def examples(encoder, gen): exes = tz.thread_last(gen, (tz.sliding_window, 3), (map, lambda x: [encoder.encode_raw(i) for i in x]), (map, tz.compose(list, tz.concat)), (map, np.array)) return exes
def despam_results(results): """ Remove spammy looking posts. """ def _filters(result): # do not remove this filter, the app requires it if not result.get('json_metadata') or type( result['json_metadata']) != dict: return False # filter out spam and unloved posts if len(result['json_metadata'].get('links', [])) > 15: return False if len(result['json_metadata'].get('users', [])) > 10: return False if result.get('net_votes', 0) < 5: return False if int(result.get('author_reputation', -1)) < 0: return False if int(result.get('net_rshares')) < 0: return False # todo add more filters return True def _clean_body(result): # todo sanitize non https links if result.get('body'): result['body'] = result['body'].replace('steemitboard.com', 'localhost') return result return thread_last(results, (filter, _filters), (map, _clean_body), list)
def remove_whitespace(string): """Removes spaces, tabs and newlines from the string """ return thread_last(string, (re.sub, r'\t*', ''), (re.sub, r' *', ''), (re.sub, r'\n*', ''))
def get_groups(ip): def _get_infs(record): name = re_find(r'(Smartgroup:\d+)', record) if name: name = name.lower().replace(':', '') infs = re_all(r'(x?gei_\d+/\d+/\d+)\s?selected', record) return dict(name=name, infs=infs) def _get_desc_mode(child, group): rslt = do_some(child, 'show run int {name}'.format(name=group['name'])) desc = re_find(r'description\s+(\S+)', rslt) group['desc'] = desc rslt = do_some(child, 'show run int {inf}'.format( inf=group['infs'][0])) mode = re_find(r'smartgroup\s\d+\smode\s(\S+)', rslt) group['mode'] = mode return group try: child = telnet(ip) rslt = re.split(r'\r\n\s*\r\n', do_some(child, 'show lacp internal')) groups = thread_last(rslt, (lmap, _get_infs), (select, lambda x: x['name'] and x['infs'])) lmap(partial(_get_desc_mode, child), groups) close(child) except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) return ('success', groups, ip)
def get_groups(ip): def _get_infs(record): name = re_find(r'(Smartgroup:\d+)', record) if name: name = name.lower().replace(':', '') infs = re_all(r'(x?gei_\d+/\d+/\d+)\s?selected', record) return dict(name=name, infs=infs) def _get_desc_mode(child, group): rslt = do_some(child, 'show run int {name}'.format(name=group['name'])) desc = re_find(r'description\s+(\S+)', rslt) group['desc'] = desc rslt = do_some(child, 'show run int {inf}'.format(inf=group['infs'][0])) mode = re_find(r'smartgroup\s\d+\smode\s(\S+)', rslt) group['mode'] = mode return group try: child = telnet(ip) rslt = re.split(r'\r\n\s*\r\n', do_some(child, 'show lacp internal')) groups = thread_last(rslt, (lmap, _get_infs), (select, lambda x: x['name'] and x['infs'])) lmap(partial(_get_desc_mode, child), groups) close(child) except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) return ('success', groups, ip)
def collect_tweets( es_client, track, twitter_consumer_key, twitter_consumer_secret, twitter_access_token_key, twitter_access_token_secret, elasticsearch_index="profanity-power-index", drop_index=False, batch_size=10, ): if es_client.indices.exists(elasticsearch_index): logger.warning(f"Index {elasticsearch_index} exists.") if drop_index: logger.warning(f"Dropping {elasticsearch_index}.") es_client.indices.delete(elasticsearch_index) logger.info(f"Creating {elasticsearch_index}.") es_client.indices.create(index=elasticsearch_index, body=TWEET_MAPPING) else: logger.info(f"Creating {elasticsearch_index}.") es_client.indices.create(index=elasticsearch_index, body=TWEET_MAPPING) logger.info(f"{elasticsearch_index} successfully created.") api = twitter.Api( consumer_key=twitter_consumer_key, consumer_secret=twitter_consumer_secret, access_token_key=twitter_access_token_key, access_token_secret=twitter_access_token_secret, ) logger.info(f"Connecting to twitter stream. Tracking {', '.join(track)}.") tweet_stream = api.GetStreamFilter(track=track) tweet_to_bulk = curry(_tweet_to_bulk)(elasticsearch_index) tweet_doc_stream = thread_last( tweet_stream, # Filter out tweets that don't contain profanity. (filter, _contains_profanity), # Convert the tweets to a bulk-indexable document. (map, tweet_to_bulk), # Partition for bulk writes. (partition_all, batch_size), ) logger.info(f"Sending tweets to {elasticsearch_index}.") failed = 0 succeeded = 0 logger.info(f"{failed + succeeded} tweets processed: " f"{succeeded} succeeded, {failed} failed.") # Since the doc stream is partitioned we get the tweets in batches. for tweet_batch in tweet_doc_stream: ok, fail = es_bulk(es_client, tweet_batch, stats_only=True) succeeded += ok failed += fail if (failed + succeeded) % 100 == 0: logger.info(f"{failed + succeeded} tweets processed: " f"{succeeded} succeeded, {failed} failed.")
def find_tme_files(dir): return tz.thread_last( dir, os.listdir, (filter, lambda x: re.search(".*\.html$", x)), (map, lambda x: f"{dir}/{x}"), sorted, )
def groupby_and_summarize(dataframe,col,funcs = [],fnames = []): return thread_last( dataframe, lambda x: x.groupby(col), (map, snd), (map,summarize(funcs = funcs, fnames = fnames)), pd.concat, reset_index)
def search_boards(key, token, board_name): method = "get" path = "members/me/boards" all_boards = execute_request(key, token, method, path, filter="open", fields="name") try: return toolz.thread_last( all_boards, (filter, lambda x: x["name"] == board_name), toolz.first, (toolz.get, "id") ) except: raise ValueError("No board found with that name")
def invariant_output(string): """Removes all changing elements from the string * Replaces all numbers in a string with the 'X' character * Removes all appearances of activate_autocompletion * Removes all newlines and whitespace from the string """ numbers_regexp = r'\d' activate_autocomp_regexp = (r'((\n?[ ]*)|,)' 'activate_autocompletion' '(:.*\n)?') return thread_last(string, (re.sub, numbers_regexp, 'X'), (re.sub, activate_autocomp_regexp, ''))
def tokenize(doc, with_stem=False): """Given a document string, return a list of tokens. """ pipeline = [ (filter, is_alpha), (filter, not_proper), (map, lower), (filter, not_stopword)] if with_stem: pipeline += [(map, stem)] pipeline += [(map, remove_pos)] return list(tz.thread_last( nltk.tag.pos_tag(nltk.tokenize.word_tokenize(doc)), *pipeline))
def parse_file(content: str) -> Tuple[str, Set[str], str]: lines = content.splitlines() if not lines[0] == '---' and lines[1:].count('---') == 1: error(''' The md file must contain exactly 2 lines consisting of --- The first of which must be the first line. Please correct the file by calling `mdn edit _e`''') front_matter = AttrDict( yaml.safe_load('\n'.join(lines[1:lines[1:].index('---') + 1]))) assert_front_matter_correct(front_matter) tags = t.thread_last(tag_pattern.findall(content), (map, str.lower), (set)) return front_matter.title, tags, front_matter.group,\ front_matter.get("doi", None)
def normalize_data(sentences: np.ndarray): def stringify(x): return str(x) def unicode2ascii(s): return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn') def normalize_string(s): s = unicode2ascii(s.lower().strip()) s = re.sub(r"([.!?])", r" \1", s) s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) return s return tlz.thread_last(sentences, (map, stringify), (map, normalize_string))
def parse_label_group(string): """ Takes string containing all data for one field, and creates a tidy dataframe with two columns: 'Well Name', and field. """ letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') raw_dataframe = pd.read_csv(StringIO(string)) label_name = raw_dataframe.columns[0] return thread_last( raw_dataframe.values[:,1:], lambda values: df(values,columns = map(lambda num: stringify(num,2),range(1,values.shape[1] + 1))), lambda dataframe: add_col(dataframe,'Row',pd.Series(letters[:len(dataframe)])), lambda dataframe: pd.melt(dataframe,id_vars=['Row']), lambda dataframe: add_col(dataframe,'Well Name',dataframe['Row'] + dataframe['variable']), lambda dataframe: dataframe.drop(['Row','variable'],axis=1), lambda dataframe: dataframe.rename(columns={'value': label_name}), lambda dataframe: dataframe[['Well Name', label_name]] )
def get_layout_data(path): """ Given a path to a file with proper format (see below), return a dataframe with 'Well Name' column and additional columns for each provided parameter. Format: Parameter Name, 1, 2 ... A, Value, Value ... B, Value, Value ... Notes: '\r' is present in csv output on windows (or google docs) and can confuse pandas `read_csv` function. Algorithm partitions by whether row is empty (each section of data should be separated by a blank line), then filters out groups where row is empty (text of row contains only commas). ... """ return thread_last( path, from_file, split_on_newlines, (map, lambda line: line.rstrip(',')), (partitionby, string_is_empty), (filter, lambda group: not string_is_empty(group[0])), (map, lambda strings: str.join('\n', strings)), (map, parse_label_group), (reduce, lambda left, right: pd.merge(left, right, on='Well Name')))
def parse_label_group(string): """ Takes string containing all data for one field, and creates a tidy dataframe with two columns: 'Well Name', and field. """ letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') raw_dataframe = pd.read_csv(StringIO(string)) label_name = raw_dataframe.columns[0] return thread_last( raw_dataframe.values[:, 1:], lambda values: df(values, columns=map(lambda num: stringify(num, 2), range(1, values.shape[1] + 1))), lambda dataframe: add_col(dataframe, 'Row', pd.Series(letters[:len(dataframe)])), lambda dataframe: pd.melt(dataframe, id_vars=['Row']), lambda dataframe: add_col(dataframe, 'Well Name', dataframe['Row'] + dataframe['variable']), lambda dataframe: dataframe.drop(['Row', 'variable'], axis=1), lambda dataframe: dataframe.rename(columns={'value': label_name}), lambda dataframe: dataframe[['Well Name', label_name]])
def resolve(query: Querylike[T], api: Api[T_auth], loaders: load.Registry, auth: T_auth, sender: http.Sender) -> T: """resolve a querylike object. Parameters ---------- query the querylike object to evaluate api the API to handle the request loaders The registry of object loaders auth The authentication object sender The request sender """ return thread_last(query, attrgetter('__req__'), api.prepare, (flip(api.add_auth), auth), sender, api.parse, loaders(query.__rtype__))
def get_layout_data(path): """ Given a path to a file with proper format (see below), return a dataframe with 'Well Name' column and additional columns for each provided parameter. Format: Parameter Name, 1, 2 ... A, Value, Value ... B, Value, Value ... Notes: '\r' is present in csv output on windows (or google docs) and can confuse pandas `read_csv` function. Algorithm partitions by whether row is empty (each section of data should be separated by a blank line), then filters out groups where row is empty (text of row contains only commas). ... """ return thread_last( path, from_file, split_on_newlines, (map, lambda line: line.rstrip(',')), (partitionby, string_is_empty), (filter, lambda group: not string_is_empty(group[0])), (map, lambda strings: str.join('\n', strings)), (map, parse_label_group), (reduce, lambda left, right: pd.merge(left, right, on = 'Well Name')))
def get_ports(ip): def _get_info(record): name = re_find(r'^((?:xg|g|f)ei\S+) is \w+ ?\w+,', record) state = re_find(r'^(?:xg|g|f)ei\S+ is (\w+ ?\w+),', record) desc = re_find(r'Description is (\S+ *\S+)', record) inTraffic = int( re_find(r'120 seconds input.*:\s+(\d+)\sBps', record) or 0) * 8 / 1000000 outTraffic = int( re_find(r'120 seconds output.*:\s+(\d+)\sBps', record) or 0) * 8 / 1000000 return dict(name=name, desc=desc, state=state, inTraffic=inTraffic, outTraffic=outTraffic) try: child = telnet(ip) rslt = do_some(child, 'show interface') close(child) except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) rslt = thread_last(rslt, (re.split, r'\r\r\n *\r\r\n *'), (select, r'^(?:xg|g|f)ei_'), (lmap, _get_info)) return ('success', rslt, ip)
def process_msg(q): return toolz.thread_last(qget(q), (map, json.loads), (map, lambda v: v['event']), (filter, lambda v: str_filter(v['event_name'])))
def create_well_df(cell_dict): return thread_last(cell_dict, (mapdict,lambda k,v: {"Cell Type":k,"Well Name":v}), (map, df), pd.concat)
import functools from toolz import thread_last, curry, pipe print(list(map(lambda x: x + 10, [1, 2, 3]))) # => [11, 12, 13] print(list(filter(lambda x: x > 5, [3, 4, 5, 6, 7]))) # => [6, 7] print(functools.reduce(lambda x, y: x + y, [1, 2, 3, 4, 5])) # => 15 x = functools.reduce(lambda x, y: x + y, map(lambda x: x + 10, filter(lambda x: x > 5, [3, 4, 5, 6, 7]))) # => 33 print('combine all operations together:', x) x = thread_last([3, 4, 5, 6, 7], (filter, lambda x: x > 5), (map, lambda x: x + 10), (functools.reduce, lambda x, y: x + y)) print('use toolz:', x) from operator import add, lt from toolz.curried import filter, map, reduce add = curry(add) lt = curry(lt) x = pipe([3, 4, 5, 6, 7], filter(lt(5)), map(add(10)), reduce(add)) print('another toolz style:', x)
Tuple, TypeVar, ) from toolz import excepts, identity, memoize, thread_last, update_in A = TypeVar("A") K = TypeVar("K") V = TypeVar("V") A_List = Tuple[Tuple[K, V], ...] _ingredient_patterns: Iterable[Pattern] = thread_last( [ r"(?P<quantity>\d*\.?\d+)\s(?P<unit>\w+)\.?\s(of\s)?(?P<ingredient>.+)", r"(?P<quantity>\d*\.?\d+)\s(?P<ingredient>.+)", r"(?P<ingredient>.+)", ], (map, re.compile), tuple, ) _ingredient_cleanup_funcs: A_List[str, Optional[Callable]] = ( ("quantity", None), ("unit", lambda s: s[0].lower() if s else None), ("ingredient", None), ) next_or_none: Callable[[Iterator[A]], Optional[A]] = excepts(StopIteration, lambda a: next(filter(None, a)), lambda __: None)
def execute_request(key, token, method, path, *args, **kwargs): url = "https://api.trello.com/1/{0}".format(path).format(**kwargs) payload = toolz.thread_last(kwargs, (remove_used_fields, path), (bundle_auth, key, token)) req = requests.request(method, url, data=payload) req.raise_for_status() return req.json()
def groupby_and_summarize(dataframe, col, funcs=[], fnames=[]): return thread_last(dataframe, lambda x: x.groupby(col), (map, snd), (map, summarize(funcs=funcs, fnames=fnames)), pd.concat, reset_index)
def parse(patterns: Iterable[Pattern], post_func_map: A_List[str, Optional[Callable]], s: str) -> Mapping: return thread_last(s, (pattern_match, patterns), groupdict, (post_process, post_func_map))
def load_opensudoku(difficulty): resp = requests.get( f'https://opensudoku.moire.org/sudoku/{difficulty}.opensudoku') parsed = xmltodict.parse(resp.text) return thread_last(parsed, (get_in, ['opensudoku', 'game']), (pluck, '@data'), tuple)
def from_string(cls, raw): return thread_last(raw, parse_raw, cls)
def create_well_df(cell_dict): return thread_last(cell_dict, (mapdict, lambda k, v: { "Cell Type": k, "Well Name": v }), (map, df), pd.concat)
from toolz import thread_last from sudoku.fp.load import load_opensudoku from sudoku.fp.solve import combined_step from sudoku.mp.base import Sudoku from sudoku.mp.solve import Solver thread_last( load_opensudoku('easy')[3], Sudoku.from_string, Solver(combined_step, max_tries=100), print)