def list(self) -> QueryResult[Starship]: responses = self.store.get("planets") response, responses = toolz.peek(responses) return QueryResultBuilder() \ .count(response.count) \ .iterator(toolz.mapcat(lambda r: (Planet(p) for p in r.results), responses)) \ .build()
def searchdirectories(filepattern: str, directories: Iterable[str]) -> List[str]: """Search directories for files matching filepattern regex.""" return list( sorted( mapcat(_crawldirectoryforfilesmatching(filepattern), findfiles(directories))))
def list(self) -> QueryResult[Vehicle]: responses = self.store.get("vehicle") response, responses = toolz.peek(responses) return QueryResultBuilder() \ .count(response.count) \ .iterator(toolz.mapcat(lambda r: (Vehicle(p) for p in r.results), responses)) \ .build()
def tokenize(self, message: str, stopwords=[]) -> List[str]: """ Tokenize string passed in If self.data is not a string or is the empty string, return [] """ if not isinstance(message, str) or message == "": print("received bad input...") try: print(f"message length = {len(message)}") except TypeError as e: print(e) print(f"message is {str(message)}") return [] else: tokens: List[str] = [] # explicitly run all other methods on input sents = self.run_pipeline(message) # we now have a list of sentences, remove falsey elements sents = t.remove(self.empty_sent_re.match, sents) # tokenize each sentence and concat resultant arrays tokens = list(t.mapcat(nltk.word_tokenize, sents)) if stopwords: tokens = [x for x in tokens if x.lower() not in stopwords] return tokens
def functional(): return count_by(itemgetter('hour'), map(json.loads, filter(None, mapcat(lambda output: output.strip().split('\n'), map(lambda date: logs[date.strftime('%Y/%m/%d')], map(lambda days_ago: today - timedelta(days=days_ago), range(1, days_of_logs + 1)))))))
def merge_related_elements(interval_group): """Merge block and superblock ids for a group of identical intervals. Args: interval_group (list): list of identical intervals Returns: BaseInterval: unified interval with merged related element ids """ # extract block and superblock ids from each of the intervals block_ids = mapcat(attrgetter('block_ids'), interval_group) superblock_ids = mapcat(attrgetter('superblock_ids'), interval_group) return BaseInterval( *interval_group[0][:6], # use first interval as base block_ids=list(block_ids), # resolve and add superblock_ids=list(superblock_ids) # do )
def piped(): return (_ | range(1, days_of_logs + 1) | map(lambda days_ago: today - timedelta(days=days_ago)) | map(lambda date: logs[date.strftime('%Y/%m/%d')]) | mapcat(lambda output: output.strip().split('\n')) | filter(None) | map(json.loads) | count_by(itemgetter('hour')) | _)
def piped(): return (_| range(1, days_of_logs + 1) | map(lambda days_ago: today - timedelta(days=days_ago)) | map(lambda date: logs[date.strftime('%Y/%m/%d')]) | mapcat(lambda output: output.strip().split('\n')) | filter(None) | map(json.loads) | count_by(itemgetter('hour')) |_)
def eqn_nums(nums: Numbers) -> Equations: # all combs/perms of ops ops = combinations_with_replacement("+-/*", len(nums) - 1) op_perms = mapcat(permutations, ops) # all number permutations, compute these now (not a gen) # and take the set, this will save on redundant eqns num_perms = set(permutations(nums)) eqns = product(num_perms, op_perms) return (list(p) + list(o) for p, o in eqns)
def p(new_df: pd.DataFrame) -> pd.DataFrame: make_dummies = lambda col: dict(map(lambda categ: ("fklearn_feat__" + col + "==" + str(categ), (new_df[col] == categ).astype(int)), vec[col][int(drop_first_column):])) oh_cols = dict(mapcat(lambda col: merge(make_dummies(col), {"fklearn_feat__" + col + "==" + "nan": (~new_df[col].isin(vec[col])).astype(int)} if hardcode_nans else {}).items(), columns_to_categorize)) return new_df.assign(**oh_cols).drop(columns_to_categorize, axis=1)
def functional(): return count_by( itemgetter('hour'), map( json.loads, filter( None, mapcat( lambda output: output.strip().split('\n'), map( lambda date: logs[date.strftime('%Y/%m/%d')], map(lambda days_ago: today - timedelta(days=days_ago), range(1, days_of_logs + 1)))))))
def main(): args = parse_cli() not_media = Counter() getType = lambda x: x.rsplit('.', 1)[-1] notMedia = lambda path: not_media.update([getType(path)]) isMedia = lambda x: getType(x).lower() in media_types buildFullPath = toolz.curry(lambda path, file: '/'.join([path, file]) if isMedia(file) else notMedia(file)) iterFiles = lambda (path, dirs, files): map(buildFullPath(path), files) file_list = filter(None, toolz.mapcat(iterFiles, os.walk(args.base_path))) print '{0} total files'.format(len(file_list)) image_scan = mediaStruct() for media in file_list: image_scan.add_media(media) image_scan.save(args.output_file) print 'file types not imported', not_media
def iter_unlooped_nodes(node, loop_variables_nodes, unloop_keys=None): loop_variables_names, sequences = zip(*map( lambda loop_variable_node: ( loop_variable_node['name'], mapcat( enumeration_node_to_sequence, loop_variable_node['enumerations'], ), ), loop_variables_nodes, )) loop_variables_values_list = itertools.product(*sequences) for loop_variables_values in loop_variables_values_list: value_by_loop_variable_name = dict(zip(loop_variables_names, loop_variables_values)) yield unlooped( node=node, unloop_keys=unloop_keys, value_by_loop_variable_name=value_by_loop_variable_name, )
def findfiles(patterns: Iterable[str]) -> List[str]: return list(sorted(mapcat(_matchfilepattern, patterns)))
def unzip(path, tree): if isinstance(tree, str) or not hasattr(tree, '__iter__'): yield path + [tree] else: for p in t.mapcat(t.partial(unzip, path + tree[0:1]), tree[1:]): yield p
def visit_regle(node): return mapcat( lambda node1: visit_node(node1) if node1['type'] == 'pour_formula' else [visit_node(node1)], node['formulas'], )
import sys import itertools import toolz from gensim.models import word2vec data_file = sys.argv[1] sentences = [ s for s in word2vec.LineSentence(data_file) if toolz.count(toolz.unique(s)) >= 2 ] cmb = toolz.frequencies( toolz.mapcat(lambda s: itertools.combinations(sorted(toolz.unique(s)), 2), sentences)) for (k1, k2), v in sorted(cmb.items(), key=lambda x: -x[1]): print(f"item1 = {k1}, item2 = {k2}, freq = {v}")