def update(self): with log_errors(): state = { 'all': valmap(len, self.plugin.all), 'nbytes': self.plugin.nbytes } for k in ['memory', 'erred', 'released', 'processing']: state[k] = valmap(len, self.plugin.state[k]) if not state['all'] and not len(self.source.data['all']): return d = progress_quads(state) update(self.source, d) totals = { k: sum(state[k].values()) for k in ['all', 'memory', 'erred', 'released'] } totals['processing'] = totals['all'] - sum( v for k, v in totals.items() if k != 'all') self.root.title.text = ( "Progress -- total: %(all)s, " "in-memory: %(memory)s, processing: %(processing)s, " "erred: %(erred)s" % totals)
def _createIndiceConverter( innerIndiceMap: Dict[Indice, int], coeffs: List[Sequence[float]], oi: int, _ls: List[List[Sequence[float]]], _boole: bool = False ) -> List[Sequence[float]]: if list(innerIndiceMap.keys())[0] is (): return coeffs else: innerIndiceMapGroup: Dict[int, List[Tuple[Indice, int]]] = compose( valmap(compose( lambda v: sorted(v, key=lambda k: k[0]), map(lambda l: (l[0][1:], l[1])))), groupby(lambda kv: kv[0][0]))(innerIndiceMap.items()) outArr = list(innerIndiceMapGroup.keys()) inArr = list(valmap(lambda v: v[0][-1], innerIndiceMapGroup).values()) coeff = self.getIndiceTransformCoeffs(outArr, inArr, oi, _ls, _boole) nextInnerIndiceMapGroup: Dict[int, Dict[Indice, int]] = valmap(dict, innerIndiceMapGroup) coeffsList = [ _createIndiceConverter( self.applyIndiceTransform(nextInnerIndiceMap, key, coeff), [*coeffs, coeff], oi + 1, _ls, _boole ) for key, nextInnerIndiceMap in nextInnerIndiceMapGroup.items() ] if allSame(coeffsList): return coeffsList[0] else: raise LinearError
def getIndexMap( self, insIndex: List[InIndex], outIndex: OutIndex ) -> Dict[Indice, LabelInIndice]: labeledInsIndex = [ valmap(lambda x, i=i: (i, x), inIndex) for (i, inIndex) in enumerate(insIndex) ] return valmap(self.getCorrespondIndex(labeledInsIndex), outIndex)
def createIndexConverter( self, indexMap: IndexMap, inShape: Shape, outShape: Shape, boole: bool = False ) -> IndexConverter: if indexMap == {}: return NullIndexConverter() if boole and set(indexMap.values()) == {(0,)}: return UnitIndexConverter(len(self.outAst.shape), 1) try: indiceConverters = list(reduce( lambda ls, i: ls + [self.getIndiceConverter(valmap(lambda v, i=i: v[i], indexMap), ls, boole)], range(len(inShape)), [[(0, 0, 0, 1)] * len(outShape)]))[1:] if not indiceConverters: return ZeroIndexConverter() genCoeffs = lambda func, func0=identity: [func0([func(c) for c in l]) for l in indiceConverters] linearCoeffs = genCoeffs(nth(0)) bs = genCoeffs(nth(1), sum) indexModCoeffs = genCoeffs(nth(2)) indexModValues = genCoeffs(nth(3)) if boole: return LinearIndiceConverter(linearCoeffs[0], bs[0], inShape[0], indexModCoeffs[0], indexModValues[0]) else: return LinearIndexConverter(linearCoeffs, bs, inShape, indexModCoeffs, indexModValues) except LinearError: if all([s != -1 for s in outShape]): return FixIndexConverter(indexMap) else: raise TransformError("Fail to find a transform.")
def update(self): with log_errors(): processing = valmap(len, self.scheduler.processing) workers = list(self.scheduler.workers) bokeh_addresses = [] for worker in workers: addr = self.scheduler.get_worker_service_addr(worker, 'bokeh') bokeh_addresses.append('%s:%d' % addr if addr is not None else '') y = list(range(len(workers))) nprocessing = [processing[w] for w in workers] processing_color = [] for w in workers: if w in self.scheduler.idle: processing_color.append('red') elif w in self.scheduler.saturated: processing_color.append('green') else: processing_color.append('blue') nbytes = [self.scheduler.worker_bytes[w] for w in workers] nbytes_color = [] max_limit = 0 for w, nb in zip(workers, nbytes): try: limit = self.scheduler.worker_info[w]['memory_limit'] except KeyError: limit = 16e9 if limit > max_limit: max_limit = limit if nb > limit: nbytes_color.append('red') elif nb > limit / 2: nbytes_color.append('orange') else: nbytes_color.append('blue') now = time() if any(nprocessing) or self.last + 1 < now: self.last = now result = { 'nprocessing': nprocessing, 'nprocessing-half': [np / 2 for np in nprocessing], 'nprocessing-color': processing_color, 'nbytes': nbytes, 'nbytes-half': [nb / 2 for nb in nbytes], 'nbytes-color': nbytes_color, 'bokeh_address': bokeh_addresses, 'worker': workers, 'y': y } self.nbytes_figure.title.text = 'Bytes stored: ' + format_bytes( sum(nbytes)) update(self.source, result)
def applyIndiceTransform( IndiceMapGroup: Dict[Indice, int], key: int, coeff: Sequence[float] ) -> Dict[Indice, int]: a, b, c, d = coeff return valmap( lambda v: v - (np.floor(a * key) + b + np.floor(c * (key % d))), IndiceMapGroup, )
def grouped(self, group, key=lambda x: x.weight): if group == 'siblings' and not self.parent: return {self.weight: [self]} elif group in {'siblings', 'children'}: agg = self.siblings if group == 'siblings' else self.children return cc.pipe(((key(x), x) for x in agg), cc.groupby(lambda x: x[0]), cc.valmap(lambda x: [y[1] for y in x])) else: return {}
def get_interactions(): dates = sorted(set(map(_g('date'), data['interactions']))) d = t.pipe(data['interactions'], tc.groupby(lambda i: i.student), tc.valmap(lambda x: t.pipe(t.groupby(lambda i: i.date,x), tc.valmap(lambda v: [v[0].time_in, v[0].time_out])))) mat = [['student'] + dates] for student, attendance in d.items(): record = [student] for dt in dates: if dt in attendance: record.append(attendance[dt]) elif dt in data['students'][student].absences: record.append(('','')) else: record.append((None,None)) mat.append(record) return {'interactions': mat}
def indice2Arr(indice: Indice) -> Union[np.ndarray, float]: whichIn = insDispatcher(indice)[0] doFuncs: Dict[str, bool] = valmap(lambda v: bool(v(indice)[0]), funcsIndex) whichFunc = list(valfilter(identity, doFuncs).keys()) if not whichFunc: func = lambda x: x else: if whichFunc[0] not in f2: raise ValueError("No function called {}.".format(whichFunc[0])) func = f2[whichFunc[0]] return func( self.getInputElement(inArrs[whichIn], tuple(indexConverters2[whichIn](indice))) )
def getIndexConverters( self, indexMap: Dict[Indice, LabelInIndice] ) -> List[IndexConverter]: return [ self.getIndexConverter( i, compose( valmap(lambda v: v[1]), valfilter(lambda v, i=i: v[0] == i), )(indexMap), ) for i in range(len(self.inAsts)) ]
def embedding_groups( node_list: List[T], persona_embedding_list: List[np.ndarray]) -> Dict[T, List[np.ndarray]]: """ Utility function, which given aligned list of nodes and embedding lists from the model.predict function, obtain a dictionary from base graph nodes to a list of embeddings. The order of the embeddings for the base nodes is not ordered, and the order may differ on different calls. :param node_list: list of base nodes, which is duplicated :param persona_embedding_list: corresponding embeddings :return: dictionary mapping base nodes to all their embeddings """ return pipe( zip(node_list, persona_embedding_list), groupby(0), valmap(lambda x: list(map(getter(1), x))), )
def kfold(df, n_splits, random_state=0): kf = KFold(n_splits, random_state=random_state, shuffle=True) pos_df = df[df['label'] == 1] neg_df = df[df['label'] == 0] dataset = TellusDataset(df, has_y=True) splieted = pipe( zip(kf.split(pos_df), kf.split(neg_df)), map(lambda x: { "train_pos": pos_df.index[x[0][0]], "val_pos": pos_df.index[x[0][1]], "train_neg": neg_df.index[x[1][0]], "val_neg": neg_df.index[x[1][1]], }), map(valmap(lambda x: Subset(dataset, x))), list ) return splieted
selected_bin = 0 dist = data_input seen_dists_dict = {} cycle_len_dict = {} while not cc.valfilter(lambda x: x >= 2, seen_dists_dict): dist_tuple = tuple(dist) if dist_tuple not in seen_dists_dict: seen_dists_dict[dist_tuple] = 1 else: seen_dists_dict[dist_tuple] += 1 if dist_tuple not in cycle_len_dict: cycle_len_dict[dist_tuple] = 0 cycle_len_dict = cc.valmap(lambda x: x + 1, cycle_len_dict) selected_bin = argmax(dist) selected_bin_val = dist[selected_bin] dist[selected_bin] = 0 while selected_bin_val > 0: selected_bin = (selected_bin + 1) % len(data_input) dist[selected_bin] += 1 selected_bin_val -= 1 cycle_len_dict = cc.valmap(lambda x: x - 1, cycle_len_dict) cycles = cc.valfilter(lambda x: x >= 2, seen_dists_dict) key = tuple(cycles.keys())[0] print(key)
}, 'Embarked': { 'funcs': [ delayed(lambda df: df['Embarked']), delayed(lambda s: s.fillna(s.mode()[0])), delayed(label_encode), delayed(one_hot), ] }, } train_df = delayed(pd.read_csv)('/store/kaggle/titanic/train.csv') preprocessed_train_df = pipe( preprocess_params, valmap(lambda x: compose(*reversed(x['funcs']))(train_df)), delayed(pd.DataFrame), ) train_dataset = delayed(TitanicDataset)( x_df=preprocessed_train_df, y_df=delayed(lambda x: x['Survived'])(train_df), ) val_dataset = delayed(TitanicDataset)( x_df=preprocessed_train_df, y_df=None, ) train_result = delayed(train)( model_path='/store/kaggle/titanic/model.pt',
# In[29]: clf.fit(docs, labels) clf.predict(docs) # In[30]: def get_step_by_name(pipe, name): return [trans for name_, trans in pipe.steps if name_.startswith(name)][0] # In[31]: cnt_vects_pipe = get_step_by_name(tfidf_pipe, "cnt_vects") cnt_vects = [ get_step_by_name(pipe, "cnt_vect_") for _name, pipe in cnt_vects_pipe.transformer_list ] vocabulary_map = pipe( enumerate(concat(cnt_vect.vocabulary_ for cnt_vect in cnt_vects)), groupby(get(1)), valmap(lambda vals: list(pluck(0, vals))), ) vocabulary_map # In[ ]:
def getInsDispatcher(self, indexMap: Dict[Indice, LabelInIndice]) -> IndexConverter: return self.createIndexConverter( valmap(lambda x: (x[0],), indexMap), (len(self.inAsts),), self.outAst.shape, True )
block_formatter = apply_formatters_to_dict(BLOCK_FORMATTERS) SYNCING_FORMATTERS = { 'startingBlock': to_integer_if_hex, 'currentBlock': to_integer_if_hex, 'highestBlock': to_integer_if_hex, 'knownStates': to_integer_if_hex, 'pulledStates': to_integer_if_hex, } syncing_formatter = apply_formatters_to_dict(SYNCING_FORMATTERS) TRANSACTION_POOL_CONTENT_FORMATTERS = { 'pending': compose( keymap(to_ascii_if_bytes), valmap(transaction_formatter), ), 'queued': compose( keymap(to_ascii_if_bytes), valmap(transaction_formatter), ), } transaction_pool_content_formatter = apply_formatters_to_dict( TRANSACTION_POOL_CONTENT_FORMATTERS) TRANSACTION_POOL_INSPECT_FORMATTERS = { 'pending': keymap(to_ascii_if_bytes), 'queued': keymap(to_ascii_if_bytes), }
remaining.extend(current.children) tree_val_re = re.compile('([a-z]{1,}).*\(([0-9]{1,})\)') input_file = cc.pipe(open(r'day07.in'), list) data_input = cc.pipe( sys.stdin.readlines(), cc.map(lambda x: x.replace('\n', '')), cc.map(lambda x: x.split('->')), cc.map(lambda x: (x[0], [] if len(x) == 1 else cc.pipe( x[1], lambda x: x.split(','), cc.map(str.strip), list))), list) tree_val_dict = cc.pipe( data_input, cc.map(cc.first), cc.map(lambda x: [tree_val_re.match(x).group(y) for y in (1, 2)]), dict, cc.valmap(int)) tree_mapping_dict = cc.pipe( data_input, cc.map(lambda x: (tree_val_re.match(x[0]).group(1), x[1])), dict) root = cc.pipe( tree_mapping_dict.keys(), cc.filter(lambda x: x not in cc.concat(tree_mapping_dict.values())), cc.first) tree = Tree(root, tree_mapping_dict, tree_val_dict) unbalanced = tree.find_unbalanced() unbalanced_self_weight = unbalanced.weight - sum(x.weight for x in unbalanced.children)
def getFuncsMap(self, funcsIndex: Dict[str, IndexMap]) -> Dict[str, IndexConverter]: return valmap(lambda v: self.createIndexConverter(v, (2,), self.outAst.shape, True), funcsIndex)
SYNCING_FORMATTERS = { 'startingBlock': to_integer_if_hex, 'currentBlock': to_integer_if_hex, 'highestBlock': to_integer_if_hex, 'knownStates': to_integer_if_hex, 'pulledStates': to_integer_if_hex, } syncing_formatter = apply_formatters_to_dict(SYNCING_FORMATTERS) TRANSACTION_POOL_CONTENT_FORMATTERS = { 'pending': compose( keymap(to_ascii_if_bytes), valmap(transaction_formatter), ), 'queued': compose( keymap(to_ascii_if_bytes), valmap(transaction_formatter), ), } transaction_pool_content_formatter = apply_formatters_to_dict( TRANSACTION_POOL_CONTENT_FORMATTERS ) TRANSACTION_POOL_INSPECT_FORMATTERS = { 'pending': keymap(to_ascii_if_bytes),
def process(workbook: Any, content: str) -> Any: """Process Storage-Groups worksheet :param workbook: :param content: :return: """ worksheet_name = 'Storage-Groups' worksheet = workbook.get_sheet_by_name(worksheet_name) headers = list(concat([ get_parser_header(PORT_TMPL), get_parser_header(STORAGEGROUP_TMPL)[3:], ])) RowTuple = namedtuple('RowTuple', headers) # pylint: disable=invalid-name build_header(worksheet, headers) cmd_storagegroup_out = run_parser_over(content, STORAGEGROUP_TMPL) cmd_port_out = run_parser_over(content, PORT_TMPL) common_columns = (0, 1) server_names_grouped = compose( valmap( compose(list, set, map(last))), groupby( itemgetter(*common_columns)) )(cmd_port_out) cmd_port_relevant = map( juxt( compose(first, first), compose(second, first), second) )(server_names_grouped.items()) common_columns_getter = itemgetter(*common_columns) cmd_merged_out = join( common_columns_getter, cmd_port_relevant, common_columns_getter, cmd_storagegroup_out) cmd_merged_out = sorted(cmd_merged_out) rows = list(map( compose( list, concat, juxt( first, compose( drop(3), second))) )(cmd_merged_out)) portcmd = {(array, grp) for array, grp, *other in rows} strgp = {(array, grp) for array, grp, *other in cmd_storagegroup_out} no_server_groups = strgp - portcmd storage_list = list(filter( lambda storage_gr: any( fnmatch(str((storage_gr[0], storage_gr[1])), str(ctrlServer)) for ctrlServer in no_server_groups), cmd_storagegroup_out)) storage_list = check_empty_arrays( list(unique(storage_list + rows, key=itemgetter(0, 1)))) final_col, final_row = 0, 0 for row_n, row_tuple in enumerate(map(RowTuple._make, storage_list), 2): for col_n, col_value in \ enumerate(row_tuple._asdict().values(), ord('A')): cell = worksheet['{}{}'.format(chr(col_n), row_n)] if isinstance(col_value, str): cell.value = str.strip(col_value) else: cell.alignment = Alignment(wrapText=True) cell.value = '\n'.join(col_value) style_value_cell(cell) set_cell_to_number(cell) final_col = col_n final_row = row_n sheet_process_output( worksheet, 'StorageGroupsTable', 'Storage-Groups', final_col, final_row) return [[lun_map[0], lun_map[1], lun_map[4]] for lun_map in storage_list]
print(acc1) # List comprehensions version (more Pythonic): acc2 = [(name, balance) for (id, name, balance, gender) in accounts if balance > 150] print(acc2) # II. SPLIT-APPLY-COMBINE WITH `GROUPBY` AND `REDUCEBY`: # 1. Split the dataset into groups by some property # 2. Reduce each of the groups with some synopsis function # In Memory Split-Apply-Combine # SELECT gender, SUM(balance) FROM accounts GROUP BY gender; print(groupby(get(3), accounts)) # {'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')], 'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')]} print(pipe(accounts, groupby(get(3)), valmap(compose(sum, pluck(2))))) # {'M': 400, 'F': 400} (pluck item ) # Streaming Split-Apply-Combine # the groupby operation is not streaming and so this approach is limited # to datasets that can fit comfortably into memory. # to achieves streaming split-apply-combine use `reduceby()` # The `reduceby` operation takes a key function, # like `get(3)` or `lambda x: x[3]`, and a binary operator like # `add` or `lesser = lambda acc, x: acc if acc < x else x`. # It successively applies the key function to each item in succession, # accumulating running totals for each key by combining each new value # with the previous using the binary operator.