Beispiel #1
0
    def update(self):
        with log_errors():
            state = {
                'all': valmap(len, self.plugin.all),
                'nbytes': self.plugin.nbytes
            }
            for k in ['memory', 'erred', 'released', 'processing']:
                state[k] = valmap(len, self.plugin.state[k])
            if not state['all'] and not len(self.source.data['all']):
                return

            d = progress_quads(state)

            update(self.source, d)

            totals = {
                k: sum(state[k].values())
                for k in ['all', 'memory', 'erred', 'released']
            }
            totals['processing'] = totals['all'] - sum(
                v for k, v in totals.items() if k != 'all')

            self.root.title.text = (
                "Progress -- total: %(all)s, "
                "in-memory: %(memory)s, processing: %(processing)s, "
                "erred: %(erred)s" % totals)
Beispiel #2
0
 def _createIndiceConverter(
         innerIndiceMap: Dict[Indice, int], coeffs: List[Sequence[float]], oi: int,
         _ls: List[List[Sequence[float]]], _boole: bool = False
 ) -> List[Sequence[float]]:
     if list(innerIndiceMap.keys())[0] is ():
         return coeffs
     else:
         innerIndiceMapGroup: Dict[int, List[Tuple[Indice, int]]] = compose(
             valmap(compose(
                 lambda v: sorted(v, key=lambda k: k[0]),
                 map(lambda l: (l[0][1:], l[1])))),
             groupby(lambda kv: kv[0][0]))(innerIndiceMap.items())
         outArr = list(innerIndiceMapGroup.keys())
         inArr = list(valmap(lambda v: v[0][-1], innerIndiceMapGroup).values())
         coeff = self.getIndiceTransformCoeffs(outArr, inArr, oi, _ls, _boole)
         nextInnerIndiceMapGroup: Dict[int, Dict[Indice, int]] = valmap(dict, innerIndiceMapGroup)
         coeffsList = [
             _createIndiceConverter(
                 self.applyIndiceTransform(nextInnerIndiceMap, key, coeff),
                 [*coeffs, coeff], oi + 1, _ls, _boole
             )
             for key, nextInnerIndiceMap in nextInnerIndiceMapGroup.items()
         ]
         if allSame(coeffsList):
             return coeffsList[0]
         else:
             raise LinearError
Beispiel #3
0
 def getIndexMap(
         self, insIndex: List[InIndex], outIndex: OutIndex
 ) -> Dict[Indice, LabelInIndice]:
     labeledInsIndex = [
         valmap(lambda x, i=i: (i, x), inIndex)
         for (i, inIndex) in enumerate(insIndex)
     ]
     return valmap(self.getCorrespondIndex(labeledInsIndex), outIndex)
Beispiel #4
0
 def createIndexConverter(
         self, indexMap: IndexMap, inShape: Shape, outShape: Shape, boole: bool = False
 ) -> IndexConverter:
     if indexMap == {}:
         return NullIndexConverter()
     if boole and set(indexMap.values()) == {(0,)}:
         return UnitIndexConverter(len(self.outAst.shape), 1)
     try:
         indiceConverters = list(reduce(
             lambda ls, i: ls + [self.getIndiceConverter(valmap(lambda v, i=i: v[i], indexMap), ls, boole)],
             range(len(inShape)), [[(0, 0, 0, 1)] * len(outShape)]))[1:]
         if not indiceConverters:
             return ZeroIndexConverter()
         genCoeffs = lambda func, func0=identity: [func0([func(c) for c in l]) for l in indiceConverters]
         linearCoeffs = genCoeffs(nth(0))
         bs = genCoeffs(nth(1), sum)
         indexModCoeffs = genCoeffs(nth(2))
         indexModValues = genCoeffs(nth(3))
         if boole:
             return LinearIndiceConverter(linearCoeffs[0], bs[0], inShape[0], indexModCoeffs[0], indexModValues[0])
         else:
             return LinearIndexConverter(linearCoeffs, bs, inShape, indexModCoeffs, indexModValues)
     except LinearError:
         if all([s != -1 for s in outShape]):
             return FixIndexConverter(indexMap)
         else:
             raise TransformError("Fail to find a transform.")
Beispiel #5
0
    def update(self):
        with log_errors():
            processing = valmap(len, self.scheduler.processing)
            workers = list(self.scheduler.workers)

            bokeh_addresses = []
            for worker in workers:
                addr = self.scheduler.get_worker_service_addr(worker, 'bokeh')
                bokeh_addresses.append('%s:%d' %
                                       addr if addr is not None else '')

            y = list(range(len(workers)))
            nprocessing = [processing[w] for w in workers]
            processing_color = []
            for w in workers:
                if w in self.scheduler.idle:
                    processing_color.append('red')
                elif w in self.scheduler.saturated:
                    processing_color.append('green')
                else:
                    processing_color.append('blue')

            nbytes = [self.scheduler.worker_bytes[w] for w in workers]
            nbytes_color = []
            max_limit = 0
            for w, nb in zip(workers, nbytes):
                try:
                    limit = self.scheduler.worker_info[w]['memory_limit']
                except KeyError:
                    limit = 16e9
                if limit > max_limit:
                    max_limit = limit

                if nb > limit:
                    nbytes_color.append('red')
                elif nb > limit / 2:
                    nbytes_color.append('orange')
                else:
                    nbytes_color.append('blue')

            now = time()
            if any(nprocessing) or self.last + 1 < now:
                self.last = now
                result = {
                    'nprocessing': nprocessing,
                    'nprocessing-half': [np / 2 for np in nprocessing],
                    'nprocessing-color': processing_color,
                    'nbytes': nbytes,
                    'nbytes-half': [nb / 2 for nb in nbytes],
                    'nbytes-color': nbytes_color,
                    'bokeh_address': bokeh_addresses,
                    'worker': workers,
                    'y': y
                }

                self.nbytes_figure.title.text = 'Bytes stored: ' + format_bytes(
                    sum(nbytes))

                update(self.source, result)
Beispiel #6
0
 def applyIndiceTransform(
         IndiceMapGroup: Dict[Indice, int], key: int, coeff: Sequence[float]
 ) -> Dict[Indice, int]:
     a, b, c, d = coeff
     return valmap(
         lambda v: v - (np.floor(a * key) + b + np.floor(c * (key % d))),
         IndiceMapGroup,
     )
Beispiel #7
0
 def grouped(self, group, key=lambda x: x.weight):
     if group == 'siblings' and not self.parent:
         return {self.weight: [self]}
     elif group in {'siblings', 'children'}:
         agg = self.siblings if group == 'siblings' else self.children
         return cc.pipe(((key(x), x) for x in agg),
                        cc.groupby(lambda x: x[0]),
                        cc.valmap(lambda x: [y[1] for y in x]))
     else:
         return {}
Beispiel #8
0
def get_interactions():
    dates = sorted(set(map(_g('date'), data['interactions'])))
    d = t.pipe(data['interactions'],
               tc.groupby(lambda i: i.student),
               tc.valmap(lambda x: t.pipe(t.groupby(lambda i: i.date,x),
                                          tc.valmap(lambda v: [v[0].time_in, v[0].time_out]))))

    mat = [['student'] + dates]
    for student, attendance in d.items():
        record = [student]
        for dt in dates:
            if dt in attendance:
                record.append(attendance[dt])
            elif dt in data['students'][student].absences:
                record.append(('',''))
            else:
                record.append((None,None))
        mat.append(record)

    return {'interactions': mat}
Beispiel #9
0
 def indice2Arr(indice: Indice) -> Union[np.ndarray, float]:
     whichIn = insDispatcher(indice)[0]
     doFuncs: Dict[str, bool] = valmap(lambda v: bool(v(indice)[0]), funcsIndex)
     whichFunc = list(valfilter(identity, doFuncs).keys())
     if not whichFunc:
         func = lambda x: x
     else:
         if whichFunc[0] not in f2:
             raise ValueError("No function called {}.".format(whichFunc[0]))
         func = f2[whichFunc[0]]
     return func(
         self.getInputElement(inArrs[whichIn], tuple(indexConverters2[whichIn](indice)))
     )
Beispiel #10
0
 def getIndexConverters(
         self, indexMap: Dict[Indice, LabelInIndice]
 ) -> List[IndexConverter]:
     return [
         self.getIndexConverter(
             i,
             compose(
                 valmap(lambda v: v[1]),
                 valfilter(lambda v, i=i: v[0] == i),
             )(indexMap),
         )
         for i in range(len(self.inAsts))
     ]
Beispiel #11
0
def embedding_groups(
        node_list: List[T],
        persona_embedding_list: List[np.ndarray]) -> Dict[T, List[np.ndarray]]:
    """
    Utility function, which given aligned list of nodes and embedding lists from the model.predict function,
    obtain a dictionary from base graph nodes to a list of embeddings. The order of the embeddings for the
    base nodes is not ordered, and the order may differ on different calls.

    :param node_list: list of base nodes, which is duplicated
    :param persona_embedding_list: corresponding embeddings
    :return: dictionary mapping base nodes to all their embeddings
    """
    return pipe(
        zip(node_list, persona_embedding_list),
        groupby(0),
        valmap(lambda x: list(map(getter(1), x))),
    )
Beispiel #12
0
def kfold(df, n_splits, random_state=0):
    kf = KFold(n_splits, random_state=random_state, shuffle=True)
    pos_df = df[df['label'] == 1]
    neg_df = df[df['label'] == 0]
    dataset = TellusDataset(df, has_y=True)

    splieted = pipe(
        zip(kf.split(pos_df), kf.split(neg_df)),
        map(lambda x: {
            "train_pos": pos_df.index[x[0][0]],
            "val_pos": pos_df.index[x[0][1]],
            "train_neg": neg_df.index[x[1][0]],
            "val_neg": neg_df.index[x[1][1]],
        }),
        map(valmap(lambda x: Subset(dataset, x))),
        list
    )

    return splieted
Beispiel #13
0
selected_bin = 0
dist = data_input
seen_dists_dict = {}
cycle_len_dict = {}

while not cc.valfilter(lambda x: x >= 2, seen_dists_dict):
    dist_tuple = tuple(dist)
    if dist_tuple not in seen_dists_dict:
        seen_dists_dict[dist_tuple] = 1
    else:
        seen_dists_dict[dist_tuple] += 1

    if dist_tuple not in cycle_len_dict:
        cycle_len_dict[dist_tuple] = 0
    cycle_len_dict = cc.valmap(lambda x: x + 1, cycle_len_dict)

    selected_bin = argmax(dist)
    selected_bin_val = dist[selected_bin]
    dist[selected_bin] = 0

    while selected_bin_val > 0:
        selected_bin = (selected_bin + 1) % len(data_input)
        dist[selected_bin] += 1
        selected_bin_val -= 1

cycle_len_dict = cc.valmap(lambda x: x - 1, cycle_len_dict)
cycles = cc.valfilter(lambda x: x >= 2, seen_dists_dict)
key = tuple(cycles.keys())[0]

print(key)
Beispiel #14
0
    },
    'Embarked': {
        'funcs': [
            delayed(lambda df: df['Embarked']),
            delayed(lambda s: s.fillna(s.mode()[0])),
            delayed(label_encode),
            delayed(one_hot),
        ]
    },
}

train_df = delayed(pd.read_csv)('/store/kaggle/titanic/train.csv')

preprocessed_train_df = pipe(
    preprocess_params,
    valmap(lambda x: compose(*reversed(x['funcs']))(train_df)),
    delayed(pd.DataFrame),
)

train_dataset = delayed(TitanicDataset)(
    x_df=preprocessed_train_df,
    y_df=delayed(lambda x: x['Survived'])(train_df),
)

val_dataset = delayed(TitanicDataset)(
    x_df=preprocessed_train_df,
    y_df=None,
)

train_result = delayed(train)(
    model_path='/store/kaggle/titanic/model.pt',
Beispiel #15
0
# In[29]:

clf.fit(docs, labels)
clf.predict(docs)

# In[30]:


def get_step_by_name(pipe, name):
    return [trans for name_, trans in pipe.steps if name_.startswith(name)][0]


# In[31]:

cnt_vects_pipe = get_step_by_name(tfidf_pipe, "cnt_vects")

cnt_vects = [
    get_step_by_name(pipe, "cnt_vect_")
    for _name, pipe in cnt_vects_pipe.transformer_list
]

vocabulary_map = pipe(
    enumerate(concat(cnt_vect.vocabulary_ for cnt_vect in cnt_vects)),
    groupby(get(1)),
    valmap(lambda vals: list(pluck(0, vals))),
)
vocabulary_map

# In[ ]:
Beispiel #16
0
 def getInsDispatcher(self, indexMap: Dict[Indice, LabelInIndice]) -> IndexConverter:
     return self.createIndexConverter(
         valmap(lambda x: (x[0],), indexMap), (len(self.inAsts),), self.outAst.shape, True
     )
Beispiel #17
0
block_formatter = apply_formatters_to_dict(BLOCK_FORMATTERS)

SYNCING_FORMATTERS = {
    'startingBlock': to_integer_if_hex,
    'currentBlock': to_integer_if_hex,
    'highestBlock': to_integer_if_hex,
    'knownStates': to_integer_if_hex,
    'pulledStates': to_integer_if_hex,
}

syncing_formatter = apply_formatters_to_dict(SYNCING_FORMATTERS)

TRANSACTION_POOL_CONTENT_FORMATTERS = {
    'pending': compose(
        keymap(to_ascii_if_bytes),
        valmap(transaction_formatter),
    ),
    'queued': compose(
        keymap(to_ascii_if_bytes),
        valmap(transaction_formatter),
    ),
}

transaction_pool_content_formatter = apply_formatters_to_dict(
    TRANSACTION_POOL_CONTENT_FORMATTERS)

TRANSACTION_POOL_INSPECT_FORMATTERS = {
    'pending': keymap(to_ascii_if_bytes),
    'queued': keymap(to_ascii_if_bytes),
}
Beispiel #18
0
            remaining.extend(current.children)


tree_val_re = re.compile('([a-z]{1,}).*\(([0-9]{1,})\)')
input_file = cc.pipe(open(r'day07.in'), list)

data_input = cc.pipe(
    sys.stdin.readlines(), cc.map(lambda x: x.replace('\n', '')),
    cc.map(lambda x: x.split('->')),
    cc.map(lambda x: (x[0], [] if len(x) == 1 else cc.pipe(
        x[1], lambda x: x.split(','), cc.map(str.strip), list))), list)

tree_val_dict = cc.pipe(
    data_input, cc.map(cc.first),
    cc.map(lambda x: [tree_val_re.match(x).group(y) for y in (1, 2)]), dict,
    cc.valmap(int))

tree_mapping_dict = cc.pipe(
    data_input, cc.map(lambda x: (tree_val_re.match(x[0]).group(1), x[1])),
    dict)

root = cc.pipe(
    tree_mapping_dict.keys(),
    cc.filter(lambda x: x not in cc.concat(tree_mapping_dict.values())),
    cc.first)

tree = Tree(root, tree_mapping_dict, tree_val_dict)

unbalanced = tree.find_unbalanced()
unbalanced_self_weight = unbalanced.weight - sum(x.weight
                                                 for x in unbalanced.children)
Beispiel #19
0
 def getFuncsMap(self, funcsIndex: Dict[str, IndexMap]) -> Dict[str, IndexConverter]:
     return valmap(lambda v: self.createIndexConverter(v, (2,), self.outAst.shape, True), funcsIndex)
Beispiel #20
0
SYNCING_FORMATTERS = {
    'startingBlock': to_integer_if_hex,
    'currentBlock': to_integer_if_hex,
    'highestBlock': to_integer_if_hex,
    'knownStates': to_integer_if_hex,
    'pulledStates': to_integer_if_hex,
}


syncing_formatter = apply_formatters_to_dict(SYNCING_FORMATTERS)


TRANSACTION_POOL_CONTENT_FORMATTERS = {
    'pending': compose(
        keymap(to_ascii_if_bytes),
        valmap(transaction_formatter),
    ),
    'queued': compose(
        keymap(to_ascii_if_bytes),
        valmap(transaction_formatter),
    ),
}


transaction_pool_content_formatter = apply_formatters_to_dict(
    TRANSACTION_POOL_CONTENT_FORMATTERS
)


TRANSACTION_POOL_INSPECT_FORMATTERS = {
    'pending': keymap(to_ascii_if_bytes),
Beispiel #21
0
def process(workbook: Any, content: str) -> Any:
    """Process Storage-Groups worksheet

    :param workbook:
    :param content:
    :return:
    """

    worksheet_name = 'Storage-Groups'
    worksheet = workbook.get_sheet_by_name(worksheet_name)

    headers = list(concat([
        get_parser_header(PORT_TMPL),
        get_parser_header(STORAGEGROUP_TMPL)[3:],
    ]))
    RowTuple = namedtuple('RowTuple', headers)  # pylint: disable=invalid-name

    build_header(worksheet, headers)

    cmd_storagegroup_out = run_parser_over(content, STORAGEGROUP_TMPL)
    cmd_port_out = run_parser_over(content, PORT_TMPL)

    common_columns = (0, 1)
    server_names_grouped = compose(
        valmap(
            compose(list, set, map(last))),
        groupby(
            itemgetter(*common_columns))
    )(cmd_port_out)

    cmd_port_relevant = map(
        juxt(
            compose(first, first),
            compose(second, first),
            second)
    )(server_names_grouped.items())

    common_columns_getter = itemgetter(*common_columns)
    cmd_merged_out = join(
        common_columns_getter, cmd_port_relevant,
        common_columns_getter, cmd_storagegroup_out)

    cmd_merged_out = sorted(cmd_merged_out)

    rows = list(map(
        compose(
            list,
            concat,
            juxt(
                first,
                compose(
                    drop(3),
                    second)))
    )(cmd_merged_out))

    portcmd = {(array, grp) for array, grp, *other in rows}
    strgp = {(array, grp) for array, grp, *other in cmd_storagegroup_out}
    no_server_groups = strgp - portcmd

    storage_list = list(filter(
        lambda storage_gr: any(
            fnmatch(str((storage_gr[0], storage_gr[1])), str(ctrlServer))
            for ctrlServer in no_server_groups),
        cmd_storagegroup_out))

    storage_list = check_empty_arrays(
        list(unique(storage_list + rows, key=itemgetter(0, 1))))

    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(RowTuple._make, storage_list), 2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord('A')):
            cell = worksheet['{}{}'.format(chr(col_n), row_n)]
            if isinstance(col_value, str):
                cell.value = str.strip(col_value)
            else:
                cell.alignment = Alignment(wrapText=True)
                cell.value = '\n'.join(col_value)
            style_value_cell(cell)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n

    sheet_process_output(
        worksheet,
        'StorageGroupsTable',
        'Storage-Groups',
        final_col,
        final_row)

    return [[lun_map[0], lun_map[1], lun_map[4]] for lun_map in storage_list]
print(acc1)

# List comprehensions version (more Pythonic):
acc2 = [(name, balance) for (id, name, balance, gender) in accounts
        if balance > 150]
print(acc2)

# II. SPLIT-APPLY-COMBINE WITH `GROUPBY` AND `REDUCEBY`:
# 1. Split the dataset into groups by some property
# 2. Reduce each of the groups with some synopsis function

# In Memory Split-Apply-Combine
# SELECT gender, SUM(balance) FROM accounts GROUP BY gender;
print(groupby(get(3), accounts))
# {'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')], 'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')]}
print(pipe(accounts, groupby(get(3)), valmap(compose(sum, pluck(2)))))

# {'M': 400, 'F': 400} (pluck item )

# Streaming Split-Apply-Combine
# the groupby operation is not streaming and so this approach is limited
# to datasets that can fit comfortably into memory.
# to achieves streaming split-apply-combine use `reduceby()`


# The `reduceby` operation takes a key function,
# like `get(3)` or `lambda x: x[3]`, and a binary operator like
# `add` or `lesser = lambda acc, x: acc if acc < x else x`.
# It successively applies the key function to each item in succession,
# accumulating running totals for each key by combining each new value
# with the previous using the binary operator.