Ejemplo n.º 1
0
    def serde_with_class(cls):
        from_fields = list(
            map(lambda a: (a, get_in([from_key], a.metadata, [a.name])),
                fields(cls)))

        to_fields = pipe(
            fields(cls),
            map(lambda a: (a, get_in([to_key], a.metadata))),
            filter(lambda f: f[1]),
            list,
        )

        def from_dict(d):
            return cls(**dict(
                map(
                    lambda f: (f[0].name, get_in(f[1], d, f[0].default)),
                    from_fields,
                )))

        def to_dict(self):
            d = asdict(self)
            return reduce(
                lambda acc, f: update_in(acc, f[1], lambda _: d[f[0].name]),
                to_fields,
                {},
            )

        cls.from_dict = staticmethod(from_dict)
        cls.to_dict = to_dict
        return cls
Ejemplo n.º 2
0
def montage_stream(ims, montage_order=None, channel_order=[0, 1, 2],
                   clear_none=True):
    """From a sequence of single-channel field images, montage multichannels.

    Suppose the input is a list:

    ```
    ims = [green1a, blue1a, red1a, green1b, blue1b, red1b,
           green2a, blue2a, red2a, green2b, blue2b, red2b]
    ```

    with channel order ``[2, 0, 1]`` and montage order ``[1, 0]``, then
    the output will be:

    ```
    [rgb1_ba, rgb2_ba]
    ```

    Parameters
    ----------
    ims : iterator of array, shape (M, N)
        A list of images in which consecutive images represent single
        channels of the same image. (See example.)
    montage_order : array-like of int, optional
        The order of the montage images (in 1D or 2D).
    channel_order : list of int, optional
        The order in which the channels appear.

    Returns
    -------
    montaged_stream : iterator of arrays
        An iterator of the images composed into multi-channel montages.

    Examples
    --------
    >>> images = (i * np.ones((4, 5), dtype=np.uint8) for i in range(24))
    >>> montaged = list(montage_stream(images, [[0, 1], [2, 3]], [2, 0, 1]))
    >>> len(montaged)
    2
    >>> montaged[0].shape
    (8, 10, 3)
    >>> montaged[0][0, 0, :]
    array([2, 0, 1], dtype=uint8)
    >>> montaged[0][4, 5, :]
    array([11,  9, 10], dtype=uint8)
    >>> montaged[1][4, 5, :]
    array([23, 21, 22], dtype=uint8)
    """
    if montage_order is None:
        montage_order = cellomics.SPIRAL_CLOCKWISE_RIGHT_25
    montage_order = np.array(montage_order)
    ntiles = montage_order.size
    if clear_none:
        nchannels = len([i for i in channel_order if i is not None])
    else:
        nchannels = len(channel_order)
    return tz.pipe(ims, c.partition(nchannels),
                        c.map(stack_channels(order=channel_order)),
                        c.partition(ntiles),
                        c.map(montage(order=montage_order)))
Ejemplo n.º 3
0
    def find_domain_urls(self, domain: str) -> List[str]:
        """
        Get all known urls for domain.

        Returns
        -------
        all_urls : iterator
        """
        def _urlkey_to_url(urlkey):
            try:
                # very rare bugged urlkeys appear
                domain, path = urlkey.split(')/', 1)
            except ValueError:
                return
            domain = domain.split(',')
            domain.reverse()
            domain = '.'.join(domain)
            if path:
                return '/'.join([domain, path])
            return domain

        urls_by_index = map(
            lambda ind: self.__get_domain_urls_in_index(ind, domain),
            self.indexes)
        all_urls = pipe(urls_by_index, concat, map(bytes.decode),
                        map(_urlkey_to_url), filter(None), map(unquote),
                        map(lambda x: x.strip()), unique, list)
        return all_urls
Ejemplo n.º 4
0
def batch_aug(aug, batch, ch=3):
    return pipe(
        batch,
        map(lambda x: [aug(x[0:ch, :, :]), aug(x[ch:2*ch, :, :])]),
        map(lambda x: torch.cat(x, dim=0)),
        list,
        torch.stack
    )
Ejemplo n.º 5
0
def process(workbook: Workbook, content: list) -> None:
    """Process LUNsPivot worksheet

    :param workbook:
    :param content:
    """
    worksheet_name = 'LUN-Storage_Pivot'
    worksheet = workbook.get_sheet_by_name(worksheet_name)

    state_header = ['ArrayName', 'LUNCount', 'SumOfLUNCapacityTB']

    StateTuple = namedtuple('StateTuple', state_header)

    build_header(worksheet, state_header)

    array_storage_groups = groupby(itemgetter(0, 3), content)
    array_groups = groupby(itemgetter(0), array_storage_groups)

    state_rows, grand_total = [], [0, 0]  # type: list, list
    for array in array_groups:
        lun_count, lun_capacity, storage_array_rows = 0, 0, []
        for array_group in array_groups[array]:
            array_luns = list(zip(*array_storage_groups[array_group]))

            row = [
                array_group[1],
                len(array_storage_groups[array_group]),
                sum(map(float, array_luns[12]))
            ]

            lun_count += row[1]
            lun_capacity += row[2]
            storage_array_rows.append(map(str, row))

        grand_total[0], grand_total[1] = grand_total[0] + lun_count, \
            grand_total[1] + lun_capacity
        state_rows += [[array, str(lun_count),
                        str(lun_capacity)], *storage_array_rows]

    state_rows.append(['Grand Total', *grand_total])
    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(StateTuple._make, state_rows), 2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord('A')):
            cell = worksheet['{}{}'.format(chr(col_n), row_n)]
            col_value = str(col_value) \
                if not isinstance(col_value, str) else col_value
            cell.value = col_value
            style_value_cell(cell)
            if row_tuple.ArrayName \
                    in [sg for sg in array_groups.keys()] + ['Grand Total']:
                cell.font = Font(bold=True, size=11)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n

    sheet_process_output(worksheet, 'LUNStorageTable', 'LUN-Storage_Pivot',
                         final_col, final_row)
Ejemplo n.º 6
0
def test_flip():
    writer = SummaryWriter(f'{config["TENSORBORAD_LOG_DIR"]}/test')
    dataset_df = load_dataset_df('/store/kaggle/tgs')
    dataset = TgsSaltDataset(dataset_df)
    writer.add_image(
        f"flip",
        vutils.make_grid(
            pipe(range(8), map(lambda x: dataset[12]),
                 map(lambda x: [x['image'], x['mask']]), concat, list)),
    )
Ejemplo n.º 7
0
def varlabels2df(vlbls, yr=None):
    return thread_last(
        vlbls.items(),
        map(lambda k, v: pd.DataFrame({'code': list(v.keys()),
                                       'label': list(v.values()),
                                       'var': k})),
        map(lambda df: df.assign(year=yr) if yr else df),
        pd.concat,
        lambda df: (df.set_index(['var', 'year', 'code'])
                    if yr else df.set_index(['var', 'code']))
    )
Ejemplo n.º 8
0
    def __init__(
        self,
        id,
        dataset_dir,
        output_dir,
        n_splits,
        base_train_config,
        folds,
    ):
        params = locals()
        torch.manual_seed(0)

        ids = pipe(range(n_splits), filter(lambda x: x in folds), list)

        train_df_path = delayed(load_train_df)(
            dataset_dir=join(dataset_dir, 'train'),
            output=join(output_dir, 'train.pqt'))

        train_df = delayed(pd.read_parquet)(train_df_path)

        kfolded = delayed(kfold)(train_df, n_splits)

        train_sets = pipe(ids, map(lambda x: delayed(lambda i: i[x])(kfolded)),
                          list)

        model_paths = pipe(
            zip(ids, train_sets),
            map(lambda x: delayed(train_fusion)(
                **base_train_config,
                model_path=join(output_dir, f"{id}-fold-{x[0]}-base-model.pt"),
                sets=x[1],
                log_dir=f'{config["TENSORBORAD_LOG_DIR"]}/{id}/{x[0]}/base',
            )), list)

        test_df_path = load_test_df(dataset_dir='/store/tellus/test',
                                    output=join(output_dir, 'test.pqt'))
        test_df = delayed(pd.read_parquet)(test_df_path)
        test_dataset = delayed(TellusDataset)(
            test_df,
            has_y=False,
        )

        submission_df_path = delayed(predict)(
            model_paths=model_paths,
            log_dir=f'{config["TENSORBORAD_LOG_DIR"]}/{id}/sub',
            dataset=test_dataset,
            log_interval=10,
            out_path=f'{output_dir}/{id}_submission.tsv',
        )

        self.output = delayed(lambda x: x)((
            model_paths,
            submission_df_path,
        ))
Ejemplo n.º 9
0
    def update(self):
        with log_errors():
            log = self.steal.log
            n = self.steal.count - self.last
            log = [log[-i] for i in range(1, n + 1)]
            self.last = self.steal.count

            if log:
                new = pipe(log, map(groupby(1)), map(dict.values), concat,
                           map(self.convert), list, transpose)
                self.source.stream(new, 10000)
Ejemplo n.º 10
0
 def __init__(self,  epoch_size, len_indices, shuffle=True, start_at=0):
     self.shuffle = shuffle
     self.epoch_size = epoch_size
     self.len_indices = len_indices
     indices = range(len_indices)
     self.chunks = pipe(
         range(0, len_indices//epoch_size),
         map(lambda x: indices[x*epoch_size:(x+1)*epoch_size]),
         map(list),
         list,
     )
     self.chunk_idx = start_at
Ejemplo n.º 11
0
def parse_variable_labels(txt, repl, lbls_to_lower=True):
    b2d = curry(block2dict)(repl=repl, to_lower=lbls_to_lower)
    labels = thread_last(
        txt.split(';'),
        filter(lambda x: x.strip().lower().startswith('value')),
        map(lambda x: x.strip().split('\n')),
        map(lambda x: (x[0].split()[1].lower(), b2d(x[1:]))),
        dict
    )
    logger.info('parsed varlabels from format txt',
                nlabeled=len(labels.keys()), nrepl=len(repl.keys()))
    return labels
Ejemplo n.º 12
0
 def __locate_url(self, index: str, params: Dict) -> Optional[Dict]:
     resp = self.session.get(index, params=params)
     if resp.status_code == 503:
         time.sleep(random.randint(1, 4))
         return self.__locate_url(index, params)
     elif resp.status_code in range(200, 300):
         content = resp.content.splitlines()
         results = map(json.loads, content)
         results = map(lambda x: {**x, 'index': index}, results)
         return results
     else:
         return None
Ejemplo n.º 13
0
def get_metadata_socrata_denovo(soc_cfg):
    g = soc_cfg
    revmap = {v: k for k, v in g.mapcols.items()}
    url = '{api_url}?' + \
          '$select={cols}' + \
          '&$order={ocols}'
    meta_diff = set(g.qn_meta).difference(g.computed)
    meta_diff = list(meta_diff)
    qncols = ','.join([(revmap[k] if
                        k in revmap else k) for
                       k in meta_diff])

    ocols = ','.join([revmap['qid'], 'year'])

    logger.info('loading SODA meta data')
    res = thread_last(
        g.soda_api,
        map(lambda x: url.format(api_url=x, cols=qncols, ocols=ocols)),
        map(dl.df_from_socrata_url),
        curry(pd.concat)(ignore_index=True))
    '''
        lambda xf: xf.applymap(lambda x: (re.sub('\xa0', '', x)).strip()),
        lambda xf: xf.rename(index=str, columns={x: x.lower() for x in
                                                 xf.columns}),
        lambda xf: xf if not g.mapcols else xf.rename(index=str,
                                                      columns=g.mapcols),
        curry(apply_fn2vals)(fns=g.apply_fn),
        lambda xf: xf if not g.mapvals else xf.replace(g.mapvals),
        lambda xf: xf if not g.mapvals else 
            xf.applymap(lambda x: g.mapvals[x.lower().strip()] if 
                        x.lower().strip() in g.mapvals else x),
        lambda xf: xf[g.qn_meta])
    '''
    logger.info('finished transformations', res=res.head())
    # pull out question -> response breakouts
    qns = res[['qid', 'year', 'topic',  
              'subtopic', 'question', 'response']].drop_duplicates().reset_index(drop=True)
    # since facets are questions as well
    # update the dict with response value from fc_res
    # overriding the original var (N.B.)
    yrvec = (res[['year']]
             .drop_duplicates()
             .assign(facet='year')
             .rename(index=str, columns={'year': 'facet_level'}))
    stvec = (res[['sitecode']]
             .drop_duplicates()
             .assign(facet='sitecode')
             .rename(index=str, columns={'sitecode':'facet_level'}))
    facs = pd.concat( [res[['facet', 'facet_level']].drop_duplicates(),
                       yrvec, stvec], axis=0).reset_index(drop=True)
    logger.info('created qn and facs', qn=qns.head(), fac=facs.head())
    return (qns, facs)
Ejemplo n.º 14
0
 def __get_domain_urls_in_index(self, index: str, domain: str) -> List[str]:
     pages_number = self._get_pages_number(index, domain)
     params = {
         'url': domain,
         'matchType': 'domain',
         'output': 'text',
         'filter': 'mime:text/html',
         'fl': 'urlkey'
     }
     resps = map(lambda page: self.__query_index(page, params, index),
                 range(pages_number))
     urls = pipe(resps, map(bytes.splitlines), concat)
     return urls
Ejemplo n.º 15
0
def get_qids_by_year(soc_cfg):
    g = soc_cfg
    revmap = {v: k for k, v in g.mapcols.items()}
    url = '{api_url}?' + \
          '$select=year,{qnkey},count(year)' + \
          '&$group=year,{qnkey}' + \
          '&$order={qnkey},year'
    qid = revmap['qid']
    df = thread_last(g.soda_api,
                     map(lambda x: url.format(api_url=x, qnkey=qid)),
                     map(dl.df_from_socrata_url),
                     curry(pd.concat)(ignore_index=True))
    df.to_csv(sys.stdout)
Ejemplo n.º 16
0
def to_string_pairs(segmentsbytxt, separator=" + "):
    """
    segmentsbytxt - Output from dual_segment_many.

    >>> exdata = [[([u"foo"], [u"foo"])], [([u"foo", u"bar", u"baz"], [u"foo", u"bar", u"baz"])]]

    >>> to_string_pairs(exdata)
    [(u"foo", u"foo"), (u"foo + bar + baz", u"foo + bar + baz")],

    >>> to_string_pairs(exdata, separator=", ")
    [(u"foo", u"foo"), (u"foo, bar, baz", u"foo, bar, baz")],
    """
    return tlz.pipe(segmentsbytxt, tlz.concat,
                    tlzc.map(tlz.compose(tuple, tlzc.map(separator.join))))
def connect_to_twitter_filtered_stream(stream_key, saveing_function):
    """Connect to & consume a filtered Twitter stream, where Twitter does 
    some of the filtering"""
    stream = tz.pipe(
        ## Connect
        start_stream_twitter(**CONFIG['twitter_filter']),
        tz.map(print_twitter_stall_warning),
        ## Filter
        tz.filter(is_tweet), # filter to tweets
        ## Parse
        tz.map(parse_tweet), # parse into a flat dictionary
    )

    ## Collect
    saveing_function(stream_key, stream)
def connect_to_twitter_stream(stream_key, saveing_function):
    """Connect to & consume a Twitter stream"""
    stream = tz.pipe(
        ## Connect
        start_stream_twitter(), # public sampled stream
        tz.map(print_twitter_stall_warning),
        ## Filter
        tz.filter(is_tweet), # filter to tweets
        # tz.filter(is_user_lang_tweet(["en", "en-AU", "en-au", "en-GB", "en-gb"])), # filter to English
        ## Parse
        tz.map(parse_tweet), # parse into a flat dictionary
    )

    # Collect
    saveing_function(stream_key, stream)
Ejemplo n.º 19
0
    def update(self):
        with log_errors():
            log = self.steal.log
            n = self.steal.count - self.last
            log = [log[-i] for i in range(1, n + 1)]
            self.last = self.steal.count

            if log:
                new = pipe(log, map(groupby(1)), map(dict.values), concat,
                           map(self.convert), list, transpose)
                if PROFILING:
                    curdoc().add_next_tick_callback(
                        lambda: self.source.stream(new, 10000))
                else:
                    self.source.stream(new, 10000)
Ejemplo n.º 20
0
    def __init__(self,
                 feature_size=8,
                 depth=3,
                 ):
        super().__init__()
        self.down_layers = nn.ModuleList([
            DownSample(1, feature_size * 2 ** depth),
            *pipe(
                range(depth),
                reversed,
                map(lambda x: DownSample(
                    feature_size * (2 ** (x + 1)),
                    feature_size * (2 ** x),
                )),
                list,
            )
        ])

        self.center = DownSample(
            in_ch=feature_size,
            out_ch=feature_size,
        )

        self.up_layers = nn.ModuleList([
            *pipe(
                self.down_layers,
                reversed,
                map(lambda x: x.out_ch),
                take(depth),
                map(lambda x: UpSample(
                    feature_size,
                    feature_size,
                    x,
                )),
                list,
            ),
            UpSample(
                feature_size,
                feature_size,
                feature_size * 2 ** depth,
            ),
        ])

        self._output = nn.Conv2d(
            feature_size,
            2,
            kernel_size=3
        )
 def get_hashtag_string(given_item):
     """Return a string of hashtags associated with the given item"""
     return tz.pipe(
         tz.get_in(['entities', 'hashtags'], given_item, default=[]),
         tz.map(lambda x: tz.get_in(['text'], x, default=None)),
         tz.filter(lambda x: x is not None),
         lambda x: ", ".join(x))
Ejemplo n.º 22
0
Archivo: multi.py Proyecto: 908kre/aplf
def validate(predicts, dataset, batch_size):
    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        pin_memory=True,
        shuffle=False,
    )
    y_preds = np.array(predicts).mean(axis=0).argmax(axis=1)
    y_trues = pipe(
        loader,
        map(lambda x: x['label'].cpu().detach().tolist()),
        reduce(lambda x, y: x + y),
        np.array,
    )

    score = iou(
        y_preds,
        y_trues,
    )
    tn, fp, fn, tp = confusion_matrix(y_trues, y_preds).ravel()
    return {
        'TPR': tp / (tp + fn),
        'FNR': fn / (tp + fn),
        'FPR': fp / (fp + tn),
        'acc': (tp + tn) / (tp + tn + fp + fn),
        'pre': tp / (tp + fp),
        'iou': tp / (fn + tp + fp),
    }
Ejemplo n.º 23
0
 def forward(self, x, others, size):
     out = pipe([x, *others],
                map(lambda x: F.interpolate(x, mode='bilinear', size=size)),
                list)
     out = torch.cat([*out], 1)
     out = self.block(out)
     return out
Ejemplo n.º 24
0
def take_topk(scores, paths, top_num):
    return pipe(
        zip(scores, paths),
        lambda x: topk(top_num, x, key=lambda y: y[0]),
        map(lambda x: x[1]),
        list
    )
Ejemplo n.º 25
0
 def _createIndiceConverter(
         innerIndiceMap: Dict[Indice, int], coeffs: List[Sequence[float]], oi: int,
         _ls: List[List[Sequence[float]]], _boole: bool = False
 ) -> List[Sequence[float]]:
     if list(innerIndiceMap.keys())[0] is ():
         return coeffs
     else:
         innerIndiceMapGroup: Dict[int, List[Tuple[Indice, int]]] = compose(
             valmap(compose(
                 lambda v: sorted(v, key=lambda k: k[0]),
                 map(lambda l: (l[0][1:], l[1])))),
             groupby(lambda kv: kv[0][0]))(innerIndiceMap.items())
         outArr = list(innerIndiceMapGroup.keys())
         inArr = list(valmap(lambda v: v[0][-1], innerIndiceMapGroup).values())
         coeff = self.getIndiceTransformCoeffs(outArr, inArr, oi, _ls, _boole)
         nextInnerIndiceMapGroup: Dict[int, Dict[Indice, int]] = valmap(dict, innerIndiceMapGroup)
         coeffsList = [
             _createIndiceConverter(
                 self.applyIndiceTransform(nextInnerIndiceMap, key, coeff),
                 [*coeffs, coeff], oi + 1, _ls, _boole
             )
             for key, nextInnerIndiceMap in nextInnerIndiceMapGroup.items()
         ]
         if allSame(coeffsList):
             return coeffsList[0]
         else:
             raise LinearError
Ejemplo n.º 26
0
def process(workbook: Any, content: str) -> None:
    """Process Perf_DISKS worksheet

    :param workbook:
    :param content:
    """
    worksheet_name = 'Perf_DISKS'
    worksheet = workbook.get_sheet_by_name(worksheet_name)

    headers = get_parser_header(DISKS_TMPL)
    RowTuple = namedtuple('RowTuple', headers)

    build_header(worksheet, headers)
    list_wwn_out = run_parser_over(content, DISKS_TMPL)
    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(RowTuple._make, list_wwn_out), 2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord('A')):
            cell = worksheet['{}{}'.format(chr(col_n), row_n)]
            cell.value = str.strip(col_value)
            style_value_cell(cell)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n

    sheet_process_output(
        worksheet,
        'PerfDISKSTable',
        'Perf_DISKS',
        final_col,
        final_row)
Ejemplo n.º 27
0
def process(workbook: Any, content: str) -> None:
    """Process Data Protection Groups worksheet (XtremIO)

    :param workbook:
    :param content:
    """
    worksheet = workbook.get_sheet_by_name('Data Protection Groups')

    headers = get_parser_header(SHOW_DATA_PROTECTION_TMPL)

    RowTuple = namedtuple('RowTuple', headers)  # pylint: disable=invalid-name

    build_header(worksheet, headers)

    show_targets_out = run_parser_over(content, SHOW_DATA_PROTECTION_TMPL)

    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(RowTuple._make, show_targets_out),
                                      2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord('A')):
            cell = worksheet['{}{}'.format(chr(col_n), row_n)]
            cell.value = str.strip(col_value)
            style_value_cell(cell)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n

    sheet_process_output(worksheet, 'DataProtectionGroupsTable',
                         'Data Protection Groups', final_col, final_row)
Ejemplo n.º 28
0
 def fetch_stats(self, qn, vars=[], filt={}):
     vars = self.mapper(vars)
     filt = self.mapper(filt)
     lvls = self.responses_for_qn(qn)
     res = map(lambda r: fetch_stats(self.des, qn, r, vars, filt), lvls)
     dfz = pd.concat(res, ignore_index=True)
     return dfz
def connect_to_wordpress_stream(stream_key, saveing_function):
    """Connect to & consume a WordPress event stream"""
    parse_functions = {
        'posts': parse_post,
        'likes': parse_like,
        'comments': parse_comment}
    stream = tz.pipe(
        ## Connect
        start_wordpress_stream(CONFIG['stream_urls'][stream_key]),
        ## Parse
        tz.map(permissive_json_load), # parse the JSON, or return an empty dictionary
        tz.map(parse_functions[stream_key]), # parse into a flat dictionary
    )

    # Collect
    saveing_function(stream_key, stream)
Ejemplo n.º 30
0
def write_excel(rows: Iterable, worksheet: Any, RowTuple: Any,
                start_col: str) -> tuple:
    """Writes rows in excel from a specified start column

    :param rows:
    :param worksheet:
    :param RowTuple:
    :param start_col:
    :return:
    """
    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(RowTuple._make, rows), 2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord(start_col)):
            cell = worksheet['{}{}'.format(chr(col_n), row_n)]
            col_value = str(col_value) \
                if not isinstance(col_value, str) else col_value
            if isinstance(col_value, str):
                cell.value = str.strip(col_value)
            else:
                cell.alignment = Alignment(wrapText=True)
                cell.value = '\n'.join(col_value)
            style_value_cell(cell)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n
    return final_col, final_row
Ejemplo n.º 31
0
def get_metadata_socrata(soc_cfg, soc_df, facets):
    g = soc_cfg
    # pull out question -> response breakouts
    qns = soc_df[g.qn_meta].drop_duplicates().reset_index(drop=True)
    # since facets are questions as well
    # update the dict with response value from fc_res
    # overriding the original var (N.B.)
    yrvec = summarize_column(soc_df, 'year')
    stvec = summarize_column(soc_df, 'sitecode')
    in_facets = list(set(facets).intersection(soc_df.columns))
    miss_facets = list(set(facets).difference(soc_df.columns))
    logger.warn('missing facets in generating socrata metadata', f=miss_facets)
    logger.warn('generating summary columns for facets', f=in_facets)
    facs = None
    if 'facet' in soc_df.columns:
        facs = (pd.concat(
            [soc_df[['facet', 'facet_level']].drop_duplicates(), yrvec, stvec],
            axis=0, ignore_index=True).reset_index(drop=True)
        )
    else:
        summs = list(map(lambda k: summarize_column(soc_df, k), in_facets))
        facs = pd.concat(summs + [yrvec, stvec], 
                         axis=0, ignore_index=True).reset_index(drop=True)
        facs = facs[facs.facet_level != "Total"]
    logger.info('created qn and fac metadata',
                qn=qns.dtypes.to_dict(),
                fac=list(facs.facet.drop_duplicates()))
    return (qns.reset_index(drop=True), facs.reset_index(drop=True))
Ejemplo n.º 32
0
def process(workbook: Any, content: str) -> None:
    """Process Volumes worksheet (XtremIO)

    :param workbook:
    :param content:
    """
    worksheet = workbook.get_sheet_by_name('Volumes')

    headers = get_parser_header(SHOW_VOLUMES_TMPL)

    RowTuple = namedtuple('RowTuple', headers)  # pylint: disable=invalid-name

    build_header(worksheet, headers)

    show_volumes_out = run_parser_over(content, SHOW_VOLUMES_TMPL)

    final_col, final_row = 0, 0
    for row_n, row_tuple in enumerate(map(RowTuple._make, show_volumes_out),
                                      2):
        for col_n, col_value in \
                enumerate(row_tuple._asdict().values(), ord('A')):
            cell = worksheet['{}{}'.format(chr(col_n), row_n)]
            if isinstance(col_value, str):
                cell.value = str.strip(col_value)
            else:
                cell.alignment = Alignment(wrapText=True)
                cell.value = '\n'.join(col_value)
            style_value_cell(cell)
            set_cell_to_number(cell)
            final_col = col_n
        final_row = row_n

    sheet_process_output(worksheet, 'VolumesTable', 'Volumes', final_col,
                         final_row)
Ejemplo n.º 33
0
def _get_wf_call_failures(metadata, opts):
    calls = []
    if 'calls' in opts:
        calls = opts['calls'].split(',')
    else:
        calls = metadata['calls'].keys()

    jobids = None
    if 'jobids' in opts:
        jobids = set(opts['jobids'].split(','))

    fails = {}

    for c in calls:
        tasks = metadata['calls'][c]
        failures = pipe(
            tasks,
            filter(lambda x: get('executionStatus', x) == 'Failed'),
            filter(lambda x: _valid_job_id(jobids, get('jobId', x))),
            map(
                lambda x: {
                    'jobId': get('jobId', x),
                    #                                               'inputs'  : get('inputs', x),
                    'stderr': get('stderr', x),
                    'shard': get('shardIndex', x),
                    'err_msg': get_in(['failures', 0, 'message'], x, 'NA'),
                    #                                               'jes'     : get('jes', x),
                    #                                               'runtime' : get('runtimeAttributes', x),
                    'rc': get('returnCode', x, 'NA'),
                }),
            list)
        fails[c] = failures

    return fails
Ejemplo n.º 34
0
        def to_dict(self,
                    convert_values: bool = False) -> MutableMapping[str, Any]:
            to_fields = curried.pipe(
                fields(self.__class__),
                curried.map(lambda a:
                            (a, curried.get_in([to_key], a.metadata))),
                curried.filter(lambda f: f[1]),
                list,
            )

            if convert_values:
                d = asdict(self)
            else:
                d = {
                    a.name: getattr(self, a.name)
                    for a in fields(self.__class__)
                }

            if not to_fields:
                return d

            return curried.reduce(
                lambda acc, f: curried.update_in(acc, f[1], lambda _: d[f[0].
                                                                        name]),
                to_fields,
                {},
            )
 def get_categories(given_dict):
     """Return a string of the categories associated with a post"""
     return tz.pipe(
         tz.get_in(['object', 'tags'], given_dict, default = []),
         tz.filter(lambda x: tz.get_in(['objectType'], x, default=None) == 'category'),
         tz.map(lambda x: tz.get_in(['displayName'], x, default=None)),
         lambda x: ", ".join(x)
     )