Esempio n. 1
0
 def __init__(self, module, node_dim, edge_dim):
     super().__init__()
     self.module = module
     sig_gain = nn.init.calculate_gain('sigmoid')
     self.prev_node_gate = Linear(
         node_dim,
         node_dim,
         bias=False,
         init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain))
     self.curr_node_gate = Linear(
         node_dim,
         node_dim,
         bias=True,
         init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain))
     self.prev_edge_gate = Linear(
         edge_dim,
         edge_dim,
         bias=False,
         init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain))
     self.curr_edge_gate = Linear(
         edge_dim,
         edge_dim,
         bias=True,
         init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain))
     nn.init.zeros_(self.curr_node_gate.bias.data)
     nn.init.zeros_(self.curr_edge_gate.bias.data)
Esempio n. 2
0
def hydrate_dataset_part(part, dbc, cdir, dsid, as_blaze=True):
    if dbc is not None:
        logger.info('hydrating with database table')
        res = pipe(part.value, lambda x: DBTBL_FMT.format(dsid=dsid, part=x),
                   dbc.resolve_table)
        res = res if as_blaze else odo(res, pd.DataFrame)
        return res
    else:
        logger.info('hydrating with feather file')
        bzfn = bz.data if as_blaze else identity
        try:
            res = pipe(
                part.value,
                curry(get_datafile_path)(dsid=dsid,
                                         cdir=cdir,
                                         ftyp=DatasetFileType.FEATHER),
                feather.read_dataframe, bzfn)
        except Exception as e:
            res = pipe(
                part.value,
                curry(get_datafile_path)(dsid=dsid,
                                         cdir=cdir,
                                         ftyp=DatasetFileType.JSONREC),
                curry(pd.read_json), bzfn)
        return res
Esempio n. 3
0
 def facet_map(self):
     facs = (self.flevels.groupby(['facet']).agg({
         'facet_level':
         lambda x: x.dropna().drop_duplicates().tolist()
     }).pipe(lambda xf: u.fill_none(xf)).to_dict(orient='index'))
     return pipe(facs,
                 curry(valmap)(lambda x: x['facet_level']),
                 curry(keyfilter)(lambda x: x != 'Overall'),
                 lambda x: merge(x, self.flevels_r))
Esempio n. 4
0
 def __init__(self,
              in_features,
              out_features,
              n_linears=1,
              bias=True,
              init=ct.curry(nn.init.xavier_normal_)(
                  gain=nn.init.calculate_gain('relu'))):
     super().__init__()
     self.out_features = out_features
     self.n_linears = n_linears
     self.in_features = in_features
     self.init = init
     weights = torch.zeros(n_linears,
                           in_features,
                           out_features,
                           dtype=torch.float32)
     init(weights)
     self.lin = nn.Parameter(weights)
     self.init(self.lin.data)
     if bias:
         b = torch.zeros((n_linears, 1, self.out_features),
                         dtype=torch.float32)
         self.bias = nn.Parameter(b)
     else:
         self.bias = None
Esempio n. 5
0
 def __init__(self,
              in_features,
              out_features,
              bias=True,
              init=ct.curry(nn.init.xavier_normal_)(gain=1.414)):
     self.init = init
     super().__init__(in_features, out_features, bias)
Esempio n. 6
0
def map_with_dict(d, val):
    repl_f = curry(get_if)(d=d)
    typ = type(val)
    if typ == str:
        return repl_f(val)
    if typ == list or typ == set:
        return typ(map(repl_f, val))
    if typ == dict:
        return keymap(repl_f, val)
Esempio n. 7
0
def process_sas_survey(svy_cfg, facets, client=None, lgr=logger):
    g = svy_cfg
    prefix = g.s3_url_prefix
    lgr.bind(p=prefix)
    evalr = asteval.Interpreter()
    evalr.symtable['pd.util'] = pd.util
    fn = g.rename_cols
    map_fn = evalr(fn)
    df_munger = curry(sdf.munge_df)(facets=facets, qids=g.qids,
                                    na_syns=g.na_synonyms, col_fn=map_fn,
                                    fmts=g.patch_format, fpc=g.fpc, lgr=lgr)
    lbl_loader = curry(load_variable_labels)(repl=g.replace_labels)
    xpt_loader = curry(load_sas_xport_df)(lgr=lgr)
    dfs = map(
        lambda r: pipe(prefix+r.xpt,
                       delayed(xpt_loader),
                       delayed(df_munger(r=r,
                                         lbls=lbl_loader(prefix+r.format,
                                                         prefix+r.formas)))),
        [r for idx, r in g.meta.iterrows()])
    lgr.info('merging SAS dfs')
    dfs = delayed(pd.concat)(dfs, ignore_index=True)
    scols = delayed(
        lambda xf: list(xf.columns
                          .intersection(set(g.qids)
                                        .union(facets))))(dfs)
    lgr.info('re-filtering question and facet columns to cast to category dtype', cols=scols)
    dfz = (dfs
           .apply(lambda x: x.astype('category'))
           .reset_index(drop=True)
           .assign(year=dfs['year'].astype(int),
                   sitecode=dfs['sitecode'].astype('category'),
                   weight=dfs['weight'].astype(float),
                   strata=dfs['strata'].astype(int, errors='ignore'),
                   psu=dfs['psu'].astype(int, errors='ignore'))
           .reset_index(drop=True))
    if g.fpc:
        dfz = (dfz.assign(fpc=dfs['fpc'].astype(int, errors='ignore'),
                         sample_ct=dfs['sample_ct'].astype(int, errors='ignore'))
                  .reset_index(drop=True))
    dfz.visualize()
    lgr.info('merged SAS dfs')
    lgr.unbind('p')
    return dfz
Esempio n. 8
0
def parse_variable_labels(txt, repl, lbls_to_lower=True):
    b2d = curry(block2dict)(repl=repl, to_lower=lbls_to_lower)
    labels = thread_last(
        txt.split(';'),
        filter(lambda x: x.strip().lower().startswith('value')),
        map(lambda x: x.strip().split('\n')),
        map(lambda x: (x[0].split()[1].lower(), b2d(x[1:]))),
        dict
    )
    logger.info('parsed varlabels from format txt',
                nlabeled=len(labels.keys()), nrepl=len(repl.keys()))
    return labels
Esempio n. 9
0
def get_metadata_socrata_denovo(soc_cfg):
    g = soc_cfg
    revmap = {v: k for k, v in g.mapcols.items()}
    url = '{api_url}?' + \
          '$select={cols}' + \
          '&$order={ocols}'
    meta_diff = set(g.qn_meta).difference(g.computed)
    meta_diff = list(meta_diff)
    qncols = ','.join([(revmap[k] if
                        k in revmap else k) for
                       k in meta_diff])

    ocols = ','.join([revmap['qid'], 'year'])

    logger.info('loading SODA meta data')
    res = thread_last(
        g.soda_api,
        map(lambda x: url.format(api_url=x, cols=qncols, ocols=ocols)),
        map(dl.df_from_socrata_url),
        curry(pd.concat)(ignore_index=True))
    '''
        lambda xf: xf.applymap(lambda x: (re.sub('\xa0', '', x)).strip()),
        lambda xf: xf.rename(index=str, columns={x: x.lower() for x in
                                                 xf.columns}),
        lambda xf: xf if not g.mapcols else xf.rename(index=str,
                                                      columns=g.mapcols),
        curry(apply_fn2vals)(fns=g.apply_fn),
        lambda xf: xf if not g.mapvals else xf.replace(g.mapvals),
        lambda xf: xf if not g.mapvals else 
            xf.applymap(lambda x: g.mapvals[x.lower().strip()] if 
                        x.lower().strip() in g.mapvals else x),
        lambda xf: xf[g.qn_meta])
    '''
    logger.info('finished transformations', res=res.head())
    # pull out question -> response breakouts
    qns = res[['qid', 'year', 'topic',  
              'subtopic', 'question', 'response']].drop_duplicates().reset_index(drop=True)
    # since facets are questions as well
    # update the dict with response value from fc_res
    # overriding the original var (N.B.)
    yrvec = (res[['year']]
             .drop_duplicates()
             .assign(facet='year')
             .rename(index=str, columns={'year': 'facet_level'}))
    stvec = (res[['sitecode']]
             .drop_duplicates()
             .assign(facet='sitecode')
             .rename(index=str, columns={'sitecode':'facet_level'}))
    facs = pd.concat( [res[['facet', 'facet_level']].drop_duplicates(),
                       yrvec, stvec], axis=0).reset_index(drop=True)
    logger.info('created qn and facs', qn=qns.head(), fac=facs.head())
    return (qns, facs)
Esempio n. 10
0
def get_qids_by_year(soc_cfg):
    g = soc_cfg
    revmap = {v: k for k, v in g.mapcols.items()}
    url = '{api_url}?' + \
          '$select=year,{qnkey},count(year)' + \
          '&$group=year,{qnkey}' + \
          '&$order={qnkey},year'
    qid = revmap['qid']
    df = thread_last(g.soda_api,
                     map(lambda x: url.format(api_url=x, qnkey=qid)),
                     map(dl.df_from_socrata_url),
                     curry(pd.concat)(ignore_index=True))
    df.to_csv(sys.stdout)
Esempio n. 11
0
def munge_df(df,
             r,
             lbls,
             facets,
             qids,
             na_syns,
             col_fn,
             fmts,
             fpc=False,
             lgr=logger):
    year = r['year']
    lgr.bind(year=year)
    lgr.info('filtering, applying varlabels, munging',
             patch_fmts=fmts.keys(),
             colfn=col_fn,
             shp=df.shape,
             lbls=lbls)
    # get mapping into table for each facet
    facets = {r[k]: k for k in facets}
    if not qids:
        qids = list(
            set(lbls.keys()).difference(
                [r['sitecode'], r['year'], r['weight'], r['psu'], r['strata']
                 ] + [k for k in facets]))
    ncols = {k: k.lower() for k in list(df.columns)}
    ndf = (df.rename(
        index=str, columns=ncols
    ).pipe(lambda xdf: filter_columns(xdf, facets, qids)).reset_index(
        drop=True
    ).apply(lambda x: eager_convert_categorical(x, lbls, fmts, lgr)).rename(
        index=str, columns=facets).pipe(
            curry(find_na_synonyms)(na_syns)).reset_index(drop=True).assign(
                year=int(year) if type(year) == int else df[year].astype(int),
                sitecode=df[r['sitecode']].apply(SITECODE_TRANSLATORS[
                    r['sitecode_type']]).astype('category'),
                weight=df[r['weight']].astype(float),
                strata=df[r['strata']].astype(int),
                psu=df[r['psu']].astype(int)).reset_index(drop=True))
    if fpc:
        ndf = (ndf.assign(
            fpc=df[r['fpc']].astype(float),
            sample_ct=df[r['sample_ct']].astype(int)).reset_index(drop=True))
    ndf.columns = list(map(pdutil.undash, list(ndf.columns)))
    lgr.info('completed SAS df munging')
    lgr.unbind('year')
    return ndf
Esempio n. 12
0
 def __init__(self,
              node_dim,
              n_heads,
              attn_key='emb',
              msg_key='emb',
              alpha=.2):
     super().__init__()
     self.attn = MultiLinear(
         node_dim,
         1,
         n_heads,
         bias=False,
         init=ct.curry(nn.init.xavier_normal_)(
             gain=nn.init.calculate_gain('leaky_relu', alpha)))
     self.leaky_relu = nn.LeakyReLU(alpha)
     self.n_heads = n_heads
     self.msg_key = msg_key
     self.attn_key = attn_key
Esempio n. 13
0
def block2dict(lines, repl, to_lower=False):
    f_lwr = str.lower if to_lower else identity
    f_repl = curry(lambda k, r: r[k] if k in r else k)(r=repl)
    rqt = re.compile(r'[\"\']')  # match quote chars
    rws = re.compile(r'\s')        # match whitespace
    # keep only alnum and a few unreserved symbols
    ruri = re.compile(r'(?![\w\s\-\_\.\'\$\-\+\(\)\/]|\.).')
    d = thread_last(
        lines,
        map(lambda x: x.replace('\x92', "'")),
        map(lambda x: rqt.sub('', x.strip()).split('=')),
        map(lambda x: (rws.sub('', x[0].strip()), ruri.sub('', x[1].strip()))),
        filter(lambda x: x[0].find('-') == -1),  # no support for ranges
        (mapcat, lambda x: map(lambda y: (y, x[1]), x[0].split(','))),
        filter(lambda x: x[0].isnumeric()),  # remove non-numeric codes
        map(lambda x: (int(x[0]),  # cat codes are ints
                       pipe(x[1], f_lwr, f_repl))),
        dict
    )
    # d[-1] = np.nan #use NA as a marker for unmapped vals
    return d
Esempio n. 14
0
def load_variable_labels(format_f, formas_f, repl, year=None):
    logger.info("loading format labels", file=format_f)
    labels = thread_last(
        format_f,
        dl.fetch_data_from_url,
        lambda x: x.read(),
        lambda t: (t.decode('utf-8', errors='ignore')
                   if type(t) is bytes else t),
        curry(parse_variable_labels)(repl=repl)
    )
    logger.info("loaded format labels", lbls=labels)
    logger.info("loading format assignments", file=formas_f)
    assignments = thread_last(
        formas_f,
        dl.fetch_data_from_url,
        lambda x: x.read(),
        lambda t: (t.decode('utf-8', errors='ignore')
                   if type(t) is bytes else t),
        parse_format_assignments
    )
    logger.info("loaded format assns", ass=assignments)
    return {k: labels[v] for k, v in assignments.items() if v in labels}
Esempio n. 15
0
    step_fn = next(step_fns)
    rotation_break_seq = set()

    while True:
        if is_odd_square(n - 1):
            step_fn = next(step_fns)
            side_length += 2
            delta_seq = [
                side_length - 2, side_length - 2 + 1, side_length - 2 + 1
            ]

            rotation_break_seq = cc.pipe(it.accumulate([n] + delta_seq),
                                         cc.drop(1), set)
        elif n in rotation_break_seq:
            step_fn = next(step_fns)

        sum_dict[pos] = neighbors_sum(pos, sum_dict)

        yield (pos, sum_dict[pos])

        pos = step_fn(pos)
        n += 1


answer = cc.pipe(sum_path(),
                 cc.curry(it.dropwhile)(lambda x: x[1] <= int(sys.argv[1])),
                 cc.take(1), list, lambda x: x[0][1])

pp(answer)