def __init__(self, module, node_dim, edge_dim): super().__init__() self.module = module sig_gain = nn.init.calculate_gain('sigmoid') self.prev_node_gate = Linear( node_dim, node_dim, bias=False, init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain)) self.curr_node_gate = Linear( node_dim, node_dim, bias=True, init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain)) self.prev_edge_gate = Linear( edge_dim, edge_dim, bias=False, init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain)) self.curr_edge_gate = Linear( edge_dim, edge_dim, bias=True, init=ct.curry(nn.init.xavier_normal_)(gain=sig_gain)) nn.init.zeros_(self.curr_node_gate.bias.data) nn.init.zeros_(self.curr_edge_gate.bias.data)
def hydrate_dataset_part(part, dbc, cdir, dsid, as_blaze=True): if dbc is not None: logger.info('hydrating with database table') res = pipe(part.value, lambda x: DBTBL_FMT.format(dsid=dsid, part=x), dbc.resolve_table) res = res if as_blaze else odo(res, pd.DataFrame) return res else: logger.info('hydrating with feather file') bzfn = bz.data if as_blaze else identity try: res = pipe( part.value, curry(get_datafile_path)(dsid=dsid, cdir=cdir, ftyp=DatasetFileType.FEATHER), feather.read_dataframe, bzfn) except Exception as e: res = pipe( part.value, curry(get_datafile_path)(dsid=dsid, cdir=cdir, ftyp=DatasetFileType.JSONREC), curry(pd.read_json), bzfn) return res
def facet_map(self): facs = (self.flevels.groupby(['facet']).agg({ 'facet_level': lambda x: x.dropna().drop_duplicates().tolist() }).pipe(lambda xf: u.fill_none(xf)).to_dict(orient='index')) return pipe(facs, curry(valmap)(lambda x: x['facet_level']), curry(keyfilter)(lambda x: x != 'Overall'), lambda x: merge(x, self.flevels_r))
def __init__(self, in_features, out_features, n_linears=1, bias=True, init=ct.curry(nn.init.xavier_normal_)( gain=nn.init.calculate_gain('relu'))): super().__init__() self.out_features = out_features self.n_linears = n_linears self.in_features = in_features self.init = init weights = torch.zeros(n_linears, in_features, out_features, dtype=torch.float32) init(weights) self.lin = nn.Parameter(weights) self.init(self.lin.data) if bias: b = torch.zeros((n_linears, 1, self.out_features), dtype=torch.float32) self.bias = nn.Parameter(b) else: self.bias = None
def __init__(self, in_features, out_features, bias=True, init=ct.curry(nn.init.xavier_normal_)(gain=1.414)): self.init = init super().__init__(in_features, out_features, bias)
def map_with_dict(d, val): repl_f = curry(get_if)(d=d) typ = type(val) if typ == str: return repl_f(val) if typ == list or typ == set: return typ(map(repl_f, val)) if typ == dict: return keymap(repl_f, val)
def process_sas_survey(svy_cfg, facets, client=None, lgr=logger): g = svy_cfg prefix = g.s3_url_prefix lgr.bind(p=prefix) evalr = asteval.Interpreter() evalr.symtable['pd.util'] = pd.util fn = g.rename_cols map_fn = evalr(fn) df_munger = curry(sdf.munge_df)(facets=facets, qids=g.qids, na_syns=g.na_synonyms, col_fn=map_fn, fmts=g.patch_format, fpc=g.fpc, lgr=lgr) lbl_loader = curry(load_variable_labels)(repl=g.replace_labels) xpt_loader = curry(load_sas_xport_df)(lgr=lgr) dfs = map( lambda r: pipe(prefix+r.xpt, delayed(xpt_loader), delayed(df_munger(r=r, lbls=lbl_loader(prefix+r.format, prefix+r.formas)))), [r for idx, r in g.meta.iterrows()]) lgr.info('merging SAS dfs') dfs = delayed(pd.concat)(dfs, ignore_index=True) scols = delayed( lambda xf: list(xf.columns .intersection(set(g.qids) .union(facets))))(dfs) lgr.info('re-filtering question and facet columns to cast to category dtype', cols=scols) dfz = (dfs .apply(lambda x: x.astype('category')) .reset_index(drop=True) .assign(year=dfs['year'].astype(int), sitecode=dfs['sitecode'].astype('category'), weight=dfs['weight'].astype(float), strata=dfs['strata'].astype(int, errors='ignore'), psu=dfs['psu'].astype(int, errors='ignore')) .reset_index(drop=True)) if g.fpc: dfz = (dfz.assign(fpc=dfs['fpc'].astype(int, errors='ignore'), sample_ct=dfs['sample_ct'].astype(int, errors='ignore')) .reset_index(drop=True)) dfz.visualize() lgr.info('merged SAS dfs') lgr.unbind('p') return dfz
def parse_variable_labels(txt, repl, lbls_to_lower=True): b2d = curry(block2dict)(repl=repl, to_lower=lbls_to_lower) labels = thread_last( txt.split(';'), filter(lambda x: x.strip().lower().startswith('value')), map(lambda x: x.strip().split('\n')), map(lambda x: (x[0].split()[1].lower(), b2d(x[1:]))), dict ) logger.info('parsed varlabels from format txt', nlabeled=len(labels.keys()), nrepl=len(repl.keys())) return labels
def get_metadata_socrata_denovo(soc_cfg): g = soc_cfg revmap = {v: k for k, v in g.mapcols.items()} url = '{api_url}?' + \ '$select={cols}' + \ '&$order={ocols}' meta_diff = set(g.qn_meta).difference(g.computed) meta_diff = list(meta_diff) qncols = ','.join([(revmap[k] if k in revmap else k) for k in meta_diff]) ocols = ','.join([revmap['qid'], 'year']) logger.info('loading SODA meta data') res = thread_last( g.soda_api, map(lambda x: url.format(api_url=x, cols=qncols, ocols=ocols)), map(dl.df_from_socrata_url), curry(pd.concat)(ignore_index=True)) ''' lambda xf: xf.applymap(lambda x: (re.sub('\xa0', '', x)).strip()), lambda xf: xf.rename(index=str, columns={x: x.lower() for x in xf.columns}), lambda xf: xf if not g.mapcols else xf.rename(index=str, columns=g.mapcols), curry(apply_fn2vals)(fns=g.apply_fn), lambda xf: xf if not g.mapvals else xf.replace(g.mapvals), lambda xf: xf if not g.mapvals else xf.applymap(lambda x: g.mapvals[x.lower().strip()] if x.lower().strip() in g.mapvals else x), lambda xf: xf[g.qn_meta]) ''' logger.info('finished transformations', res=res.head()) # pull out question -> response breakouts qns = res[['qid', 'year', 'topic', 'subtopic', 'question', 'response']].drop_duplicates().reset_index(drop=True) # since facets are questions as well # update the dict with response value from fc_res # overriding the original var (N.B.) yrvec = (res[['year']] .drop_duplicates() .assign(facet='year') .rename(index=str, columns={'year': 'facet_level'})) stvec = (res[['sitecode']] .drop_duplicates() .assign(facet='sitecode') .rename(index=str, columns={'sitecode':'facet_level'})) facs = pd.concat( [res[['facet', 'facet_level']].drop_duplicates(), yrvec, stvec], axis=0).reset_index(drop=True) logger.info('created qn and facs', qn=qns.head(), fac=facs.head()) return (qns, facs)
def get_qids_by_year(soc_cfg): g = soc_cfg revmap = {v: k for k, v in g.mapcols.items()} url = '{api_url}?' + \ '$select=year,{qnkey},count(year)' + \ '&$group=year,{qnkey}' + \ '&$order={qnkey},year' qid = revmap['qid'] df = thread_last(g.soda_api, map(lambda x: url.format(api_url=x, qnkey=qid)), map(dl.df_from_socrata_url), curry(pd.concat)(ignore_index=True)) df.to_csv(sys.stdout)
def munge_df(df, r, lbls, facets, qids, na_syns, col_fn, fmts, fpc=False, lgr=logger): year = r['year'] lgr.bind(year=year) lgr.info('filtering, applying varlabels, munging', patch_fmts=fmts.keys(), colfn=col_fn, shp=df.shape, lbls=lbls) # get mapping into table for each facet facets = {r[k]: k for k in facets} if not qids: qids = list( set(lbls.keys()).difference( [r['sitecode'], r['year'], r['weight'], r['psu'], r['strata'] ] + [k for k in facets])) ncols = {k: k.lower() for k in list(df.columns)} ndf = (df.rename( index=str, columns=ncols ).pipe(lambda xdf: filter_columns(xdf, facets, qids)).reset_index( drop=True ).apply(lambda x: eager_convert_categorical(x, lbls, fmts, lgr)).rename( index=str, columns=facets).pipe( curry(find_na_synonyms)(na_syns)).reset_index(drop=True).assign( year=int(year) if type(year) == int else df[year].astype(int), sitecode=df[r['sitecode']].apply(SITECODE_TRANSLATORS[ r['sitecode_type']]).astype('category'), weight=df[r['weight']].astype(float), strata=df[r['strata']].astype(int), psu=df[r['psu']].astype(int)).reset_index(drop=True)) if fpc: ndf = (ndf.assign( fpc=df[r['fpc']].astype(float), sample_ct=df[r['sample_ct']].astype(int)).reset_index(drop=True)) ndf.columns = list(map(pdutil.undash, list(ndf.columns))) lgr.info('completed SAS df munging') lgr.unbind('year') return ndf
def __init__(self, node_dim, n_heads, attn_key='emb', msg_key='emb', alpha=.2): super().__init__() self.attn = MultiLinear( node_dim, 1, n_heads, bias=False, init=ct.curry(nn.init.xavier_normal_)( gain=nn.init.calculate_gain('leaky_relu', alpha))) self.leaky_relu = nn.LeakyReLU(alpha) self.n_heads = n_heads self.msg_key = msg_key self.attn_key = attn_key
def block2dict(lines, repl, to_lower=False): f_lwr = str.lower if to_lower else identity f_repl = curry(lambda k, r: r[k] if k in r else k)(r=repl) rqt = re.compile(r'[\"\']') # match quote chars rws = re.compile(r'\s') # match whitespace # keep only alnum and a few unreserved symbols ruri = re.compile(r'(?![\w\s\-\_\.\'\$\-\+\(\)\/]|\.).') d = thread_last( lines, map(lambda x: x.replace('\x92', "'")), map(lambda x: rqt.sub('', x.strip()).split('=')), map(lambda x: (rws.sub('', x[0].strip()), ruri.sub('', x[1].strip()))), filter(lambda x: x[0].find('-') == -1), # no support for ranges (mapcat, lambda x: map(lambda y: (y, x[1]), x[0].split(','))), filter(lambda x: x[0].isnumeric()), # remove non-numeric codes map(lambda x: (int(x[0]), # cat codes are ints pipe(x[1], f_lwr, f_repl))), dict ) # d[-1] = np.nan #use NA as a marker for unmapped vals return d
def load_variable_labels(format_f, formas_f, repl, year=None): logger.info("loading format labels", file=format_f) labels = thread_last( format_f, dl.fetch_data_from_url, lambda x: x.read(), lambda t: (t.decode('utf-8', errors='ignore') if type(t) is bytes else t), curry(parse_variable_labels)(repl=repl) ) logger.info("loaded format labels", lbls=labels) logger.info("loading format assignments", file=formas_f) assignments = thread_last( formas_f, dl.fetch_data_from_url, lambda x: x.read(), lambda t: (t.decode('utf-8', errors='ignore') if type(t) is bytes else t), parse_format_assignments ) logger.info("loaded format assns", ass=assignments) return {k: labels[v] for k, v in assignments.items() if v in labels}
step_fn = next(step_fns) rotation_break_seq = set() while True: if is_odd_square(n - 1): step_fn = next(step_fns) side_length += 2 delta_seq = [ side_length - 2, side_length - 2 + 1, side_length - 2 + 1 ] rotation_break_seq = cc.pipe(it.accumulate([n] + delta_seq), cc.drop(1), set) elif n in rotation_break_seq: step_fn = next(step_fns) sum_dict[pos] = neighbors_sum(pos, sum_dict) yield (pos, sum_dict[pos]) pos = step_fn(pos) n += 1 answer = cc.pipe(sum_path(), cc.curry(it.dropwhile)(lambda x: x[1] <= int(sys.argv[1])), cc.take(1), list, lambda x: x[0][1]) pp(answer)