def hydrate_dataset_part(part, dbc, cdir, dsid, as_blaze=True): if dbc is not None: logger.info('hydrating with database table') res = pipe(part.value, lambda x: DBTBL_FMT.format(dsid=dsid, part=x), dbc.resolve_table) res = res if as_blaze else odo(res, pd.DataFrame) return res else: logger.info('hydrating with feather file') bzfn = bz.data if as_blaze else identity try: res = pipe( part.value, curry(get_datafile_path)(dsid=dsid, cdir=cdir, ftyp=DatasetFileType.FEATHER), feather.read_dataframe, bzfn) except Exception as e: res = pipe( part.value, curry(get_datafile_path)(dsid=dsid, cdir=cdir, ftyp=DatasetFileType.JSONREC), curry(pd.read_json), bzfn) return res
def post_process_compiled_contracts(compiled_contracts): for contract_data in compiled_contracts: bytecode = contract_data.get('bytecode') if is_string(bytecode): bytecode_placeholder_locations = find_placeholder_locations(bytecode) bytecode_link_references = normalize_placeholder_link_references( bytecode_placeholder_locations, compiled_contracts, ) else: bytecode_link_references = tuple() bytecode_runtime = contract_data.get('bytecode_runtime') if is_string(bytecode_runtime): bytecode_runtime_placeholder_locations = find_placeholder_locations( bytecode_runtime, ) bytecode_runtime_link_references = normalize_placeholder_link_references( bytecode_runtime_placeholder_locations, compiled_contracts, ) else: bytecode_runtime_link_references = tuple() yield pipe( contract_data, partial(assoc, key='linkrefs', value=bytecode_link_references), partial(assoc, key='linkrefs_runtime', value=bytecode_runtime_link_references), )
def get_compiled_contracts(self, source_file_paths, import_remappings): self.logger.debug("Import remappings: %s", import_remappings) self.logger.debug("Compiler Settings: %s", pprint.pformat(self.compiler_settings)) if 'import_remappings' in self.compiler_settings and import_remappings is not None: self.logger.warn( "Import remappings setting will be overridden by backend settings" ) try: compilation_result = compile_files( source_file_paths, import_remappings=import_remappings, **self.compiler_settings) except ContractsNotFound: return {} compiled_contracts = pipe( compilation_result, normalize_compilation_result, post_process_compiled_contracts, ) return compiled_contracts
def facet_map(self): facs = (self.flevels.groupby(['facet']).agg({ 'facet_level': lambda x: x.dropna().drop_duplicates().tolist() }).pipe(lambda xf: u.fill_none(xf)).to_dict(orient='index')) return pipe(facs, curry(valmap)(lambda x: x['facet_level']), curry(keyfilter)(lambda x: x != 'Overall'), lambda x: merge(x, self.flevels_r))
def process_text(self, input_text: str, **kwargs) -> str: return pipe( input_text, lambda x: self.clean_pattern.sub(' ', x), normalize_hyphenated_words, normalize_quotation_marks, normalize_unicode, normalize_whitespace )
def normalize(text: str) -> str: space = len(text) > 0 and text[-1] in string.whitespace text = text.lower() text = pipe(text, remove_accents, remove_punctuation, normalize_whitespace) if space: return f'{text} ' return text
def normalize_compilation_result(compilation_result): for key_from_compiler, raw_contract_data in compilation_result.items(): contract_data = normalize_combined_json_contract_data(raw_contract_data) source_path, contract_name = normalize_combined_json_contract_key( key_from_compiler, contract_data, ) yield pipe( contract_data, partial(assoc, key='source_path', value=source_path), partial(assoc, key='name', value=contract_name), )
def validate_unique(values): if not isdistinct(values): duplicates = pipe( values, frequencies, # get the frequencies partial(valfilter, lambda v: v > 1), # filter to ones that occure > 1 sorted, # sort them tuple, # cast them to an immutiable form ) raise ValidationError( "The values provided are not unique. Duplicates: {0}".format( ', '.join((str(value) for value in duplicates))))
def normalize_compilation_result(compilation_result): """ Take the result from the --standard-json compilation and flatten it into an interable of contract data dictionaries. """ for source_path, file_contracts in compilation_result['contracts'].items(): for contract_name, raw_contract_data in file_contracts.items(): contract_data = normalize_standard_json_contract_data(raw_contract_data) yield pipe( contract_data, partial(assoc, key='source_path', value=source_path), partial(assoc, key='name', value=contract_name), )
def upgrade_user_config(user_config, to_version=LATEST_VERSION): try: current_version = user_config['version'] except KeyError: raise KeyError("No version key found in user config file:\n\n{0}".format( pprint.pformat(user_config), )) upgrade_sequence = get_upgrade_sequence(current_version, to_version, KNOWN_USER_VERSIONS) upgrade_functions = tuple( USER_UPGRADE_FUNCTIONS[version] for version in upgrade_sequence ) upgraded_user_config = pipe(user_config, *upgrade_functions) return upgraded_user_config
def validate_unique(values, title="Value"): if not isdistinct(values): duplicates = pipe( values, frequencies, # get the frequencies partial(valfilter, lambda v: v > 1), # filter to ones that occure > 1 sorted, # sort them tuple, # cast them to an immutiable form ) raise ValidationError( "{title} does not contain unique items. Duplicates: {0}".format( ', '.join((str(value) for value in duplicates)), title=title, ))
def serialize_full_transaction(transaction, block, transaction_index, is_pending): if is_pending: block_number = None block_hash = None transaction_index = None else: block_number = block['number'] block_hash = block['hash'] return pipe( transaction, partial(assoc, key='block_number', value=block_number), partial(assoc, key='block_hash', value=block_hash), partial(assoc, key='transaction_index', value=transaction_index), )
def validate_unique(values, title="Value"): if not isdistinct(values): duplicates = pipe( values, frequencies, # get the frequencies partial(valfilter, lambda v: v > 1), # filter to ones that occure > 1 sorted, # sort them tuple, # cast them to an immutiable form ) raise ValidationError( "{title} does not contain unique items. Duplicates: {0}".format( ', '.join((str(value) for value in duplicates)), title=title, ) )
def main(): # corpus = "\n".join([gt.raw(fileid) for fileid in gt.fileids()]) # corpus = '\n'.join([' '.join(s) for s in brown.sents()]) text = open('src/synonymize/hounds_sherlock.txt').read().replace('_', ' ') bot = POSifiedText(text) try: while True: cmd = input('Generate sentence?') if cmd.lower() in ['n', 'no', 'q', 'quit']: break print(pipe(bot.make_sentence())) except KeyboardInterrupt: pass
def embedding_groups( node_list: List[T], persona_embedding_list: List[np.ndarray]) -> Dict[T, List[np.ndarray]]: """ Utility function, which given aligned list of nodes and embedding lists from the model.predict function, obtain a dictionary from base graph nodes to a list of embeddings. The order of the embeddings for the base nodes is not ordered, and the order may differ on different calls. :param node_list: list of base nodes, which is duplicated :param persona_embedding_list: corresponding embeddings :return: dictionary mapping base nodes to all their embeddings """ return pipe( zip(node_list, persona_embedding_list), groupby(0), valmap(lambda x: list(map(getter(1), x))), )
def process_sas_survey(svy_cfg, facets, client=None, lgr=logger): g = svy_cfg prefix = g.s3_url_prefix lgr.bind(p=prefix) evalr = asteval.Interpreter() evalr.symtable['pd.util'] = pd.util fn = g.rename_cols map_fn = evalr(fn) df_munger = curry(sdf.munge_df)(facets=facets, qids=g.qids, na_syns=g.na_synonyms, col_fn=map_fn, fmts=g.patch_format, fpc=g.fpc, lgr=lgr) lbl_loader = curry(load_variable_labels)(repl=g.replace_labels) xpt_loader = curry(load_sas_xport_df)(lgr=lgr) dfs = map( lambda r: pipe(prefix+r.xpt, delayed(xpt_loader), delayed(df_munger(r=r, lbls=lbl_loader(prefix+r.format, prefix+r.formas)))), [r for idx, r in g.meta.iterrows()]) lgr.info('merging SAS dfs') dfs = delayed(pd.concat)(dfs, ignore_index=True) scols = delayed( lambda xf: list(xf.columns .intersection(set(g.qids) .union(facets))))(dfs) lgr.info('re-filtering question and facet columns to cast to category dtype', cols=scols) dfz = (dfs .apply(lambda x: x.astype('category')) .reset_index(drop=True) .assign(year=dfs['year'].astype(int), sitecode=dfs['sitecode'].astype('category'), weight=dfs['weight'].astype(float), strata=dfs['strata'].astype(int, errors='ignore'), psu=dfs['psu'].astype(int, errors='ignore')) .reset_index(drop=True)) if g.fpc: dfz = (dfz.assign(fpc=dfs['fpc'].astype(int, errors='ignore'), sample_ct=dfs['sample_ct'].astype(int, errors='ignore')) .reset_index(drop=True)) dfz.visualize() lgr.info('merged SAS dfs') lgr.unbind('p') return dfz
def add_full_dependencies_to_compiled_contracts(compiled_contracts): dependency_graph = compute_direct_dependency_graph(compiled_contracts) deploy_order = compute_deploy_order(dependency_graph) for contract_data in compiled_contracts: full_dependencies = compute_recursive_contract_dependencies( contract_data['name'], dependency_graph, ) ordered_full_dependencies = tuple( contract_name for contract_name in deploy_order if contract_name in full_dependencies) yield pipe( contract_data, partial(assoc, key='full_dependencies', value=full_dependencies), partial(assoc, key='ordered_full_dependencies', value=ordered_full_dependencies), )
def upgrade_config(config, config_context, to_version=LATEST_VERSION): if config_context == ConfigContext.USER: known_versions = KNOWN_USER_VERSIONS elif config_context == ConfigContext.LEGACY: known_versions = KNOWN_LEGACY_VERSIONS try: current_version = config['version'] except KeyError: raise KeyError("No version key found in config file:\n\n{0}".format( pprint.pformat(config), )) upgrade_sequence = get_upgrade_sequence(current_version, to_version, known_versions) upgrade_functions = tuple(UPGRADE_FUNCTIONS[version] for version in upgrade_sequence) upgraded_config = pipe(config, *upgrade_functions) return upgraded_config
def upgrade_config(config, config_context, to_version=LATEST_VERSION): if config_context == ConfigContext.USER: known_versions = KNOWN_USER_VERSIONS elif config_context == ConfigContext.LEGACY: known_versions = KNOWN_LEGACY_VERSIONS try: current_version = config['version'] except KeyError: raise KeyError("No version key found in config file:\n\n{0}".format( pprint.pformat(config), )) upgrade_sequence = get_upgrade_sequence(current_version, to_version, known_versions) upgrade_functions = tuple( UPGRADE_FUNCTIONS[version] for version in upgrade_sequence ) upgraded_config = pipe(config, *upgrade_functions) return upgraded_config
def process_text(self, input_text: str, **kwargs) -> str: temperature = kwargs.get('temperature', self.temperature or .25) result: List[str] = [] for token in self.nlp(input_text >> self.cleaner): if any(x not in string.ascii_lowercase for x in token.orth_): result.append(token.orth_) else: new_token = pipe( token.orth_.lower(), lambda x: self.pin.manipulate(x, temperature=temperature) ) if token.orth_ == token.orth_.capitalize(): new_token = new_token.capitalize() result.append(new_token) return ' '.join(result)
def block2dict(lines, repl, to_lower=False): f_lwr = str.lower if to_lower else identity f_repl = curry(lambda k, r: r[k] if k in r else k)(r=repl) rqt = re.compile(r'[\"\']') # match quote chars rws = re.compile(r'\s') # match whitespace # keep only alnum and a few unreserved symbols ruri = re.compile(r'(?![\w\s\-\_\.\'\$\-\+\(\)\/]|\.).') d = thread_last( lines, map(lambda x: x.replace('\x92', "'")), map(lambda x: rqt.sub('', x.strip()).split('=')), map(lambda x: (rws.sub('', x[0].strip()), ruri.sub('', x[1].strip()))), filter(lambda x: x[0].find('-') == -1), # no support for ranges (mapcat, lambda x: map(lambda y: (y, x[1]), x[0].split(','))), filter(lambda x: x[0].isnumeric()), # remove non-numeric codes map(lambda x: (int(x[0]), # cat codes are ints pipe(x[1], f_lwr, f_repl))), dict ) # d[-1] = np.nan #use NA as a marker for unmapped vals return d
def get_compiled_contracts(self, source_file_paths, import_remappings): self.logger.debug("Import remappings: %s", import_remappings) self.logger.debug("Compiler Settings: %s", pprint.pformat(self.compiler_settings)) if 'import_remappings' in self.compiler_settings and import_remappings is not None: self.logger.warn("Import remappings setting will be overridden by backend settings") try: compilation_result = compile_files( source_file_paths, import_remappings=import_remappings, **self.compiler_settings ) except ContractsNotFound: return {} compiled_contracts = pipe( compilation_result, normalize_compilation_result, post_process_compiled_contracts, ) return compiled_contracts
def __init__(self, input_text: str, state_size: int = 2): nltk.download('brown') nltk.download('gutenberg') self.nlp = spacy.load('en_core_web_lg') self.synonyms: Dict[str, List[str]] = defaultdict(list) self.entities: Dict[str, List[str]] = defaultdict(list) input_text = pipe( input_text, # lambda x: x.replace('\n', ' '), lambda x: self.clean_pattern.sub(' ', x), normalize_hyphenated_words, normalize_quotation_marks, normalize_unicode, normalize_whitespace) markovify.Text.__init__(self, input_text, state_size, retain_original=False) self.grammar = Grammar({**self.synonyms, **self.entities}) self.grammar.add_modifiers(base_english)
def post_process_compiled_contracts(compiled_contracts): return pipe( compiled_contracts, add_direct_dependencies_to_compiled_contracts, add_full_dependencies_to_compiled_contracts, )
def test_pipe(): assert pipe(1, inc) == 2 assert pipe(1, inc, inc) == 3 assert pipe(1, double, inc, iseven) is False
def find_mismatched_levels(self): return pipe(self.meta.qns[ID_COLUMN], set, map(self.compare_levels), filter(lambda x: set(x['surveys']) != set(x['socrata'])))
def _fetch_user_profiles(keyword: str) -> List[str]: return pipe(keyword, request_users_from_keyword_search, _get_json_from_response, get_users_from_json)