def _get_column_lists(columns): date_format = _convert_format(current_org.get_setting("date_format")) datetime_format = _convert_format( "{} {}".format( current_org.get_setting("date_format"), current_org.get_setting("time_format"), ) ) special_types = { TYPE_BOOLEAN: _convert_bool, TYPE_DATE: rpartial(_convert_datetime, date_format), TYPE_DATETIME: rpartial(_convert_datetime, datetime_format), } fieldnames = [] special_columns = dict() for col in columns: fieldnames.append(col["name"]) for col_type in special_types.keys(): if col["type"] == col_type: special_columns[col["name"]] = special_types[col_type] return fieldnames, special_columns
def get_arch_recipes(arch: str, dag: networkx.DiGraph): """Get all recipes compatible with the desired build architecture""" return filter( funcy.compose(funcy.rpartial(operator.__contains__, arch), funcy.rpartial(getattr, 'architectures')), filter( funcy.compose(functools.partial(operator.__eq__, 0), dag.in_degree), dag.nodes))
def get_balances(self): available = { 'BEARS': Amount(self['balance']).amount, 'BSD': Amount(self['bsd_balance']).amount, 'COINS': Amount(self['coining_shares']).amount, } savings = { 'BEARS': Amount(self['savings_balance']).amount, 'BSD': Amount(self['savings_bsd_balance']).amount, } rewards = { 'BEARS': Amount(self['reward_bears_balance']).amount, 'BSD': Amount(self['reward_bsd_balance']).amount, 'COINS': Amount(self['reward_coining_balance']).amount, } totals = { 'BEARS': sum([available['BEARS'], savings['BEARS'], rewards['BEARS']]), 'BSD': sum([available['BSD'], savings['BSD'], rewards['BSD']]), 'COINS': sum([available['COINS'], rewards['COINS']]), } total = walk_values(rpartial(round, 3), totals) return { 'available': available, 'savings': savings, 'rewards': rewards, 'total': total, }
def read_config_header(filepath, defaults=None, encoding=DEFAULT_ENCODING): filepath = Path(filepath) if not has_config_header(filepath): return defaults.copy() if defaults else {} else: with open(filepath, mode="r", encoding=DEFAULT_ENCODING) as fi: header = "".join( fn.takewhile( fn.none_fn( fn.rpartial(str.startswith, "---\n"), fn.rpartial(str.startswith, "...\n"), ), fn.rest(fi), ) ) return parse_config(header, defaults)
def get_balances(self): available = { 'HIVE': Amount(self['balance']).amount, 'HBD': Amount(self['sbd_balance']).amount, 'VESTS': Amount(self['vesting_shares']).amount, } savings = { 'HIVE': Amount(self['savings_balance']).amount, 'HBD': Amount(self['savings_sbd_balance']).amount, } rewards = { 'HIVE': Amount(self['reward_steem_balance']).amount, 'HBD': Amount(self['reward_sbd_balance']).amount, 'VESTS': Amount(self['reward_vesting_balance']).amount, } totals = { 'HIVE': sum([available['HIVE'], savings['HIVE'], rewards['HIVE']]), 'HBD': sum([available['HBD'], savings['HBD'], rewards['HBD']]), 'VESTS': sum([available['VESTS'], rewards['VESTS']]), } total = walk_values(rpartial(round, 3), totals) return { 'available': available, 'savings': savings, 'rewards': rewards, 'total': total, }
def estimation_error(T, T_hat, metric, params): """ Computes the error between an estimated transition model and the corresponding ground truth based on a given evaluation metric (with optional parameters). :param T: [S x S x A] ground truth transition model :param T_hat: [S x S x A] estimated transition model :param metric: string specifying the evaluation metric :param params: parameters of the evaluation metric :return: metric value """ # select metric if metric == 'emd': fun = rpartial(emd, params) elif metric == 'hellinger': fun = hellinger # MDP dimensions _, nStates, nActions = T.shape # evaluate the metric for each state and action E = np.zeros([nStates, nActions]) for s, a in np.ndindex(nStates, nActions): E[s, a] = fun(T[:, s, a], T_hat[:, s, a]) # return mean and std metric values (computed over the state space of the MDP) return E.mean(), E.std()
def get_balances(self): available = { 'GOLOS': Amount(self['balance']).amount, 'GBG': Amount(self['sbd_balance']).amount, 'GESTS': Amount(self['vesting_shares']).amount, } savings = { 'GOLOS': Amount(self['savings_balance']).amount, 'GBG': Amount(self['savings_sbd_balance']).amount, } totals = { 'GOLOS': sum([available['GOLOS'], savings['GOLOS']]), 'GBG': sum([available['GBG'], savings['GBG']]), 'GESTS': sum([available['GESTS']]), } total = walk_values(rpartial(round, 3), totals) return { 'available': available, 'savings': savings, 'total': total, }
def wrapper(data, *args): g = rpartial(wrapper, *args) if isinstance(data, Mapping): return {g(k): g(v) for k, v in data.items()} if isinstance(data, seq): return type(data)(map(g, data)) if isinstance(data, str): return f(data, *args) return data
def __init__(self, info): super().__init__(_to_publication_info(info)) self.info = AttrDict(info) self._to_comment = rpartial(VkComment, self.owner_id, self.id) self._useful_post_info = dict( owner_id=self.owner_id, post_id=self.id )
def resolve(src, context): Seq = (list, tuple, set) apply_value = rpartial(resolve, context) if isinstance(src, Mapping): return {key: apply_value(value) for key, value in src.items()} elif isinstance(src, Seq): return type(src)(map(apply_value, src)) elif isinstance(src, str): return _resolve_str(src, context) return src
def depobjs(self): return reversed( list( map( lambda pt: f'-l{pt.target.replace("lib", "", 1)}', filter( funcy.compose( functools.partial(operator.__eq__, 'object'), funcy.rpartial(getattr, 'build')), map(operator.itemgetter(1), self.binding_dgraph.out_edges(self))))))
def generate_makefile(arch: str, dag: networkx.DiGraph): def get_arch_recipes(arch: str, dag: networkx.DiGraph): """Get all recipes compatible with the desired build architecture""" return filter( funcy.compose(funcy.rpartial(operator.__contains__, arch), funcy.rpartial(getattr, 'architectures')), filter( funcy.compose(functools.partial(operator.__eq__, 0), dag.in_degree), dag.nodes)) def get_default_rules(): return [ make.MakeRule( 'all', list( map(funcy.rpartial(getattr, 'target'), get_arch_recipes(arch, dag))), list( map( lambda target: f'mkdir -p {os.path.dirname(target.destination)}; ' f'cp build/target/{target.target} {target.destination}', [ target for target in dag if target.build == 'application' ]))), make.MakeRule( 'image', body=[ Architecture(arch).getbootboot, 'echo "kernel=sbin/cpu_driver\\n" >> boot/BOOTBOOT/CONFIG', '(cd initrd; tar -czf ../boot/BOOTBOOT/INITRD *)', 'cp /usr/share/ovmf/OVMF.fd .', f'dd if=/dev/zero of=fakix_{arch}_image bs=1M count=4', f'mkfs.fat fakix_{arch}_image', f'mcopy -i fakix_{arch}_image -s boot/* ::' ]), make.MakeRule( 'clean', body=['rm -rf build/target/* build/obj/* build/generated/*']), make.MakeRule( 'qemu', body=[ f'{Architecture(arch).qemu} ' f'-drive format=raw,file=fakix_{arch}_image -nographic' ]) ] _, *_ = map(funcy.rpartial(PykeTransform.bind, dag, arch), dag) return map( str, itertools.chain(get_default_rules(), get_arch_recipes( arch, dag))), frozenset(f'/sbin/{pt.target}' for pt in dag if pt.build == 'application')
def _get_column_lists(columns): date_format = _convert_format(current_org.get_setting('date_format')) datetime_format = _convert_format('{} {}'.format(current_org.get_setting('date_format'), current_org.get_setting('time_format'))) special_types = { TYPE_BOOLEAN: _convert_bool, TYPE_DATE: rpartial(_convert_datetime, date_format), TYPE_DATETIME: rpartial(_convert_datetime, datetime_format) } fieldnames = [] special_columns = dict() for col in columns: fieldnames.append(col['name']) for col_type in special_types.keys(): if col['type'] == col_type: special_columns[col['name']] = special_types[col_type] return fieldnames, special_columns
def read_contents(filepath, encoding=DEFAULT_ENCODING): filepath = Path(filepath) if not has_config_header(filepath): if encoding is None: with open(filepath, mode="rb") as fi: return fi.read() else: with open(filepath, mode="r", encoding=encoding) as fi: return fi.read() else: with open(filepath, mode="r", encoding=encoding) as fi: return "".join( fn.rest( fn.dropwhile( fn.none_fn( fn.rpartial(str.startswith, "---\n"), fn.rpartial(str.startswith, "...\n"), ), fn.rest(fi), ) ) )
def test_collect_metrics(): id_ = hlp.dummy_container(10, False) stats = cgroup.collect_runtime_metrics(id_, interval=2) hlp.clean_up_container(id_) cgroup_time = funcy.rpartial(funcy.get_in, ['read']) timepoints = list(map(cgroup_time, stats)) assert len(timepoints) >= 3 assert len(timepoints) < 5 duration = cgroup.time_diff_in_seconds(timepoints[0], timepoints[-1]) assert duration >= 6 assert duration < 10
def test_collect_metrics(): id_ = hlp.dummy_container(10, False) stats = cgroup.collect_runtime_metrics(id_, interval = 2) hlp.clean_up_container(id_) cgroup_time = funcy.rpartial(funcy.get_in, ['read']) timepoints = list(map(cgroup_time, stats)) assert len(timepoints) >= 3 assert len(timepoints) < 5 duration = cgroup.time_diff_in_seconds(timepoints[0], timepoints[-1]) assert duration >= 6 assert duration < 10
def _map_gdl_to_publication(data_dict, obj): dataset = { "id": str(uuid.uuid3(uuid.NAMESPACE_DNS, str(data_dict['id']))), "type": "publications", "title": data_dict['title'], "creator": [a['name'] for a in data_dict['authors']], # "subject": data_dict, "notes": data_dict['description'], "publisher": data_dict.get('relatedOrganisation'), # "contributor": [a['name'] for a in data_dict['authors']], "date": data_dict.get('created'), "metadata_modified": data_dict.get('created'), # "publication_type": data_dict, # "format": data_dict, "identifier": data_dict['identifier'], "source": data_dict.get('source'), # "language": data_dict, # "relation": data_dict, # "spatial": data_dict, # "rights": data_dict, "license_id": 'notspecified', "member_countries": 'other', # relatedCountry, optional "harvest_source": 'GDL' } thematic_area = data_dict.get('thematicArea', {}).get('area') if thematic_area: dataset["thematic_area_string"] = thematic_area_mapping.get( thematic_area) related_country = data_dict.get('relatedCountry') if related_country: schema = sh.scheming_get_dataset_schema('publications') choices = sh.scheming_field_by_name(schema['dataset_fields'], 'member_countries')['choices'] member_country = F.first( F.filter( F.compose(F.rpartial(contains, related_country), itemgetter('label')), choices)) if member_country: dataset['member_countries'] = member_country['value'] spatial = get_extent_for_country(member_country['label']) if spatial: dataset['spatial'] = spatial['value'] if data_dict['file']: res_url = _gl_url(obj.source.url, 'download') + '?id=' + str( data_dict['id']) res = {'name': data_dict['file'], 'url': res_url} res['format'] = splitext(res['name'])[1].lstrip('.') dataset['resources'] = [res] return dataset
def predict(self, text, threshold=0.5): tokens = data.tokenize([text]) result = self.model.predict(tokens) probabilities = list(result[result >= threshold]) chosen = np.copy(result) chosen[(result[:] >= threshold)] = 1 chosen[(result[:] < threshold)] = 0 topic_chunks = data.topic_chunk_encoder().inverse_transform(chosen)[0] return pipe(zip(probabilities, topic_chunks), sorted(key=first), reversed, list_map(lambda x: topic_result.init(*x)), rpartial(distinct, attr('id')), list)
def topics_frame(passage_or_passages, df=None): if df is None: df = by_topic_nodes(references=True, use_set=True) if isinstance(passage_or_passages, list): references = pipe( passage_or_passages, map(rpartial(getattr, 'references')), concat, frozenset) else: references = frozenset(passage_or_passages.references) overlapping = df.references.apply(lambda r: len(r & references) > 0) return df[overlapping]
def __to_str(self): if self.comp: return '' def link_body(): if self.build == 'bundle' or self.build == 'object': return [] objstring = ' '.join(self.objs()) l_flags = self.l_flags + (f' -o build/target/{self.target}') ld_command = f'{self.selected_arch.linker} {l_flags} {objstring}' return ([ f'{self.selected_arch.preprocessor} {self.includes} {self.l_file} | grep -v \'^#\' > build/generated/' + os.path.basename(self.l_file) + f'; {ld_command}' ] if self.l_file is not None else [f'{self.selected_arch.linker} {l_flags} {objstring}']) self.comp = True if not self.generated_make: edges = map( funcy.rpartial(PykeTransform.bind, self.binding_dgraph, self.selected_arch), [v for u, v in self.binding_dgraph.out_edges(self)]) depstring = ' '.join(self.dependencies) s_files = [(PykeTransform.build_string(s_file), s_file) for s_file in self.s_files] c_files = [(PykeTransform.build_string(c_file), c_file) for c_file in self.c_files] objstring = ' '.join(self.objs()) self.make = '\n'.join([ *filter(len, map(str, edges)), '\n\t'.join([ f'{self.target}: {depstring}', *itertools.starmap( lambda o, c: f'{self.selected_arch.compiler} {self.c_flags} {self.includes} {c} -o {o}', c_files), *itertools.starmap( lambda o, s: f'{self.selected_arch.preprocessor} -DASM_FILE {s} {self.includes} | {self.selected_arch.assember} {self.s_flags} -o {o}', s_files), *link_body(), *([ f'{self.selected_arch.archive} rcs build/target/{self.target}.a {objstring}' ] if self.build == 'object' else []) ]) ]) self.generated_make = True return self.make
def build_qc_dict(destination, thresholds, analysis, status): """ Build a dict QC containing all data about this evaluation. """ f = funcy.rpartial(build_qc_node, status[analysis]) nodes = map(f, status[thresholds]['thresholds']) failures = funcy.rcompose(partial(funcy.remove, does_pass), partial(map, fail_code), funcy.distinct)(nodes) qc_dict = { 'pass': len(failures) == 0, # Testing empty list as true/false is pythonic. # What is considered pythonic appears subjective and abitrary to me. 'fail_codes': failures, 'evaluation': nodes } status[destination] = qc_dict return status
def init_deps(stage: PipelineStage) -> List["Dependency"]: from funcy import rpartial from dvc.dependency import ParamsDependency from dvc.fs import localfs new_deps = [dep for dep in stage.deps if not dep.exists] params, deps = lsplit(rpartial(isinstance, ParamsDependency), new_deps) # always create a file for params, detect file/folder based on extension # for other dependencies dirs = [dep.fs_path for dep in deps if not is_file(dep.fs_path)] files = [dep.fs_path for dep in deps + params if is_file(dep.fs_path)] for path in dirs: localfs.makedirs(path) for path in files: localfs.makedirs(localfs.path.parent(path), exist_ok=True) localfs.touch(path) return new_deps
def load_config(config): if "basicauth" not in config: raise ConfigError("Basic Auth not configured!") for param in ("passwd",): if param not in config["basicauth"]: raise ConfigError("Basic Auth not configured! Missing: {0}".format(repr(param))) config = config["basicauth"] realm = config.get("realm", "kdb") hasher = config.get("hasher", "sha") if hasher not in HASHERS: raise ConfigError("Unsupported hasher: {0}".format(repr(hasher))) with open(config["passwd"], "r") as f: users = dict(imap(rpartial(str.split, ":"), imap(str.strip, f))) return users, realm, hasher
def resolve(self, src, unwrap=True): """Recursively resolves interpolation and returns resolved data. Args: src: Data (str/list/dict etc.) to resolve unwrap: Unwrap CtxDict/CtxList/Value to it's original data if inside `src`. Defaults to True. >>> c = Context({"three": 3}) >>> c.resolve({"lst": [1, 2, "${three}"]}) {'lst': [1, 2, 3]} """ Seq = (list, tuple, set) resolve = rpartial(self.resolve, unwrap) if isinstance(src, Mapping): return dict(map(resolve, kv) for kv in src.items()) elif isinstance(src, Seq): return type(src)(map(resolve, src)) elif isinstance(src, str): return self.resolve_str(src, unwrap=unwrap) return src
def get_balances(self): available = { "GOLOS": Amount(self["balance"]).amount, "GBG": Amount(self["sbd_balance"]).amount, "GESTS": Amount(self["vesting_shares"]).amount, } savings = { "GOLOS": Amount(self["savings_balance"]).amount, "GBG": Amount(self["savings_sbd_balance"]).amount, } accumulative = {"GOLOS": Amount(self["accumulative_balance"]).amount} tip = {"GOLOS": Amount(self["tip_balance"]).amount} totals = { "GOLOS": sum([ available["GOLOS"], savings["GOLOS"], accumulative["GOLOS"], tip["GOLOS"] ]), "GBG": sum([available["GBG"], savings["GBG"]]), "GESTS": sum([available["GESTS"]]), } total = walk_values(rpartial(round, 3), totals) return { "available": available, "savings": savings, "accumulative": accumulative, "tip": tip, "total": total, }
def get_default_rules(): return [ make.MakeRule( 'all', list( map(funcy.rpartial(getattr, 'target'), get_arch_recipes(arch, dag))), list( map( lambda target: f'mkdir -p {os.path.dirname(target.destination)}; ' f'cp build/target/{target.target} {target.destination}', [ target for target in dag if target.build == 'application' ]))), make.MakeRule( 'image', body=[ Architecture(arch).getbootboot, 'echo "kernel=sbin/cpu_driver\\n" >> boot/BOOTBOOT/CONFIG', '(cd initrd; tar -czf ../boot/BOOTBOOT/INITRD *)', 'cp /usr/share/ovmf/OVMF.fd .', f'dd if=/dev/zero of=fakix_{arch}_image bs=1M count=4', f'mkfs.fat fakix_{arch}_image', f'mcopy -i fakix_{arch}_image -s boot/* ::' ]), make.MakeRule( 'clean', body=['rm -rf build/target/* build/obj/* build/generated/*']), make.MakeRule( 'qemu', body=[ f'{Architecture(arch).qemu} ' f'-drive format=raw,file=fakix_{arch}_image -nographic' ]) ]
def generatePolicy(policy, mdp, planner, params): """Generates a (strategy) policy based on its name.""" # decision strategy for exploration policy decisionStrategy = rpartial(epsilonGreedy, params['epsilon']) # common parameters for all policies pi_params = dict(nStates=mdp.nStates, nActions=mdp.nActions, decisionStrategy=decisionStrategy, planner=planner, updateFreq=params['planningUpdateFreq']) # switch between variational sampling and variational mean if policy in ['dir_sparse_sampling', 'dir_uniform_sampling', 'pg_sampling']: pi_params['nSamples'] = params['nModelSamples'] elif policy in ['dir_sparse_mean', 'dir_uniform_mean', 'pg_mean']: pi_params['nSamples'] = None else: raise ValueError('unknown policy') # sparse Dirichlet model if policy in ['dir_sparse_sampling', 'dir_sparse_mean']: pi_params['beliefTransitionModel'] = \ DirichletTransitionModel(mdp.nStates, mdp.nActions, alpha=1e-3 * np.ones_like(mdp.T)) # uniform Dirichlet model elif policy in ['dir_uniform_sampling', 'dir_uniform_mean']: pi_params['beliefTransitionModel'] = DirichletTransitionModel(mdp.nStates, mdp.nActions) # PG model elif policy in ['pg_sampling', 'pg_mean']: distmat = positions2distmat(mdp.statePositions) distmat_kernel = normalize01(distmat + distmat.T) ** 2 Sigma = NegExponential(distmat_kernel) pi_params['beliefTransitionModel'] = \ PGVarStateTransitionModel(mdp.nStates, mdp.nActions, nonInformative=False, Sigma=Sigma) return PlanningPolicy(**pi_params)
def min_balance_for_period(eth_address: str, created_at: dt.datetime, lookback_days: int = 7): """ For a given ETH address, look up their minimum stake for the past lookback_days. Returns the lowest balance as determined by a stochastic process. The purpose of this method is to prevent abuse caused by people who are voting and moving their tokens in an attempt to be able to vote again. The evaluation will pick random blocks in the average of 1 block per hour, and acknowledge the minimum balance during this period as the voting power. """ w3 = get_infura_web3() review_period_end = int(created_at.timestamp()) review_period_start = int( (created_at - dt.timedelta(days=lookback_days)).timestamp()) find_block = partial(find_block_from_timestamp, w3) review_block_range = [ find_block(x).number for x in (review_period_start, review_period_end) ] # get random VIEW balances on the voter's address for the last 7 days # split search range into chunks that contain ~ 1 hour worth of blocks chunk_size = (review_block_range[1] - review_block_range[0]) // (lookback_days * 24) balances = map( lambda block_num: view_token_balance(eth_address, block_num=block_num), (random.randrange(*chunk_range) for chunk_range in chunks(chunk_size, review_block_range))) to_eth = compose(int, rpartial(from_wei, 'ether')) return min(to_eth(x) for x in balances)
import json from os import getenv, environ, getcwd, path from dotenv import load_dotenv, find_dotenv from funcy import rpartial from toolz import pipe load_json_file = rpartial(pipe, open, lambda x: x.read(), json.loads) def config_folder_prefix(): # todo: improve this quick hack # (currently tightly coupled to alpha-2 folder) # It should find the correct config file path when # ran from src/, tests/ or Docker based paths. return path.join(getcwd().split('alpha-2')[0], 'alpha-2/config') def load_json_config(name): env = 'prod' if IS_PRODUCTION else 'dev' return load_json_file(f'{config_folder_prefix()}/{name}.{env}.json') # load default config IS_PRODUCTION = bool(getenv('PRODUCTION', False)) if not IS_PRODUCTION: load_dotenv(find_dotenv()) FLASK_ENV = environ['FLASK_ENV'].lower() # base config SECRET_KEY = getenv('SECRET_KEY', 'not_a_good_secret')
order_map = { 'name': 'lowercase_name', '-name': '-lowercase_name', 'created_at': 'created_at', '-created_at': '-created_at', 'schedule': 'schedule', '-schedule': '-schedule', 'runtime': 'query_results-runtime', '-runtime': '-query_results-runtime', 'executed_at': 'query_results-retrieved_at', '-executed_at': '-query_results-retrieved_at', 'created_by': 'users-name', '-created_by': '-users-name', } order_results = rpartial(_order_results, '-created_at', order_map) @routes.route(org_scoped_rule('/api/queries/format'), methods=['POST']) @login_required def format_sql_query(org_slug=None): """ Formats an SQL query using the Python ``sqlparse`` formatter. :<json string query: The SQL text to format :>json string query: Formatted SQL text """ arguments = request.get_json(force=True) query = arguments.get("query", "") return jsonify({'query': sqlparse.format(query, reindent=True, keyword_case='upper')})
_order_results) from redash.serializers import serialize_dashboard from redash.permissions import (can_modify, require_admin_or_owner, require_object_modify_permission, require_permission) from sqlalchemy.orm.exc import StaleDataError # Ordering map for relationships order_map = { 'name': 'lowercase_name', '-name': '-lowercase_name', 'created_at': 'created_at', '-created_at': '-created_at', } order_results = rpartial(_order_results, '-created_at', order_map) class DashboardListResource(BaseResource): @require_permission('list_dashboards') def get(self): """ Lists all accessible dashboards. :qparam number page_size: Number of queries to return per page :qparam number page: Page number to retrieve :qparam number order: Name of column to order by :qparam number q: Full text search term Responds with an array of :ref:`dashboard <dashboard-response-label>` objects.
def discover( features: List['ballet.feature.Feature'], X_df: Optional[pd.DataFrame], y_df: Optional[pd.DataFrame], y: Optional[np.ndarray], input: Optional[str] = None, primitive: Optional[str] = None, expensive_stats: bool = False, ) -> pd.DataFrame: """Discover existing features Display information about existing features including summary statistics on the development dataset. If the feature extracts multiple feature values, then the summary statistics (e.g. mean, std, nunique) are computed for each feature value and then averaged. If the development dataset cannot be loaded, computation of summary statistics is skipped. The following information is shown: - name: the name of the feature - description: the description of the feature - input: the variables that are used as input to the feature - transformer: the transformer/transformer pipeline - output: the output columns of the feature (not usually specified) - author: the GitHub username of the feature's author - source: the fully-qualified name of the Python module that contains the feature - mutual_information: estimated mutual information between the feature (or averaged over feature values) and the target on the development dataset split - conditional_mutual_information: estimated conditional mutual information between the feature (or averaged over feature values) and the target conditional on all other features on the development dataset split - ninputs: the number of input columns to the feature - nvalues: the number of feature values this feature extracts (i.e. 1 for a scalar-valued feature and >1 for a vector-valued feature) - ncontinuous: the number of feature values this feature extracts that are continuous-valued - ndiscrete: the number of feature values this feature extracts that are discrete-valued - mean: mean of the feature on the development dataset split - std: standard deviation of the feature (or averaged over feature values) on the development dataset split - var: variance of the feature (or averaged over feature values) on the development dataset split - min: minimum of the feature on the development dataset split - median: median of the feature (or median over feature values) on the development dataset split - max: maximum of the feature on the development dataset split - nunique: number of unique values of the feature (or averaged over feature values) on the development dataset split The following query operators are supported: - input (str): filter to only features that have ``input`` in their input/ list of inputs - primitive (str): filter to only features that use primitive ``primitive`` (i.e. a class with name ``primitive``) in the transformer/transformer pipeline For other queries, you should just use normal DataFrame indexing:: >>> features_df[features_df['author'] == 'jane'] >>> features_df[features_df['name'].str.contains('married')] >>> features_df[features_df['mutual_information'] > 0.05] >>> features_df[features_df['input'].apply( lambda input: 'A' in input and 'B' in input)] Returns: data frame with features on the row index and columns as described above """ records = [] if X_df is not None and y_df is not None and y is not None: @fy.ignore(Exception) def get_feature_values(feature): return asarray2d( feature.as_feature_engineering_pipeline().fit_transform( X_df, y_df)) values = {feature: get_feature_values(feature) for feature in features} y = asarray2d(y) summarize = fy.rpartial(_summarize_feature, values, y, expensive_stats) else: summarize = fy.rpartial(_summarize_feature, None, None, expensive_stats) for feature in tqdm(features): if (input and isinstance(feature.input, Container) # avoid callables and input not in feature.input and input != feature.input): continue if (primitive and primitive not in get_transformer_primitives( feature.transformer)): continue summary = summarize(feature) records.append(summary) return pd.DataFrame.from_records(records)
def split_params_deps(stage): from ..dependency import ParamsDependency return lsplit(rpartial(isinstance, ParamsDependency), stage.deps)