def filter_data(field, yaml_data): """Extract a field of data from the YAML files. Args: field: the name of the field to extract yaml_data: the benchmark YAML data Returns: the filtered data from the YAML data """ return pipe( yaml_data, dict, valmap(lambda val: val["data"]), valmap(filter(lambda item: item["name"].lower() == field)), valmap(list), valmap(get(0, default=None)), valfilter(lambda x: x is not None), itemmap(lambda item: (item[0], update_dict(item[1], name=item[0]))), lambda dict_: sorted(list(dict_.values()), key=lambda item: item["name"]), map( update_in( keys=["transform"], func=lambda x: x + [dict(expr="datum.x > 0.01", type="filter")], ) ), )
def decision_function(self, X, exposure=None): if not hasattr(self, 'estimator_'): raise NotFittedError() pred_args = valmap(growd(2), valfilter(notnone, dict(X=X, exposure=exposure))) score = self.estimator_.predict(**pred_args) return score
def transform(self, X, exposure=None): data = valmap(growd(2), valfilter(notnone, dict(X=X, exposure=exposure))) return np.concatenate(tuple( map(compose(growd(2), methodcaller('predict', **data)), self.estimators)), axis=1)
def async_request_logger(logger): no_nones = valfilter(lambda x: x) async def request_logger_middleware(ctx, next): req = ctx[_REQ_HTTP] context = ctx.get(_CONTEXT, {}) msg = "before request.http" log = await logger.bind(**context) await log.info(msg, url=req.url, method=req.method, params=no_nones(req.params)) await log.debug(msg, headers=req.headers) ctx = await next(ctx) res = ctx[_RES_HTTP] msg = "after response.http" await log.info( msg, url=res.request.url, status=res.status_code, method=res.request.method, elapsed=res.elapsed, size=len(res.parsed_content if hasattr(res, "parsed_content" ) else res.content), duration_us=ctx.get(_REQ_DURATION, None), ) await log.debug(msg, headers=res.headers) return ctx return request_logger_middleware
def load( refresh, bq_read, sqlfn="../fis/data/hist_data_proto.sql", dest="/tmp/hists.pq", ): if refresh: sql = read(sqlfn) df_ = bq_read(sql) hist_cols = get_hist_cols_raw_dl(df_) h_kw = { h: lambda df, h=h: df[h].map(arr_of_str2dict) for h in hist_cols } df_ = df_.assign(**h_kw) # hist_cols = get_hist_cols(df) hist_cols_asn = { c: lambda x, c=c: x[c].map(z.keymap(str)) for c in hist_cols } ds = df_.assign(**hist_cols_asn) ds.to_parquet(dest) ds = pd.read_parquet(dest) # print(ds.cycle_collector) # return ds fn = z.compose(typed_dict, z.keymap(int), z.valfilter(lambda x: x is not None)) hist_cols = get_dict_hist_cols(ds) hist_cols_asn = {c: lambda df, c=c: df[c].map(fn) for c in hist_cols} # return ds df = ds.assign(**hist_cols_asn) return df
def request_logger(logger): no_nones = valfilter(lambda x: x) def request_logger_middleware(ctx, next): req = ctx[_REQ_HTTP] context = ctx.get(_CONTEXT, {}) msg = "request.http" log = logger.bind(**context) log.info(msg, url=req.url, method=req.method, params=no_nones(req.params)) log.debug(msg, headers=req.headers) ctx = next(ctx) res = ctx[_RES_HTTP] msg = "response.http" log.info( msg, url=res.request.url, status=res.status_code, method=res.request.method, elapsed=res.elapsed, size=len(res.content), duration_us=ctx.get(_REQ_DURATION, None), ) log.debug(msg, headers=res.headers) return ctx return request_logger_middleware
def get_ping(host, n): output = ping_output(host, n) return _.pipe( ping_re, _.itemmap(__.vcall(lambda key, func: (key, func(output)))), _.valfilter(lambda v: v), )
def get_xhe_to_full_transformer() -> Dict[str, List[str]]: return pipe( FullToTwoTable.select(), map(lambda e: (e.full, e.two)), groupby(lambda e: e[1]), itemmap(lambda kv: ( kv[0], list(filter(lambda e: e != kv[0], map(lambda e: e[0], kv[1]))))), itemmap(lambda kv: (kv[0], kv[1] if len(kv[1]) > 0 else [kv[0]])), valfilter(lambda e: len(e) == 1), dict)
def __init__(self, **kwargs): self.attrs = pipe( self.imports, reversed, map(vars), merge, keyfilter(compose(str.islower, first), ), valfilter(callable), ) self.attrs.update()
def main(filter_func, j2_file_name): """Generate the chart JSON files Args: filter_func: function to filter simulaton YAML data j2_file_name: the j2 file name to insert the data into Returns: list of (filepath, chart_json) pairs """ return pipe( get_data(filter_func), valfilter(lambda x: len(x) > 0), itemmap(lambda item: (item[0], process_chart(item[0], item[1], j2_file_name))), itemmap(write_chart_json(j2_file_name)))
def args_extractor(f, merge_defaults=False): """ Takes a function, inspects it's parameter lists, and returns a function that will return all of the named and key arguments back as a dictionary. The varargs are also returned which don't have a names. """ spec = inspect.getfullargspec(f) if spec.defaults: param_defaults = dict( zip(spec.args[-len(spec.defaults):], spec.defaults)) else: param_defaults = {} named_param_defaults = spec.kwonlydefaults or {} default_dicts = {} num_named_args = len(spec.args) if merge_defaults is True and hasattr(f, '__merge_defaults__'): merge_defaults = f.__merge_defaults__ if merge_defaults: default_dicts = t.pipe( t.merge(named_param_defaults, param_defaults), tc.valfilter(lambda v: isinstance(v, dict)), ) if isinstance(merge_defaults, Sequence): default_dicts = {k: default_dicts[k] for k in merge_defaults} def _args_dict(args, kargs): unnamed_args = dict(zip(spec.args, args[0:num_named_args])) varargs = args[num_named_args:] kargs = t.merge(kargs, unnamed_args) for k, d in default_dicts.items(): kargs[k] = t.merge(d, kargs.get(k) or {}) return varargs, kargs else: def _args_dict(args, kargs): unnamed_args = dict(zip(spec.args, args[0:num_named_args])) varargs = args[num_named_args:] kargs = t.merge(kargs, unnamed_args) return varargs, kargs return _args_dict
def predict(self, X, exposure=None): data = valmap(growd(2), valfilter(notnone, dict(X=X, exposure=exposure))) prediction = self.coefficients[0] * self.estimators[0].predict(**data) if len(prediction.shape) == 2 and prediction.shape[1] == 1: prediction = np.ravel(prediction) ravel = True elif len(prediction.shape) == 1: ravel = True else: ravel = False for i, estimator in enumerate(self.estimators[1:]): prediction += self.coefficients[i + 1] * (np.ravel( estimator.predict(**data)) if ravel else estimator.predict( **data)) return prediction
def __init__( self, data=None, index=None, columns=None, estimator=None, parent=None, feature_level=None, copy=False, extensions=[ 'harness.python.ext.base.JinjaExtension', 'harness.python.ext.SciKit.SciKitExtension', 'harness.python.ext.Bokeh.BokehModelsExtension', 'harness.python.ext.Bokeh.BokehPlottingExtension', 'harness.python.ext.Bokeh.BokehChartsExtension' ], ): kwargs = dict( estimator=estimator, parent=parent, feature_level=feature_level, extensions=extensions, ) self.set_params(**kwargs) for ext in self.extensions: if not ext in self.env.extensions: self.env.add_extension(ext) ext = self.env.extensions[ext] if (not (ext.mixin is None) and not (ext.mixin in self.__class__.__bases__)): self.__class__.__bases__ += (ext.mixin, ) kwargs = pipe(locals(), keyfilter(partial(operator.contains, self._blacklist)), valfilter(complement(lambda x: x is None))) super().__init__(**kwargs)
def remove_by_feature_shuffling(log: LogType, predict_fn: PredictFnType, eval_fn: EvalFnType, eval_data: pd.DataFrame, extractor: ExtractorFnType, metric_name: str, max_removed_by_step: int = 50, threshold: float = 0.005, speed_up_by_importance: bool = False, parallel: bool = False, nthread: int = 1, seed: int = 7) -> List[str]: """ Performs feature selection based on the evaluation of the test vs the evaluation of the test with randomly shuffled features Parameters ---------- log : LogType Dictionaries evaluations. predict_fn: function pandas.DataFrame -> pandas.DataFrame A partially defined predictor that takes a DataFrame and returns the predicted score for this dataframe eval_fn : function DataFrame -> log dict A partially defined evaluation function that takes a dataset with prediction and returns the evaluation logs. eval_data: pandas.DataFrame Data used to evaluate the model after shuffling extractor: function str -> float A extractor that take a string and returns the value of that string on a dict metric_name: str String with the name of the column that refers to the metric column to be extracted max_removed_by_step: int (default 5) The maximum number of features to remove. It will only consider the least max_removed_by_step in terms of feature importance. If speed_up_by_importance=True it will first filter the least relevant feature an shuffle only those. If speed_up_by_importance=False it will shuffle all features and drop the last max_removed_by_step in terms of PIMP. In both cases, the features will only be removed if drop in performance is up to the defined threshold. threshold: float (default 0.005) Threshold for model performance comparison speed_up_by_importance: bool (default True) If it should narrow search looking at feature importance first before getting PIMP importance. If True, will only shuffle the top num_removed_by_step in terms of feature importance. parallel: bool (default False) nthread: int (default 1) seed: int (default 7) Random seed Returns ---------- features: list of str The remaining features after removing based on feature importance """ random.seed(seed) curr_metric = get_avg_metric_from_extractor(log, extractor, metric_name) eval_size = eval_data.shape[0] features_to_shuffle = order_feature_importance_avg_from_logs(log)[-max_removed_by_step:] \ if speed_up_by_importance else get_used_features(log) def shuffle(feature: str) -> pd.DataFrame: return eval_data.assign( **{feature: eval_data[feature].sample(frac=1.0)}) feature_to_delta_metric = compose( lambda m: curr_metric - m, get_avg_metric_from_extractor(extractor=extractor, metric_name=metric_name), gen_validator_log(fold_num=0, test_size=eval_size), eval_fn, predict_fn, shuffle) if parallel: metrics = Parallel(n_jobs=nthread, backend="threading")( delayed(feature_to_delta_metric)(feature) for feature in features_to_shuffle) feature_to_delta_metric = dict(zip(features_to_shuffle, metrics)) gc.collect() else: feature_to_delta_metric = { feature: feature_to_delta_metric(feature) for feature in features_to_shuffle } return pipe(feature_to_delta_metric, valfilter(lambda delta_metric: delta_metric < threshold), sorted(key=lambda f: feature_to_delta_metric.get(f)), take(max_removed_by_step), list)
def split_paths(split_paths, graph_in): debug("____") debug("split_paths:", split_paths) debug("graph_in:", graph_in) # Convert list of split_paths into list of vertex indices. Ignores # split_paths which don"t match any vertices in the graph. # All edges pointing at the indices will be deleted from the graph. split_path_indices = list(unnest_iterable(map( split_path_spec_to_indices(graph_in), split_paths ))) debug("split_path_indices:", split_path_indices) # Short circuit if there is nothing to do (split_paths didn"t match any # vertices in the graph). if len(split_path_indices) == 0: return {"rest": graph_in} # If graph has multiple roots, add a single one connecting all existing # roots to make it easy to split the graph into 2 sets of vertices after # deleting edges pointing at split_path_indices. fake_root_name = "__root__" graph, root_name = add_root(fake_root_name, graph_in) debug("root_name", root_name) if ( find_vertex_by_name_or_none(graph)(root_name).index in split_path_indices ): return {"main": graph_in} # Copy graph if add_root has not already created a copy, since we are # going to mutate the graph and don"t want to mutate a function argument. graph = graph if graph is not graph_in else graph.copy() if DEBUG_PLOT: layout = graph.layout('tree') debug_plot(graph, layout=layout) # Get incidences of all vertices which can be reached split_path_indices # (including split_path_indices). This is a set of all split_paths and their # dependencies. split_off_vertex_indices = frozenset( subcomponent_multi(graph, split_path_indices)) debug("split_off_vertex_indices", split_off_vertex_indices) # Delete edges which point at any of the vertices in split_path_indices. graph.delete_edges(_target_in=split_path_indices) if DEBUG_PLOT: debug_plot(graph, layout=layout) # Get incidences of all vertices which can be reached from the root. Since # edges pointing at split_path_indices have been deleted, none of the # split_path_indices will be included. Dependencies of rest_with_common will # only be included if they can be reached from any vertex which is itself # not in split_off_vertex_indices. rest_with_common = frozenset(graph.subcomponent(root_name, mode="out")) debug("rest_with_common", rest_with_common) # Get a set of all dependencies common to split_path_indices and the rest # of the graph. common = split_off_vertex_indices.intersection(rest_with_common) debug("common", common) # Get a set of vertices which cannot be reached from split_path_indices. rest_without_common = rest_with_common.difference(common) debug("rest_without_common", rest_without_common) # Get a set of split_path_indices and their dependencies which cannot be # reached from the rest of the graph. split_off_without_common = split_off_vertex_indices.difference(common) debug("split_off_without_common", split_off_without_common) if DEBUG_PLOT: def choose_color(index): if (index in split_off_without_common): return "green" elif (index in rest_without_common): return "red" else: return "purple" vertex_color = [choose_color(v.index) for v in graph.vs] debug_plot( graph, layout=layout, vertex_color=vertex_color ) # Return subgraphs based on calculated sets of vertices. result_keys = ["main", "common", "rest"] result_values = [ # Split paths and their deps (unreachable from rest of the graph). graph.induced_subgraph(split_off_without_common), # Dependencies of split paths which can be reached from the rest of the # graph. graph.induced_subgraph(common), # Rest of the graph (without dependencies common with split paths). graph.induced_subgraph(rest_without_common), ] debug('result_values', result_values[0].vs["name"]) return tlz.valfilter( tlz.complement(graph_is_empty), dict(zip( result_keys, ( result_values if root_name != fake_root_name # If root was added, remove it else tlz.map(remove_added_root(fake_root_name), result_values) ) )) )
def nonempty_search_kw(search_kw): return pipe( search_kw.items(), valfilter(not_null), )
def calculate(): # Process controller arguments. calculees_arg = request.args.getlist('calculee') or None saisies_arg = request.args.get('saisies') saisie_variables = {} if saisies_arg is not None: try: saisie_variables = json.loads(saisies_arg) except ValueError: raise BadRequest('"saisies" GET parameter must contain a valid JSON.') # Accept aliases saisie_variables = dict(iter_saisie_variables_or_aliases(saisie_variables)) wrong_saisie_variable_names = list(filter( lambda variable_name: state.variables_definitions.get_type(variable_name) != 'variable_saisie', saisie_variables.keys(), )) if wrong_saisie_variable_names: raise BadRequest([ '"saisies" GET parameter contains the variable "{}" which is not a "saisie" variable.'.format(variable_name) for variable_name in wrong_saisie_variable_names ]) warning_messages_by_section = defaultdict(list) if calculees_arg is None: calculee_variable_names = state.variables_definitions.filter_calculees(kind='restituee') else: calculee_variable_names = calculees_arg for calculee_variable_name in calculee_variable_names: if not state.variables_definitions.is_calculee(calculee_variable_name, kind='restituee'): warning_messages_by_section['saisies'].append( 'Variable "{}" is not a variable of type "calculee restituee"'.format(calculee_variable_name) ) if 'V_ANREV' not in saisie_variables: warning_messages_by_section['saisies'].append( 'V_ANREV should be given as a "saisie" variable. Hint: saisies={"V_ANREV":2014}.' ) # Load formula functions with a new cache for each HTTP request result_by_formula_name_cache = {} formulas_functions = formulas.get_formulas( cache=result_by_formula_name_cache, constants=state.constants, saisie_variables=saisie_variables, ) # Apply verifs errors = verifs.get_errors( formulas=formulas_functions, saisie_variables=saisie_variables, ) if errors is not None: warning_messages_by_section['verif_errors'] = [ (error, state.definition_by_error_name.get(error, {}).get('description')) for error in unique(errors) # Keep order ] # Calculate results results = { calculee_variable_name: formulas_functions[calculee_variable_name]() for calculee_variable_name in calculee_variable_names } if calculees_arg is None: results = valfilter(lambda val: val != 0, results) return jsonify(valfilter( lambda val: val is not None, { 'calculate_results': results, 'warnings': warning_messages_by_section or None, }, ))
def parse_tiles(lines): tiles = dict() for chunk in chunker(lines): tile_id = chunk.pop(0) tile_id = int(re.match("Tile (\d+):", tile_id).groups()[0]) # noqa tiles[tile_id] = Tile(copy(chunk)) return tiles def tiles_match(t1, t2): """ Checks if there exists any orientation of tile t1 and tile 2 such that they share a matching edge. """ for e1, e2 in product(t1.edges(), t2.edges()): if e1 == e2: return True return False tiles = parse_tiles(lines) matches = defaultdict(bool) for tid1, tid2 in combinations(tiles.keys(), 2): if tiles_match(tiles[tid1], tiles[tid2]): matches[tid1] += 1 matches[tid2] += 1 corner_ids = pipe(matches, valfilter(lambda x: x == 2), dict.keys) assert len(corner_ids) == 4 print(reduce(mul, corner_ids))
def get_vertex_attrs(g): return tlz.valfilter(not_None, g.vs.find(name).attributes())
7: 8, 9: 10 }), ), "keyfilter": ( chained(dict, curried.keyfilter(lambda x: x > 5)), dict.items({ 1: 2, 3: 4, 5: 6, 7: 8, 9: 10 }), ), "valfilter": ( chained(dict, curried.valfilter(lambda x: x > 5)), dict.items({ 1: 2, 3: 4, 5: 6, 7: 8, 9: 10 }), ), "itemfilter": ( chained(dict, curried.itemfilter(lambda i: i[0] % 2 == 0 and i[1] < 4)), dict.items({ 1: 2, 2: 3, 3: 4,