Example #1
0
def filter_data(field, yaml_data):
    """Extract a field of data from the YAML files.

    Args:
      field: the name of the field to extract
      yaml_data: the benchmark YAML data

    Returns:
      the filtered data from the YAML data
    """
    return pipe(
        yaml_data,
        dict,
        valmap(lambda val: val["data"]),
        valmap(filter(lambda item: item["name"].lower() == field)),
        valmap(list),
        valmap(get(0, default=None)),
        valfilter(lambda x: x is not None),
        itemmap(lambda item: (item[0], update_dict(item[1], name=item[0]))),
        lambda dict_: sorted(list(dict_.values()), key=lambda item: item["name"]),
        map(
            update_in(
                keys=["transform"],
                func=lambda x: x + [dict(expr="datum.x > 0.01", type="filter")],
            )
        ),
    )
Example #2
0
 def decision_function(self, X, exposure=None):
     if not hasattr(self, 'estimator_'):
         raise NotFittedError()
     pred_args = valmap(growd(2),
                        valfilter(notnone, dict(X=X, exposure=exposure)))
     score = self.estimator_.predict(**pred_args)
     return score
 def transform(self, X, exposure=None):
     data = valmap(growd(2), valfilter(notnone, dict(X=X,
                                                     exposure=exposure)))
     return np.concatenate(tuple(
         map(compose(growd(2), methodcaller('predict', **data)),
             self.estimators)),
                           axis=1)
Example #4
0
def async_request_logger(logger):
    no_nones = valfilter(lambda x: x)

    async def request_logger_middleware(ctx, next):
        req = ctx[_REQ_HTTP]
        context = ctx.get(_CONTEXT, {})
        msg = "before request.http"

        log = await logger.bind(**context)
        await log.info(msg,
                       url=req.url,
                       method=req.method,
                       params=no_nones(req.params))
        await log.debug(msg, headers=req.headers)

        ctx = await next(ctx)

        res = ctx[_RES_HTTP]

        msg = "after response.http"
        await log.info(
            msg,
            url=res.request.url,
            status=res.status_code,
            method=res.request.method,
            elapsed=res.elapsed,
            size=len(res.parsed_content if hasattr(res, "parsed_content"
                                                   ) else res.content),
            duration_us=ctx.get(_REQ_DURATION, None),
        )
        await log.debug(msg, headers=res.headers)
        return ctx

    return request_logger_middleware
Example #5
0
def load(
    refresh,
    bq_read,
    sqlfn="../fis/data/hist_data_proto.sql",
    dest="/tmp/hists.pq",
):
    if refresh:
        sql = read(sqlfn)
        df_ = bq_read(sql)
        hist_cols = get_hist_cols_raw_dl(df_)
        h_kw = {
            h: lambda df, h=h: df[h].map(arr_of_str2dict)
            for h in hist_cols
        }
        df_ = df_.assign(**h_kw)
        # hist_cols = get_hist_cols(df)
        hist_cols_asn = {
            c: lambda x, c=c: x[c].map(z.keymap(str))
            for c in hist_cols
        }
        ds = df_.assign(**hist_cols_asn)
        ds.to_parquet(dest)

    ds = pd.read_parquet(dest)
    # print(ds.cycle_collector)
    # return ds

    fn = z.compose(typed_dict, z.keymap(int),
                   z.valfilter(lambda x: x is not None))
    hist_cols = get_dict_hist_cols(ds)
    hist_cols_asn = {c: lambda df, c=c: df[c].map(fn) for c in hist_cols}
    #     return ds
    df = ds.assign(**hist_cols_asn)
    return df
Example #6
0
def request_logger(logger):
    no_nones = valfilter(lambda x: x)

    def request_logger_middleware(ctx, next):
        req = ctx[_REQ_HTTP]
        context = ctx.get(_CONTEXT, {})
        msg = "request.http"

        log = logger.bind(**context)
        log.info(msg,
                 url=req.url,
                 method=req.method,
                 params=no_nones(req.params))
        log.debug(msg, headers=req.headers)

        ctx = next(ctx)

        res = ctx[_RES_HTTP]

        msg = "response.http"
        log.info(
            msg,
            url=res.request.url,
            status=res.status_code,
            method=res.request.method,
            elapsed=res.elapsed,
            size=len(res.content),
            duration_us=ctx.get(_REQ_DURATION, None),
        )
        log.debug(msg, headers=res.headers)
        return ctx

    return request_logger_middleware
Example #7
0
def get_ping(host, n):
    output = ping_output(host, n)

    return _.pipe(
        ping_re,
        _.itemmap(__.vcall(lambda key, func: (key, func(output)))),
        _.valfilter(lambda v: v),
    )
Example #8
0
def get_xhe_to_full_transformer() -> Dict[str, List[str]]:
    return pipe(
        FullToTwoTable.select(), map(lambda e: (e.full, e.two)),
        groupby(lambda e: e[1]),
        itemmap(lambda kv: (
            kv[0],
            list(filter(lambda e: e != kv[0], map(lambda e: e[0], kv[1]))))),
        itemmap(lambda kv: (kv[0], kv[1] if len(kv[1]) > 0 else [kv[0]])),
        valfilter(lambda e: len(e) == 1), dict)
Example #9
0
 def __init__(self, **kwargs):
     self.attrs = pipe(
         self.imports,
         reversed,
         map(vars),
         merge,
         keyfilter(compose(str.islower, first), ),
         valfilter(callable),
     )
     self.attrs.update()
Example #10
0
def main(filter_func, j2_file_name):
    """Generate the chart JSON files

    Args:
      filter_func: function to filter simulaton YAML data
      j2_file_name: the j2 file name to insert the data into

    Returns:
      list of (filepath, chart_json) pairs
    """
    return pipe(
        get_data(filter_func), valfilter(lambda x: len(x) > 0),
        itemmap(lambda item:
                (item[0], process_chart(item[0], item[1], j2_file_name))),
        itemmap(write_chart_json(j2_file_name)))
Example #11
0
def args_extractor(f, merge_defaults=False):
    """
    Takes a function, inspects it's parameter lists, and returns a
    function that will return all of the named and key arguments
    back as a dictionary. The varargs are also returned which don't
    have a names.

    """
    spec = inspect.getfullargspec(f)
    if spec.defaults:
        param_defaults = dict(
            zip(spec.args[-len(spec.defaults):], spec.defaults))
    else:
        param_defaults = {}
    named_param_defaults = spec.kwonlydefaults or {}
    default_dicts = {}
    num_named_args = len(spec.args)

    if merge_defaults is True and hasattr(f, '__merge_defaults__'):
        merge_defaults = f.__merge_defaults__

    if merge_defaults:
        default_dicts = t.pipe(
            t.merge(named_param_defaults, param_defaults),
            tc.valfilter(lambda v: isinstance(v, dict)),
        )

        if isinstance(merge_defaults, Sequence):
            default_dicts = {k: default_dicts[k] for k in merge_defaults}

        def _args_dict(args, kargs):
            unnamed_args = dict(zip(spec.args, args[0:num_named_args]))
            varargs = args[num_named_args:]
            kargs = t.merge(kargs, unnamed_args)
            for k, d in default_dicts.items():
                kargs[k] = t.merge(d, kargs.get(k) or {})
            return varargs, kargs

    else:

        def _args_dict(args, kargs):
            unnamed_args = dict(zip(spec.args, args[0:num_named_args]))
            varargs = args[num_named_args:]
            kargs = t.merge(kargs, unnamed_args)
            return varargs, kargs

    return _args_dict
    def predict(self, X, exposure=None):
        data = valmap(growd(2), valfilter(notnone, dict(X=X,
                                                        exposure=exposure)))
        prediction = self.coefficients[0] * self.estimators[0].predict(**data)
        if len(prediction.shape) == 2 and prediction.shape[1] == 1:
            prediction = np.ravel(prediction)
            ravel = True
        elif len(prediction.shape) == 1:
            ravel = True
        else:
            ravel = False
        for i, estimator in enumerate(self.estimators[1:]):

            prediction += self.coefficients[i + 1] * (np.ravel(
                estimator.predict(**data)) if ravel else estimator.predict(
                    **data))
        return prediction
Example #13
0
    def __init__(
        self,
        data=None,
        index=None,
        columns=None,
        estimator=None,
        parent=None,
        feature_level=None,
        copy=False,
        extensions=[
            'harness.python.ext.base.JinjaExtension',
            'harness.python.ext.SciKit.SciKitExtension',
            'harness.python.ext.Bokeh.BokehModelsExtension',
            'harness.python.ext.Bokeh.BokehPlottingExtension',
            'harness.python.ext.Bokeh.BokehChartsExtension'
        ],
    ):
        kwargs = dict(
            estimator=estimator,
            parent=parent,
            feature_level=feature_level,
            extensions=extensions,
        )

        self.set_params(**kwargs)

        for ext in self.extensions:
            if not ext in self.env.extensions:
                self.env.add_extension(ext)
            ext = self.env.extensions[ext]
            if (not (ext.mixin is None)
                    and not (ext.mixin in self.__class__.__bases__)):
                self.__class__.__bases__ += (ext.mixin, )

        kwargs = pipe(locals(),
                      keyfilter(partial(operator.contains, self._blacklist)),
                      valfilter(complement(lambda x: x is None)))

        super().__init__(**kwargs)
Example #14
0
def remove_by_feature_shuffling(log: LogType,
                                predict_fn: PredictFnType,
                                eval_fn: EvalFnType,
                                eval_data: pd.DataFrame,
                                extractor: ExtractorFnType,
                                metric_name: str,
                                max_removed_by_step: int = 50,
                                threshold: float = 0.005,
                                speed_up_by_importance: bool = False,
                                parallel: bool = False,
                                nthread: int = 1,
                                seed: int = 7) -> List[str]:
    """
        Performs feature selection based on the evaluation of the test vs the
        evaluation of the test with randomly shuffled features

        Parameters
        ----------
        log : LogType
            Dictionaries evaluations.

        predict_fn: function pandas.DataFrame -> pandas.DataFrame
            A partially defined predictor that takes a DataFrame and returns the
            predicted score for this dataframe

        eval_fn : function DataFrame -> log dict
            A partially defined evaluation function that takes a dataset with prediction and
            returns the evaluation logs.

        eval_data: pandas.DataFrame
            Data used to evaluate the model after shuffling

        extractor: function str -> float
            A extractor that take a string and returns the value of that string on a dict

        metric_name: str
            String with the name of the column that refers to the metric column to be extracted

        max_removed_by_step: int (default 5)
            The maximum number of features to remove. It will only consider the least max_removed_by_step in terms of
            feature importance. If speed_up_by_importance=True it will first filter the least relevant feature an
            shuffle only those. If speed_up_by_importance=False it will shuffle all features and drop the last
            max_removed_by_step in terms of PIMP. In both cases, the features will only be removed if drop in
            performance is up to the defined threshold.

        threshold: float (default 0.005)
            Threshold for model performance comparison

        speed_up_by_importance: bool (default True)
            If it should narrow search looking at feature importance first before getting PIMP importance. If True,
            will only shuffle the top num_removed_by_step in terms of feature importance.

        parallel: bool (default False)

        nthread: int (default 1)

        seed: int (default 7)
            Random seed

        Returns
        ----------
        features: list of str
            The remaining features after removing based on feature importance

    """
    random.seed(seed)

    curr_metric = get_avg_metric_from_extractor(log, extractor, metric_name)
    eval_size = eval_data.shape[0]

    features_to_shuffle = order_feature_importance_avg_from_logs(log)[-max_removed_by_step:] \
        if speed_up_by_importance else get_used_features(log)

    def shuffle(feature: str) -> pd.DataFrame:
        return eval_data.assign(
            **{feature: eval_data[feature].sample(frac=1.0)})

    feature_to_delta_metric = compose(
        lambda m: curr_metric - m,
        get_avg_metric_from_extractor(extractor=extractor,
                                      metric_name=metric_name),
        gen_validator_log(fold_num=0, test_size=eval_size), eval_fn,
        predict_fn, shuffle)

    if parallel:
        metrics = Parallel(n_jobs=nthread, backend="threading")(
            delayed(feature_to_delta_metric)(feature)
            for feature in features_to_shuffle)
        feature_to_delta_metric = dict(zip(features_to_shuffle, metrics))
        gc.collect()

    else:
        feature_to_delta_metric = {
            feature: feature_to_delta_metric(feature)
            for feature in features_to_shuffle
        }

    return pipe(feature_to_delta_metric,
                valfilter(lambda delta_metric: delta_metric < threshold),
                sorted(key=lambda f: feature_to_delta_metric.get(f)),
                take(max_removed_by_step), list)
def split_paths(split_paths, graph_in):
    debug("____")
    debug("split_paths:", split_paths)
    debug("graph_in:", graph_in)

    # Convert list of split_paths into list of vertex indices. Ignores
    # split_paths which don"t match any vertices in the graph.
    # All edges pointing at the indices will be deleted from the graph.
    split_path_indices = list(unnest_iterable(map(
        split_path_spec_to_indices(graph_in),
        split_paths
    )))

    debug("split_path_indices:", split_path_indices)

    # Short circuit if there is nothing to do (split_paths didn"t match any
    # vertices in the graph).
    if len(split_path_indices) == 0:
        return {"rest": graph_in}

    # If graph has multiple roots, add a single one connecting all existing
    # roots to make it easy to split the graph into 2 sets of vertices after
    # deleting edges pointing at split_path_indices.
    fake_root_name = "__root__"
    graph, root_name = add_root(fake_root_name, graph_in)

    debug("root_name", root_name)

    if (
        find_vertex_by_name_or_none(graph)(root_name).index
        in split_path_indices
    ):
        return {"main": graph_in}

    # Copy graph if add_root has not already created a copy, since we are
    # going to mutate the graph and don"t want to mutate a function argument.
    graph = graph if graph is not graph_in else graph.copy()

    if DEBUG_PLOT:
        layout = graph.layout('tree')
        debug_plot(graph, layout=layout)

    # Get incidences of all vertices which can be reached split_path_indices
    # (including split_path_indices). This is a set of all split_paths and their
    # dependencies.
    split_off_vertex_indices = frozenset(
        subcomponent_multi(graph, split_path_indices))
    debug("split_off_vertex_indices", split_off_vertex_indices)

    # Delete edges which point at any of the vertices in split_path_indices.
    graph.delete_edges(_target_in=split_path_indices)

    if DEBUG_PLOT:
        debug_plot(graph, layout=layout)

    # Get incidences of all vertices which can be reached from the root. Since
    # edges pointing at split_path_indices have been deleted, none of the
    # split_path_indices will be included. Dependencies of rest_with_common will
    # only be included if they can be reached from any vertex which is itself
    # not in split_off_vertex_indices.
    rest_with_common = frozenset(graph.subcomponent(root_name, mode="out"))
    debug("rest_with_common", rest_with_common)

    # Get a set of all dependencies common to split_path_indicesĀ and the rest
    # of the graph.
    common = split_off_vertex_indices.intersection(rest_with_common)
    debug("common", common)

    # Get a set of vertices which cannot be reached from split_path_indices.
    rest_without_common = rest_with_common.difference(common)
    debug("rest_without_common", rest_without_common)

    # Get a set of split_path_indices and their dependencies which cannot be
    # reached from the rest of the graph.
    split_off_without_common = split_off_vertex_indices.difference(common)
    debug("split_off_without_common", split_off_without_common)

    if DEBUG_PLOT:
        def choose_color(index):
            if (index in split_off_without_common):
                return "green"
            elif (index in rest_without_common):
                return "red"
            else:
                return "purple"

        vertex_color = [choose_color(v.index) for v in graph.vs]

        debug_plot(
            graph,
            layout=layout,
            vertex_color=vertex_color
        )

    # Return subgraphs based on calculated sets of vertices.

    result_keys = ["main", "common", "rest"]
    result_values = [
        # Split paths and their deps (unreachable from rest of the graph).
        graph.induced_subgraph(split_off_without_common),
        # Dependencies of split paths which can be reached from the rest of the
        # graph.
        graph.induced_subgraph(common),
        # Rest of the graph (without dependencies common with split paths).
        graph.induced_subgraph(rest_without_common),
    ]

    debug('result_values', result_values[0].vs["name"])

    return tlz.valfilter(
        tlz.complement(graph_is_empty),
        dict(zip(
            result_keys,
            (
                result_values if root_name != fake_root_name
                # If root was added, remove it
                else tlz.map(remove_added_root(fake_root_name), result_values)
            )
        ))
    )
Example #16
0
def nonempty_search_kw(search_kw):
    return pipe(
        search_kw.items(),
        valfilter(not_null),
    )
def calculate():
    # Process controller arguments.

    calculees_arg = request.args.getlist('calculee') or None
    saisies_arg = request.args.get('saisies')
    saisie_variables = {}
    if saisies_arg is not None:
        try:
            saisie_variables = json.loads(saisies_arg)
        except ValueError:
            raise BadRequest('"saisies" GET parameter must contain a valid JSON.')

    # Accept aliases

    saisie_variables = dict(iter_saisie_variables_or_aliases(saisie_variables))

    wrong_saisie_variable_names = list(filter(
        lambda variable_name: state.variables_definitions.get_type(variable_name) != 'variable_saisie',
        saisie_variables.keys(),
        ))
    if wrong_saisie_variable_names:
        raise BadRequest([
            '"saisies" GET parameter contains the variable "{}" which is not a "saisie" variable.'.format(variable_name)
            for variable_name in wrong_saisie_variable_names
            ])

    warning_messages_by_section = defaultdict(list)

    if calculees_arg is None:
        calculee_variable_names = state.variables_definitions.filter_calculees(kind='restituee')
    else:
        calculee_variable_names = calculees_arg
        for calculee_variable_name in calculee_variable_names:
            if not state.variables_definitions.is_calculee(calculee_variable_name, kind='restituee'):
                warning_messages_by_section['saisies'].append(
                    'Variable "{}" is not a variable of type "calculee restituee"'.format(calculee_variable_name)
                    )

    if 'V_ANREV' not in saisie_variables:
        warning_messages_by_section['saisies'].append(
            'V_ANREV should be given as a "saisie" variable. Hint: saisies={"V_ANREV":2014}.'
            )

    # Load formula functions with a new cache for each HTTP request

    result_by_formula_name_cache = {}
    formulas_functions = formulas.get_formulas(
        cache=result_by_formula_name_cache,
        constants=state.constants,
        saisie_variables=saisie_variables,
        )

    # Apply verifs

    errors = verifs.get_errors(
        formulas=formulas_functions,
        saisie_variables=saisie_variables,
        )
    if errors is not None:
        warning_messages_by_section['verif_errors'] = [
            (error, state.definition_by_error_name.get(error, {}).get('description'))
            for error in unique(errors)  # Keep order
            ]

    # Calculate results

    results = {
        calculee_variable_name: formulas_functions[calculee_variable_name]()
        for calculee_variable_name in calculee_variable_names
        }
    if calculees_arg is None:
        results = valfilter(lambda val: val != 0, results)

    return jsonify(valfilter(
        lambda val: val is not None,
        {
            'calculate_results': results,
            'warnings': warning_messages_by_section or None,
            },
        ))
Example #18
0
def parse_tiles(lines):
    tiles = dict()
    for chunk in chunker(lines):
        tile_id = chunk.pop(0)
        tile_id = int(re.match("Tile (\d+):", tile_id).groups()[0])  # noqa
        tiles[tile_id] = Tile(copy(chunk))
    return tiles


def tiles_match(t1, t2):
    """
    Checks if there exists any orientation of tile t1 and tile 2 such that
    they share a matching edge.
    """
    for e1, e2 in product(t1.edges(), t2.edges()):
        if e1 == e2:
            return True
    return False


tiles = parse_tiles(lines)
matches = defaultdict(bool)
for tid1, tid2 in combinations(tiles.keys(), 2):
    if tiles_match(tiles[tid1], tiles[tid2]):
        matches[tid1] += 1
        matches[tid2] += 1

corner_ids = pipe(matches, valfilter(lambda x: x == 2), dict.keys)
assert len(corner_ids) == 4
print(reduce(mul, corner_ids))
 def get_vertex_attrs(g):
     return tlz.valfilter(not_None, g.vs.find(name).attributes())
Example #20
0
         7: 8,
         9: 10
     }),
 ),
 "keyfilter": (
     chained(dict, curried.keyfilter(lambda x: x > 5)),
     dict.items({
         1: 2,
         3: 4,
         5: 6,
         7: 8,
         9: 10
     }),
 ),
 "valfilter": (
     chained(dict, curried.valfilter(lambda x: x > 5)),
     dict.items({
         1: 2,
         3: 4,
         5: 6,
         7: 8,
         9: 10
     }),
 ),
 "itemfilter": (
     chained(dict,
             curried.itemfilter(lambda i: i[0] % 2 == 0 and i[1] < 4)),
     dict.items({
         1: 2,
         2: 3,
         3: 4,