Python assoc Beispiele, toolz.curried.assoc Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: fipy_module.py Projekt: wd15/sfepy-bm4

def solve(params, set_eta, calc_d2f):
    """Solve the phase field problem with FiPy

    Args:
      params: dictionary of parameter values
      set_eta: function to set the initial value of the phase field
      calc_df2: function to calculate the second derivative of the
        free energy function


    Returns:
      dictionary of the equation, variables and residuals
    """
    def sweep_wrapper(kwargs):
        """Wrapper for sweep function

        Ensures that residuals tuple has the residual appended to it.
        """
        return pipe(
            dissoc(kwargs, "residuals"),
            lambda x: sweep(params["dt"], calc_d2f, **x),
            lambda x: kwargs["residuals"] + (x, ),
            assoc(kwargs, "residuals"),
        )

    return pipe(
        params,
        get_mesh,
        get_vars(params, set_eta),
        lambda x: assoc(x, "equation", get_eq(params, **x)),
        lambda x: assoc(x, "residuals", ()),
        iterate_(sweep_wrapper, params["fipy_iter"]),
    )

Beispiel #2

0

Datei anzeigen

def test_sfepy():
    """Run some tests
    """
    assert np.allclose(
        run_sfepy_fake(assoc(get_params(), "delta", 0.1), (10, 10), 1.0)[1][0, 0],
        [-0.00515589, -0.00515589],
    )
    assert np.allclose(
        run_sfepy_fake(assoc(get_params(), "delta", 0.1), (10, 10), 0.1)[1][0, 0],
        [-0.00051559, -0.00051559],
    )

Beispiel #3

0

Datei anzeigen

Datei: spec.py Projekt: stanzikratel/otter

def split_cf_messages(format_message, var_length_key, event, separator=', ',
                      max_length=255):
    """
    Try to split cloud feed log events out into multiple events if the message
    is too long (the variable-length variable would cause the message to be
    too long.)

    :param str format_message: The format string to use to format the event
    :param str var_length_key: The key in the event dictionary that contains
        the variable-length part of the formatted message.
    :param dict event: The event dictionary
    :param str separator: The separator to use to join the various elements
        that should be varied.  (e.g. if the elements in "var_length_key" are
        ["1", "2", "3"] and the separator is "; ", "var_length_key" will be
        represented as "1; 2; 3")
    :param int max_length: The maximum length of the formatted message.

    :return: `list` of event dictionaries with the formatted message and
        the split event field.
    """
    def length_calc(e):
        return len(format_message.format(**e))

    render = compose(assoc(event, var_length_key), separator.join,
                     curry(map, str))

    if length_calc(event) <= max_length:
        return [(render(event[var_length_key]), format_message)]

    events = split(render, event[var_length_key], max_length, length_calc)
    return [(e, format_message) for e in events]

Beispiel #4

0

Datei anzeigen

Datei: spec.py Projekt: stanzikratel/otter

def split_list_servers(event, maxlength=event_max_length):
    """
    Split response_body in listing servers detail log such that each
    each log's response_body is < maxlength. Since this event only has
    response_body as large part it is fine to only have that < maxlength
    since maxlength is generic guideline for length

    :param dict event: Event to split
    :param int maxlength: Event JSON max length

    :return: List of (event, formatted message) tuples
    """
    message = "Listing server details succeeded"

    _json = json.dumps(event["response_body"])
    if len(_json) < maxlength:
        event["response_body"] = _json
        return [(event, message)]

    def part_json(servers):
        return json.dumps({"servers": servers})

    parts = split(part_json, event["response_body"]["servers"], maxlength, len)
    del event["response_body"]
    return [(assoc(event, "response_body", part), message) for part in parts]

Beispiel #5

0

Datei anzeigen

Datei: validator.py Projekt: renatacgcastanha/fklearn

 def _join_split_log(
         log_tuple: Tuple[LogType, LogType]) -> Tuple[LogType, LogType]:
     train_log = {}
     split_log, validator_log = log_tuple
     train_log["train_log"] = validator_log["train_log"]
     return train_log, assoc(dissoc(validator_log, "train_log"),
                             "split_log", split_log)

Beispiel #6

0

Datei anzeigen

def split_cf_messages(format_message,
                      var_length_key,
                      event,
                      separator=', ',
                      max_length=255):
    """
    Try to split cloud feed log events out into multiple events if the message
    is too long (the variable-length variable would cause the message to be
    too long.)

    :param str format_message: The format string to use to format the event
    :param str var_length_key: The key in the event dictionary that contains
        the variable-length part of the formatted message.
    :param dict event: The event dictionary
    :param str separator: The separator to use to join the various elements
        that should be varied.  (e.g. if the elements in "var_length_key" are
        ["1", "2", "3"] and the separator is "; ", "var_length_key" will be
        represented as "1; 2; 3")
    :param int max_length: The maximum length of the formatted message.

    :return: `list` of event dictionaries with the formatted message and
        the split event field.
    """
    def length_calc(e):
        return len(format_message.format(**e))

    render = compose(assoc(event, var_length_key), separator.join,
                     curry(map, str))

    if length_calc(event) <= max_length:
        return [(render(event[var_length_key]), format_message)]

    events = split(render, event[var_length_key], max_length, length_calc)
    return [(e, format_message) for e in events]

Beispiel #7

0

Datei anzeigen

def split_list_servers(event, maxlength=event_max_length):
    """
    Split response_body in listing servers detail log such that each
    each log's response_body is < maxlength. Since this event only has
    response_body as large part it is fine to only have that < maxlength
    since maxlength is generic guideline for length

    :param dict event: Event to split
    :param int maxlength: Event JSON max length

    :return: List of (event, formatted message) tuples
    """
    message = "Listing server details succeeded"

    _json = json.dumps(event["response_body"])
    if len(_json) < maxlength:
        event["response_body"] = _json
        return [(event, message)]

    def part_json(servers):
        return json.dumps({"servers": servers})

    parts = split(part_json, event["response_body"]["servers"], maxlength, len)
    del event["response_body"]
    return [(assoc(event, "response_body", part), message) for part in parts]

Beispiel #8

0

Datei anzeigen

Datei: sqs.py Projekt: zuoralabs/zsec-aws-tools

 def _process_config(self, config: Mapping) -> Mapping:
     processed_config = pipe(
         config,
         assoc(key='Tags',
               value=merge(standard_tags(self), config.get('Tags', {}))),
         # original tags takes precedence if there is a conflict
         super()._process_config)
     return processed_config

Beispiel #9

0

Datei anzeigen

Datei: s3.py Projekt: zuoralabs/zsec-aws-tools

 def _process_config(self, config: Mapping) -> Mapping:
     tags_dict = merge(standard_tags(self), config.get('Tags', {}))
     tags_list = [{'Key': k, 'Value': v} for k, v in tags_dict.items()]
     processed_config = pipe(
         config,
         assoc(key='Tags', value=tags_list),
         super()._process_config,
     )
     return processed_config

Beispiel #10

0

Datei anzeigen

Datei: validator.py Projekt: DailyActie/AI_ML_FM-fklearn

def validator(train_data: pd.DataFrame, split_fn: SplitterFnType,
              train_fn: LearnerFnType,
              eval_fn: EvalFnType) -> ValidatorReturnType:
    """
    Splits the training data into folds given by the split function and
    performs a train-evaluation sequence on each fold by calling
    ``validator_iteration``.

    Parameters
    ----------
    train_data : pandas.DataFrame
        A Pandas' DataFrame with training data

    split_fn : function pandas.DataFrame ->  list of tuple
        Partially defined split function that takes a dataset and returns
        a list of folds. Each fold is a Tuple of arrays. The fist array in
        each tuple contains training indexes while the second array
        contains validation indexes.

    train_fn : function pandas.DataFrame -> prediction_function, predictions_dataset, logs
        A partially defined learning function that takes a training set and
        returns a predict function, a dataset with training predictions and training
        logs.

    eval_fn : function pandas.DataFrame -> dict
        A partially defined evaluation function that takes a dataset with prediction and
        returns the evaluation logs.

    predict_oof : bool
        Whether to return out of fold predictions on the logs

    Returns
    ----------
    A list of log-like dictionary evaluations.
    """

    folds, logs = split_fn(train_data)

    def fold_iter(fold: Tuple[int, Tuple[pd.Index, pd.Index]]) -> LogType:
        (fold_num, (train_index, test_indexes)) = fold
        return validator_iteration(train_data, train_index, test_indexes,
                                   fold_num, train_fn, eval_fn)

    zipped_logs = pipe(folds, enumerate, map(fold_iter), partial(zip, logs))

    def _join_split_log(
            log_tuple: Tuple[LogType, LogType]) -> Tuple[LogType, LogType]:
        train_log = {}
        split_log, validator_log = log_tuple
        train_log["train_log"] = validator_log["train_log"]
        return train_log, assoc(dissoc(validator_log, "train_log"),
                                "split_log", split_log)

    train_logs, validator_logs = zip(*map(_join_split_log, zipped_logs))
    first_train_log = first(train_logs)
    return assoc(first_train_log, "validator_log", list(validator_logs))

Beispiel #11

0

Datei anzeigen

Datei: fipy_module.py Projekt: wd15/sfepy-bm4

    def sweep_wrapper(kwargs):
        """Wrapper for sweep function

        Ensures that residuals tuple has the residual appended to it.
        """
        return pipe(
            dissoc(kwargs, "residuals"),
            lambda x: sweep(params["dt"], calc_d2f, **x),
            lambda x: kwargs["residuals"] + (x, ),
            assoc(kwargs, "residuals"),
        )

Beispiel #12

0

Datei anzeigen

def test_combined():
    """Run a combined test
    """
    assert pipe(
        get_params(),
        assoc(key="fipy_iter", value=2),
        run_main,
        get("eta"),
        np.array,
        np.sum,
        lambda x: np.allclose(x, 1515.784),
    )

Beispiel #13

0

Datei anzeigen

    def _process_config(self, config: Mapping) -> Mapping:
        tags = [{
            'Key': k,
            'Value': v
        } for k, v in merge(standard_tags(self), config.get('Tags',
                                                            {})).items()]

        processed_config = pipe(
            config,
            assoc(key='Tags', value=tags),
            # original tags takes precedence if there is a conflict
            super()._process_config)
        return processed_config

Beispiel #14

0

Datei anzeigen

 def scatter_drag(self,
                  x_points: 'Array',
                  y_points: 'Array',
                  *,
                  show_eqn=True,
                  options={}):
     options = tz.assoc(options, '_fig', self.figure)
     box = scatter_drag(x_points,
                        y_points,
                        show_eqn=show_eqn,
                        options=options)
     widget = box.children[0]
     self.widgets.append(widget)
     return self

Beispiel #15

0

Datei anzeigen

def test_fipy():
    """Run the FiPy tests
    """
    assert pipe(
        dict(e11=0.0, e12=0.0, e22=0.0),
        lambda x: np.allclose(
            fipy_solve(
                assoc(get_params(), "fipy_iter", 2),
                set_eta(None),
                calc_d2f(get_params(), x),
            )["residuals"][-1],
            60.736145628467526,
        ),
    )

Beispiel #16

0

Datei anzeigen

Datei: aws_lambda.py Projekt: zuoralabs/zsec-aws-tools

    def _process_config(self, config: Mapping) -> Mapping:
        self.role = role = config['Role']
        assert isinstance(self.role, Role)

        processed_config = pipe(
            config,
            assoc(key='Role', value=role.arn),
            assoc(key='Tags',
                  value=merge(standard_tags(self), config.get('Tags', {}))),
            # original tags takes precedence if there is a conflict
            super()._process_config,
            dict)

        for config_key, model in self.non_creation_parameters.items():
            if config_key in processed_config:
                operation_name = getattr(
                    self.service_client,
                    model.create_name + '_' + model.sdk_name)
                operation_model = get_operation_model(self.service_client,
                                                      operation_name)
                value = self._process_config_value(
                    None, processed_config[config_key])

                if model.is_collection:
                    processed_value = [
                        self._process_config_value(operation_model.input_shape,
                                                   elt) for elt in value
                    ]
                else:
                    processed_value = self._process_config_value(
                        operation_model.input_shape.members[config_key],
                        processed_config[config_key])

                processed_config[config_key] = processed_value

        return processed_config

Beispiel #17

0

Datei anzeigen

def fipy_iter(params, data):
    """One FiPy iteration

    Args:
      params: the parameter dictionary
      total_strain: dictionary of total strain fields

    Returns:
      updated data dictionary
    """
    return pipe(
        dissoc(data, "eta"),
        calc_d2f(params),
        lambda x: fipy_solve(params, set_eta(data["eta"]), x)["eta"],
        lambda x: assoc(data, "eta", x),
    )

Beispiel #18

0

Datei anzeigen

def one_iter(params, data):
    """Do one iteration

    Args:
      params: the parameter dictionary
      data: dictionary of the phase field and strain fields

    Returns:
      dictionary of the phase field and strain fields
    """
    return pipe(
        data,
        fipy_iter(params),
        sfepy_iter(params),
        lambda x: assoc(x, "step_counter", x["step_counter"] + 1),
    )

Beispiel #19

0

Datei anzeigen

def add_weight(answer: dict):
    def is_a_matching_question(answer):
        return pipe(
            [answer_keys.match_left, answer_keys.incorrect],
            map(lambda k: k in answer),
            any,
        )

    needs_weight = compose(
        any,
        juxt(complement(is_a_matching_question), ),
    )

    if needs_weight(answer):
        return assoc(answer, answer_keys.weight,
                     int(answer.get(answer_keys.weight, 0) and 100))

    return answer

Beispiel #20

0

Datei anzeigen

Datei: spec.py Projekt: stanzikratel/otter

def split_execute_convergence(event, max_length=event_max_length):
    """
    Try to split execute-convergence event out into multiple events if there
    are too many CLB nodes, too many servers, or too many steps.

    The problem is mainly the servers, since they take up the most space.

    Experimentally determined that probably logs cut off at around 75k,
    characters - we're going to limit it to 50k.

    :param dict event: The 'execute-convergence' type event dictionary to split
    :param int max_length: The maximum length of the entire JSON-formatted
        dictionary.

    :return: `list` of `tuple` of (`dict`, `str`).  The `dict`s in the tuple
        represents the spit up event dicts, and the `str` the format string
        for each.  If the event does not need to be split, the list will only
        have one tuple.
    """
    message = "Executing convergence"
    if _json_len(event) <= max_length:
        return [(event, message)]

    events = [(event, message)]
    large_things = sorted(('servers', 'lb_nodes'),
                          key=compose(_json_len, event.get),
                          reverse=True)

    # simplified event which serves as a base for the split out events
    base_event = keyfilter(
        lambda k: k not in ('desired', 'servers', 'lb_nodes', 'steps'),
        event)

    for thing in large_things:
        split_up_events = split(
            assoc(base_event, thing), event[thing], max_length,
            _json_len)
        events.extend([(e, message) for e in split_up_events])
        del event[thing]
        if _json_len(event) <= max_length:
            break

    return events

Beispiel #21

0

Datei anzeigen

Datei: make_csv.py Projekt: wd15/python-novice-gapminder

def make_csv(columns, number, size, filename):
    return pipe(
        'data.json',
        lambda x: loadfn(x, cls=MontyDecoder)[:number],
        map(
            lambda x: assoc(
                x,
                key='formula',
                value=x['final_str'].composition.reduced_formula
            ),
        ),
        list,
        lambda x: pandas.DataFrame(x),
        lambda x: x[columns],
        lambda x: x.to_csv('tmp.csv', index=False),
        lambda _: pandas.read_csv('tmp.csv', na_values=['None', 'na']),
        lambda x: x.dropna().reset_index(drop=True).ix[:size],
        lambda x: x.to_csv(filename, index=False)
    )

Beispiel #22

0

Datei anzeigen

def split_execute_convergence(event, max_length=event_max_length):
    """
    Try to split execute-convergence event out into multiple events if there
    are too many CLB nodes, too many servers, or too many steps.

    The problem is mainly the servers, since they take up the most space.

    Experimentally determined that probably logs cut off at around 75k,
    characters - we're going to limit it to 50k.

    :param dict event: The 'execute-convergence' type event dictionary to split
    :param int max_length: The maximum length of the entire JSON-formatted
        dictionary.

    :return: `list` of `tuple` of (`dict`, `str`).  The `dict`s in the tuple
        represents the spit up event dicts, and the `str` the format string
        for each.  If the event does not need to be split, the list will only
        have one tuple.
    """
    message = "Executing convergence"
    if _json_len(event) <= max_length:
        return [(event, message)]

    events = [(event, message)]
    large_things = sorted(('servers', 'lb_nodes'),
                          key=compose(_json_len, event.get),
                          reverse=True)

    # simplified event which serves as a base for the split out events
    base_event = keyfilter(
        lambda k: k not in ('desired', 'servers', 'lb_nodes', 'steps'), event)

    for thing in large_things:
        split_up_events = split(assoc(base_event, thing), event[thing],
                                max_length, _json_len)
        events.extend([(e, message) for e in split_up_events])
        del event[thing]
        if _json_len(event) <= max_length:
            break

    return events

Beispiel #23

0

Datei anzeigen

Datei: view.py Projekt: wd15/sfepy-bm4

def calc_gradient_free_energy(data):
    """Calculate the gradient free energy for one time step

    Args:
      data: dictionary of data from a output file for given time step

    Returns:
      a float representing the gradient free energy for a given time
      step
    """
    func = sequence(
        lambda x: get_vars(x, set_eta(data["eta"]), get_mesh(x)),
        get("eta"),
        lambda x: x.grad.mag ** 2,
    )
    return pipe(
        data["params"].item(),
        lambda x: assoc(x, "dx", x["lx"] / x["nx"]),
        lambda x: func(x) * (x["kappa"] / 2) * calc_dx2(x),
        np.array,
        np.sum,
    )

Beispiel #24

0

Datei anzeigen

Datei: view.py Projekt: wd15/sfepy-bm4

    read_and_plot(calc_position_d)(ctx)


@cli.command()
@click.pass_context
def elastic_free_energy(ctx):
    """Command to plot the elastic free energy
    """
    read_and_plot(calc_elastic_free_energy)(ctx)


calc_dx2 = lambda x: (x["lx"] / x["nx"]) ** 2


calc_elastic_free_energy = sequence(
    lambda x: assoc(x, "params", x["params"].item()),
    lambda x: assoc(x, "dx", x["params"]["lx"] / x["params"]["nx"]),
    lambda x: assoc(x, "total_strain", dict(e11=x["e11"], e22=x["e22"], e12=x["e12"])),
    lambda x: calc_elastic_f(x["params"], x["total_strain"], x["eta"])
    * calc_dx2(x["params"]),
    np.sum,
)


@cli.command()
@click.pass_context
def bulk_free_energy(ctx):
    """Command to plot the bulk free energy
    """
    read_and_plot(calc_bulk_free_energy)(ctx)

Beispiel #25

0

Datei anzeigen

Datei: validator.py Projekt: renatacgcastanha/fklearn

def validator(train_data: pd.DataFrame,
              split_fn: SplitterFnType,
              train_fn: LearnerFnType,
              eval_fn: EvalFnType,
              perturb_fn_train: PerturbFnType = identity,
              perturb_fn_test: PerturbFnType = identity,
              predict_oof: bool = False) -> ValidatorReturnType:
    """
    Splits the training data into folds given by the split function and
    performs a train-evaluation sequence on each fold by calling
    ``validator_iteration``.

    Parameters
    ----------
    train_data : pandas.DataFrame
        A Pandas' DataFrame with training data

    split_fn : function pandas.DataFrame ->  list of tuple
        Partially defined split function that takes a dataset and returns
        a list of folds. Each fold is a Tuple of arrays. The fist array in
        each tuple contains training indexes while the second array
        contains validation indexes.

    train_fn : function pandas.DataFrame -> prediction_function, predictions_dataset, logs
        A partially defined learning function that takes a training set and
        returns a predict function, a dataset with training predictions and training
        logs.

    eval_fn : function pandas.DataFrame -> dict
        A partially defined evaluation function that takes a dataset with prediction and
        returns the evaluation logs.

    perturb_fn_train : PerturbFnType
        A partially defined corruption function that takes a dataset and returns
        a corrupted dataset. Perturbation applied at train-time.

    perturb_fn_test : PerturbFnType
        A partially defined corruption function that takes a dataset and returns
        a corrupted dataset. Perturbation applied at test-time.

    predict_oof : bool
        Whether to return out of fold predictions on the logs

    Returns
    ----------
    A list of log-like dictionary evaluations.
    """

    folds, logs = split_fn(train_data)

    train_fn = compose(train_fn, perturb_fn_train)
    eval_fn = compose(eval_fn, perturb_fn_test)

    def fold_iter(fold: Tuple[int, Tuple[pd.Index, pd.Index]]) -> LogType:
        (fold_num, (train_index, test_indexes)) = fold
        return validator_iteration(train_data, train_index, test_indexes,
                                   fold_num, train_fn, eval_fn, predict_oof)

    zipped_logs = pipe(folds, enumerate, map(fold_iter), partial(zip, logs))

    def _join_split_log(
            log_tuple: Tuple[LogType, LogType]) -> Tuple[LogType, LogType]:
        train_log = {}
        split_log, validator_log = log_tuple
        train_log["train_log"] = validator_log["train_log"]
        return train_log, assoc(dissoc(validator_log, "train_log"),
                                "split_log", split_log)

    def get_perturbed_columns(perturbator: PerturbFnType) -> List[str]:
        args = inspect.getfullargspec(perturbator).kwonlydefaults
        return args['cols'] if args else []

    train_logs, validator_logs = zip(*map(_join_split_log, zipped_logs))
    first_train_log = first(train_logs)

    perturbator_log = {
        'perturbated_train': [],
        'perturbated_test': []
    }  # type: LogType
    if perturb_fn_train != identity:
        perturbator_log['perturbated_train'] = get_perturbed_columns(
            perturb_fn_train)
    if perturb_fn_test != identity:
        perturbator_log['perturbated_test'] = get_perturbed_columns(
            perturb_fn_test)
    first_train_log = assoc(first_train_log, "perturbator_log",
                            perturbator_log)

    return assoc(first_train_log, "validator_log", list(validator_logs))

Beispiel #26

0

Datei anzeigen

Datei: validator.py Projekt: renatacgcastanha/fklearn

def parallel_validator(train_data: pd.DataFrame,
                       split_fn: SplitterFnType,
                       train_fn: LearnerFnType,
                       eval_fn: EvalFnType,
                       n_jobs: int = 1,
                       predict_oof: bool = False) -> ValidatorReturnType:
    """
    Splits the training data into folds given by the split function and
    performs a train-evaluation sequence on each fold. Tries to run each
    fold in parallel using up to n_jobs processes.

    Parameters
    ----------
    train_data : pandas.DataFrame
        A Pandas' DataFrame with training data

    split_fn : function pandas.DataFrame ->  list of tuple
        Partially defined split function that takes a dataset and returns
        a list of folds. Each fold is a Tuple of arrays. The fist array in
        each tuple contains training indexes while the second array
        contains validation indexes.

    train_fn : function pandas.DataFrame -> prediction_function, predictions_dataset, logs
        A partially defined learning function that takes a training set and
        returns a predict function, a dataset with training predictions and training
        logs.

    eval_fn : function pandas.DataFrame -> dict
        A partially defined evaluation function that takes a dataset with prediction and
        returns the evaluation logs.

    n_jobs : int
        Number of parallel processes to spawn.

    predict_oof : bool
        Whether to return out of fold predictions on the logs

    Returns
    ----------
    A list log-like dictionary evaluations.
    """
    folds, logs = split_fn(train_data)

    dumped_train_fn = cloudpickle.dumps(train_fn)
    dumped_eval_fn = cloudpickle.dumps(eval_fn)

    result = Parallel(n_jobs=n_jobs, backend="threading")(
        delayed(parallel_validator_iteration)(train_data, x, dumped_train_fn,
                                              dumped_eval_fn, predict_oof)
        for x in enumerate(folds))
    gc.collect()

    train_log = {
        "train_log": [fold_result["train_log"] for fold_result in result]
    }

    @curry
    def kwdissoc(d: Dict, key: str) -> Dict:
        return dissoc(d, key)

    validator_logs = pipe(
        result, partial(zip, logs),
        map(lambda log_tuple: assoc(log_tuple[1], "split_log", log_tuple[0])),
        map(kwdissoc(key="train_log")), list)

    return assoc(train_log, "validator_log", validator_logs)

Beispiel #27

0

Datei anzeigen

Datei: validator.py Projekt: renatacgcastanha/fklearn

def validator_iteration(data: pd.DataFrame,
                        train_index: pd.Index,
                        test_indexes: pd.Index,
                        fold_num: int,
                        train_fn: LearnerFnType,
                        eval_fn: EvalFnType,
                        predict_oof: bool = False) -> LogType:
    """
    Perform an iteration of train test split, training and evaluation.

    Parameters
    ----------
    data : pandas.DataFrame
        A Pandas' DataFrame with training and testing subsets

    train_index : numpy.Array
        The index of the training subset of `data`.

    test_indexes : list of numpy.Array
        A list of indexes of the testing subsets of `data`.

    fold_num : int
        The number of the fold in the current iteration

    train_fn : function pandas.DataFrame -> prediction_function, predictions_dataset, logs
        A partially defined learning function that takes a training set and
        returns a predict function, a dataset with training predictions and training
        logs.

    eval_fn : function pandas.DataFrame -> dict
        A partially defined evaluation function that takes a dataset with prediction and
        returns the evaluation logs.

    predict_oof : bool
        Whether to return out of fold predictions on the logs

    Returns
    ----------
    A log-like dictionary evaluations.
    """

    train_data = data.iloc[train_index]

    empty_set_warn = "Splitter on validator_iteration in generating an empty training dataset. train_data.shape is %s" \
                     % str(train_data.shape)
    warnings.warn(
        empty_set_warn) if train_data.shape[0] == 0 else None  # type: ignore

    predict_fn, train_out, train_log = train_fn(train_data)

    eval_results = []
    oof_predictions = []
    for test_index in test_indexes:
        test_predictions = predict_fn(data.iloc[test_index])
        eval_results.append(eval_fn(test_predictions))
        if predict_oof:
            oof_predictions.append(test_predictions)

    logs = {
        'fold_num': fold_num,
        'train_log': train_log,
        'eval_results': eval_results
    }

    return assoc(logs, "oof_predictions",
                 oof_predictions) if predict_oof else logs

Beispiel #28

0

Datei anzeigen

Datei: plotting.py Projekt: gitter-badger/nbinteract

 def hist(self, hist_function, *, options={}, **interact_params):
     options = tz.assoc(options, '_fig', self.figure)
     box = hist(hist_function, options=options, **interact_params)
     widget = box.children[0]
     self.widgets.append(widget)
     return self

Beispiel #29

0

Datei anzeigen

Datei: iam.py Projekt: zuoralabs/zsec-aws-tools

 def _process_config(self, config: Mapping) -> Mapping:
     tags_dict = merge(standard_tags(self), config.get('Description', {}))
     processed_config = pipe(config,
                             assoc(key='Description', value=tags_dict),
                             super()._process_config)
     return processed_config

Beispiel #30

0

Datei anzeigen

Datei: json_ast_to_data.py Projekt: openfisca/calculette-impots-m-language-parser

 def rename_key(d, key_name, key_new_name):
     return assoc(dissoc(d, key_name), key_new_name, d[key_name])

Beispiel #31

0

Datei anzeigen

Datei: json_ast_to_data.py Projekt: openfisca/calculette-impots-m-language-parser

def load_regles_nodes(json_file_name):
    return pipe(
        read_ast_json_file(json_file_name),
        filter(lambda node: 'batch' in node['applications']),
        map(lambda d: assoc(d, 'source_file_name', '{}.m'.format(os.path.splitext(json_file_name)[0]))),
        )

Beispiel #32

0

Datei anzeigen

Datei: json_ast_to_data.py Projekt: openfisca/calculette-impots-m-language-parser

def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-d', '--debug', action='store_true', default=False, help='Display debug messages')
    parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Increase output verbosity')
    global args
    args = parser.parse_args()
    logging.basicConfig(
        level=logging.DEBUG if args.debug else (logging.INFO if args.verbose else logging.WARNING),
        stream=sys.stdout,
        )

    if not os.path.isdir(json_dir_path):
        os.mkdir(json_dir_path)
    if not os.path.isdir(ast_dir_path):
        os.mkdir(ast_dir_path)

    # Load variables definitions

    tgvh_infos = list(load_tgvH_file())

    # Write constants

    constant_by_name = pipe(
         tgvh_infos,
         filter(lambda val: val['type'] == 'variable_const'),
         map(lambda d: (d['name'], d['value'])),
         dict,
         )
    write_json_file(data=constant_by_name, file_name='constants.json')

    # Write variables dependencies

    regles_nodes = list(mapcat(load_regles_nodes, iter_json_file_names('chap-*.json', 'res-ser*.json')))
    dependencies_by_formula_name = dict(list(mapcat(dependencies_visitors.visit_node, regles_nodes)))
    write_json_file(data=dependencies_by_formula_name, file_name='formulas_dependencies.json')

    # Write variables definitions

    ast_infos_by_variable_name = {}
    for regle_node in regles_nodes:
        regle_infos = {
            'regle_applications': regle_node['applications'],
            'regle_linecol': regle_node['linecol'],
            'regle_name': regle_node['name'],
            'source_file_name': regle_node['source_file_name'],
            }
        regle_tags = list(pluck('value', regle_node.get('tags', [])))
        if regle_tags:
            regle_infos['regle_tags'] = regle_tags
        for formula_node in regle_node['formulas']:
            if formula_node['type'] == 'formula':
                ast_infos_by_variable_name[formula_node['name']] = assoc(
                    regle_infos, 'formula_linecol', formula_node['linecol'])
            elif formula_node['type'] == 'pour_formula':
                for unlooped_formula_node in unloop_helpers.iter_unlooped_nodes(
                        loop_variables_nodes=formula_node['loop_variables'],
                        node=formula_node['formula'],
                        unloop_keys=['name'],
                        ):
                    pour_formula_infos = merge(regle_infos, {
                        'pour_formula_linecol': formula_node['formula']['linecol'],
                        'pour_formula_name': formula_node['formula']['name'],
                        })
                    ast_infos_by_variable_name[unlooped_formula_node['name']] = pour_formula_infos
            else:
                assert False, 'Unhandled formula_node type: {}'.format(formula_node)

    def rename_key(d, key_name, key_new_name):
        return assoc(dissoc(d, key_name), key_new_name, d[key_name])

    tgvh_infos_by_variable_name = pipe(
        tgvh_infos,
        filter(lambda d: d['type'] in ('variable_calculee', 'variable_saisie')),
        map(lambda d: rename_key(d, 'linecol', 'tgvh_linecol')),
        map(lambda d: (d['name'], d)),  # Index by name
        dict,
        )

    definition_by_variable_name = merge_with(merge, ast_infos_by_variable_name, tgvh_infos_by_variable_name)

    write_json_file(data=definition_by_variable_name, file_name='variables_definitions.json')

    return 0

Beispiel #33

0

Datei anzeigen

Datei: plotting.py Projekt: gitter-badger/nbinteract

 def line(self, x_fn, y_fn, *, options={}, **interact_params):
     options = tz.assoc(options, '_fig', self.figure)
     box = line(x_fn, y_fn, options=options, **interact_params)
     widget = box.children[0]
     self.widgets.append(widget)
     return self

Beispiel #34

0

Datei anzeigen

def spatial_learning_curve_splitter(train_data: pd.DataFrame,
                                    space_column: str,
                                    time_column: str,
                                    training_limit: DateType,
                                    holdout_gap: timedelta = timedelta(days=0),
                                    train_percentages: Iterable[float] = (0.25, 0.5, 0.75, 1.0),
                                    random_state: int = None) -> SplitterReturnType:
    """
    Splits the data for a spatial learning curve. Progressively adds more and
    more examples to the training in order to verify the impact of having more
    data available on a validation set.

    The validation set starts after the training set, with an optional time gap.

    Similar to the temporal learning curves, but with spatial increases in the training set.

    Parameters
    ----------

    train_data : pandas.DataFrame
        A Pandas' DataFrame that will be split for learning curve estimation.

    space_column : str
        The name of the ID column of `train_data`.

    time_column : str
        The name of the temporal column of `train_data`.

    training_limit: datetime or str
        The date limiting the training (after which the holdout begins).

    holdout_gap: timedelta
        The gap between the end of training and the start of the holdout.
        If you have censored data, use a gap similar to the censor time.

    train_percentages: list or tuple of floats
        A list containing the percentages of IDs to use in the training.
        Defaults to (0.25, 0.5, 0.75, 1.0). For example: For the default value,
        there would be four model trainings, containing respectively 25%, 50%,
        75%, and 100% of the IDs that are not part of the held out set.

    random_state : int
        A seed for the random number generator that shuffles the IDs.
    """
    if np.min(train_percentages) < 0 or np.max(train_percentages) > 1:
        raise ValueError('Train percentages must be between 0 and 1')

    if isinstance(training_limit, str):
        training_limit = datetime.strptime(training_limit, "%Y-%m-%d")

    if training_limit < train_data[time_column].min() or training_limit > train_data[time_column].max():
        raise ValueError('Temporal training limit should be within datasets temporal bounds (min and max times)')
    if timedelta(days=0) > holdout_gap:
        raise ValueError('Holdout gap cannot be negative')
    if holdout_gap >= (train_data[time_column].max() - training_limit):
        raise ValueError('After taking the gap into account, there should be enough time for the holdout set')

    train_data = train_data.reset_index()

    # We need to sample the space column before getting its unique values so their order in the DF won't matter here
    spatial_ids = train_data[space_column].sample(frac=1, random_state=random_state).unique()

    cumulative_ids = pipe(
        spatial_ids,
        lambda ids: (np.array(train_percentages) * len(ids)).astype(int),  # Get the corresponding indices for each %
        lambda idx: np.split(spatial_ids, idx)[:-1],  # Split spatial ids by the indices
        lambda l: map(lambda x: x.tolist(), l),  # Transform sub-arrays into sub-lists
        lambda l: filter(None, l),  # Drop empty sub-lists
        accumulate(operator.add)  # Cumulative sum of lists
    )

    validation_set = train_data[train_data[time_column] > (training_limit + holdout_gap)]
    train_data = train_data[train_data[time_column] <= training_limit]

    folds = [(train_data[train_data[space_column].isin(ids)][time_column], validation_set[time_column])
             for ids in cumulative_ids]

    folds_indices = _lc_fold_to_indexes(folds)  # final formatting with idx

    logs = [assoc(learner, "percentage", p) for learner, p in zip(map(_log_time_fold, folds), train_percentages)]

    return folds_indices, logs

Beispiel #35

0

Datei anzeigen

def modulemap(root, io):
    modules = dirs(root, io)
    return pipe(modules, map(lambda m: assoc({}, basename(m), io.yaml(join(m, RUNNER_YAML)))), # noqa
                         filter(lambda m: m[first(m)] is not None),
                         merge) # noqa yapf: disable

Beispiel #36

0

Datei anzeigen

Datei: course.py Projekt: dogwynn/coursework

 def transform_year(d):
     if 'year' in d:
         return _.assoc(d, 'year', int(d['year']))
     return d