コード例 #1
0
    def _filter_parameters_to_hash(self, parameters):
        """Produce a filtered version of `parameters` that does not include hyperparameters that
        should be ignored during hashing, such as those pertaining to verbosity, seeds, and random
        states, as they have no effect on HyperparameterHunter experiment results

        Parameters
        ----------
        parameters: Dict
            Full dictionary of initial parameters to be filtered

        Returns
        -------
        parameters: Dict
            Filtered version of the given `parameters`"""
        reject = ["verbose", "verbosity", "silent"]
        reject += ["random_state", "random_seed", "seed", "n_jobs", "nthread"]

        if self.is_task_keras:
            reject.append("build_fn")

        parameters["model_init_params"] = subdict(
            parameters["model_init_params"], drop=reject)
        parameters["model_extra_params"] = subdict(
            parameters["model_extra_params"], drop=reject)
        return parameters
コード例 #2
0
def merge_compile_params(compile_params, dummified_params):
    """Update `compile_params` to reflect those values that were given hyperparameter space choices,
    as specified by `dummified_params`

    Parameters
    ----------
    compile_params: Dict
        All the compile parameters provided to a dummy model's `compile` method, or their default
        values if they were not explicitly given. If the original value of one of the keys in
        `compile_params` was a hyperparameter space choice, its current value will be the dummy
        chosen for it, and this change will be reflected by the contents of `dummified_params`
    dummified_params: Dict
        A mapping of keys in `compile_params` (possibly nested keys) to a tuple pair of
        (<original hyperparameter space choice>, <tuple path to key>)

    Returns
    -------
    merged_params: Dict
        A dictionary that mirrors `compile_params`, except where an element of `dummified_params`
        has the same path/key, in which case the hyperparameter space choice value in
        `dummified_params` is used"""
    # FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`="Adam" vs "adam"
    _dummy_params = subdict(dummified_params.copy(), key=lambda _: _[1:] if _[0] == "params" else _)

    def _visit(path, key, value):
        """If (`path` + `key`) in `_dummy_params`, return its value instead. Else, default"""
        location = path + (key,)
        if len(_dummy_params) and location in _dummy_params:
            return (key, _dummy_params.pop(location))
        return (key, value)

    merged_params = remap(compile_params, visit=_visit)
    return merged_params
コード例 #3
0
    def format_result(self):
        """Format an OrderedDict containing the Experiment's identifying attributes, results,
        hyperparameters used, and other stats or information that may be useful"""
        self.result = OrderedDict([
            ("experiment_id", self.experiment_id),
            ("algorithm_name", self.algorithm_name),
            ("module_name", self.module_name),
            ("hyperparameter_key", self.hyperparameter_key.key),
            ("cross_experiment_key", self.cross_experiment_key.key),
            ("final_evaluations", self.last_evaluation_results),
            ("hyperparameters", self.hyperparameter_key.parameters),
            ("cross_experiment_parameters",
             self.cross_experiment_key.parameters),
            ("train_features",
             None),  # TODO: Record the column features in train df
            ("platform", node()),
            ("source_script", self.source_script),
            ("notes", self.notes or ""),
            ("aggregates", self.stat_aggregates),
        ])

        #################### Filter Hyperparameters' model_init_params ####################
        self.result["hyperparameters"]["model_init_params"] = subdict(
            self.result["hyperparameters"]["model_init_params"],
            drop=["random_state", "seed"])
コード例 #4
0
    def get_datasets_for_f(self, datasets: DFDict) -> DFDict:
        """Produce a dict of DataFrames containing only the merged datasets and standard datasets
        requested in :attr:`params`. In other words, add the requested merged datasets and remove
        unnecessary standard datasets

        Parameters
        ----------
        datasets: DFDict
            Original dict of datasets, containing all datasets provided to
            :meth:`EngineerStep.__call__`, some of which may be superfluous, or may require
            additional processing to resolve merged/coupled datasets

        Returns
        -------
        DFDict
            Updated version of `datasets`, in which unnecessary datasets have been filtered out, and
            the requested merged datasets have been added"""
        self.merged_datasets: List[str] = validate_dataset_names(
            self.params, self.stage)
        datasets_for_f = datasets

        for _dataset_name in self.merged_datasets:
            datasets_for_f[_dataset_name] = merge_dfs(_dataset_name,
                                                      self.stage, datasets)

        return subdict(datasets_for_f, keep=self.params)
コード例 #5
0
    def __init__(self, parameters, cross_experiment_key, **kwargs):
        """A KeyMaker class dedicated to creating hyperparameter keys, which determine when
        experiments were executed using identical hyperparameters. Two separate instances of
        :class:`experiments.CVExperiment` should produce identical `hyperparameter_key` s if their
        hyperparameters are the same (or close enough)

        Parameters
        ----------
        parameters: Dict
            All the parameters to be included when creating the key hash. Keys should correspond to
            parameter names, and values should be the values of the corresponding keys
        cross_experiment_key: Str
            The key produced by the active Environment via
            :class:`key_handler.CrossExperimentKeyMaker`, used for determining when a
            hyperparameter key has already been tested under the same cross-experiment parameters
        **kwargs: Dict
            Additional arguments supplied to :meth:`key_handler.KeyMaker.__init__`"""
        self.cross_experiment_key = cross_experiment_key
        self.is_task_keras = (
            hasattr(G.Env, "current_task")
            and G.Env.current_task
            and G.Env.current_task.module_name == "keras"
        )

        if self.is_task_keras:
            parameters = deepcopy(parameters)

            #################### Initialize and Parameterize Dummy Model ####################
            temp_model = initialize_dummy_model(
                parameters["model_initializer"],
                parameters["model_init_params"]["build_fn"],
                parameters["model_extra_params"],
            )

            temp_layers, temp_compile_params = parameterize_compiled_keras_model(temp_model)

            #################### Process Parameters ####################
            # noinspection PyUnusedLocal
            def _visit(path, key, value):
                """If `key` not in ('input_shape', 'input_dim'), return True. Else, return False"""
                return key not in ("input_shape", "input_dim")

            temp_layers = remap(temp_layers, visit=_visit)

            parameters["model_init_params"]["layers"] = temp_layers
            parameters["model_init_params"]["compile_params"] = temp_compile_params

            parameters["model_extra_params"] = subdict(
                parameters["model_extra_params"], drop=["params"]
            )

        KeyMaker.__init__(self, parameters, **kwargs)
コード例 #6
0
def get_concise_params_dict(params, split_args=False):
    # TODO: Add docstring
    new_params = subdict(params, drop=HH_ARG_ATTRS)
    arg_vals = {}

    #################### Resolve Kwargs Used as Args ####################
    if len(params.get(U_ARGS, [])) > len(params.get(D_ARGS, [])):
        for i in range(len(params[D_ARGS]), len(params[U_ARGS])):
            # Find the kwarg key that probably should have been used
            target_kwarg = list(params[D_KWARGS])[i - len(params[D_ARGS])]
            if target_kwarg in params[U_KWARGS]:
                raise SyntaxError(
                    f"Misplaced argument (i={i}/{target_kwarg}): {params[U_ARGS][i]}"
                )
            else:
                params[U_KWARGS][target_kwarg] = params[U_ARGS][
                    i]  # Move arg to kwargs
        params[U_ARGS] = params[U_ARGS][:len(
            params[D_ARGS])]  # Remove arg (now in kwargs)

    #################### Gather Args ####################
    for i, expected_arg in enumerate(params.get(D_ARGS, [])):
        try:
            arg_vals[expected_arg] = params[U_ARGS][i]
        except IndexError:
            if expected_arg in params[U_KWARGS]:
                arg_vals[expected_arg] = params[U_KWARGS][expected_arg]
            else:
                raise

    #################### Gather Kwargs ####################
    # Merge default and used kwargs with constraints: only include if k in default, and give priority to used values
    # This means that args used as kwargs won't make it through because they have no default values, and
    # nonsensical kwargs won't make it through because the defaults are the point of reference
    kwarg_vals = {
        k: params[U_KWARGS].get(k, v)
        for k, v in params.get(D_KWARGS, {}).items()
    }

    #################### Consolidate ####################
    if split_args:
        new_params = dict(**new_params,
                          **dict(arg_vals=arg_vals, kwarg_vals=kwarg_vals))
    else:
        new_params = {**new_params, **arg_vals, **kwarg_vals}

    return new_params
コード例 #7
0
def est_except(skip: str) -> list:
    """Return flattened list of all "est" values in `EST_OPT_PRO_PAIRS` that are not members of
    the dict named by `skip`

    Parameters
    ----------
    skip: String
        Key in `EST_OPT_PRO_PAIRS`, declaring the `base_estimator` values to exclude from the result

    Returns
    -------
    List
        Flat list of `base_estimator` values in `EST_OPT_PRO_PAIRS`, less those specified by `skip`

    Examples
    --------
    >>> est_except("gbrt")  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
    ['GP', GaussianProcessRegressor(...),
     'RF', RandomForestRegressor(...),
     'ET', ExtraTreesRegressor(...),
     'DUMMY']
    """
    return flatten(
        [_["est"] for _ in subdict(EST_OPT_PRO_PAIRS, drop=[skip]).values()])
コード例 #8
0
def est_except(skip: str) -> list:
    return flatten(
        [_["est"] for _ in subdict(scenario_pairs, drop=[skip]).values()])