def link_choice_ids(layers, compile_params, extra_params, dimensions): """Update `extra_params` to include a 'location' attribute on any descendants of :class:`space.Dimension`, specifying its position among all hyperparameters Parameters ---------- layers: List A list of dicts, in which each dict describes a network layer compile_params: Dict A dict containing the hyperparameters supplied to the model's `compile` call extra_params: Dict A dict containing the hyperparameters for the model's extra methods, such as `fit`, `predict`, and `predict_proba` dimensions: List A list containing descendants of :class:`space.Dimension`, representing the entire hyperparameter search space Returns ------- extra_params: Dict Mirrors the given `extra_params`, except any descendants of :class:`space.Dimension` now have a 'location' attribute""" def visit_builder(param_type): """Define a visit function that prepends `param_type` to the 'location' tuple added in `_visit`""" param_type = ( param_type, ) if not isinstance(param_type, tuple) else param_type def _visit(path, key, value): """If `value` is a descendant of :class:`space.Dimension`, add 'location' to itself and its copy in `dimensions`""" if isinstance(value, (Real, Integer, Categorical)): for i in range(len(dimensions)): if dimensions[i].id == value.id: setattr(dimensions[i], 'location', (param_type + path + (key, ))) setattr(value, 'location', (param_type + path + (key, ))) return (key, value) return _visit def _enter(path, key, value): """If `value` is in `keras.callbacks`, enter as a dict, iterating over non-magic attributes. Else, `default_enter`""" if isinstance(value, base_keras_callback): return dict(), [(_, getattr(value, _)) for _ in dir(value) if not _.startswith('__')] return default_enter(path, key, value) # noinspection PyUnusedLocal _new_layers = remap(layers.copy(), visit=visit_builder(('model_init_params', 'layers'))) # noinspection PyUnusedLocal _new_compile_params = remap(compile_params.copy(), visit=visit_builder( ('model_init_params', 'compile_params'))) # noinspection PyUnusedLocal _new_extra_params = remap( {_k: _v for _k, _v in extra_params.items() if _k != 'params'}, visit=visit_builder('model_extra_params'), enter=_enter) # `extra_params` has locations for `layers`, `compile_params`, `extra_params` - Of form expected by `build_fn` (less choices) return extra_params
def generate_cross_experiment_key(self): """Generate a key to describe the current Environment's cross-experiment parameters""" parameters = dict( metrics_params=self.metrics_params, cv_params=self.cv_params, target_column=self.target_column, id_column=self.id_column, do_predict_proba=self.do_predict_proba, prediction_formatter=self.prediction_formatter, train_dataset=self.train_dataset, test_dataset=self.test_dataset, holdout_dataset=self.holdout_dataset, cross_experiment_params=self.cross_experiment_params.copy(), to_csv_params=self.to_csv_params, ) #################### Revert Aliases for Compatibility #################### # If any aliases were used during call to `Environment.__init__`, replace the default names # in `parameters` with the alias used. This ensures compatibility with Environment keys # made in earlier versions aliases_used = getattr(self, "__hh_aliases_used", {}) # noinspection PyUnusedLocal def _visit(path, key, value): if key in aliases_used: key = aliases_used.pop(key) return (key, value) if aliases_used: parameters = remap(parameters, visit=_visit) #################### Make `cross_experiment_key` #################### self.cross_experiment_key = CrossExperimentKeyMaker(parameters)
def get_choice_dimensions(params, iter_attrs=None): """List all elements in the nested structure `params` that are hyperparameter space choices Parameters ---------- params: Dict Parameters that may be nested and that may contain hyperparameter space choices to collect iter_attrs: Callable, list of callables, or None, default=None If callable, must evaluate to True or False when given three inputs: (path, key, value). Callable should return True if the current value should be entered by `remap`. If callable returns False, `default_enter` will be called. If `iter_attrs` is a list of callables, the value will be entered if any evaluates to True. If None, `default_enter` will be called Returns ------- choices: List A list of tuple pairs, in which `choices[<index>][0]` is a tuple path specifying the location of the hyperparameter given a choice, and `choices[<index>][1]` is the space choice instance for that hyperparameter""" choices = [] def _visit(path, key, value): """If `value` is a descendant of :class:`space.Dimension`, collect inputs, and return True. Else, return False""" if isinstance(value, (Real, Integer, Categorical)): choices.append(((path + (key, )), value)) return True return False _ = remap(params, visit=_visit, enter=extra_enter_attrs(iter_attrs)) return choices
def merge_compile_params(compile_params, dummified_params): """Update `compile_params` to reflect those values that were given hyperparameter space choices, as specified by `dummified_params` Parameters ---------- compile_params: Dict All the compile parameters provided to a dummy model's `compile` method, or their default values if they were not explicitly given. If the original value of one of the keys in `compile_params` was a hyperparameter space choice, its current value will be the dummy chosen for it, and this change will be reflected by the contents of `dummified_params` dummified_params: Dict A mapping of keys in `compile_params` (possibly nested keys) to a tuple pair of (<original hyperparameter space choice>, <tuple path to key>) Returns ------- merged_params: Dict A dictionary that mirrors `compile_params`, except where an element of `dummified_params` has the same path/key, in which case the hyperparameter space choice value in `dummified_params` is used""" # FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`='Adam' vs 'adam' _dummified_params = {(_k[1:] if _k[0] == "params" else _k): _v for _k, _v in dummified_params.copy().items()} def _visit(path, key, value): """If (`path` + `key`) in `_dummified_params`, return its value instead. Else, default""" location = path + (key, ) if len(_dummified_params) and location in _dummified_params: return (key, _dummified_params.pop(location)) return (key, value) merged_params = remap(compile_params, visit=_visit) return merged_params
def __init__( self, algorithm_name, module_name, cross_experiment_key, target_metric, hyperparameter_space, leaderboard_path, descriptions_dir, model_params, ): """ResultFinder for locating saved Keras Experiments compatible with the given constraints Parameters ---------- algorithm_name: String The name of the algorithm whose hyperparameters are being optimized module_name: String The name of the module from whence the algorithm being used came cross_experiment_key: String The cross_experiment_key produced by the current :class:`environment.Environment` target_metric: Tuple Path denoting the metric to be used. The first value should be one of ['oof', 'holdout', 'in_fold'], and the second value should be the name of a metric supplied in :attr:`environment.Environment.metrics_params` hyperparameter_space: :class:`space.Space` Hyperparameter search space constraints leaderboard_path: String Path to a leaderboard file, whose listed Experiments will be tested for compatibility descriptions_dir: String Path to a directory containing the description files of saved Experiments model_params: Dict Concrete hyperparameters for the model. Common keys include 'model_init_params', and 'model_extra_params', both of which can be pointers to dicts of hyperparameters""" super().__init__( algorithm_name=algorithm_name, module_name=module_name, cross_experiment_key=cross_experiment_key, target_metric=target_metric, hyperparameter_space=hyperparameter_space, leaderboard_path=leaderboard_path, descriptions_dir=descriptions_dir, model_params=model_params, ) from keras.callbacks import Callback as BaseKerasCallback # noinspection PyUnusedLocal def _visit(path, key, value): """If `value` is `BaseKerasCallback`, return dict representation. Else default_visit""" if isinstance(value, BaseKerasCallback): return (key, keras_callback_to_dict(value)) return (key, value) self.model_params = remap(self.model_params, visit=_visit) try: del self.model_params["model_extra_params"]["params"] except KeyError: pass
def handle_complex_types(self): """Locate complex types in :attr:`parameters`, create hashes for them, add lookup entries linking their original values to their hashes, then update their values in :attr:`parameters` to their hashes to facilitate Description saving""" if self.tested_keys_dir is None: # Key-making blacklisted return dataframe_hashes = {} def visit(path, key, value): """Check whether a parameter is of a complex type. If not, return it unchanged. Otherwise, 1) create a hash for its value; 2) save a complex type lookup entry linking `key`, `value`, and the hash for `value`; and 3) return the hashed value with `key`, instead of the original complex-typed `value` Parameters ---------- path: Tuple The path of keys that leads to `key` key: Str The parameter name value: * The value of the parameter `key` Returns ------- Tuple of (`key`, value), in which value is either unchanged or a hash for the original `value`""" if isinstance(value, BaseKerasCallback): return (key, keras_callback_to_dict(value)) if isinstance(value, Sentinel): return (key, value.sentinel) elif callable(value) or isinstance(value, pd.DataFrame): hashed_value = make_hash_sha256(value) if isinstance(value, pd.DataFrame): dataframe_hashes.setdefault(hashed_value, []).append(key) try: self.add_complex_type_lookup_entry(path, key, value, hashed_value) except FileNotFoundError: os.makedirs(self.key_attribute_lookup_dir, exist_ok=False) self.add_complex_type_lookup_entry(path, key, value, hashed_value) return (key, hashed_value) return (key, value) self.parameters = remap(self.parameters, visit=visit) #################### Check for Identical DataFrames #################### for df_hash, df_names in dataframe_hashes.items(): if len(df_names) > 1: G.warn( f"The dataframes: {df_names} have an identical hash: {df_hash!s}. This implies the dataframes are " + "identical, which is probably unintentional. If left alone, scores may be misleading!" )
def __init__(self, parameters, cross_experiment_key, **kwargs): """A KeyMaker class dedicated to creating hyperparameter keys, which determine when experiments were executed using identical hyperparameters. Two separate instances of :class:`experiments.CVExperiment` should produce identical `hyperparameter_key` s if their hyperparameters are the same (or close enough) Parameters ---------- parameters: Dict All the parameters to be included when creating the key hash. Keys should correspond to parameter names, and values should be the values of the corresponding keys cross_experiment_key: Str The key produced by the active Environment via :class:`key_handler.CrossExperimentKeyMaker`, used for determining when a hyperparameter key has already been tested under the same cross-experiment parameters **kwargs: Dict Additional arguments supplied to :meth:`key_handler.KeyMaker.__init__`""" self.cross_experiment_key = cross_experiment_key self.is_task_keras = ( hasattr(G.Env, "current_task") and G.Env.current_task and G.Env.current_task.module_name == "keras" ) if self.is_task_keras: parameters = deepcopy(parameters) #################### Initialize and Parameterize Dummy Model #################### temp_model = initialize_dummy_model( parameters["model_initializer"], parameters["model_init_params"]["build_fn"], parameters["model_extra_params"], ) temp_layers, temp_compile_params = parameterize_compiled_keras_model(temp_model) #################### Process Parameters #################### # noinspection PyUnusedLocal def _visit(path, key, value): """If `key` not in ('input_shape', 'input_dim'), return True. Else, return False""" return key not in ("input_shape", "input_dim") temp_layers = remap(temp_layers, visit=_visit) parameters["model_init_params"]["layers"] = temp_layers parameters["model_init_params"]["compile_params"] = temp_compile_params if "params" in parameters["model_extra_params"]: parameters["model_extra_params"] = { _k: _v for _k, _v in parameters["model_extra_params"].items() if _k != "params" } KeyMaker.__init__(self, parameters, **kwargs)
def _filter_by_guidelines_multi(self, location): """Helper to filter by guidelines when one of the guideline hyperparameters is directly affected by a hyperparameter that is given as a space choice Parameters ---------- location: Tuple Location of the hyperparameter space choice that affects the acceptable guideline values of a particular hyperparameter. In other words, this is the path of a hyperparameter, which, if changed, would change the expected default value of another hyperparameter Notes ----- This is used for Keras Experiments when the `optimizer` value in a model's `compile_params` is given as a hyperparameter space choice. Each possible value of `optimizer` prescribes different default values for the `optimizer_params` argument, so special measures need to be taken to ensure the correct Experiments are declared to fit within the constraints""" _model_params = deepcopy(self.model_params) if location == ("model_init_params", "compile_params", "optimizer"): from keras.optimizers import get as k_opt_get update_location = ("model_init_params", "compile_params", "optimizer_params") allowed_values = get_path(_model_params, location).bounds #################### Handle First Value (Dummy) #################### self._filter_by_guidelines() allowed_values = allowed_values[1:] #################### Handle Remaining Values #################### for allowed_val in allowed_values: updated_value = k_opt_get(allowed_val).get_config() def _visit(path, key, value): """If `path` + `key` == `update_location`, return default for this choice. Else, default_visit""" if path + (key, ) == update_location: return (key, updated_value) return (key, value) self._filter_by_guidelines( model_params=remap(_model_params, visit=_visit)) self.similar_experiments = sorted(self.similar_experiments, key=lambda _: _[1], reverse=True) else: raise ValueError( "Received unhandled location: {}".format(location))
def locate_sentinels(parameters): """Produce a mirrored `parameters` dict, wherein `Sentinel` values are converted to the objects they represent Parameters ---------- parameters: Dict Dict of parameters, which may contain nested `Sentinel` values Returns ------- Dict Dict mirroring `parameters`, except where a `Sentinel` was found, the value it represents is returned instead""" if len(G.sentinel_registry) == 0: return parameters return remap(parameters, visit=_sentinel_visitor)
def deep_restricted_update(default_vals, new_vals, iter_attrs=None): """Return an updated dictionary that mirrors `default_vals`, except where the key in `new_vals` matches the path in `default_vals`, in which case the `new_vals` value is used Parameters ---------- default_vals: Dict Dict containing the values to return if an alternative is not found in `new_vals` new_vals: Dict Dict whose keys are expected to be tuples corresponding to key paths in `default_vals` iter_attrs: Callable, list of callables, or None, default=None If callable, must evaluate to True or False when given three inputs: (path, key, value). Callable should return True if the current value should be entered by `remap`. If callable returns False, `default_enter` will be called. If `iter_attrs` is a list of callables, the value will be entered if any evaluates to True. If None, `default_enter` will be called Returns ------- Dict, or None Examples -------- >>> deep_restricted_update({'a': 1, 'b': 2}, {('b',): 'foo', ('c',): 'bar'}) {'a': 1, 'b': 'foo'} >>> deep_restricted_update({'a': 1, 'b': {'b1': 2, 'b2': 3}}, {('b', 'b1'): 'foo', ('c', 'c1'): 'bar'}) {'a': 1, 'b': {'b1': 'foo', 'b2': 3}}""" iter_attrs = iter_attrs or [lambda *_args: False] iter_attrs = [iter_attrs ] if not isinstance(iter_attrs, list) else iter_attrs def _visit(path, key, value): """If (`path` + `key`) is a key in `new_vals`, return its value. Else, default return""" for _current_key, _current_val in new_vals.items(): if path + (key, ) == _current_key: return (key, _current_val) return (key, value) def _enter(path, key, value): """If any in `iter_attrs` is True, enter `value` as a dict, iterating over non-magic attributes. Else, `default_enter`""" if any([_(path, key, value) for _ in iter_attrs]): included_attrs = [_ for _ in dir(value) if not _.startswith("__")] return dict(), [(_, getattr(value, _)) for _ in included_attrs] return default_enter(path, key, value) return remap(default_vals, visit=_visit, enter=_enter) if default_vals else default_vals
def __init__(self, params: List[str], stage: str): """Characterize the relationships between the dataset names `params` Parameters ---------- params: List[str] Dataset names requested by a feature engineering step callable. Must be a subset of {"train_data", "train_inputs", "train_targets", "validation_data", "validation_inputs", "validation_targets", "holdout_data", "holdout_inputs", "holdout_targets", "test_inputs", "all_data", "all_inputs", "all_targets", "non_train_data", "non_train_inputs", "non_train_targets"} stage: String in {"pre_cv", "intra_cv"} Feature engineering stage during which the datasets `params` are requested Attributes ---------- merged_datasets: List[tuple] Tuples of strings denoting paths to datasets that represent a merge between multiple datasets. Merged datasets are those prefixed with either "all" or "non_train". These paths are locations in `descendants` coupled_datasets: List[tuple] Tuples of strings denoting paths to datasets that represent a coupling of "inputs" and "targets" datasets. Coupled datasets are those suffixed with "data". These paths are locations in `descendants`, and the values at each path should be a dict containing keys with "inputs" and "targets" suffixes leaves: Dict[tuple, str] Mapping of full path tuples in `descendants` to their leaf values. Tuple paths represent the steps necessary to reach the standard dataset leaf value in `descendants` by traversing merged and coupled datasets. Values in `leaves` should be identical to the last element of the corresponding tuple key descendants: DescendantsType Nested dict in which all keys are dataset name strings, and all leaf values are `None`. Represents the structure of the requested dataset names, traversing over merged and coupled datasets (if necessary) in order to reach the standard dataset leaves""" self.params: List[str] = params self.stage: str = stage self.merged_datasets: List[tuple] = [] self.coupled_datasets: List[tuple] = [] self.leaves: Dict[tuple, str] = dict() self.descendants: DescendantsType = remap({_: _ for _ in self.params}, visit=self._visit, enter=self._enter, use_registry=False)
def get_choice_dimensions(params, iter_attrs=None): """List all elements in the nested structure `params` that are hyperparameter space choices Parameters ---------- params: Dict Parameters that may be nested and that may contain hyperparameter space choices to collect iter_attrs: Callable, list of callables, or None, default=None If callable, must evaluate to True or False when given three inputs: (path, key, value). Callable should return True if the current value should be entered by `remap`. If callable returns False, `default_enter` will be called. If `iter_attrs` is a list of callables, the value will be entered if any evaluates to True. If None, `default_enter` will be called Returns ------- choices: List A list of tuple pairs, in which `choices[<index>][0]` is a tuple path specifying the location of the hyperparameter given a choice, and `choices[<index>][1]` is the space choice instance for that hyperparameter""" choices = [] iter_attrs = iter_attrs or [lambda *_args: False] iter_attrs = [iter_attrs ] if not isinstance(iter_attrs, list) else iter_attrs def _visit(path, key, value): """If `value` is a descendant of :class:`space.Dimension`, collect inputs, and return True. Else, return False""" if isinstance(value, (Real, Integer, Categorical)): choices.append(((path + (key, )), value)) return True return False def _enter(path, key, value): """If any in `iter_attrs` is True, enter `value` as a dict, iterating over non-magic attributes. Else, `default_enter`""" if any([_(path, key, value) for _ in iter_attrs]): included_attrs = [_ for _ in dir(value) if not _.startswith("__")] return dict(), [(_, getattr(value, _)) for _ in included_attrs] return default_enter(path, key, value) _ = remap(params, visit=_visit, enter=_enter) return choices
def check_dummy_params(params): """Locate and dummify hyperparameter space choices in `params`, if the hyperparameter is used for model compilation Parameters ---------- params: Dict A dictionary of hyperparameters, in which values may be hyperparameter space choices Returns ------- checked_params: Dict A replica of `params`, in which instances of hyperparameter space choices are replaced with dummy values dummified_params: Dict A record of keys that were found whose values were hyperparameter space choices, mapped to tuple pairs of (<original value>, <path to key>)""" compile_keys = [ "optimizer", "loss", "metrics", "loss_weights", "sample_weight_mode", "weighted_metrics", "target_tensors", ] dummified_params = dict() # noinspection PyUnusedLocal def _visit(path, key, value): """If `value` is a descendant of :class:`space.Dimension`, return its lower bound and collect it. Else, default return""" if key in compile_keys: if isinstance(value, (Real, Integer, Categorical)): dummified_params[path + (key, )] = value return (key, value.bounds[0]) return (key, value) checked_params = remap(params, visit=_visit) return checked_params, dummified_params
def deep_restricted_update(default_vals, new_vals, iter_attrs=None): """Return an updated dictionary that mirrors `default_vals`, except where the key in `new_vals` matches the path in `default_vals`, in which case the `new_vals` value is used Parameters ---------- default_vals: Dict Dict containing the values to return if an alternative is not found in `new_vals` new_vals: Dict Dict whose keys are expected to be tuples corresponding to key paths in `default_vals` iter_attrs: Callable, list of callables, or None, default=None If callable, must evaluate to True or False when given three inputs: (path, key, value). Callable should return True if the current value should be entered by `remap`. If callable returns False, `default_enter` will be called. If `iter_attrs` is a list of callables, the value will be entered if any evaluates to True. If None, `default_enter` will be called Returns ------- Dict, or None Examples -------- >>> deep_restricted_update({'a': 1, 'b': 2}, {('b',): 'foo', ('c',): 'bar'}) {'a': 1, 'b': 'foo'} >>> deep_restricted_update({'a': 1, 'b': {'b1': 2, 'b2': 3}}, {('b', 'b1'): 'foo', ('c', 'c1'): 'bar'}) {'a': 1, 'b': {'b1': 'foo', 'b2': 3}}""" if not default_vals: return default_vals def _visit(path, key, value): """If (`path` + `key`) is a key in `new_vals`, return its value. Else, default return""" for _current_key, _current_val in new_vals.items(): if path + (key, ) == _current_key: return (key, _current_val) return (key, value) return remap(default_vals, visit=_visit, enter=extra_enter_attrs(iter_attrs))
def filter_by_guidelines( hyperparameters_and_scores, space, model_init_params, model_extra_params, feature_engineer, feature_selector, **kwargs, ): """Reject any `hyperparameters_and_scores` tuples whose hyperparameters do not match guideline hyperparameters (all hyperparameters not in `space`), after ignoring unimportant hyperparameters Parameters ---------- hyperparameters_and_scores: List of tuples Each tuple should be of form (hyperparameters <dict>, evaluation <float>), in which hyperparameters contains at least the keys: ['model_init_params', 'model_extra_params', 'feature_engineer', 'feature_selector'] space: `space.space_core.Space` The boundaries of the hyperparameters to be searched model_init_params: Dict model_extra_params: Dict, or None feature_engineer: Dict feature_selector: List of column names, callable, list of booleans, or None **kwargs: Dict Extra parameter dicts to include in `guidelines`. For example, if filtering the hyperparameters of a Keras neural network, this should contain the following keys: 'layers', 'compile_params' Returns ------- hyperparameters_and_scores: List of tuples Filtered to include only those whose hyperparameters matched guideline hyperparameters""" dimensions = [("model_init_params", _) if isinstance(_, str) else _ for _ in space.names()] # `dimensions` = hyperparameters to be ignored. Filter by all remaining dimensions_to_ignore = [ ("model_initializer", ), ("model_init_params", "build_fn"), (None, "verbose"), (None, "silent"), (None, "random_state"), (None, "seed"), ("model_init_params", "n_jobs"), ("model_init_params", "nthread"), # TODO: Remove below once loss_functions are hashed in description files ("model_init_params", "compile_params", "loss_functions"), ] #################### Prepare `feature_engineer` #################### feature_engineer = feature_engineer and feature_engineer.get_key_data() # Dataset hashes in `feature_engineer` and candidates can be ignored, since it is assumed that candidates here had matching `Environment`s temp_guidelines = dict( model_init_params=model_init_params if model_init_params is not None else {}, model_extra_params=model_extra_params if model_extra_params is not None else {}, feature_engineer=feature_engineer if feature_engineer is not None else {}, feature_selector=feature_selector if feature_selector is not None else [], **kwargs, ) def _visit(path, key, value): """Return False if element in space dimensions, or in dimensions being ignored. Else, return True. If `value` is of type tuple or set, it will be converted to a list in order to simplify comparisons to the JSON-formatted `hyperparameters_and_scores`""" if path and path[0] == "model_extra_params" and value == {}: # Remove empty dicts in ("model_extra_params"). Simplify comparison between experiments # with no `model_extra_params` and, for example, `dict(fit=dict(verbose=True))` return False #################### Clean `feature_engineer` #################### try: return visit_feature_engineer(path, key, value) except ContinueRemap: ... for dimension in dimensions + dimensions_to_ignore: if (path + (key, ) == dimension) or (dimension[0] is None and dimension[-1] == key): return False if isinstance(value, (tuple, set)): return key, list(value) return True guidelines = remap(temp_guidelines, visit=_visit) # `guidelines` = `temp_guidelines` that are neither `space` choices, nor `dimensions_to_ignore` hyperparameters_and_scores = list( filter(lambda _: remap(_[0], visit=_visit) == guidelines, hyperparameters_and_scores)) return hyperparameters_and_scores
def does_match_guidelines( candidate_params: dict, space: Space, template_params: dict, visitors=(), dims_to_ignore: List[tuple] = None, ) -> bool: """Check candidate compatibility with template guideline hyperparameters Parameters ---------- candidate_params: Dict Candidate Experiment hyperparameters to be compared to `template_params` after processing space: Space Hyperparameter search space constraints for the current template template_params: Dict Template hyperparameters to which `candidate_params` will be compared after processing. Although the name of the function implies that these will all be guideline hyperparameters, this is not a requirement, as any non-guideline hyperparameters will be removed during processing by checking `space.names` visitors: Callable, or Tuple[callable] (optional) Extra `visit` function(s) invoked when :func:`~hyperparameter_hunter.utils.boltons_utils.remap`-ing both `template_params` and `candidate_params`. Can be used to filter out unwanted values, or to pre-process selected values (and more) prior to performing the final compatibility check between the processed `candidate_params` and guidelines in `template_params` dims_to_ignore: List[tuple] (optional) Paths to hyperparameter(s) that should be ignored when comparing `candidate_params` and `template_params`. By default, hyperparameters pertaining to verbosity and random states are ignored. Paths should be tuples of the form expected by :func:`~hyperparameter_hunter.utils.boltons_utils.get_path`. Additionally a path may start with None, which acts as a wildcard, matching any hyperparameters whose terminal path nodes match the value following None. For example, ``(None, "verbose")`` would match paths such as ``("model_init_params", "a", "verbose")`` and ``("model_extra_params", "b", 2, "verbose")`` Returns ------- Boolean True if the processed version of `candidate_params` is equal to the extracted and processed guidelines from `template_params`. Else, False""" dimensions_to_ignore = [ (None, "verbose"), (None, "silent"), (None, "random_state"), (None, "seed"), ] if isinstance(dims_to_ignore, list): dimensions_to_ignore.extend(dims_to_ignore) # `dimensions_to_ignore` = hyperparameters to be ignored. Filter by all remaining (less # dimensions in `space`, which are also ignored) def _visit(path, key, value): """Return False if element in space dimensions, or in dimensions being ignored. Else, return True. If `value` is of type tuple or set, it will be converted to a list in order to simplify comparisons to the JSON-formatted `candidate_params`""" # Remove elements whose full paths are in `dimensions_to_ignore` or `space.names()` for dim in space.names() + dimensions_to_ignore: if (path + (key, ) == dim) or (dim[0] is None and dim[-1] == key): return False # Convert tuples/sets to lists if isinstance(value, (tuple, set)): return key, list(value) return True #################### Chain Together Visit Functions #################### if callable(visitors): visitors = (visitors, ) visit = multi_visit(*visitors, _visit) # Extra `visitors` will be called first, with `_visit` acting as the default visit, called last guidelines = remap(template_params, visit=visit) # `guidelines` = `template_params` that are neither `space` choices, nor `dimensions_to_ignore` return remap(candidate_params, visit=visit) == guidelines
def link_choice_ids(layers, compile_params, extra_params, dimensions): """Update `extra_params` to include a "location" attribute on any descendants of :class:`space.Dimension`, specifying its position among all hyperparameters Parameters ---------- layers: List A list of dicts, in which each dict describes a network layer compile_params: Dict A dict containing the hyperparameters supplied to the model's `compile` call extra_params: Dict A dict containing the hyperparameters for the model's extra methods, such as `fit`, `predict`, and `predict_proba` dimensions: List A list containing descendants of :class:`space.Dimension`, representing the entire hyperparameter search space Returns ------- extra_params: Dict Mirrors the given `extra_params`, except any descendants of :class:`space.Dimension` now have a "location" attribute""" def visit_as(param_type): """Make visit func that prepends `param_type` to the "location" tuple added in `_visit`""" param_type = (param_type,) if not isinstance(param_type, tuple) else param_type def _visit(path, key, value): """If `value` is a descendant of :class:`space.Dimension`, add "location" to itself and its copy in `dimensions`""" if isinstance(value, (Real, Integer, Categorical)): for i in range(len(dimensions)): #################### Add `location` Attribute #################### if dimensions[i].id == value.id: setattr(dimensions[i], "location", (param_type + path + (key,))) setattr(value, "location", (param_type + path + (key,))) return (key, value) return _visit #################### Enter Keras Callbacks #################### def _enter(path, key, value): """If `value` is in `keras.callbacks`, enter as a dict, iterating over non-magic attributes. Else, `default_enter`""" if isinstance(value, base_keras_callback): return dict(), [(_, getattr(value, _)) for _ in dir(value) if not _.startswith("__")] return default_enter(path, key, value) #################### Enter Keras Initializer #################### def layer_enter(path, key, value): """If Keras `Initializer`, enter as dict, iterating over non-magic attributes""" if isinstance(value, BaseKerasInitializer): return ( dict(), [ (_, getattr(value, _)) for _ in dir(value) if _ != "__hh_previous_frame" and not _.endswith("__") ], ) return default_enter(path, key, value) # TODO: Merge "__hh" attrs above into a single dict of attributes for initializers # TODO: Entering layer initializers like above will break matching when using default values # TODO: Currently, path is set to use "__hh_used_kwargs", which won't match if the default value is used # noinspection PyUnusedLocal _new_layers = remap( layers.copy(), visit=visit_as(("model_init_params", "layers")), enter=layer_enter ) # noinspection PyUnusedLocal _new_compile_params = remap( compile_params.copy(), visit=visit_as(("model_init_params", "compile_params")) ) # noinspection PyUnusedLocal _extra_params = {k: v for k, v in extra_params.items() if k != "params"} # TODO: Replace above with `general_utils.subdict` _new_extra_params = remap(_extra_params, visit=visit_as("model_extra_params"), enter=_enter) # `extra_params` has locations for `layers`, `compile_params`, `extra_params` - Of form expected by `build_fn` (less choices) return extra_params
def handle_complex_types(self): """Locate complex types in :attr:`parameters`, create hashes for them, add lookup entries linking their original values to their hashes, then update their values in :attr:`parameters` to their hashes to facilitate Description saving""" dataframe_hashes = {} def enter(path, key, value): """Produce iterable of attributes to remap for instances of :class:`metrics.Metric`""" if isinstance(value, Metric): metric_attrs = ["name", "metric_function", "direction"] return ({}, [(_, getattr(value, _)) for _ in metric_attrs]) return default_enter(path, key, value) def visit(path, key, value): """Check whether a parameter is of a complex type. If not, return it unchanged. Otherwise, 1) create a hash for its value; 2) save a complex type lookup entry linking `key`, `value`, and the hash for `value`; and 3) return the hashed value with `key`, instead of the original complex-typed `value` Parameters ---------- path: Tuple The path of keys that leads to `key` key: Str The parameter name value: * The value of the parameter `key` Returns ------- Tuple of (`key`, value), in which value is either unchanged or a hash for the original `value`""" if isinstance(value, BaseKerasCallback): return (key, keras_callback_to_dict(value)) if isinstance(value, Sentinel): return (key, value.sentinel) elif callable(value) or isinstance(value, pd.DataFrame): # TODO: Check here if callable, and using a `Trace`d model/model_initializer # TODO: If so, pass extra kwargs to below `make_hash_sha256`, which are eventually given to `hash_callable` # TODO: Notably, `ignore_source_lines=True` should be included # FLAG: Also, look into adding package version number to hashed attributes hashed_value = make_hash_sha256(value) if isinstance(value, pd.DataFrame): dataframe_hashes.setdefault(hashed_value, []).append(key) if self.tested_keys_dir is not None: # Key-making not blacklisted try: self.add_complex_type_lookup_entry(path, key, value, hashed_value) except (FileNotFoundError, OSError): make_dirs(os.path.join(self.lookup_dir, *path), exist_ok=False) self.add_complex_type_lookup_entry(path, key, value, hashed_value) return (key, hashed_value) return (key, value) self.parameters = remap(self.parameters, visit=visit, enter=enter) #################### Check for Identical DataFrames #################### for df_hash, df_names in dataframe_hashes.items(): if len(df_names) > 1: G.warn( f"The dataframes: {df_names} have an identical hash: {df_hash!s}. This implies the dataframes are " + "identical, which is probably unintentional. If left alone, scores may be misleading!" )
def does_match_init_params_guidelines_multi(self, exp_id, params, score, location) -> bool: """Check candidate compatibility with `model_init_params` template guidelines when a guideline hyperparameter is directly affected by another hyperparameter that is given as a space choice Parameters ---------- exp_id: String Candidate Experiment ID params: Dict Candidate "model_init_params" to compare to the template in :attr:`model_params` score: Number Value of the candidate Experiment's target metric location: Tuple Location of the hyperparameter space choice that affects the acceptable guideline values of a particular hyperparameter. In other words, this is the path of a hyperparameter, which, if changed, would change the expected default value of another hyperparameter Returns ------- Boolean True if candidate `params` match `model_init_params` guidelines. Else, False Notes ----- This is used for Keras Experiments when the `optimizer` value in a model's `compile_params` is given as a hyperparameter space choice. Each possible value of `optimizer` prescribes different default values for the `optimizer_params` argument, so special measures need to be taken to ensure the correct Experiments are declared to fit within the constraints""" _model_params = deepcopy(self.model_params["model_init_params"]) if location == ("compile_params", "optimizer"): from keras.optimizers import get as k_opt_get update_location = ("compile_params", "optimizer_params") # `update_location` = Path to hyperparameter whose default value depends on `location` allowed_values = get_path(_model_params, location).bounds # `allowed_values` = Good `("model_init_params", "compile_params", "optimizer")` values #################### Handle First Value (Dummy) #################### is_match = self.does_match_init_params_guidelines( exp_id, params, score) # The first value gets handled separately from the rest because the value at # `update_location` is set according to `allowed_values[0]`. For the remaining # `allowed_values`, we need to manually set `update_location` for each # If the first value was a match, the below `while` loop will never be entered because # `is_match` is already True #################### Handle Remaining Values #################### allowed_val_index = 1 while is_match is not True and allowed_val_index < len( allowed_values): allowed_val = allowed_values[allowed_val_index] # Determine current default value for the dependent hyperparameter updated_val = k_opt_get(allowed_val).get_config() # Set value at `update_location` to `updated_val`, then check if params match def _visit(path, key, value): """If `path` + `key` == `update_location`, return default for this choice. Else, default_visit""" if path + (key, ) == update_location: return (key, updated_val) return (key, value) is_match = self.does_match_init_params_guidelines( exp_id, params, score, template_params=remap(_model_params, visit=_visit)) # If `is_match` is True, the loop stops and :attr:`match_status`'s value at `exp_id` # for `does_match_init_params_guidelines` remains truthy allowed_val_index += 1 return is_match else: raise ValueError( "Received unhandled location: {}".format(location))
def __init__( self, algorithm_name, module_name, cross_experiment_key, target_metric, space, leaderboard_path, descriptions_dir, model_params, sort=None, # TODO: Unfinished - To be used in `_get_scored_params`/`_get_ids` ): """ResultFinder for locating saved Keras Experiments compatible with the given constraints Parameters ---------- algorithm_name: String The name of the algorithm whose hyperparameters are being optimized module_name: String The name of the module from whence the algorithm being used came cross_experiment_key: String The cross_experiment_key produced by the current :class:`environment.Environment` target_metric: Tuple Path denoting the metric to be used. The first value should be one of ['oof', 'holdout', 'in_fold'], and the second value should be the name of a metric supplied in :attr:`environment.Environment.metrics_params` space: :class:`space.Space` Hyperparameter search space constraints leaderboard_path: String Path to a leaderboard file, whose listed Experiments will be tested for compatibility descriptions_dir: String Path to a directory containing the description files of saved Experiments model_params: Dict Concrete hyperparameters for the model. Common keys include 'model_init_params', and 'model_extra_params', both of which can be pointers to dicts of hyperparameters sort: "target_asc", "target_desc", "chronological", "reverse_chronological", int How to sort the experiment results that fit within the given constraints * "target_asc": Sort from experiments with the lowest value for `target_metric` to those with the greatest * "target_desc": Sort from experiments with the highest value for `target_metric` to those with the lowest * "chronological": Sort from oldest experiments to newest * "reverse_chronological": Sort from newest experiments to oldest * int: Random seed with which to shuffle experiments""" super().__init__( algorithm_name=algorithm_name, module_name=module_name, cross_experiment_key=cross_experiment_key, target_metric=target_metric, space=space, leaderboard_path=leaderboard_path, descriptions_dir=descriptions_dir, model_params=model_params, sort= sort, # TODO: Unfinished - To be used in `_get_scored_params`/`_get_ids` ) from keras.callbacks import Callback as BaseKerasCallback # noinspection PyUnusedLocal def _visit(path, key, value): """If `value` is `BaseKerasCallback`, return dict representation. Else default_visit""" if isinstance(value, BaseKerasCallback): return (key, keras_callback_to_dict(value)) return (key, value) self.model_params = remap(self.model_params, visit=_visit) try: del self.model_params["model_extra_params"]["params"] except KeyError: pass
def __init__( self, algorithm_name, module_name, cross_experiment_key, target_metric, space, leaderboard_path, descriptions_dir, model_params, sort=None, # TODO: Unfinished - To be used in `get_scored_params`/`experiment_ids` ): """ResultFinder for locating saved Keras Experiments compatible with the given constraints Parameters ---------- algorithm_name: String Name of the algorithm whose hyperparameters are being optimized module_name: String Name of the module from whence the algorithm being used came cross_experiment_key: String :attr:`hyperparameter_hunter.environment.Environment.cross_experiment_key` produced by the current `Environment` target_metric: Tuple Path denoting the metric to be used. The first value should be one of {"oof", "holdout", "in_fold"}, and the second value should be the name of a metric supplied in :attr:`hyperparameter_hunter.environment.Environment.metrics_params` space: Space Instance of :class:`~hyperparameter_hunter.space.space_core.Space`, defining hyperparameter search space constraints leaderboard_path: String Path to a leaderboard file, whose listed Experiments will be tested for compatibility descriptions_dir: String Path to a directory containing the description files of saved Experiments model_params: Dict Concrete hyperparameters for the model. Common keys include "model_init_params" and "model_extra_params", both of which can be pointers to dicts of hyperparameters. Additionally, "feature_engineer" may be included with an instance of :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer` sort: {"target_asc", "target_desc", "chronological", "reverse_chronological"}, or int ... Experimental... How to sort the experiment results that fit within the given constraints * "target_asc": Sort from experiments with the lowest value for `target_metric` to those with the greatest * "target_desc": Sort from experiments with the highest value for `target_metric` to those with the lowest * "chronological": Sort from oldest experiments to newest * "reverse_chronological": Sort from newest experiments to oldest * int: Random seed with which to shuffle experiments""" super().__init__( algorithm_name=algorithm_name, module_name=module_name, cross_experiment_key=cross_experiment_key, target_metric=target_metric, space=space, leaderboard_path=leaderboard_path, descriptions_dir=descriptions_dir, model_params=model_params, sort=sort, ) from keras.callbacks import Callback as BaseKerasCallback from keras.initializers import Initializer as BaseKerasInitializer # noinspection PyUnusedLocal def _visit(path, key, value): """If `value` is `BaseKerasCallback` or `BaseKerasInitializer`, return dict representation. Else default_visit""" if isinstance(value, BaseKerasCallback): return (key, keras_callback_to_dict(value)) if isinstance(value, BaseKerasInitializer): return (key, keras_initializer_to_dict(value)) return (key, value) self.model_params = remap(self.model_params, visit=_visit) # Below cleans out the temporary "params" dict built by `keras_optimization_helper`. # It exists in order to pass concrete values for choices during optimization through the # Keras model `build_fn`. However, at this stage, it just gets in the way since # :attr:`space` defines the choices, and their `location`s point to where they are within # :attr:`model_params`. Not deleting them would basically duplicate all choice Dimensions try: del self.model_params["model_extra_params"]["params"] except KeyError: pass
def handle_complex_types(self): """Locate complex types in :attr:`parameters`, create hashes for them, add lookup entries linking their original values to their hashes, then update their values in :attr:`parameters` to their hashes to facilitate Description saving""" dataframe_hashes = {} def enter(path, key, value): """Produce iterable of attributes to remap for instances of :class:`metrics.Metric`""" if isinstance(value, Metric): metric_attrs = ["name", "metric_function", "direction"] return ({}, [(_, getattr(value, _)) for _ in metric_attrs]) if isinstance(value, EngineerStep): return ({}, list(value.get_key_data().items())) if isinstance(value, FeatureEngineer): return ({}, list(value.get_key_data().items())) return default_enter(path, key, value) def visit(path, key, value): """Check whether a parameter is of a complex type. If not, return it unchanged. Otherwise, 1) create a hash for its value; 2) save a complex type lookup entry linking `key`, `value`, and the hash for `value`; and 3) return the hashed value with `key`, instead of the original complex-typed `value` Parameters ---------- path: Tuple The path of keys that leads to `key` key: Str The parameter name value: * The value of the parameter `key` Returns ------- Tuple of (`key`, value), in which value is either unchanged or a hash for the original `value`""" if isinstance(value, BaseKerasCallback): return (key, keras_callback_to_dict(value)) if isinstance(value, BaseKerasInitializer): return (key, keras_initializer_to_dict(value)) if isinstance(value, Sentinel): return (key, value.sentinel) elif callable(value) or isinstance(value, pd.DataFrame): # FLAG: Look into adding package version number to hashed attributes hashed_value = make_hash_sha256(value) if isinstance(value, pd.DataFrame): dataframe_hashes.setdefault(hashed_value, []).append(key) if self.tested_keys_dir is not None: # Key-making not blacklisted self.add_complex_type_lookup_entry(path, key, value, hashed_value) return (key, hashed_value) return (key, value) self.parameters = remap(self.parameters, visit=visit, enter=enter) #################### Check for Identical DataFrames #################### for df_hash, df_names in dataframe_hashes.items(): if len(df_names) > 1: G.warn( f"The dataframes: {df_names} are identical. Scores may be misleading!" )
def filter_by_guidelines(hyperparameters_and_scores, hyperparameter_space, model_init_params, model_extra_params, preprocessing_pipeline, preprocessing_params, feature_selector, **kwargs): """Reject any `hyperparameters_and_scores` tuples whose hyperparameters do not match the guideline hyperparameters (all hyperparameters not in `hyperparameter_space`), after ignoring unimportant hyperparameters Parameters ---------- hyperparameters_and_scores: List of tuples Each tuple should be of form (hyperparameters <dict>, evaluation <float>), in which hyperparameters contains at least the keys: ['model_init_params', 'model_extra_params', 'preprocessing_pipeline', 'preprocessing_params', 'feature_selector'] hyperparameter_space: instance of :class:`space.Space` The boundaries of the hyperparameters to be searched model_init_params: Dict model_extra_params: Dict, or None preprocessing_pipeline: Dict, or None preprocessing_params: Dict, or None feature_selector: List of column names, callable, list of booleans, or None **kwargs: Dict Extra parameter dicts to include in `guidelines`. For example, if filtering the hyperparameters of a Keras neural network, this should contain the following keys: 'layers', 'compile_params' Returns ------- hyperparameters_and_scores: List of tuples Filtered to include only those whose hyperparameters matched the guideline hyperparameters""" dimensions = [('model_init_params', _) if isinstance(_, str) else _ for _ in hyperparameter_space.get_names()] # `dimensions` = hyperparameters to be ignored. Filter by all remaining dimensions_to_ignore = [ ('model_initializer', ), ('model_init_params', 'build_fn'), (None, 'verbose'), (None, 'silent'), (None, 'random_state'), (None, 'seed'), ('model_init_params', 'n_jobs'), ('model_init_params', 'nthread'), ( 'model_init_params', 'compile_params', 'loss_functions' ), # TODO: Remove this once loss_functions are hashed in description files ] temp_guidelines = dict(model_init_params=model_init_params, model_extra_params=model_extra_params, preprocessing_pipeline=preprocessing_pipeline, preprocessing_params=preprocessing_params, feature_selector=feature_selector, **kwargs) # noinspection PyUnusedLocal def _visit(path, key, value): """Return False if element in hyperparameter_space dimensions, or in dimensions being ignored. Else, return True""" for dimension in dimensions + dimensions_to_ignore: if (path + (key, ) == dimension) or (dimension[0] is None and dimension[1] == key): return False return True guidelines = remap( temp_guidelines, visit=_visit ) # (Hyperparameters that were set values and affect Experiment results) # `guidelines` = `temp_guidelines` that are neither `hyperparameter_space` choices, nor in `dimensions_to_ignore` hyperparameters_and_scores = list( filter(lambda _: remap(_[0], visit=_visit) == guidelines, hyperparameters_and_scores)) return hyperparameters_and_scores