コード例 #1
0
    def __call__(self, wrapped, instance, args, kwargs):
        file = self.file
        file_params = {}

        #################### Locate Parameters File ####################
        if not file and self.key is not None:
            with suppress(TypeError):
                file = kwargs.get(self.key, None) or args[self.key]

        if file:  # If `file=None`, continue with empty dict of `file_params`
            file_params = read_json(file)

        if not isinstance(file_params, dict):
            raise TypeError("{} must have dict, not {}".format(
                file, file_params))

        #################### Check Valid Parameters for `wrapped` ####################
        ok_keys = [
            k for k, v in signature(wrapped).parameters.items()
            if v.kind == v.KEYWORD_ONLY
        ]

        for k, v in file_params.items():
            if k not in ok_keys:
                if self.verbose:
                    G.warn(
                        f"Invalid key ({k}) in user parameters file: {file}")
            if k not in kwargs:
                kwargs[k] = v
                if self.verbose:
                    G.debug(
                        f"Parameter `{k}` set to user default in parameters file: '{file}'"
                    )

        return wrapped(*args, **kwargs)
コード例 #2
0
    def update_custom_environment_params(self):
        """Try to update null parameters from environment_params_path, or DEFAULT_PARAMS"""
        allowed_parameter_keys = [k for k, v in signature(Environment).parameters.items() if v.kind == v.KEYWORD_ONLY]
        user_defaults = {}

        if (not isinstance(self.environment_params_path, str)) and (self.environment_params_path is not None):
            raise TypeError('environment_params_path must be a str, not {}: {}'.format(*type_val(self.environment_params_path)))

        try:
            user_defaults = read_json(self.environment_params_path)
        except TypeError:
            if self.environment_params_path is not None:
                raise
        except FileNotFoundError:
            raise

        if not isinstance(user_defaults, dict):
            raise TypeError('environment_params_path must contain a dict. Received {}: {}'.format(*type_val(user_defaults)))

        #################### Check user_defaults ####################
        for k, v in user_defaults.items():
            if k not in allowed_parameter_keys:
                G.warn('\n\t'.join([
                    'Invalid key ({}) in user-defined default Environment parameter file at "{}". If expected to do something,',
                    'it really won\'t, so it should be removed or fixed. The following are valid default keys: {}'
                ]).format(k, self.environment_params_path, allowed_parameter_keys))
            elif getattr(self, k) is None:
                setattr(self, k, v)
                G.debug('Environment kwarg "{}" was set to user default at "{}"'.format(k, self.environment_params_path))

        #################### Check Module Default Environment Arguments ####################
        for k in allowed_parameter_keys:
            if getattr(self, k) is None:
                setattr(self, k, self.DEFAULT_PARAMS.get(k, None))
コード例 #3
0
 def _random_seed_initializer(self):
     """Generate set of random seeds for each repetition/fold/run if not provided"""
     if self.experiment_params['random_seeds'] is None:
         self.experiment_params['random_seeds'] = np.random.randint(*self.experiment_params['random_seed_bounds'], size=(
             self.cross_validation_params.get('n_repeats', 1),
             self.cross_validation_params['n_splits'],
             self.experiment_params['runs']
         )).tolist()
     G.debug('BaseExperiment._random_seed_initializer() done')
コード例 #4
0
 def preparation_workflow(self):
     """Execute all tasks that must take place before the experiment is actually started. Such tasks include (but are not
     limited to): Creating experiment IDs and hyperparameter keys, creating script backups, and validating parameters"""
     G.debug('Starting preparation_workflow...')
     self._generate_experiment_id()
     self._create_script_backup()
     self._validate_parameters()
     self._generate_hyperparameter_key()
     self._additional_preparation_steps()
     G.debug('Completed preparation_workflow')
コード例 #5
0
    def _validate_parameters(self):
        """Ensure provided input parameters are properly formatted"""
        #################### target_metric ####################
        self.target_metric = get_formatted_target_metric(self.target_metric, self.metrics_map)

        #################### feature_selector ####################
        if self.feature_selector is None:
            restricted_cols = [_ for _ in [self.target_column, self.id_column] if _ is not None]
            self.feature_selector = [_ for _ in self.train_dataset.columns.values if _ not in restricted_cols]

        G.debug('Experiment parameters have been validated')
コード例 #6
0
 def _random_seed_initializer(self):
     """Generate set of random seeds for each repetition/fold/run if not provided"""
     if self.experiment_params["random_seeds"] is None:
         self.experiment_params["random_seeds"] = np.random.randint(
             *self.experiment_params["random_seed_bounds"],
             size=(
                 self.cv_params.get("n_repeats", 1),
                 self.cv_params["n_splits"],
                 self.experiment_params["runs"],
             ),
         ).tolist()
     G.debug("BaseExperiment._random_seed_initializer() done")
コード例 #7
0
    def on_repetition_end(self):
        content = format_fold_run(rep=self._rep, fold="-", run="-")
        content += self.log_separator if not content.endswith(" ") else ""
        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("reps")

        if G.Env.verbose >= 2 and G.Env.cv_params.get("n_repeats", 1) > 1:
            G.log(content, previous_frame=inspect.currentframe().f_back)
        else:
            G.debug(content, previous_frame=inspect.currentframe().f_back)
        super().on_repetition_end()
コード例 #8
0
    def on_run_end(self):
        content = [
            format_fold_run(rep=self._rep, fold=self._fold, run=self._run),
            format_evaluation(self.last_evaluation_results,
                              float_format=self.float_format),
            self.__elapsed_helper("runs"),
        ]

        if G.Env.verbose >= 3 and G.Env.runs > 1:
            G.log(self.log_separator.join(content),
                  previous_frame=inspect.currentframe().f_back)
        else:
            G.debug(self.log_separator.join(content),
                    previous_frame=inspect.currentframe().f_back)
        super().on_run_end()
コード例 #9
0
    def _generate_hyperparameter_key(self):
        """Set :attr:`hyperparameter_key` to a key to describe the experiment's hyperparameters"""
        parameters = dict(
            model_initializer=self.model_initializer,
            model_init_params=self.model_init_params,
            model_extra_params=self.model_extra_params,
            feature_engineer=self.feature_engineer,
            feature_selector=self.feature_selector,
            # FLAG: Should probably add :attr:`target_metric` to key - With option to ignore it?
        )

        self.hyperparameter_key = HyperparameterKeyMaker(
            parameters, self.cross_experiment_key)
        G.log("Hyperparameter Key:     '{}'".format(self.hyperparameter_key))
        G.debug("Raw hyperparameters...")
        G.debug(self.hyperparameter_key.parameters)
コード例 #10
0
    def update_custom_environment_params(self):
        """Try to update null parameters from environment_params_path, or DEFAULT_PARAMS"""
        allowed_parameter_keys = [
            k for k, v in signature(Environment).parameters.items()
            if v.kind == v.KEYWORD_ONLY
        ]
        user_defaults = {}

        if (not isinstance(self.environment_params_path, str)) and (
                self.environment_params_path is not None):
            raise TypeError(
                f"Non-str `environment_params_path`: {self.environment_params_path}"
            )

        try:
            user_defaults = read_json(self.environment_params_path)
        except TypeError:
            if self.environment_params_path is not None:
                raise
            # If `environment_params_path=None`, no error raised - `user_defaults` continues as {}
        except FileNotFoundError:
            raise

        if not isinstance(user_defaults, dict):
            raise TypeError(
                "environment_params_path must have dict, not {}".format(
                    user_defaults))

        #################### Check user_defaults ####################
        for k, v in user_defaults.items():
            if k not in allowed_parameter_keys:
                G.warn("\n\t".join([
                    "Invalid key ({}) in user-defined default Environment parameter file at '{}'. If expected to do something,",
                    "it really won't, so it should be removed or fixed. The following are valid default keys: {}",
                ]).format(k, self.environment_params_path,
                          allowed_parameter_keys))
            elif getattr(self, k) is None:
                setattr(self, k, v)
                G.debug(
                    f"Environment.`{k}` set to user default: '{self.environment_params_path}'"
                )

        #################### Check Module Default Environment Arguments ####################
        for k in allowed_parameter_keys:
            if getattr(self, k) is None:
                setattr(self, k, self.DEFAULT_PARAMS.get(k, None))
コード例 #11
0
    def on_fold_end(self):
        content = "F{}.{} AVG:   ".format(
            self._rep, self._fold)  # TODO: Prepend rep count
        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("folds")

        if G.Env.verbose >= 2 and G.Env.cross_validation_params["n_splits"] > 1:
            G.log(content,
                  previous_frame=inspect.currentframe().f_back,
                  add_time=False)
        else:
            G.debug(content,
                    previous_frame=inspect.currentframe().f_back,
                    add_time=False)
        super().on_fold_end()
コード例 #12
0
    def on_fold_end(self):
        content = format_fold_run(rep=self._rep, fold=self._fold, run="-")
        content += self.log_separator if not content.endswith(" ") else ""
        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("folds")

        if G.Env.verbose >= 2 and G.Env.cv_params["n_splits"] > 1:
            G.log(content,
                  previous_frame=inspect.currentframe().f_back,
                  add_time=False)
        else:
            G.debug(content,
                    previous_frame=inspect.currentframe().f_back,
                    add_time=False)
        super().on_fold_end()
コード例 #13
0
    def on_run_start(self):
        content = format_fold_run(rep=self._rep,
                                  fold=self._fold,
                                  run=self._run)
        content += format(
            self.log_separator if content != "" and self.current_seed else "")
        content += "Seed: {}".format(
            self.current_seed) if self.current_seed else ""

        if G.Env.verbose >= 4 and G.Env.runs > 1:
            G.log(content,
                  previous_frame=inspect.currentframe().f_back,
                  add_time=True)
        else:
            G.debug(content,
                    previous_frame=inspect.currentframe().f_back,
                    add_time=True)
        super().on_run_start()
コード例 #14
0
    def experiment_workflow(self):
        """Define the actual experiment process, including execution, result saving, and cleanup"""
        if self.hyperparameter_key.exists is True:
            _ex = f"{self!r} has already been run"
            if self.do_raise_repeated is True:
                self._clean_up()
                raise RepeatedExperimentError(_ex)
            G.debug(_ex)
            G.warn("WARNING: Duplicate experiment!")

        self._initialize_random_seeds()
        self.execute()

        #################### Save Experiment Results ####################
        recorders = RecorderList(file_blacklist=G.Env.file_blacklist,
                                 extra_recorders=G.Env.experiment_recorders)
        recorders.format_result()
        G.log(f"Saving results for Experiment: '{self.experiment_id}'")
        recorders.save_result()
        self._clean_up()
コード例 #15
0
 def _update_model_params(self):
     """Update random state of :attr:`model_init_params` according to :attr:`current_seed`"""
     # TODO: Add this to some workflow in Experiment class. For now it is never used, unless the subclass decides to...
     # `model_init_params` initialized to all algorithm hyperparameters - Works even if 'random_state' not explicitly given
     try:
         if "random_state" in self.model_init_params:
             self.model_init_params["random_state"] = self.current_seed
         elif "seed" in self.model_init_params:
             self.model_init_params["seed"] = self.current_seed
         else:
             G.debug(
                 "WARNING: Model has no random_state/seed parameter to update"
             )
             # FLAG: HIGH PRIORITY BELOW
             # TODO: BELOW IS NOT THE CASE IF MODEL IS NN - SETTING THE GLOBAL RANDOM SEED DOES SOMETHING
             # TODO: If this is logged, there is no reason to execute multiple-run-averaging, so don't
             # TODO: ... Either 1) Set `runs` = 1 (this would mess with the environment key), or...
             # TODO: ... 2) Set the results of all subsequent runs to the results of the first run (this could be difficult)
             # FLAG: HIGH PRIORITY ABOVE
     except Exception as _ex:
         G.log(
             "WARNING: Failed to update model's random_state     {}".format(
                 _ex.__repr__()))
コード例 #16
0
    def update_custom_environment_params(self):
        """Try to update null parameters from environment_params_path, or DEFAULT_PARAMS"""
        allowed_parameter_keys = [
            k for k, v in signature(Environment).parameters.items()
            if v.kind == v.KEYWORD_ONLY
        ]
        user_defaults = {}

        try:
            user_defaults = read_json(self.environment_params_path)
        except (TypeError, OSError):
            # If `environment_params_path=None`, no error raised - `user_defaults` continues as {}
            if self.environment_params_path is not None:
                raise

        if not isinstance(user_defaults, dict):
            raise TypeError(
                "environment_params_path must have dict, not {}".format(
                    user_defaults))

        #################### Check user_defaults ####################
        for k, v in user_defaults.items():
            if k not in allowed_parameter_keys:
                G.warn(
                    f"Invalid key ({k}) in user Environment parameters: {self.environment_params_path}"
                )
            elif getattr(self, k) is None:
                setattr(self, k, v)
                G.debug(
                    f"Environment.`{k}` set to user default: '{self.environment_params_path}'"
                )

        #################### Check Module Default Environment Arguments ####################
        for k in allowed_parameter_keys:
            if getattr(self, k) is None:
                setattr(self, k, self.DEFAULT_PARAMS.get(k, None))
コード例 #17
0
 def on_fold_start(self):
     """Perform tasks when a fold is started during an Experiment's cross-validation scheme"""
     G.debug('BaseCallback.on_fold_start()')
コード例 #18
0
 def on_run_start(self):
     """Perform tasks on run start in an Experiment's multiple-run-averaging phase"""
     G.debug("BaseCallback.on_run_start()")
コード例 #19
0
 def _initialize_random_seeds(self):
     """Initialize global random seed, and generate random seeds for stages if not provided"""
     np.random.seed(self.experiment_params["global_random_seed"])
     random.seed(self.experiment_params["global_random_seed"])
     self._random_seed_initializer()
     G.debug("Initialized random seeds for experiment")
コード例 #20
0
 def on_repetition_end(self):
     """Perform tasks on repetition end in an Experiment's repeated cross-validation scheme"""
     G.debug("BaseCallback.on_repetition_end()")
コード例 #21
0
 def on_fold_end(self):
     """Perform tasks on fold end in an Experiment's cross-validation scheme"""
     G.debug("BaseCallback.on_fold_end()")
コード例 #22
0
 def on_experiment_start(self):
     """Perform tasks when an Experiment is started"""
     G.debug("BaseCallback.on_experiment_start()")
コード例 #23
0
 def on_experiment_end(self):
     """Perform tasks when an Experiment ends"""
     G.debug("BaseCallback.on_experiment_end()")
コード例 #24
0
 def on_run_end(self):
     """Perform tasks when a run ends during an Experiment's multiple-run-averaging phase"""
     G.debug('BaseCallback.on_run_end()')
コード例 #25
0
 def on_run_start(self):
     """Perform tasks when a run is started during an Experiment's multiple-run-averaging phase"""
     G.debug('BaseCallback.on_run_start()')
コード例 #26
0
 def on_fold_end(self):
     """Perform tasks when a fold ends during an Experiment's cross-validation scheme"""
     G.debug('BaseCallback.on_fold_end()')
コード例 #27
0
 def _initialize_random_seeds(self):
     """Initialize global random seed, and generate set of random seeds for each fold/run if not provided"""
     np.random.seed(self.experiment_params['global_random_seed'])
     random.seed(self.experiment_params['global_random_seed'])
     self._random_seed_initializer()
     G.debug('Initialized random seeds for experiment')
コード例 #28
0
 def on_repetition_end(self):
     """Perform tasks when a repetition ends during an Experiment's repeated cross-validation scheme"""
     G.debug('BaseCallback.on_repetition_end()')