Ejemplo n.º 1
0
 def environment_workflow(self):
     """Execute all methods required to validate the environment and run Experiments"""
     self.update_custom_environment_params()
     self.validate_parameters()
     self.format_result_paths()
     self.generate_cross_experiment_key()
     G.log("Cross-Experiment Key:   '{!s}'".format(self.cross_experiment_key))
 def on_run_start(self):
     content = ""
     content += format_fold_run(fold=self._fold, run=self._run)
     content += format(self.log_separator if content != "" and self.current_seed else "")
     content += "Seed: {}".format(self.current_seed) if self.current_seed else ""
     G.log(content, previous_frame=inspect.currentframe().f_back, add_time=True)
     super().on_run_start()
Ejemplo n.º 3
0
    def data_scaling(self, which_sets=None):
        which_sets = which_sets if which_sets else self.fit_input_sets

        # TODO: Expand method to include other scaling types by sending string param or callable for apply_scale arg
        if self.preprocessing_params.get('apply_standard_scale',
                                         False) is True:
            scaler = StandardScaler()

            # TODO: Modify fitting process to use 'which_sets' and 'self.fit_input_sets' like 'data_imputation' method
            scaler.fit(self.train_input_data[self.features].values)

            if 'train_input_data' in self.all_input_sets:
                self.train_input_data[self.features] = scaler.transform(
                    self.train_input_data[self.features].values)
            if 'holdout_input_data' in self.all_input_sets:
                self.holdout_input_data[self.features] = scaler.transform(
                    self.holdout_input_data[self.features].values)
            if 'test_input_data' in self.all_input_sets:
                self.test_input_data[self.features] = scaler.transform(
                    self.test_input_data[self.features].values)

        G.log(
            'Completed data_scaling preprocessing. preprocessing_params["apply_standard_scale"]={}'
            .format(
                self.preprocessing_params.get('apply_standard_scale', False)))
Ejemplo n.º 4
0
    def _create_script_backup(self):
        """Create and save a copy of the script that initialized the Experiment if allowed to, and
        if :attr:`source_script` ends with a ".py" extension"""
        #################### Attempt to Copy Source Script if Allowed ####################
        try:
            if not self.source_script.endswith(".py"):
                G.Env.result_paths["script_backup"] = None

            if G.Env.result_paths["script_backup"] is not None:
                self._source_copy_helper()
                G.log(
                    "Created source backup:  '{}'".format(self.source_script),
                    4)
            else:
                G.log(
                    "Skipped source backup:  '{}'".format(self.source_script),
                    4)
        #################### Exception Handling ####################
        except AttributeError as _ex:
            if G.Env is None:
                raise EnvironmentInactiveError(extra="\n{!s}".format(_ex))
            if not hasattr(G.Env, "result_paths"):
                raise EnvironmentInvalidError(
                    extra=f"G.Env lacks 'result_paths' attr\n{_ex!s}")
            raise
        except KeyError as _ex:
            if "script_backup" not in G.Env.result_paths:
                raise EnvironmentInvalidError(
                    extra=
                    f"G.Env.result_paths lacks 'script_backup' key\n{_ex!s}")
            raise
    def cross_validation_workflow(self):
        """Execute workflow for cross-validation process, consisting of the following tasks:
        1) Create train and validation split indices for all folds, 2) Iterate through folds,
        performing `cv_fold_workflow` for each, 3) Average accumulated predictions over fold
        splits, 4) Evaluate final predictions, 5) Format final predictions to prepare for saving"""
        self.on_experiment_start()

        cv_indices = self.folds.split(self.train_input_data,
                                      self.train_target_data.iloc[:, 0])
        new_shape = (
            self.cross_validation_params.get("n_repeats", 1),
            self.cross_validation_params["n_splits"],
            2,
        )
        reshaped_indices = np.reshape(np.array(list(cv_indices)), new_shape)

        for self._rep, repetition_indices in enumerate(
                reshaped_indices.tolist()):
            self.on_repetition_start()

            for self._fold, (
                    self.train_index,
                    self.validation_index) in enumerate(repetition_indices):
                self.cv_fold_workflow()

            self.on_repetition_end()
        self.on_experiment_end()

        G.log("")
 def _create_script_backup(self):
     """Create and save a copy of the script that initialized the Experiment"""
     #################### Attempt to Copy Source Script if Allowed ####################
     try:
         if G.Env.result_paths["script_backup"] is not None:
             try:
                 self._source_copy_helper()
             except FileNotFoundError:
                 os.makedirs(self.result_paths["script_backup"],
                             exist_ok=False)
                 self._source_copy_helper()
             G.log("Created backup of file: '{}'".format(
                 self.source_script))
         else:
             G.log("Skipped creating backup of file: '{}'".format(
                 self.source_script))
     #################### Exception Handling ####################
     except AttributeError as _ex:
         if G.Env is None:
             raise EnvironmentInactiveError(extra="\n{!s}".format(_ex))
         if not hasattr(G.Env, "result_paths"):
             raise EnvironmentInvalidError(
                 extra=f"G.Env lacks 'result_paths' attr\n{_ex!s}")
         raise
     except KeyError as _ex:
         if "script_backup" not in G.Env.result_paths:
             raise EnvironmentInvalidError(
                 extra=
                 f"G.Env.result_paths lacks 'script_backup' key\n{_ex!s}")
         raise
Ejemplo n.º 7
0
    def on_experiment_start(self):
        content = ''
        # experiment_start_time = self.stat_aggregates['times']['start']

        G.log('\n', previous_frame=inspect.currentframe().f_back)
        # G.log(content, previous_frame=inspect.currentframe().f_back, add_time=False)

        super().on_experiment_start()
Ejemplo n.º 8
0
 def save_key(self):
     """Create a new file for this cross_experiment_key if :attr:`exists` is False"""
     if not self.exists:
         write_json(F'{self.tested_keys_dir}/{self.key}.json', {})
         self.exists = True
         G.log(F'Saved {self.key_type}_key: "{self.key}"')
     else:
         G.log(F'{self.key_type}_key "{self.key}" already exists - Skipped saving')
Ejemplo n.º 9
0
    def on_fold_start(self):
        content = ''
        # fold_start_time = self.stat_aggregates['times']['folds'][-1]

        # G.log('\n', previous_frame=inspect.currentframe().f_back)
        G.log('', previous_frame=inspect.currentframe().f_back)

        super().on_fold_start()
Ejemplo n.º 10
0
    def on_run_end(self):
        content = list()
        content.append(format_fold_run(fold=self._fold, run=self._run))
        content.append(format_evaluation_results(self.last_evaluation_results, float_format=self.float_format))
        content.append('Time Elapsed: {}'.format(sec_to_hms(self.stat_aggregates['times']['runs'][-1], as_str=True)))

        G.log(self.log_separator.join(content), previous_frame=inspect.currentframe().f_back)
        super().on_run_end()
Ejemplo n.º 11
0
 def _validate_environment(self):
     """Check that there is a currently active Environment instance that is not already occupied"""
     if G.Env is None:
         raise EnvironmentInactiveError('')
     if G.Env.current_task is None:
         G.Env.current_task = self
         G.log(F'Validated Environment with key: "{self.cross_experiment_key}"')
     else:
         raise EnvironmentInvalidError('An experiment is in progress. It must finish before a new one can be started')
Ejemplo n.º 12
0
 def environment_workflow(self):
     """Execute all methods required to validate the environment and run Experiments"""
     self.update_custom_environment_params()
     self.validate_parameters()
     self.train_dataset, self.holdout_dataset = define_holdout_set(
         self.train_dataset, self.holdout_dataset, self.target_column)
     self.format_result_paths()
     self.generate_cross_experiment_key()
     G.log("Cross-Experiment Key:   '{!s}'".format(
         self.cross_experiment_key))
Ejemplo n.º 13
0
    def cv_fold_workflow(self):
        """Execute workflow for individual fold, consisting of the following tasks: Execute
        overridden :meth:`on_fold_start` tasks, 2) Perform cv_run_workflow for each run, 3) Execute
        overridden :meth:`on_fold_end` tasks"""
        self.on_fold_start()
        G.log("Intra-CV preprocessing stage complete", 4)

        for self._run in range(self.experiment_params.get("runs", 1)):
            self.cv_run_workflow()
        self.on_fold_end()
Ejemplo n.º 14
0
 def _validate_environment(self):
     """Ensure there is a currently active Environment instance that is not already occupied"""
     if G.Env is None:
         raise EnvironmentInactiveError("")
     if G.Env.current_task is None:
         G.Env.current_task = self
         G.log(f"Validated Environment:  '{self.cross_experiment_key}'")
     else:
         raise EnvironmentInvalidError(
             "Current experiment must finish before starting another")
Ejemplo n.º 15
0
    def on_repetition_end(self):
        content = ''
        content += 'Repetition {} AVG:   '.format(self._rep)
        content += format_evaluation_results(self.last_evaluation_results, float_format=self.float_format)
        content += self.log_separator if not content.endswith(' ') else ''
        content += 'Time Elapsed: {}'.format(sec_to_hms(self.stat_aggregates['times']['reps'][-1], as_str=True))

        G.log('', previous_frame=inspect.currentframe().f_back)
        G.log(content, previous_frame=inspect.currentframe().f_back)

        super().on_repetition_end()
Ejemplo n.º 16
0
    def on_experiment_end(self):
        content = 'FINAL:    '

        content += format_evaluation_results(self.last_evaluation_results, float_format=self.float_format)
        content += self.log_separator if not content.endswith(' ') else ''

        content += 'Time Elapsed: {}'.format(sec_to_hms(self.stat_aggregates['times']['total_elapsed'], as_str=True))

        G.log('')
        G.log(content, previous_frame=inspect.currentframe().f_back, add_time=False)
        super().on_experiment_end()
    def target_data_transformation(self, which_sets=None):
        transformation = self.preprocessing_params.get("target_transformation", None)
        which_sets = which_sets if which_sets else self.fit_target_sets

        for data_key in which_sets:
            data = self.__getattribute__(data_key)

            if callable(transformation) and data:
                # TODO: Send either "self" or all attributes in self as other input to "imputer"
                # TODO: Force callable "imputer" to have **kwargs, or check for the args it expects and send only those
                self.__setattr__(data_key, transformation(data))

        G.log("Completed target_data_transformation preprocessing")
Ejemplo n.º 18
0
    def on_experiment_end(self):
        content = "FINAL:    "

        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("total_elapsed")

        G.log("")
        G.log(content,
              previous_frame=inspect.currentframe().f_back,
              add_time=False)
        super().on_experiment_end()
Ejemplo n.º 19
0
    def on_repetition_end(self):
        content = format_fold_run(rep=self._rep, fold="-", run="-")
        content += self.log_separator if not content.endswith(" ") else ""
        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("reps")

        if G.Env.verbose >= 2 and G.Env.cv_params.get("n_repeats", 1) > 1:
            G.log(content, previous_frame=inspect.currentframe().f_back)
        else:
            G.debug(content, previous_frame=inspect.currentframe().f_back)
        super().on_repetition_end()
Ejemplo n.º 20
0
    def _generate_hyperparameter_key(self):
        """Set :attr:`hyperparameter_key` to a key to describe the experiment's hyperparameters"""
        parameters = dict(
            model_initializer=self.model_initializer,
            model_init_params=self.model_init_params,
            model_extra_params=self.model_extra_params,
            preprocessing_pipeline=self.preprocessing_pipeline,
            preprocessing_params=self.preprocessing_params,
            feature_selector=self.feature_selector,
            # FLAG: Should probably add :attr:`target_metric` to key - With option to ignore it?
        )

        self.hyperparameter_key = HyperparameterKeyMaker(parameters, self.cross_experiment_key)
        G.log('Generated hyperparameter key: {}'.format(self.hyperparameter_key))
Ejemplo n.º 21
0
    def on_repetition_end(self):
        content = ""
        content += "Repetition {} AVG:   ".format(self._rep)
        content += format_evaluation_results(
            self.last_evaluation_results, float_format=self.float_format
        )
        content += self.log_separator if not content.endswith(" ") else ""
        content += "Time Elapsed: {}".format(
            sec_to_hms(self.stat_aggregates["times"]["reps"][-1], as_str=True)
        )

        G.log("", previous_frame=inspect.currentframe().f_back)
        G.log(content, previous_frame=inspect.currentframe().f_back)
        super().on_repetition_end()
Ejemplo n.º 22
0
    def on_run_end(self):
        content = [
            format_fold_run(rep=self._rep, fold=self._fold, run=self._run),
            format_evaluation(self.last_evaluation_results,
                              float_format=self.float_format),
            self.__elapsed_helper("runs"),
        ]

        if G.Env.verbose >= 3 and G.Env.runs > 1:
            G.log(self.log_separator.join(content),
                  previous_frame=inspect.currentframe().f_back)
        else:
            G.debug(self.log_separator.join(content),
                    previous_frame=inspect.currentframe().f_back)
        super().on_run_end()
Ejemplo n.º 23
0
    def on_experiment_end(self):
        content = "FINAL:    "

        content += format_evaluation_results(
            self.last_evaluation_results, float_format=self.float_format
        )
        content += self.log_separator if not content.endswith(" ") else ""

        content += "Time Elapsed: {}".format(
            sec_to_hms(self.stat_aggregates["times"]["total_elapsed"], as_str=True)
        )

        G.log("")
        G.log(content, previous_frame=inspect.currentframe().f_back, add_time=False)
        super().on_experiment_end()
Ejemplo n.º 24
0
    def save_key(self):
        """Create an entry in the dict contained in the file at :attr:`cross_experiment_key.key`,
        whose key is :attr:`key`, and whose value is an empty list if :attr:`exists` is False"""
        if not self.exists:
            if self.cross_experiment_key.exists is False:
                _err = "Cannot save hyperparameter_key: '{}', before cross_experiment_key '{}'"
                raise ValueError(_err.format(self.key, self.cross_experiment_key.key))

            key_path = f"{self.tested_keys_dir}/{self.cross_experiment_key.key}.json"
            add_to_json(key_path, [], key=self.key, condition=lambda _: self.key not in _.keys())

            self.exists = True
            G.log(f'Saved {self.key_type}_key: "{self.key}"', 4)
        else:
            G.log(f'{self.key_type}_key "{self.key}" already exists - Skipped saving', 4)
Ejemplo n.º 25
0
    def save_key(self):
        """Create an entry in the dict corresponding to the file at :attr:`cross_experiment_key.key`, whose key is :attr:`key`,
        and whose value is an empty list if :attr:`exists` is False"""
        if not self.exists:
            if self.cross_experiment_key.exists is False:
                raise ValueError('Cannot save hyperparameter_key: "{}", before cross_experiment_key "{}" has been saved'.format(
                    self.key, self.cross_experiment_key.key
                ))

            key_path = F'{self.tested_keys_dir}/{self.cross_experiment_key.key}.json'
            add_to_json(file_path=key_path, data_to_add=[], key=self.key, condition=lambda _: self.key not in _.keys())

            self.exists = True
            G.log(F'Saved {self.key_type}_key: "{self.key}"')
        else:
            G.log(F'{self.key_type}_key "{self.key}" already exists - Skipped saving')
Ejemplo n.º 26
0
    def _generate_hyperparameter_key(self):
        """Set :attr:`hyperparameter_key` to a key to describe the experiment's hyperparameters"""
        parameters = dict(
            model_initializer=self.model_initializer,
            model_init_params=self.model_init_params,
            model_extra_params=self.model_extra_params,
            feature_engineer=self.feature_engineer,
            feature_selector=self.feature_selector,
            # FLAG: Should probably add :attr:`target_metric` to key - With option to ignore it?
        )

        self.hyperparameter_key = HyperparameterKeyMaker(
            parameters, self.cross_experiment_key)
        G.log("Hyperparameter Key:     '{}'".format(self.hyperparameter_key))
        G.debug("Raw hyperparameters...")
        G.debug(self.hyperparameter_key.parameters)
Ejemplo n.º 27
0
    def on_fold_end(self):
        content = format_fold_run(rep=self._rep, fold=self._fold, run="-")
        content += self.log_separator if not content.endswith(" ") else ""
        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("folds")

        if G.Env.verbose >= 2 and G.Env.cv_params["n_splits"] > 1:
            G.log(content,
                  previous_frame=inspect.currentframe().f_back,
                  add_time=False)
        else:
            G.debug(content,
                    previous_frame=inspect.currentframe().f_back,
                    add_time=False)
        super().on_fold_end()
Ejemplo n.º 28
0
    def data_imputation(self, which_sets=None):
        imputer = self.preprocessing_params.get('imputer', None)
        which_sets = which_sets if which_sets else self.fit_input_sets

        for data_key in which_sets:
            data = self.__getattribute__(data_key)

            if data is not None:
                if callable(imputer):  # Apply Function to Impute Data
                    # TODO: Send either "self" or all attributes in self as other input to "imputer"
                    # TODO: Force callable "imputer" to have **kwargs, or check for the args it expects and send only those
                    self.__setattr__(data_key, imputer(data))
                elif any([isinstance(imputer, _) for _ in (int, float)
                          ]):  # Fill Null Data With Given Value
                    self.__setattr__(data_key, data.fillna(imputer))

        G.log('Completed data_imputation preprocessing')
Ejemplo n.º 29
0
    def on_fold_end(self):
        content = "F{}.{} AVG:   ".format(
            self._rep, self._fold)  # TODO: Prepend rep count
        content += format_evaluation(self.last_evaluation_results,
                                     float_format=self.float_format)
        content += self.log_separator if not content.endswith(" ") else ""
        content += self.__elapsed_helper("folds")

        if G.Env.verbose >= 2 and G.Env.cross_validation_params["n_splits"] > 1:
            G.log(content,
                  previous_frame=inspect.currentframe().f_back,
                  add_time=False)
        else:
            G.debug(content,
                    previous_frame=inspect.currentframe().f_back,
                    add_time=False)
        super().on_fold_end()
Ejemplo n.º 30
0
    def on_run_start(self):
        content = format_fold_run(rep=self._rep,
                                  fold=self._fold,
                                  run=self._run)
        content += format(
            self.log_separator if content != "" and self.current_seed else "")
        content += "Seed: {}".format(
            self.current_seed) if self.current_seed else ""

        if G.Env.verbose >= 4 and G.Env.runs > 1:
            G.log(content,
                  previous_frame=inspect.currentframe().f_back,
                  add_time=True)
        else:
            G.debug(content,
                    previous_frame=inspect.currentframe().f_back,
                    add_time=True)
        super().on_run_start()