Ejemplo n.º 1
0
 def cv_type(self, value):
     if isinstance(value, str):
         try:
             self._cv_type = sk_cv.__getattribute__(value)
         except AttributeError:
             raise AttributeError(f"'{value}' not in `sklearn.model_selection._split`")
     else:  # Assumed to be a valid CV class
         self._cv_type = value
Ejemplo n.º 2
0
    def validate_parameters(self):
        """Ensure the provided parameters are valid and properly formatted"""
        #################### root_results_path ####################
        if self.root_results_path is None:
            G.warn('Received root_results_path=None. Results will not be stored at all.')
        elif isinstance(self.root_results_path, str):
            if not self.root_results_path.endswith(ASSETS_DIRNAME):
                self.root_results_path = os.path.join(self.root_results_path, ASSETS_DIRNAME)
                self.result_paths['root'] = self.root_results_path
            if not os.path.exists(self.root_results_path):
                os.makedirs(self.root_results_path, exist_ok=True)
        else:
            raise TypeError('root_results_path must be None or str, not {}: {}'.format(*type_val(self.root_results_path)))

        #################### verbose ####################
        if not isinstance(self.verbose, bool):
            raise TypeError('verbose must be a boolean. Received {}: {}'.format(*type_val(self.verbose)))

        #################### file_blacklist ####################
        self.file_blacklist = validate_file_blacklist(self.file_blacklist)

        #################### Train/Test Datasets ####################
        if isinstance(self.train_dataset, str):
            self.train_dataset = pd.read_csv(self.train_dataset)
        if isinstance(self.test_dataset, str):
            self.test_dataset = pd.read_csv(self.test_dataset)

        #################### metrics_params/metrics_map ####################
        if (self.metrics_map is not None) and ('metrics_map' in self.metrics_params.keys()):
            raise ValueError(
                '`metrics_map` may be provided as a kwarg, or as a key in `metrics_params`, but NOT BOTH. Received: ' +
                F'\n `metrics_map`={self.metrics_map}\n `metrics_params`={self.metrics_params}'
            )
        else:
            if self.metrics_map is None:
                self.metrics_map = self.metrics_params['metrics_map']
            self.metrics_params = {**dict(metrics_map=self.metrics_map), **self.metrics_params}

        #################### cross_validation_type ####################
        if isinstance(self.cross_validation_type, str):
            try:
                self.cross_validation_type = sk_cv.__getattribute__(self.cross_validation_type)
            except AttributeError:
                raise AttributeError('`sklearn.model_selection._split` has no attribute "{}".'.format(self.cross_validation_type))

        #################### to_csv_params ####################
        self.to_csv_params = {_k: _v for _k, _v in self.to_csv_params.items() if _k != 'path_or_buf'}

        #################### cross_experiment_params ####################
        self.cross_experiment_params = dict(
            cross_validation_type=self.cross_validation_type,
            runs=self.runs,
            global_random_seed=self.global_random_seed,
            random_seeds=self.random_seeds,
            random_seed_bounds=self.random_seed_bounds,
        )

        #################### experiment_callbacks ####################
        if not isinstance(self.experiment_callbacks, list):
            self.experiment_callbacks = [self.experiment_callbacks]
        for callback in self.experiment_callbacks:
            if not isclass(callback):
                raise TypeError(F'experiment_callbacks must be classes. Received {type(callback)}: {callback}')
            if callback.__name__ != 'LambdaCallback':
                raise ValueError(F'experiment_callbacks must be LambdaCallback instances, not {callback.__name__}: {callback}')
Ejemplo n.º 3
0
    def validate_parameters(self):
        """Ensure the provided parameters are valid and properly formatted"""
        #################### root_results_path ####################
        if self.root_results_path is None:
            G.warn(
                "Received root_results_path=None. Results will not be stored at all."
            )
        elif isinstance(self.root_results_path, str):
            if not self.root_results_path.endswith(ASSETS_DIRNAME):
                self.root_results_path = os.path.join(self.root_results_path,
                                                      ASSETS_DIRNAME)
                self.result_paths["root"] = self.root_results_path
            if not os.path.exists(self.root_results_path):
                make_dirs(self.root_results_path, exist_ok=True)
        else:
            raise TypeError(
                f"root_results_path must be None or str, not {self.root_results_path}"
            )

        #################### target_column ####################
        if isinstance(self.target_column, str):
            self.target_column = [self.target_column]

        #################### file_blacklist ####################
        self.file_blacklist = validate_file_blacklist(self.file_blacklist)

        if self.root_results_path is None:
            self.file_blacklist = "ALL"

        #################### Train/Test Datasets ####################
        if isinstance(self.train_dataset, str):
            self.train_dataset = pd.read_csv(self.train_dataset)
        if isinstance(self.test_dataset, str):
            self.test_dataset = pd.read_csv(self.test_dataset)

        #################### metrics_params/metrics_map ####################
        if (self.metrics_map is not None) and ("metrics_map"
                                               in self.metrics_params.keys()):
            raise ValueError(
                "`metrics_map` may be provided as a kwarg, or as a `metrics_params` key, but NOT BOTH. Received: "
                +
                f"\n `metrics_map`={self.metrics_map}\n `metrics_params`={self.metrics_params}"
            )
        else:
            if self.metrics_map is None:
                self.metrics_map = self.metrics_params["metrics_map"]
            self.metrics_map = format_metrics_map(self.metrics_map)
            self.metrics_params = {
                **dict(metrics_map=self.metrics_map),
                **self.metrics_params
            }

        #################### cross_validation_type ####################
        if isinstance(self.cross_validation_type, str):
            try:
                self.cross_validation_type = sk_cv.__getattribute__(
                    self.cross_validation_type)
            except AttributeError:
                raise AttributeError(
                    f"'{self.cross_validation_type}' not in `sklearn.model_selection._split`"
                )

        #################### to_csv_params ####################
        self.to_csv_params = {
            k: v
            for k, v in self.to_csv_params.items() if k != "path_or_buf"
        }

        #################### cross_experiment_params ####################
        self.cross_experiment_params = dict(
            cross_validation_type=self.cross_validation_type,
            runs=self.runs,
            global_random_seed=self.global_random_seed,
            random_seeds=self.random_seeds,
            random_seed_bounds=self.random_seed_bounds,
        )

        #################### experiment_callbacks ####################
        if not isinstance(self.experiment_callbacks, list):
            self.experiment_callbacks = [self.experiment_callbacks]
        for cb in self.experiment_callbacks:
            if not isclass(cb):
                raise TypeError(
                    f"experiment_callbacks must be classes, not {type(cb)}: {cb}"
                )
            if cb.__name__ != "LambdaCallback":
                raise ValueError(
                    f"experiment_callbacks must be LambdaCallback instances, not {cb}"
                )