def setUpClass(cls): np.random.seed(None) df = pd.read_csv( "tests/data/adult_missing_values_missing_target_500rows.csv") cls.data = {"train": {"X": df[df.columns[:-1]], "y": df["income"]}} available_models = list( ModelsRegistry.registry[BINARY_CLASSIFICATION].keys()) model_type = np.random.permutation(available_models)[0] model_info = ModelsRegistry.registry[BINARY_CLASSIFICATION][model_type] model_params = RandomParameters.get(model_info["params"]) required_preprocessing = model_info["required_preprocessing"] model_additional = model_info["additional"] preprocessing_params = PreprocessingTuner.get(required_preprocessing, cls.data, BINARY_CLASSIFICATION) cls.train_params = { "additional": model_additional, "preprocessing": preprocessing_params, "validation": { "validation_type": "split", "train_ratio": 0.8, # "validation_type": "kfold", # "k_folds": 5, "shuffle": True, }, "learner": { "model_type": model_info["class"].algorithm_short_name, **model_params, }, }
def _get_model_params(self, model_type, X, y, models_cnt): model_info = AlgorithmsRegistry.registry[self._ml_task][model_type] model_params = RandomParameters.get(model_info["params"], models_cnt + self._seed) required_preprocessing = model_info["required_preprocessing"] model_additional = model_info["additional"] preprocessing_params = PreprocessingTuner.get( required_preprocessing, {"train": { "X": X, "y": y }}, self._ml_task) model_params = { "additional": model_additional, "preprocessing": preprocessing_params, "validation": self._validation, "learner": { "model_type": model_info["class"].algorithm_short_name, "ml_task": self._ml_task, **model_params, }, } num_class = (len(np.unique(y[~pd.isnull(y)])) if self._ml_task == MULTICLASS_CLASSIFICATION else None) if num_class is not None: model_params["learner"]["num_class"] = num_class model_params["ml_task"] = self._ml_task return model_params
def _get_model_params(self, model_type, X, y): model_info = ModelsRegistry.registry[BINARY_CLASSIFICATION][model_type] model_params = RandomParameters.get(model_info["params"]) required_preprocessing = model_info["required_preprocessing"] model_additional = model_info["additional"] preprocessing_params = PreprocessingTuner.get( required_preprocessing, {"train": {"X": X, "y": y}}, BINARY_CLASSIFICATION ) return { "additional": model_additional, "preprocessing": preprocessing_params, "validation": self._validation, "learner": { "model_type": model_info["class"].algorithm_short_name, **model_params, }, }
def _get_model_params(self, model_type, seed, params_type="random"): model_info = AlgorithmsRegistry.registry[self._ml_task][model_type] model_params = None if params_type == "default": model_params = model_info["default_params"] model_params["seed"] = seed else: model_params = RandomParameters.get(model_info["params"], seed + self._seed) if model_params is None: return None required_preprocessing = model_info["required_preprocessing"] model_additional = model_info["additional"] preprocessing_params = PreprocessingTuner.get(required_preprocessing, self._data_info, self._ml_task) model_params = { "additional": model_additional, "preprocessing": preprocessing_params, "validation_strategy": self._validation_strategy, "learner": { "model_type": model_info["class"].algorithm_short_name, "ml_task": self._ml_task, "n_jobs": self._n_jobs, **model_params, }, } if self._data_info.get("num_class") is not None: model_params["learner"]["num_class"] = self._data_info.get( "num_class") model_params["ml_task"] = self._ml_task model_params["explain_level"] = self._explain_level return model_params
def _get_model_params(self, model_type, seed, params_type="random"): model_info = AlgorithmsRegistry.registry[self._ml_task][model_type] model_params = None if params_type == "default": model_params = model_info["default_params"] model_params["seed"] = seed else: model_params = RandomParameters.get(model_info["params"], seed + self._seed) if model_params is None: return None # set eval metric if model_info["class"].algorithm_short_name == "Xgboost": model_params["eval_metric"] = xgboost_eval_metric( self._ml_task, self._eval_metric ) if model_info["class"].algorithm_short_name == "LightGBM": metric, custom_metric = lightgbm_eval_metric( self._ml_task, self._eval_metric ) model_params["metric"] = metric model_params["custom_eval_metric_name"] = custom_metric if model_info["class"].algorithm_short_name == "CatBoost": model_params["eval_metric"] = catboost_eval_metric( self._ml_task, self._eval_metric ) elif model_info["class"].algorithm_short_name in [ "Random Forest", "Extra Trees", ]: model_params["eval_metric_name"] = self._eval_metric model_params["ml_task"] = self._ml_task required_preprocessing = model_info["required_preprocessing"] model_additional = model_info["additional"] preprocessing_params = PreprocessingTuner.get( required_preprocessing, self._data_info, self._ml_task ) model_params = { "additional": model_additional, "preprocessing": preprocessing_params, "validation_strategy": self._validation_strategy, "learner": { "model_type": model_info["class"].algorithm_short_name, "ml_task": self._ml_task, "n_jobs": self._n_jobs, **model_params, }, "automl_random_state": self._seed, } if self._data_info.get("num_class") is not None: model_params["learner"]["num_class"] = self._data_info.get("num_class") model_params["ml_task"] = self._ml_task model_params["explain_level"] = self._explain_level return model_params