Ejemplo n.º 1
0
    def __init__(
        self,
        ml_task,
        X_train,
        y_train,
        sample_weight,
        X_validation,
        y_validation,
        sample_weight_validation,
        eval_metric,
        cat_features_indices,
        n_jobs,
        random_state,
    ):
        self.X_train = X_train
        self.y_train = y_train
        self.sample_weight = sample_weight
        self.X_validation = X_validation
        self.y_validation = y_validation
        self.sample_weight_validation = sample_weight_validation
        self.dtrain = lgb.Dataset(self.X_train.to_numpy() if isinstance(
            self.X_train, pd.DataFrame) else self.X_train,
                                  label=self.y_train,
                                  weight=self.sample_weight)
        self.dvalid = lgb.Dataset(
            self.X_validation.to_numpy() if isinstance(
                self.X_validation, pd.DataFrame) else self.X_validation,
            label=self.y_validation,
            weight=self.sample_weight_validation,
        )

        self.cat_features_indices = cat_features_indices
        self.eval_metric = eval_metric
        self.learning_rate = 0.025
        self.rounds = 1000
        self.early_stopping_rounds = 50
        self.seed = random_state

        self.n_jobs = n_jobs
        if n_jobs == -1:
            self.n_jobs = 0

        self.objective = ""
        self.eval_metric_name = ""

        self.eval_metric_name, self.custom_eval_metric_name = lightgbm_eval_metric(
            ml_task, eval_metric.name)

        self.custom_eval_metric = None
        if self.eval_metric.name == "r2":
            self.custom_eval_metric = lightgbm_eval_metric_r2
        elif self.eval_metric.name == "spearman":
            self.custom_eval_metric = lightgbm_eval_metric_spearman
        elif self.eval_metric.name == "pearson":
            self.custom_eval_metric = lightgbm_eval_metric_pearson
        elif self.eval_metric.name == "f1":
            self.custom_eval_metric = lightgbm_eval_metric_f1
        elif self.eval_metric.name == "average_precision":
            self.custom_eval_metric = lightgbm_eval_metric_average_precision
        elif self.eval_metric.name == "accuracy":
            self.custom_eval_metric = lightgbm_eval_metric_accuracy

        self.num_class = (len(np.unique(y_train))
                          if ml_task == MULTICLASS_CLASSIFICATION else None)
        self.objective = lightgbm_objective(ml_task, eval_metric.name)
Ejemplo n.º 2
0
    def _get_model_params(self, model_type, seed, params_type="random"):
        model_info = AlgorithmsRegistry.registry[self._ml_task][model_type]

        model_params = None
        if params_type == "default":

            model_params = model_info["default_params"]
            model_params["seed"] = seed

        else:
            model_params = RandomParameters.get(model_info["params"], seed + self._seed)
        if model_params is None:
            return None

        # set eval metric
        if model_info["class"].algorithm_short_name == "Xgboost":
            model_params["eval_metric"] = xgboost_eval_metric(
                self._ml_task, self._eval_metric
            )
        if model_info["class"].algorithm_short_name == "LightGBM":
            metric, custom_metric = lightgbm_eval_metric(
                self._ml_task, self._eval_metric
            )
            model_params["metric"] = metric
            model_params["custom_eval_metric_name"] = custom_metric
        if model_info["class"].algorithm_short_name == "CatBoost":
            model_params["eval_metric"] = catboost_eval_metric(
                self._ml_task, self._eval_metric
            )
        elif model_info["class"].algorithm_short_name in [
            "Random Forest",
            "Extra Trees",
        ]:
            model_params["eval_metric_name"] = self._eval_metric
            model_params["ml_task"] = self._ml_task

        required_preprocessing = model_info["required_preprocessing"]
        model_additional = model_info["additional"]
        preprocessing_params = PreprocessingTuner.get(
            required_preprocessing, self._data_info, self._ml_task
        )

        model_params = {
            "additional": model_additional,
            "preprocessing": preprocessing_params,
            "validation_strategy": self._validation_strategy,
            "learner": {
                "model_type": model_info["class"].algorithm_short_name,
                "ml_task": self._ml_task,
                "n_jobs": self._n_jobs,
                **model_params,
            },
            "automl_random_state": self._seed,
        }

        if self._data_info.get("num_class") is not None:
            model_params["learner"]["num_class"] = self._data_info.get("num_class")

        model_params["ml_task"] = self._ml_task
        model_params["explain_level"] = self._explain_level

        return model_params