Exemple #1
0
    def __init__(
        self,
        ml_task,
        X_train,
        y_train,
        sample_weight,
        X_validation,
        y_validation,
        sample_weight_validation,
        eval_metric,
        cat_features_indices,
        n_jobs,
        random_state,
    ):
        self.ml_task = ml_task
        self.X_train = X_train
        self.y_train = y_train
        self.sample_weight = sample_weight
        self.X_validation = X_validation
        self.y_validation = y_validation
        self.eval_metric = eval_metric
        self.cat_features = cat_features_indices
        self.eval_set = Pool(
            data=X_validation,
            label=y_validation,
            cat_features=self.cat_features,
            weight=sample_weight_validation,
        )
        self.n_jobs = n_jobs
        self.rounds = 1000
        self.learning_rate = 0.0125
        self.early_stopping_rounds = 50
        self.seed = random_state

        self.objective = catboost_objective(ml_task, self.eval_metric.name)
        self.eval_metric_name = catboost_eval_metric(ml_task, self.eval_metric.name)
        self.custom_eval_metric = None
        if self.eval_metric_name == "spearman":
            self.custom_eval_metric = CatBoostEvalMetricSpearman()
        elif self.eval_metric_name == "pearson":
            self.custom_eval_metric = CatBoostEvalMetricPearson()
        elif self.eval_metric_name == "average_precision":
            self.custom_eval_metric = CatBoostEvalMetricAveragePrecision()
        elif self.eval_metric_name == "mse":
            self.custom_eval_metric = CatBoostEvalMetricMSE()
        elif self.eval_metric_name == "user_defined_metric":
            self.custom_eval_metric = CatBoostEvalMetricUserDefined()
Exemple #2
0
    def __init__(self, params):
        super(CatBoostAlgorithm, self).__init__(params)
        self.library_version = catboost.__version__
        self.snapshot_file_path = "training_snapshot"

        self.explain_level = params.get("explain_level", 0)
        self.rounds = additional.get("max_rounds", 10000)
        self.max_iters = 1
        self.early_stopping_rounds = additional.get("early_stopping_rounds",
                                                    50)

        Algo = CatBoostClassifier
        loss_function = "Logloss"
        if self.params["ml_task"] == BINARY_CLASSIFICATION:
            loss_function = self.params.get("loss_function", "Logloss")
        elif self.params["ml_task"] == MULTICLASS_CLASSIFICATION:
            loss_function = self.params.get("loss_function", "MultiClass")
        elif self.params["ml_task"] == REGRESSION:
            loss_function = self.params.get("loss_function", "RMSE")
            Algo = CatBoostRegressor

        cat_params = {
            "iterations": self.params.get("num_boost_round", self.rounds),
            "learning_rate": self.params.get("learning_rate", 0.1),
            "depth": self.params.get("depth", 3),
            "rsm": self.params.get("rsm", 1.0),
            "l2_leaf_reg": self.params.get("l2_leaf_reg", 3.0),
            "random_strength": self.params.get("random_strength", 1.0),
            "loss_function": loss_function,
            "eval_metric": self.params.get("eval_metric", loss_function),
            # "custom_metric": self.params.get("eval_metric", loss_function),
            "thread_count": self.params.get("n_jobs", -1),
            "verbose": False,
            "allow_writing_files": False,
            "random_seed": self.params.get("seed", 1),
        }

        for extra_param in [
                "min_data_in_leaf",
                "bootstrap_type",
                "bagging_temperature",
                "subsample",
                "border_count",
        ]:
            if extra_param in self.params:
                cat_params[extra_param] = self.params[extra_param]

        self.log_metric_name = cat_params["eval_metric"]
        if cat_params["eval_metric"] == "spearman":
            cat_params["eval_metric"] = CatBoostEvalMetricSpearman()
            self.log_metric_name = "CatBoostEvalMetricSpearman"
        elif cat_params["eval_metric"] == "pearson":
            cat_params["eval_metric"] = CatBoostEvalMetricPearson()
            self.log_metric_name = "CatBoostEvalMetricPearson"
        elif cat_params["eval_metric"] == "average_precision":
            cat_params["eval_metric"] = CatBoostEvalMetricAveragePrecision()
            self.log_metric_name = "CatBoostEvalMetricAveragePrecision"
        elif cat_params["eval_metric"] == "mse":
            cat_params["eval_metric"] = CatBoostEvalMetricMSE()
            self.log_metric_name = "CatBoostEvalMetricMSE"

        self.model = Algo(**cat_params)
        self.cat_features = None
        self.best_ntree_limit = 0

        logger.debug("CatBoostAlgorithm.__init__")