Esempio n. 1
0
    def optimize_model_params(self, X: pd.DataFrame, y: pd.Series) -> None:
        """
        Optimize params of LightGBM regressor
        :param X: Data
        :param y: Target
        :return: Saves best model in self
        """
        param_grid = ParameterGrid({
            "learning_rate": [0.05],
            "n_estimators": [1000],
            "num_leaves": [15, 31, 63, 127, 255],
            "min_child_samples": [3, 20, 50, 150],
            "subsample_freq": [1, 5, 25, 50],
            "colsample_bytree": [1.0, 0.8, 0.6],
            "subsample": [1.0, 0.8, 0.6],
            "lambda_l2": [0, 0.1, 1, 10],
            "random_state": [2020]
        })
        param_grid = list(param_grid)
        random.shuffle(param_grid)
        print(f"Number of grid points: {len(param_grid)}")

        # optimize params
        while (self.training_time_left >
               self.worst_training_time) & (len(param_grid) > 0):
            params = param_grid.pop()
            self.validate_fit(X, y, {
                "model_params": params,
                "params": self.best_params["params"]
            })
Esempio n. 2
0
    def optimize_params(self, X: pd.DataFrame, y: pd.Series):
        """
        Optimize main parameters of pipeline
        :param X: Data
        :param y: Target
        :return: Saves best model in self
        """
        param_grid = ParameterGrid({
            "id_columns": [self.dtype_cols['idd']],
            "cat_columns": [self.dtype_cols['cat']],
            "encode_type": ["catboost"],
            "target_process": ["diff", "none"],
            "prev_target_col": [f"{self.label}__shift-1"],
            "apply_weights": ["none"],
            "use_features": ["all"]
        })
        param_grid = list(param_grid)
        param_grid = [
            params for params in param_grid
            if params != self.best_params["params"]
        ]
        random.shuffle(param_grid)
        print(f"Number of grid points: {len(param_grid)}")
        pprint(param_grid)

        # optimize params
        while (self.training_time_left >
               self.worst_training_time) & (len(param_grid) > 0):
            params = param_grid.pop()
            self.validate_fit(X, y, {"params": params})