Ejemplo n.º 1
0
    def mutate_params(self, **kwargs):
        fake_lgbm_model = LightGBMModel(**self.input_dict)
        fake_lgbm_model.params = self.params
        fake_lgbm_model.mutate_params(**kwargs)
        self.params = fake_lgbm_model.lightgbm_params

        # see what else can mutate, need to know things don't want to preserve
        uses_gpus, n_gpus = self.get_uses_gpus(self.params)
        if not uses_gpus:
            self.params['colsample_bylevel'] = MainModel.get_one(
                [0.3, 0.5, 0.9, 1.0])

        if not (uses_gpus and self.num_classes > 2):
            self.params['boosting_type'] = MainModel.get_one(
                ['Plain', 'Ordered'])

        if self._can_handle_categorical:
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list)
            if uses_gpus:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 255)
            else:
                self.params['one_hot_max_size'] = min(
                    self.params['one_hot_max_size'], 65535)

        if not uses_gpus:
            self.params['sampling_frequency'] = MainModel.get_one(
                ['PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel'])

        bootstrap_type_list = [
            'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS',
            'Poisson', 'No'
        ]
        if not uses_gpus:
            bootstrap_type_list.remove('Poisson')
        if uses_gpus:
            bootstrap_type_list.remove('MVS')  # undocumented CPU only
        self.params['bootstrap_type'] = MainModel.get_one(bootstrap_type_list)

        if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']:
            self.params['subsample'] = MainModel.get_one(
                [0.5, 0.66, 0.66,
                 0.9])  # will get pop'ed if not Poisson/Bernoulli

        if self.params['bootstrap_type'] in ['Bayesian']:
            self.params['bagging_temperature'] = MainModel.get_one(
                [0, 0.1, 0.5, 0.9, 1.0])
Ejemplo n.º 2
0
 def set_default_params(self,
                        accuracy=None,
                        time_tolerance=None,
                        interpretability=None,
                        **kwargs):
     # First call the LightGBM set_default_params
     # This will input all model parameters just like DAI would do.
     LightGBMModel.set_default_params(self,
                                      accuracy=accuracy,
                                      time_tolerance=time_tolerance,
                                      interpretability=interpretability,
                                      **kwargs)
     # Now we just need to tell LightGBM to do quantile regression
     self.params["objective"] = "quantile"
     self.params["alpha"] = QuantileRegressionLightGBMModel._quantile
Ejemplo n.º 3
0
 def set_default_params(self,
                        accuracy=None, time_tolerance=None, interpretability=None,
                        **kwargs):
     # First call the parent set_default_params
     LightGBMModel.set_default_params(
         self,
         accuracy=accuracy,
         time_tolerance=time_tolerance,
         interpretability=interpretability,
         **kwargs
     )
     # Then modify the parameters
     self.params["grow_policy"] = "lossguide"
     self.params["max_leaves"] = 8192
     self.params["max_depth"] = -1
 def set_default_params(self,
                        accuracy=None, time_tolerance=None, interpretability=None,
                        **kwargs):
     # First call the LightGBM set_default_params
     # This will input all model parameters just like DAI would do.
     LightGBMModel.set_default_params(
         self,
         accuracy=accuracy,
         time_tolerance=time_tolerance,
         interpretability=interpretability,
         **kwargs
     )
     # Now we just need to tell LightGBM to use tweedie distribution
     self.params["objective"] = "tweedie"
     self.params["tweedie_variance_power"] = TweedieLightGBMModel._tweedie_variance_power
 def mutate_params(self,
                   get_best=False,
                   time_tolerance=None,
                   accuracy=None,
                   imbalance_ratio=None,
                   train_shape=None,
                   ncol_effective=None,
                   time_series=False,
                   ensemble_level=None,
                   score_f_name: str = None,
                   **kwargs):
     # If we don't override the parent mutate_params method, DAI would have the opportunity
     # to modify the objective and select the winner
     # For demonstration purposes we purposely make sure that the objective
     # is the one we want
     # So first call the parent method to mutate parameters
     params = LightGBMModel.mutate_params(self,
                                          get_best=get_best,
                                          time_tolerance=time_tolerance,
                                          accuracy=accuracy,
                                          imbalance_ratio=imbalance_ratio,
                                          train_shape=train_shape,
                                          ncol_effective=ncol_effective,
                                          time_series=time_series,
                                          ensemble_level=ensemble_level,
                                          score_f_name=score_f_name,
                                          **kwargs)
     # Now set the objective
     params["objective"] = custom_asymmetric_objective
    def set_default_params(self,
                           accuracy=None,
                           time_tolerance=None,
                           interpretability=None,
                           **kwargs):
        # Define the global loss
        # global custom_asymmetric_objective

        # First call the LightGBM set_default_params
        # This will input all model parameters just like DAI would do.
        LightGBMModel.set_default_params(self,
                                         accuracy=accuracy,
                                         time_tolerance=time_tolerance,
                                         interpretability=interpretability,
                                         **kwargs)
        # Now we just need to tell LightGBM that it has to optimize for our custom objective
        # And we are done
        self.params["objective"] = custom_asymmetric_objective
Ejemplo n.º 7
0
    def mutate_params(self, **kwargs):
        fake_lgbm_model = LightGBMModel(**self.input_dict)
        fake_lgbm_model.params = self.params.copy()
        fake_lgbm_model.params_base = self.params_base.copy()
        for k, v in fake_lgbm_model.params_base.items():
            if k in fake_lgbm_model.params:
                fake_lgbm_model.params[k] = fake_lgbm_model.params_base[k]
        kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500))
        kwargs['from_catboost'] = True
        fake_lgbm_model.mutate_params(**kwargs)
        self.params.update(fake_lgbm_model.params)
        fake_lgbm_model.transcribe_params(params=self.params, **kwargs)
        self.params.update(fake_lgbm_model.lightgbm_params)

        get_best = kwargs.get('get_best', True)
        if get_best is None:
            get_best = True
        trial = kwargs.get('trial', False)
        if trial is None:
            trial = False

        # see what else can mutate, need to know things don't want to preserve
        uses_gpus, n_gpus = self.get_uses_gpus(self.params)
        if not uses_gpus:
            colsample_bylevel_list = [0.3, 0.5, 0.9, 1.0]
            self.params['colsample_bylevel'] = MainModel.get_one(
                colsample_bylevel_list,
                get_best=get_best,
                best_type="first",
                name="colsample_bylevel",
                trial=trial)

        if not (uses_gpus and self.num_classes > 2):
            boosting_type_list = ['Plain', 'Ordered']
            self.params['boosting_type'] = MainModel.get_one(
                boosting_type_list,
                get_best=get_best,
                best_type="first",
                name="boosting_type",
                trial=trial)

        if self._can_handle_categorical:
            max_cat_to_onehot_list = [
                4, 10, 20, 40, config.max_int_as_cat_uniques
            ]
            if uses_gpus:
                max_one_hot_max_size = 255
            else:
                max_one_hot_max_size = 65535
            max_cat_to_onehot_list = sorted(
                set([
                    min(x, max_one_hot_max_size)
                    for x in max_cat_to_onehot_list
                ]))
            log = True if max(max_cat_to_onehot_list) > 1000 else False
            self.params['one_hot_max_size'] = MainModel.get_one(
                max_cat_to_onehot_list,
                get_best=get_best,
                best_type="max",
                name="one_hot_max_size",
                trial=trial,
                log=log)

        if not uses_gpus:
            sampling_frequency_list = [
                'PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel'
            ]
            self.params['sampling_frequency'] = MainModel.get_one(
                sampling_frequency_list,
                get_best=get_best,
                best_type="first",
                name="sampling_frequency",
                trial=trial)

        bootstrap_type_list = [
            'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS',
            'Poisson', 'No'
        ]
        if not uses_gpus:
            bootstrap_type_list.remove('Poisson')
        if uses_gpus:
            bootstrap_type_list.remove('MVS')  # undocumented CPU only
        self.params['bootstrap_type'] = MainModel.get_one(
            bootstrap_type_list,
            get_best=get_best,
            best_type="first",
            name="bootstrap_type",
            trial=trial)

        # lgbm usage already sets subsample
        #if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']:
        #    subsample_list = [0.5, 0.66, 0.66, 0.9]
        #    # will get pop'ed if not Poisson/Bernoulli
        #    self.params['subsample'] = MainModel.get_one(subsample_list, get_best=get_best, best_type="first", name="subsample", trial=trial)

        if self.params['bootstrap_type'] in ['Bayesian']:
            bagging_temperature_list = [0.0, 0.1, 0.5, 0.9, 1.0]
            self.params['bagging_temperature'] = MainModel.get_one(
                bagging_temperature_list,
                get_best=get_best,
                best_type="first",
                name="bagging_temperature",
                trial=trial)

        # overfit protection different sometimes compared to early_stopping_rounds
        # self.params['od_type']
        # self.params['od_pval']
        # self.params['od_wait']
        self.params['learning_rate'] = max(
            config.min_learning_rate,
            max(self._min_learning_rate_catboost,
                self.params['learning_rate']))