def mutate_params(self, **kwargs): fake_lgbm_model = LightGBMModel(**self.input_dict) fake_lgbm_model.params = self.params.copy() fake_lgbm_model.params_base = self.params_base.copy() fake_lgbm_model.params.update(fake_lgbm_model.params_base) kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500)) fake_lgbm_model.mutate_params(**kwargs) self.params.update(fake_lgbm_model.params) fake_lgbm_model.transcribe_params(params=self.params) self.params.update(fake_lgbm_model.lightgbm_params) # see what else can mutate, need to know things don't want to preserve uses_gpus, n_gpus = self.get_uses_gpus(self.params) if not uses_gpus: self.params['colsample_bylevel'] = MainModel.get_one( [0.3, 0.5, 0.9, 1.0]) if not (uses_gpus and self.num_classes > 2): self.params['boosting_type'] = MainModel.get_one( ['Plain', 'Ordered']) if self._can_handle_categorical: max_cat_to_onehot_list = [ 4, 10, 20, 40, config.max_int_as_cat_uniques ] self.params['one_hot_max_size'] = MainModel.get_one( max_cat_to_onehot_list) if uses_gpus: self.params['one_hot_max_size'] = min( self.params['one_hot_max_size'], 255) else: self.params['one_hot_max_size'] = min( self.params['one_hot_max_size'], 65535) if not uses_gpus: self.params['sampling_frequency'] = MainModel.get_one( ['PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel']) bootstrap_type_list = [ 'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS', 'Poisson', 'No' ] if not uses_gpus: bootstrap_type_list.remove('Poisson') if uses_gpus: bootstrap_type_list.remove('MVS') # undocumented CPU only self.params['bootstrap_type'] = MainModel.get_one(bootstrap_type_list) if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']: self.params['subsample'] = MainModel.get_one( [0.5, 0.66, 0.66, 0.9]) # will get pop'ed if not Poisson/Bernoulli if self.params['bootstrap_type'] in ['Bayesian']: self.params['bagging_temperature'] = MainModel.get_one( [0, 0.1, 0.5, 0.9, 1.0])
def mutate_params(self, **kwargs): fake_lgbm_model = LightGBMModel(**self.input_dict) fake_lgbm_model.params = self.params.copy() fake_lgbm_model.params_base = self.params_base.copy() for k, v in fake_lgbm_model.params_base.items(): if k in fake_lgbm_model.params: fake_lgbm_model.params[k] = fake_lgbm_model.params_base[k] kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500)) kwargs['from_catboost'] = True fake_lgbm_model.mutate_params(**kwargs) self.params.update(fake_lgbm_model.params) fake_lgbm_model.transcribe_params(params=self.params, **kwargs) self.params.update(fake_lgbm_model.lightgbm_params) get_best = kwargs.get('get_best', True) if get_best is None: get_best = True trial = kwargs.get('trial', False) if trial is None: trial = False # see what else can mutate, need to know things don't want to preserve uses_gpus, n_gpus = self.get_uses_gpus(self.params) if not uses_gpus: colsample_bylevel_list = [0.3, 0.5, 0.9, 1.0] self.params['colsample_bylevel'] = MainModel.get_one( colsample_bylevel_list, get_best=get_best, best_type="first", name="colsample_bylevel", trial=trial) if not (uses_gpus and self.num_classes > 2): boosting_type_list = ['Plain', 'Ordered'] self.params['boosting_type'] = MainModel.get_one( boosting_type_list, get_best=get_best, best_type="first", name="boosting_type", trial=trial) if self._can_handle_categorical: max_cat_to_onehot_list = [ 4, 10, 20, 40, config.max_int_as_cat_uniques ] if uses_gpus: max_one_hot_max_size = 255 else: max_one_hot_max_size = 65535 max_cat_to_onehot_list = sorted( set([ min(x, max_one_hot_max_size) for x in max_cat_to_onehot_list ])) log = True if max(max_cat_to_onehot_list) > 1000 else False self.params['one_hot_max_size'] = MainModel.get_one( max_cat_to_onehot_list, get_best=get_best, best_type="max", name="one_hot_max_size", trial=trial, log=log) if not uses_gpus: sampling_frequency_list = [ 'PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel' ] self.params['sampling_frequency'] = MainModel.get_one( sampling_frequency_list, get_best=get_best, best_type="first", name="sampling_frequency", trial=trial) bootstrap_type_list = [ 'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS', 'Poisson', 'No' ] if not uses_gpus: bootstrap_type_list.remove('Poisson') if uses_gpus: bootstrap_type_list.remove('MVS') # undocumented CPU only self.params['bootstrap_type'] = MainModel.get_one( bootstrap_type_list, get_best=get_best, best_type="first", name="bootstrap_type", trial=trial) # lgbm usage already sets subsample #if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']: # subsample_list = [0.5, 0.66, 0.66, 0.9] # # will get pop'ed if not Poisson/Bernoulli # self.params['subsample'] = MainModel.get_one(subsample_list, get_best=get_best, best_type="first", name="subsample", trial=trial) if self.params['bootstrap_type'] in ['Bayesian']: bagging_temperature_list = [0.0, 0.1, 0.5, 0.9, 1.0] self.params['bagging_temperature'] = MainModel.get_one( bagging_temperature_list, get_best=get_best, best_type="first", name="bagging_temperature", trial=trial) # overfit protection different sometimes compared to early_stopping_rounds # self.params['od_type'] # self.params['od_pval'] # self.params['od_wait'] self.params['learning_rate'] = max( config.min_learning_rate, max(self._min_learning_rate_catboost, self.params['learning_rate']))