def set_default_params(self, accuracy=10, time_tolerance=10, interpretability=1, **kwargs): # https://catboost.ai/docs/concepts/python-reference_parameters-list.html # https://catboost.ai/docs/concepts/python-reference_catboostclassifier.html # optimize for final model as transcribed from best lightgbm model n_estimators = self.params_base.get('n_estimators', 100) learning_rate = self.params_base.get('learning_rate', config.min_learning_rate) early_stopping_rounds_default = min(500, max(1, int(n_estimators / 4))) early_stopping_rounds = self.params_base.get('early_stopping_rounds', early_stopping_rounds_default) self.params = {'bootstrap_type': 'Bayesian', 'n_estimators': n_estimators, 'learning_rate': learning_rate, 'early_stopping_rounds': early_stopping_rounds, 'max_depth': 8 } dummy = kwargs.get('dummy', False) ensemble_level = kwargs.get('ensemble_level', 0) train_shape = kwargs.get('train_shape', (1, 1)) valid_shape = kwargs.get('valid_shape', (1, 1)) self.get_gbm_main_params_evolution(self.params, dummy, accuracy, self.num_classes, ensemble_level, train_shape, valid_shape) # self.params['has_time'] # should use this if TS problem if self._can_handle_categorical: # less than 2 is risky, can get stuck in learning max_cat_to_onehot_list = [4, 10, 20, 40, config.max_int_as_cat_uniques] self.params['one_hot_max_size'] = MainModel.get_one(max_cat_to_onehot_list, get_best=True) uses_gpus, n_gpus = self.get_uses_gpus(self.params) if uses_gpus: self.params['one_hot_max_size'] = min(self.params['one_hot_max_size'], 255)
def mutate_params(self, **kwargs): fake_lgbm_model = LightGBMModel(**self.input_dict) fake_lgbm_model.params = self.params.copy() fake_lgbm_model.params_base = self.params_base.copy() fake_lgbm_model.params.update(fake_lgbm_model.params_base) kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500)) fake_lgbm_model.mutate_params(**kwargs) self.params.update(fake_lgbm_model.params) fake_lgbm_model.transcribe_params(params=self.params) self.params.update(fake_lgbm_model.lightgbm_params) # see what else can mutate, need to know things don't want to preserve uses_gpus, n_gpus = self.get_uses_gpus(self.params) if not uses_gpus: self.params['colsample_bylevel'] = MainModel.get_one( [0.3, 0.5, 0.9, 1.0]) if not (uses_gpus and self.num_classes > 2): self.params['boosting_type'] = MainModel.get_one( ['Plain', 'Ordered']) if self._can_handle_categorical: max_cat_to_onehot_list = [ 4, 10, 20, 40, config.max_int_as_cat_uniques ] self.params['one_hot_max_size'] = MainModel.get_one( max_cat_to_onehot_list) if uses_gpus: self.params['one_hot_max_size'] = min( self.params['one_hot_max_size'], 255) else: self.params['one_hot_max_size'] = min( self.params['one_hot_max_size'], 65535) if not uses_gpus: self.params['sampling_frequency'] = MainModel.get_one( ['PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel']) bootstrap_type_list = [ 'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS', 'Poisson', 'No' ] if not uses_gpus: bootstrap_type_list.remove('Poisson') if uses_gpus: bootstrap_type_list.remove('MVS') # undocumented CPU only self.params['bootstrap_type'] = MainModel.get_one(bootstrap_type_list) if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']: self.params['subsample'] = MainModel.get_one( [0.5, 0.66, 0.66, 0.9]) # will get pop'ed if not Poisson/Bernoulli if self.params['bootstrap_type'] in ['Bayesian']: self.params['bagging_temperature'] = MainModel.get_one( [0, 0.1, 0.5, 0.9, 1.0])
def set_default_params(self, accuracy=10, time_tolerance=10, interpretability=1, **kwargs): # https://catboost.ai/docs/concepts/python-reference_parameters-list.html # https://catboost.ai/docs/concepts/python-reference_catboostclassifier.html # optimize for final model as transcribed from best lightgbm model n_estimators = self.params_base.get('n_estimators', 100) learning_rate = self.params_base.get('learning_rate', config.min_learning_rate) early_stopping_rounds_default = min(500, max(1, int(n_estimators / 4))) early_stopping_rounds = self.params_base.get( 'early_stopping_rounds', early_stopping_rounds_default) self.params = { 'bootstrap_type': 'Bayesian', 'n_estimators': n_estimators, 'learning_rate': learning_rate, 'early_stopping_rounds': early_stopping_rounds, 'max_depth': 8, 'grow_policy': 'depthwise', } dummy = kwargs.get('dummy', False) ensemble_level = kwargs.get('ensemble_level', 0) train_shape = kwargs.get('train_shape', (1, 1)) valid_shape = kwargs.get('valid_shape', (1, 1)) self.get_gbm_main_params_evolution(params=self.params, dummy=dummy, accuracy=accuracy, num_classes=self.num_classes, ensemble_level=ensemble_level, train_shape=train_shape, valid_shape=valid_shape) for k in kwargs: if k in self.params: self.params[k] = copy.deepcopy(kwargs[k]) # self.params['has_time'] # should use this if TS problem if self._can_handle_categorical: # less than 2 is risky, can get stuck in learning max_cat_to_onehot_list = [ 4, 10, 20, 40, config.max_int_as_cat_uniques ] self.params['one_hot_max_size'] = MainModel.get_one( max_cat_to_onehot_list, get_best=True) uses_gpus, n_gpus = self.get_uses_gpus(self.params) if uses_gpus: self.params['one_hot_max_size'] = min( self.params['one_hot_max_size'], 255) else: self.params['one_hot_max_size'] = min( self.params['one_hot_max_size'], 65535) self.params['learning_rate'] = max(self._min_learning_rate_catboost, self.params['learning_rate']) # fill mutatable params with best for left over if default didn't fill params = copy.deepcopy(self.params) self.mutate_params(accuracy=accuracy, time_tolerance=time_tolerance, interpretability=interpretability, get_best=True, **kwargs) params_from_mutate = copy.deepcopy(self.params) for k in params_from_mutate: if k not in params: params[k] = params_from_mutate[k] self.params = copy.deepcopy(params)
def mutate_params(self, **kwargs): fake_lgbm_model = LightGBMModel(**self.input_dict) fake_lgbm_model.params = self.params.copy() fake_lgbm_model.params_base = self.params_base.copy() for k, v in fake_lgbm_model.params_base.items(): if k in fake_lgbm_model.params: fake_lgbm_model.params[k] = fake_lgbm_model.params_base[k] kwargs['train_shape'] = kwargs.get('train_shape', (10000, 500)) kwargs['from_catboost'] = True fake_lgbm_model.mutate_params(**kwargs) self.params.update(fake_lgbm_model.params) fake_lgbm_model.transcribe_params(params=self.params, **kwargs) self.params.update(fake_lgbm_model.lightgbm_params) get_best = kwargs.get('get_best', True) if get_best is None: get_best = True trial = kwargs.get('trial', False) if trial is None: trial = False # see what else can mutate, need to know things don't want to preserve uses_gpus, n_gpus = self.get_uses_gpus(self.params) if not uses_gpus: colsample_bylevel_list = [0.3, 0.5, 0.9, 1.0] self.params['colsample_bylevel'] = MainModel.get_one( colsample_bylevel_list, get_best=get_best, best_type="first", name="colsample_bylevel", trial=trial) if not (uses_gpus and self.num_classes > 2): boosting_type_list = ['Plain', 'Ordered'] self.params['boosting_type'] = MainModel.get_one( boosting_type_list, get_best=get_best, best_type="first", name="boosting_type", trial=trial) if self._can_handle_categorical: max_cat_to_onehot_list = [ 4, 10, 20, 40, config.max_int_as_cat_uniques ] if uses_gpus: max_one_hot_max_size = 255 else: max_one_hot_max_size = 65535 max_cat_to_onehot_list = sorted( set([ min(x, max_one_hot_max_size) for x in max_cat_to_onehot_list ])) log = True if max(max_cat_to_onehot_list) > 1000 else False self.params['one_hot_max_size'] = MainModel.get_one( max_cat_to_onehot_list, get_best=get_best, best_type="max", name="one_hot_max_size", trial=trial, log=log) if not uses_gpus: sampling_frequency_list = [ 'PerTree', 'PerTreeLevel', 'PerTreeLevel', 'PerTreeLevel' ] self.params['sampling_frequency'] = MainModel.get_one( sampling_frequency_list, get_best=get_best, best_type="first", name="sampling_frequency", trial=trial) bootstrap_type_list = [ 'Bayesian', 'Bayesian', 'Bayesian', 'Bayesian', 'Bernoulli', 'MVS', 'Poisson', 'No' ] if not uses_gpus: bootstrap_type_list.remove('Poisson') if uses_gpus: bootstrap_type_list.remove('MVS') # undocumented CPU only self.params['bootstrap_type'] = MainModel.get_one( bootstrap_type_list, get_best=get_best, best_type="first", name="bootstrap_type", trial=trial) # lgbm usage already sets subsample #if self.params['bootstrap_type'] in ['Poisson', 'Bernoulli']: # subsample_list = [0.5, 0.66, 0.66, 0.9] # # will get pop'ed if not Poisson/Bernoulli # self.params['subsample'] = MainModel.get_one(subsample_list, get_best=get_best, best_type="first", name="subsample", trial=trial) if self.params['bootstrap_type'] in ['Bayesian']: bagging_temperature_list = [0.0, 0.1, 0.5, 0.9, 1.0] self.params['bagging_temperature'] = MainModel.get_one( bagging_temperature_list, get_best=get_best, best_type="first", name="bagging_temperature", trial=trial) # overfit protection different sometimes compared to early_stopping_rounds # self.params['od_type'] # self.params['od_pval'] # self.params['od_wait'] self.params['learning_rate'] = max( config.min_learning_rate, max(self._min_learning_rate_catboost, self.params['learning_rate']))