def train(self): """ ## Train Single Model ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost """ TM = TrainingMode() """ Global Seed """ train_seed = random.randint(0, 1000) cv_seed = random.randint(0, 1000) # train_seed = 666 # cv_seed = 216 # 425 48 461 157 """ Training Arguments """ train_args = {'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'save_final_pred': True, 'save_final_pred_train': False, 'save_cv_pred': True, 'save_cv_pred_train': False, 'save_csv_log': True, 'loss_fuc': self.rmse, 'append_info': 'Yuanan Bike'} """ Cross Validation Arguments """ cv_args = {'n_cv': 10} """ Base Parameters """ base_parameters = self.get_base_params('dnn') """ Train Single Model """ TM.train_single_model('dnn', train_seed, cv_seed, # num_boost_round=1000, base_parameters=base_parameters, train_args=train_args, cv_args=cv_args) print('======================================================') print('Global Train Seed: {}'.format(train_seed)) print('Global Cross Validation Seed: {}'.format(cv_seed))
def train(self): """ ## Auto Train with Logs of Boost Round ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost """ TM = TrainingMode() """ Training Arguments """ train_args = { 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'save_final_pred': True, 'save_final_pred_train': False, 'save_cv_pred': True, 'save_cv_pred_train': False, 'save_csv_log': True, 'loss_fuc': self.rmse, 'append_info': 'Yuanan Bike' } """ Cross Validation Arguments """ cv_args = {'n_cv': 10} """ Base Parameters """ base_parameters = self.get_base_params('dnn') """ Auto Train with Logs of Boost Round """ pg_list = [[['learning_rate', [0.05]]]] train_seed_list = [68] cv_seed_list = [95] TM.auto_train_boost_round('dnn', num_boost_round=10, n_epoch=1, full_grid_search=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=True, train_args=train_args, cv_args=cv_args) """Train Different Rounds"""
def train(self): """ Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'ab': AdaBoost 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost 'dnn': Deep Neural Networks 'stack_lgb': LightGBM for stack layer 'christar': Christar1991 'prejudge_b': PrejudgeBinary 'prejudge_m': PrejudgeMultiClass 'stack_t': StackTree """ TM = TrainingMode() """ Global Seed """ train_seed = random.randint(0, 500) cv_seed = random.randint(0, 500) # train_seed = 666 # cv_seed = 216 # 425 48 461 157 """ Training Arguments """ train_args = {'train_seed': train_seed, 'prescale': False, 'postscale': False, 'use_scale_pos_weight': False, 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'show_accuracy': True, 'save_final_pred': True, 'save_final_prob_train': False, 'save_cv_pred': False, 'save_cv_prob_train': False, 'save_csv_log': True, 'append_info': None} """ Cross Validation Arguments """ cv_args = {'n_valid': 4, 'n_cv': 20, 'n_era': 20} # cv_args = self.get_cv_args('lgb_fi') """ Reduced Features """ reduced_feature_list = None """ Base Parameters """ base_parameters = self.get_base_params('xgb') # base_parameters = None """ Train Single Model """ # TM.train_single_model('xgb', train_seed, cv_seed, num_boost_round=88, # reduced_feature_list=reduced_feature_list, base_parameters=base_parameters, # train_args=train_args, use_multi_group=False) """ Auto Train with Logs of Boost Round """ pg_list = [ [['learning_rate', [0.003]]] ] # train_seed_list = [666] # cv_seed_list = [216] train_seed_list = None cv_seed_list = None TM.auto_train_boost_round('xgb', num_boost_round=100, grid_search_n_cv=20, n_epoch=100, full_grid_search=False, use_multi_group=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=True, reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args) """ Auto Grid Search Parameters """ # pg_list = [ # [['max_depth', (8, 9, 10, 11, 12)], # ['feature_fraction', (0.5, 0.6, 0.7, 0.8, 0.9)], # ['bagging_fraction', (0.6, 0.7, 0.8, 0.9)], # ['bagging_freq', (1, 3, 5, 7)]] # ] # train_seed_list = [999] # cv_seed_list = [888] # # train_seed_list = None # # cv_seed_list = None # TM.auto_grid_search('lgb', num_boost_round=65, grid_search_n_cv=5, n_epoch=1, use_multi_group=True, # full_grid_search=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, # parameter_grid_list=pg_list, base_parameters=base_parameters, save_final_pred=False, # reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args) """ Auto Train """ # TM.auto_train('lgb', n_epoch=10000, base_parameters=base_parameters, # reduced_feature_list=reduced_feature_list, train_args=train_args, use_multi_group=False) """ Others """ # TM.train_single_model('dnn', train_seed, cv_seed, reduced_feature_list=reduced_feature_list, # base_parameters=base_parameters, train_args=train_args, use_multi_group=False) # TM.train_single_model('prejudge_b', train_seed, cv_seed, load_pickle=False, # base_parameters=base_parameters, reduced_feature_list=reduced_feature_list, # train_args=train_args, use_multi_group=False) # TM.train_single_model('stack_lgb', train_seed, cv_seed, auto_idx=1, # base_parameters=base_parameters, reduced_feature_list=reduced_feature_list, # train_args=train_args, use_multi_group=False) # TM.train_single_model('stack_pt', train_seed, cv_seed, reduced_feature_list=reduced_feature_list, # base_parameters=base_parameters, train_args=train_args, use_multi_group=False) # pg_list = [ # [['learning_rate', [0.00005]]], # [['keep_probability', [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]]], # # [['unit_number', # # [ # # [32, 16, 8], # # [48, 24, 12], # # [64, 32], [64, 32, 16], # # [128, 64], [128, 64, 32], [128, 64, 32, 16], # # [256, 128], [256, 128, 64], [256, 128, 64, 32], [256, 128, 64, 32, 16], # # [200, 100, 50], # # [2048, 512], # # [288, 144, 72], [288, 144, 72, 36], # # [216, 108, 54], [216, 108, 54, 27], # # [128, 256, 128, 64], [64, 128, 64, 32], [128, 256, 128], [64, 128, 64] # # ]]] # ] # train_seed_list = [666] # cv_seed_list = [216] # TM.auto_train_boost_round('dnn', train_seed_list, cv_seed_list, n_epoch=1, base_parameters=base_parameters, # epochs=2, parameter_grid_list=pg_list, save_final_pred=True, # reduced_feature_list=reduced_feature_list, grid_search_n_cv=20, # train_args=train_args, use_multi_group=False) # TM.auto_train('stack_t', n_epoch=2, stack_final_epochs=10, base_parameters=base_parameters, # reduced_feature_list=reduced_feature_list, train_args=train_args, use_multi_group=False) print('======================================================') print('Global Train Seed: {}'.format(train_seed)) print('Global Cross Validation Seed: {}'.format(cv_seed))
def train(self): """ ## Auto Train with Logs of Boost Round ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost """ TM = TrainingMode() """ Training Arguments """ train_args = { 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'save_final_pred': True, 'save_final_pred_train': False, 'save_cv_pred': False, 'save_cv_pred_train': False, 'save_csv_log': True, 'loss_fuc': self.rmse, 'append_info': 'Yuanan Bike' } """ Cross Validation Arguments """ cv_args = {'n_cv': 10} # cv_args = self.get_cv_args('xgb') """ Base Parameters """ # base_parameters = self.get_base_params('xgb') base_parameters = None """ Auto Train with Logs of Boost Round """ pg_list = [ # [ # ['n_cv', [5,7,9]] # ['valid_rate', valid_rate_list] # ], [ ['n_cv', [5, 7, 9]], ['max_depth', [12, 13, 14]], # best 14 ['learning_rate', [0.15, 0.2, 0.25]], #best 0.15 #['min_child_weight', [9]], ['subsample', [0.75, 0.8]], #best0.8 ['colsample_bytree', [0.7]], #best 0.7 ['colsample_bylevel', [0.77]], #best0.77 ] ] train_seed_list = None cv_seed_list = None TM.auto_train_boost_round('xgb', num_boost_round=600, n_epoch=3, full_grid_search=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=True, train_args=train_args, cv_args=cv_args) """Train Different Rounds"""
def train(self): """ ## Auto Train ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'ab': AdaBoost 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost 'dnn': Deep Neural Networks 'stack_lgb': LightGBM for stack layer 'christar': Christar1991 'prejudge_b': PrejudgeBinary 'prejudge_m': PrejudgeMultiClass 'stack_t': StackTree """ TM = TrainingMode() """ Training Arguments """ train_args = {'prescale': False, 'postscale': False, 'use_scale_pos_weight': False, 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'show_accuracy': True, 'save_final_pred': True, 'save_final_prob_train': False, 'save_cv_pred': False, 'save_cv_prob_train': False, 'save_csv_log': True, 'append_info': None} """ Cross Validation Arguments """ # cv_args = {'n_valid': 4, # 'n_cv': 20, # 'n_era': 20} cv_args = self.get_cv_args('lgb_fi') """ Reduced Features """ reduced_feature_list = None """ Base Parameters """ base_parameters = self.get_base_params('xgb') # base_parameters = None """ Auto Train """ TM.auto_train('xgb', n_epoch=200, base_parameters=base_parameters, use_multi_group=False, reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args)
def train(self): """ ## Auto Train with Logs of Boost Round ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'ab': AdaBoost 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost 'dnn': Deep Neural Networks 'stack_lgb': LightGBM for stack layer 'christar': Christar1991 'prejudge_b': PrejudgeBinary 'prejudge_m': PrejudgeMultiClass 'stack_t': StackTree """ TM = TrainingMode() """ Strategy Arguments """ from models.strategy import Strategy strategy_args = {'use_strategy': True, 'f_strategy': Strategy.calc_profit, 'buy_count': 15, 'fee': 0.0034, 'ascending': False, 'save_result': True, 'save_image': True} """ Training Arguments """ train_args = {'prescale': False, 'postscale': True, 'use_scale_pos_weight': False, 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'show_accuracy': True, 'save_final_pred': True, 'save_final_prob_train': False, 'save_cv_pred': True, 'save_cv_prob_train': False, 'save_csv_log': True, 'strategy_args': strategy_args, 'append_info': 'forward_window'} """ Cross Validation Arguments """ from models.cross_validation import CrossValidation cv_args = {'ensemble': True, 'n_cv': 12, 'n_era': 90, 'cv_generator': CrossValidation.forward_window, 'window_size': 40} # cv_args = self.get_cv_args('xgb_fw') """ Reduced Features """ reduced_feature_list = None """ Base Parameters """ base_parameters = self.get_base_params('xgb_fw') # base_parameters = None """ Auto Train with Logs of Boost Round """ # cv_weights_range = [self.get_cv_weight('range', 1, i+1) for i in [5, 8, 10, 12, 15, 20]] # cv_weights_log = [self.get_cv_weight('log', 1, i+1) for i in [5, 8, 10, 12, 15, 20]] # n_cv_list = [5, 8, 10, 12, 15, 20] * 6 # import numpy as np # valid_rate_list = np.array([[i]*6 for i in [0.075, 0.1, 0.125, 0.15, 0.175, 0.2]]).reshape(-1,).tolist() # cv_weights_list = cv_weights_log*6 pg_list = [ # [['n_cv', (8, 9, 10), (11, 12, 13), [15], (18, 20)], # ['valid_rate', (0.075, 0.1, 0.125, 0.15, 0.166, 0.175, 0.2)], # ['window_size', (32, 34, 36, 40, 42, 44, 46, 48)]] # [['n_cv', n_cv_list], # ['valid_rate', valid_rate_list], # ['cv_weights', cv_weights_list]] # [ # ['max_depth', [11]], # ['min_child_weight', [9]], # ['subsample', (0.88, 0.90, 0.92)], # ['colsample_bytree', (0.86, 0.88, 0.9)], # ['colsample_bylevel', (0.7, 0.75, 0.8)] # ] [['learning_rate', [0.003]]] ] train_seed_list = [999] cv_seed_list = [95] TM.auto_train_boost_round('xgb', num_boost_round=100, n_epoch=1, full_grid_search=True, use_multi_group=False, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=False, reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args)
def train(self): """ ## Auto Grid Search Parameters ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'ab': AdaBoost 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost 'dnn': Deep Neural Networks 'stack_lgb': LightGBM for stack layer 'christar': Christar1991 'prejudge_b': PrejudgeBinary 'prejudge_m': PrejudgeMultiClass 'stack_t': StackTree """ TM = TrainingMode() """ Training Arguments """ train_args = { 'prescale': False, 'postscale': True, 'use_scale_pos_weight': False, 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'show_accuracy': False, 'save_final_pred': True, 'save_final_prob_train': False, 'save_cv_pred': False, 'save_cv_prob_train': False, 'save_csv_log': True, 'append_info': 'forward_window_postscale' } """ Cross Validation Arguments """ # cv_args = {'n_valid': 4, # 'n_cv': 20, # 'n_era': 20} cv_args = self.get_cv_args('lgb_fi') """ Reduced Features """ reduced_feature_list = None """ Base Parameters """ base_parameters = self.get_base_params('xgb_fw') # base_parameters = None """ Auto Grid Search Parameters """ pg_list = [[ ['max_depth', (8, 9, 10)], ['min_child_weight', (2, 4, 6, 8)], ['subsample', (0.81, 0.84, 0.87, 0.9)], ['colsample_bytree', (0.8, 0.85, 0.9)], ['colsample_bylevel', (0.7, 0.75, 0.8)], ]] train_seed_list = [999] cv_seed_list = [95] # train_seed_list = None # cv_seed_list = None TM.auto_grid_search('xgb', num_boost_round=95, n_epoch=1, full_grid_search=True, use_multi_group=False, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, parameter_grid_list=pg_list, base_parameters=base_parameters, save_final_pred=False, reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args)
def train(self): """ Model Name: 'lr': Logistic Regression 'rf': Random Forest 'ab': AdaBoost 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost """ TM = TrainingMode() """ Global Seed """ train_seed = random.randint(0, 500) cv_seed = random.randint(0, 500) # train_seed = 666 # cv_seed = 216 # 425 48 461 157 """ Training Arguments """ train_args = { 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'save_final_pred': True, 'save_final_pred_train': False, 'save_cv_pred': False, 'save_cv_pred_train': False, 'save_csv_log': True, 'loss_fuc': None, 'append_info': 'forward_window_postscale_mdp-11_sub' } """ Cross Validation Arguments """ # cv_args = {'n_splits': 10, # 'n_cv': 10} cv_args = self.get_cv_args('xgb') """ Base Parameters """ # base_parameters = self.get_base_params('xgb') base_parameters = None """ Train Single Model """ # TM.train_single_model('xgb', train_seed, cv_seed, num_boost_round=88, # reduced_feature_list=reduced_feature_list, base_parameters=base_parameters, # train_args=train_args, use_multi_group=False) """ Auto Train with Logs of Boost Round """ pg_list = [ # [ # ['n_cv', n_cv_list], # ['valid_rate', valid_rate_list] # ] [['max_depth', [11]], ['min_child_weight', [9]], ['subsample', (0.88, 0.90, 0.92)], ['colsample_bytree', (0.86, 0.88, 0.9)], ['colsample_bylevel', (0.7, 0.75, 0.8)]] # [['learning_rate', [0.003]]] ] train_seed_list = [999] cv_seed_list = [95] TM.auto_train_boost_round('xgb', num_boost_round=100, n_epoch=1, full_grid_search=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=False, train_args=train_args, cv_args=cv_args) """Train Different Rounds""" # num_boost_round_list = [83, 85, 87] # self.train_diff_round('xgb', TM, num_boost_round_list=num_boost_round_list, n_epoch=1, full_grid_search=True, # train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, # base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=True, # train_args=train_args, cv_args=cv_args) """ Auto Grid Search Parameters """ # pg_list = [ # [ # ['max_depth', (8, 9, 10)], # ['min_child_weight', (2, 4, 6, 8)], # ['subsample', (0.81, 0.84, 0.87, 0.9)], # ['colsample_bytree', (0.8, 0.85, 0.9)], # ['colsample_bylevel', (0.7, 0.75, 0.8)], # ] # ] # train_seed_list = [999] # cv_seed_list = [95] # # train_seed_list = None # # cv_seed_list = None # TM.auto_grid_search('xgb', num_boost_round=95, n_epoch=1, full_grid_search=True, # train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, # parameter_grid_list=pg_list, base_parameters=base_parameters, save_final_pred=False, # train_args=train_args, cv_args=cv_args) print('======================================================') print('Global Train Seed: {}'.format(train_seed)) print('Global Cross Validation Seed: {}'.format(cv_seed))
def train(self): """ ## Train Single Model ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'ab': AdaBoost 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost 'dnn': Deep Neural Networks 'stack_lgb': LightGBM for stack layer 'christar': Christar1991 'prejudge_b': PrejudgeBinary 'prejudge_m': PrejudgeMultiClass 'stack_t': StackTree """ TM = TrainingMode() """ Global Seed """ train_seed = random.randint(0, 1000) cv_seed = random.randint(0, 1000) # train_seed = 666 # cv_seed = 216 # 425 48 461 157 """ Training Arguments """ train_args = {'prescale': False, 'postscale': True, 'use_scale_pos_weight': False, 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'show_accuracy': False, 'save_final_pred': True, 'save_final_prob_train': False, 'save_cv_pred': False, 'save_cv_prob_train': False, 'save_csv_log': True, 'append_info': 'fw_v0.2_c20_w35'} """ Cross Validation Arguments """ # cv_args = {'n_valid': 4, # 'n_cv': 20, # 'n_era': 20} cv_args = self.get_cv_args('lgb_fi') """ Reduced Features """ reduced_feature_list = None """ Base Parameters """ base_parameters = self.get_base_params('lgb') # base_parameters = None """ Train Single Model """ TM.train_single_model('lgb', train_seed, cv_seed, num_boost_round=100, reduced_feature_list=reduced_feature_list, base_parameters=base_parameters, train_args=train_args, cv_args=cv_args, use_multi_group=True) print('======================================================') print('Global Train Seed: {}'.format(train_seed)) print('Global Cross Validation Seed: {}'.format(cv_seed))
def train(self): """ ## Auto Grid Search Parameters ## Model Name: 'lr': Logistic Regression 'rf': Random Forest 'et': Extra Trees 'gb': GradientBoosting 'xgb': XGBoost 'xgb_sk': XGBoost using scikit-learn module 'lgb': LightGBM 'lgb_sk': LightGBM using scikit-learn module 'cb': CatBoost """ TM = TrainingMode() """ Training Arguments """ train_args = { 'use_global_valid': False, 'use_custom_obj': False, 'show_importance': False, 'save_final_pred': True, 'save_final_pred_train': False, 'save_cv_pred': False, 'save_cv_pred_train': False, 'save_csv_log': True, 'loss_fuc': None, 'append_info': 'forward_window_postscale_mdp-11_sub' } """ Cross Validation Arguments """ # cv_args = {'n_splits': 10, # 'n_cv': 10} cv_args = self.get_cv_args('xgb') """ Base Parameters """ # base_parameters = self.get_base_params('xgb') base_parameters = None """ Auto Grid Search Parameters """ pg_list = [[ ['max_depth', (8, 9, 10)], ['min_child_weight', (2, 4, 6, 8)], ['subsample', (0.81, 0.84, 0.87, 0.9)], ['colsample_bytree', (0.8, 0.85, 0.9)], ['colsample_bylevel', (0.7, 0.75, 0.8)], ]] train_seed_list = [999] cv_seed_list = [95] # train_seed_list = None # cv_seed_list = None TM.auto_grid_search('xgb', num_boost_round=95, n_epoch=1, full_grid_search=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list, parameter_grid_list=pg_list, base_parameters=base_parameters, save_final_pred=False, train_args=train_args, cv_args=cv_args)