Beispiel #1
0
    def train(self):
        """
            ## Auto Train with Logs of Boost Round ##

            Model Name:
            'lr':           Logistic Regression
            'rf':           Random Forest
            'et':           Extra Trees
            'gb':           GradientBoosting
            'xgb':          XGBoost
            'xgb_sk':       XGBoost using scikit-learn module
            'lgb':          LightGBM
            'lgb_sk':       LightGBM using scikit-learn module
            'cb':           CatBoost
        """
        TM = TrainingMode()
        """
            Training Arguments
        """
        train_args = {
            'use_global_valid': False,
            'use_custom_obj': False,
            'show_importance': False,
            'save_final_pred': True,
            'save_final_pred_train': False,
            'save_cv_pred': True,
            'save_cv_pred_train': False,
            'save_csv_log': True,
            'loss_fuc': self.rmse,
            'append_info': 'Yuanan Bike'
        }
        """
            Cross Validation Arguments
        """
        cv_args = {'n_cv': 10}
        """
            Base Parameters
        """
        base_parameters = self.get_base_params('dnn')
        """
            Auto Train with Logs of Boost Round
        """
        pg_list = [[['learning_rate', [0.05]]]]
        train_seed_list = [68]
        cv_seed_list = [95]
        TM.auto_train_boost_round('dnn',
                                  num_boost_round=10,
                                  n_epoch=1,
                                  full_grid_search=True,
                                  train_seed_list=train_seed_list,
                                  cv_seed_list=cv_seed_list,
                                  base_parameters=base_parameters,
                                  parameter_grid_list=pg_list,
                                  save_final_pred=True,
                                  train_args=train_args,
                                  cv_args=cv_args)
        """Train Different Rounds"""
Beispiel #2
0
    def train(self):
        """
            Model Name:
            'lr':           Logistic Regression
            'rf':           Random Forest
            'et':           Extra Trees
            'ab':           AdaBoost
            'gb':           GradientBoosting
            'xgb':          XGBoost
            'xgb_sk':       XGBoost using scikit-learn module
            'lgb':          LightGBM
            'lgb_sk':       LightGBM using scikit-learn module
            'cb':           CatBoost
            'dnn':          Deep Neural Networks
            'stack_lgb':    LightGBM for stack layer
            'christar':     Christar1991
            'prejudge_b':   PrejudgeBinary
            'prejudge_m':   PrejudgeMultiClass
            'stack_t':      StackTree
        """
        TM = TrainingMode()

        """
            Global Seed
        """
        train_seed = random.randint(0, 500)
        cv_seed = random.randint(0, 500)
        # train_seed = 666
        # cv_seed = 216  # 425 48 461 157

        """
            Training Arguments
        """
        train_args = {'train_seed': train_seed,
                      'prescale': False,
                      'postscale': False,
                      'use_scale_pos_weight': False,
                      'use_global_valid': False,
                      'use_custom_obj': False,
                      'show_importance': False,
                      'show_accuracy': True,
                      'save_final_pred': True,
                      'save_final_prob_train': False,
                      'save_cv_pred': False,
                      'save_cv_prob_train': False,
                      'save_csv_log': True,
                      'append_info': None}

        """
            Cross Validation Arguments
        """
        cv_args = {'n_valid': 4,
                   'n_cv': 20,
                   'n_era': 20}

        # cv_args = self.get_cv_args('lgb_fi')

        """
            Reduced Features
        """
        reduced_feature_list = None

        """
            Base Parameters
        """
        base_parameters = self.get_base_params('xgb')

        # base_parameters = None

        """
            Train Single Model
        """
        # TM.train_single_model('xgb', train_seed, cv_seed, num_boost_round=88,
        #                       reduced_feature_list=reduced_feature_list, base_parameters=base_parameters,
        #                       train_args=train_args, use_multi_group=False)

        """
            Auto Train with Logs of Boost Round
        """
        pg_list = [
            [['learning_rate', [0.003]]]
        ]
        # train_seed_list = [666]
        # cv_seed_list = [216]
        train_seed_list = None
        cv_seed_list = None
        TM.auto_train_boost_round('xgb', num_boost_round=100, grid_search_n_cv=20, n_epoch=100, full_grid_search=False,
                                  use_multi_group=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list,
                                  base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=True,
                                  reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args)

        """
            Auto Grid Search Parameters
        """
        # pg_list = [
        #            [['max_depth', (8, 9, 10, 11, 12)],
        #             ['feature_fraction', (0.5, 0.6, 0.7, 0.8, 0.9)],
        #             ['bagging_fraction', (0.6, 0.7, 0.8, 0.9)],
        #             ['bagging_freq', (1, 3, 5, 7)]]
        #            ]
        # train_seed_list = [999]
        # cv_seed_list = [888]
        # # train_seed_list = None
        # # cv_seed_list = None
        # TM.auto_grid_search('lgb', num_boost_round=65, grid_search_n_cv=5, n_epoch=1, use_multi_group=True,
        #                     full_grid_search=True, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list,
        #                     parameter_grid_list=pg_list, base_parameters=base_parameters, save_final_pred=False,
        #                     reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args)

        """
            Auto Train
        """
        # TM.auto_train('lgb', n_epoch=10000, base_parameters=base_parameters,
        #               reduced_feature_list=reduced_feature_list, train_args=train_args, use_multi_group=False)

        """
            Others
        """
        # TM.train_single_model('dnn', train_seed, cv_seed,  reduced_feature_list=reduced_feature_list,
        #                       base_parameters=base_parameters, train_args=train_args,  use_multi_group=False)
        # TM.train_single_model('prejudge_b', train_seed, cv_seed, load_pickle=False,
        #                       base_parameters=base_parameters, reduced_feature_list=reduced_feature_list,
        #                       train_args=train_args, use_multi_group=False)
        # TM.train_single_model('stack_lgb', train_seed, cv_seed, auto_idx=1,
        #                       base_parameters=base_parameters, reduced_feature_list=reduced_feature_list,
        #                       train_args=train_args, use_multi_group=False)
        # TM.train_single_model('stack_pt', train_seed, cv_seed, reduced_feature_list=reduced_feature_list,
        #                       base_parameters=base_parameters, train_args=train_args, use_multi_group=False)

        # pg_list = [
        #            [['learning_rate', [0.00005]]],
        #            [['keep_probability', [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]]],
        #            # [['unit_number',
        #            #   [
        #            #    [32, 16, 8],
        #            #    [48, 24, 12],
        #            #    [64, 32], [64, 32, 16],
        #            #    [128, 64], [128, 64, 32], [128, 64, 32, 16],
        #            #    [256, 128], [256, 128, 64], [256, 128, 64, 32], [256, 128, 64, 32, 16],
        #            #    [200, 100, 50],
        #            #    [2048, 512],
        #            #    [288, 144, 72], [288, 144, 72, 36],
        #            #    [216, 108, 54], [216, 108, 54, 27],
        #            #    [128, 256, 128, 64], [64, 128, 64, 32], [128, 256, 128], [64, 128, 64]
        #            #    ]]]
        #            ]
        # train_seed_list = [666]
        # cv_seed_list = [216]
        # TM.auto_train_boost_round('dnn', train_seed_list, cv_seed_list, n_epoch=1, base_parameters=base_parameters,
        #                           epochs=2, parameter_grid_list=pg_list, save_final_pred=True,
        #                           reduced_feature_list=reduced_feature_list, grid_search_n_cv=20,
        #                           train_args=train_args, use_multi_group=False)

        # TM.auto_train('stack_t', n_epoch=2, stack_final_epochs=10, base_parameters=base_parameters,
        #               reduced_feature_list=reduced_feature_list, train_args=train_args, use_multi_group=False)

        print('======================================================')
        print('Global Train Seed: {}'.format(train_seed))
        print('Global Cross Validation Seed: {}'.format(cv_seed))
    def train(self):
        """
            ## Auto Train with Logs of Boost Round ##

            Model Name:
            'lr':           Logistic Regression
            'rf':           Random Forest
            'et':           Extra Trees
            'gb':           GradientBoosting
            'xgb':          XGBoost
            'xgb_sk':       XGBoost using scikit-learn module
            'lgb':          LightGBM
            'lgb_sk':       LightGBM using scikit-learn module
            'cb':           CatBoost
        """
        TM = TrainingMode()
        """
            Training Arguments
        """
        train_args = {
            'use_global_valid': False,
            'use_custom_obj': False,
            'show_importance': False,
            'save_final_pred': True,
            'save_final_pred_train': False,
            'save_cv_pred': False,
            'save_cv_pred_train': False,
            'save_csv_log': True,
            'loss_fuc': self.rmse,
            'append_info': 'Yuanan Bike'
        }
        """
            Cross Validation Arguments
        """
        cv_args = {'n_cv': 10}

        # cv_args = self.get_cv_args('xgb')
        """
            Base Parameters
        """
        # base_parameters = self.get_base_params('xgb')
        base_parameters = None
        """
            Auto Train with Logs of Boost Round
        """
        pg_list = [
            # [
            #  ['n_cv', [5,7,9]]
            #  ['valid_rate', valid_rate_list]
            # ],
            [
                ['n_cv', [5, 7, 9]],
                ['max_depth', [12, 13, 14]],  # best 14
                ['learning_rate', [0.15, 0.2, 0.25]],  #best 0.15
                #['min_child_weight', [9]],
                ['subsample', [0.75, 0.8]],  #best0.8
                ['colsample_bytree', [0.7]],  #best 0.7
                ['colsample_bylevel', [0.77]],  #best0.77
            ]
        ]
        train_seed_list = None
        cv_seed_list = None
        TM.auto_train_boost_round('xgb',
                                  num_boost_round=600,
                                  n_epoch=3,
                                  full_grid_search=True,
                                  train_seed_list=train_seed_list,
                                  cv_seed_list=cv_seed_list,
                                  base_parameters=base_parameters,
                                  parameter_grid_list=pg_list,
                                  save_final_pred=True,
                                  train_args=train_args,
                                  cv_args=cv_args)
        """Train Different Rounds"""
Beispiel #4
0
    def train(self):
        """
            ## Auto Train with Logs of Boost Round ##

            Model Name:
            'lr':           Logistic Regression
            'rf':           Random Forest
            'et':           Extra Trees
            'ab':           AdaBoost
            'gb':           GradientBoosting
            'xgb':          XGBoost
            'xgb_sk':       XGBoost using scikit-learn module
            'lgb':          LightGBM
            'lgb_sk':       LightGBM using scikit-learn module
            'cb':           CatBoost
            'dnn':          Deep Neural Networks
            'stack_lgb':    LightGBM for stack layer
            'christar':     Christar1991
            'prejudge_b':   PrejudgeBinary
            'prejudge_m':   PrejudgeMultiClass
            'stack_t':      StackTree
        """
        TM = TrainingMode()

        """
            Strategy Arguments
        """
        from models.strategy import Strategy
        strategy_args = {'use_strategy': True,
                         'f_strategy': Strategy.calc_profit,
                         'buy_count': 15,
                         'fee': 0.0034,
                         'ascending': False,
                         'save_result': True,
                         'save_image': True}

        """
            Training Arguments
        """
        train_args = {'prescale': False,
                      'postscale': True,
                      'use_scale_pos_weight': False,
                      'use_global_valid': False,
                      'use_custom_obj': False,
                      'show_importance': False,
                      'show_accuracy': True,
                      'save_final_pred': True,
                      'save_final_prob_train': False,
                      'save_cv_pred': True,
                      'save_cv_prob_train': False,
                      'save_csv_log': True,
                      'strategy_args': strategy_args,
                      'append_info': 'forward_window'}

        """
            Cross Validation Arguments
        """
        from models.cross_validation import CrossValidation
        cv_args = {'ensemble': True,
                   'n_cv': 12,
                   'n_era': 90,
                   'cv_generator': CrossValidation.forward_window,
                   'window_size': 40}
        # cv_args = self.get_cv_args('xgb_fw')

        """
            Reduced Features
        """
        reduced_feature_list = None

        """
            Base Parameters
        """
        base_parameters = self.get_base_params('xgb_fw')

        # base_parameters = None

        """
            Auto Train with Logs of Boost Round
        """
        # cv_weights_range = [self.get_cv_weight('range', 1, i+1) for i in [5, 8, 10, 12, 15, 20]]
        # cv_weights_log = [self.get_cv_weight('log', 1, i+1) for i in [5, 8, 10, 12, 15, 20]]
        # n_cv_list = [5, 8, 10, 12, 15, 20] * 6
        # import numpy as np
        # valid_rate_list = np.array([[i]*6 for i in [0.075, 0.1, 0.125, 0.15, 0.175, 0.2]]).reshape(-1,).tolist()
        # cv_weights_list = cv_weights_log*6
        pg_list = [
                   # [['n_cv', (8, 9, 10), (11, 12, 13), [15], (18, 20)],
                   #  ['valid_rate', (0.075, 0.1, 0.125, 0.15, 0.166, 0.175, 0.2)],
                   #  ['window_size', (32, 34, 36, 40, 42, 44, 46, 48)]]
                   # [['n_cv', n_cv_list],
                   #  ['valid_rate', valid_rate_list],
                   #  ['cv_weights', cv_weights_list]]
                   # [
                   #  ['max_depth', [11]],
                   #  ['min_child_weight', [9]],
                   #  ['subsample', (0.88, 0.90, 0.92)],
                   #  ['colsample_bytree', (0.86, 0.88, 0.9)],
                   #  ['colsample_bylevel', (0.7, 0.75, 0.8)]
                   #  ]
                   [['learning_rate', [0.003]]]
                   ]
        train_seed_list = [999]
        cv_seed_list = [95]
        TM.auto_train_boost_round('xgb', num_boost_round=100, n_epoch=1, full_grid_search=True,
                                  use_multi_group=False, train_seed_list=train_seed_list, cv_seed_list=cv_seed_list,
                                  base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=False,
                                  reduced_feature_list=reduced_feature_list, train_args=train_args, cv_args=cv_args)
Beispiel #5
0
    def train(self):
        """
            Model Name:
            'lr':           Logistic Regression
            'rf':           Random Forest
            'ab':           AdaBoost
            'gb':           GradientBoosting
            'xgb':          XGBoost
            'xgb_sk':       XGBoost using scikit-learn module
            'lgb':          LightGBM
            'lgb_sk':       LightGBM using scikit-learn module
            'cb':           CatBoost
        """
        TM = TrainingMode()
        """
            Global Seed
        """
        train_seed = random.randint(0, 500)
        cv_seed = random.randint(0, 500)
        # train_seed = 666
        # cv_seed = 216  # 425 48 461 157
        """
            Training Arguments
        """
        train_args = {
            'use_global_valid': False,
            'use_custom_obj': False,
            'show_importance': False,
            'save_final_pred': True,
            'save_final_pred_train': False,
            'save_cv_pred': False,
            'save_cv_pred_train': False,
            'save_csv_log': True,
            'loss_fuc': None,
            'append_info': 'forward_window_postscale_mdp-11_sub'
        }
        """
            Cross Validation Arguments
        """
        # cv_args = {'n_splits': 10,
        #            'n_cv': 10}

        cv_args = self.get_cv_args('xgb')
        """
            Base Parameters
        """
        # base_parameters = self.get_base_params('xgb')
        base_parameters = None
        """
            Train Single Model
        """
        # TM.train_single_model('xgb', train_seed, cv_seed, num_boost_round=88,
        #                       reduced_feature_list=reduced_feature_list, base_parameters=base_parameters,
        #                       train_args=train_args, use_multi_group=False)
        """
            Auto Train with Logs of Boost Round
        """
        pg_list = [
            # [
            #  ['n_cv', n_cv_list],
            #  ['valid_rate', valid_rate_list]
            #  ]
            [['max_depth', [11]], ['min_child_weight', [9]],
             ['subsample', (0.88, 0.90, 0.92)],
             ['colsample_bytree', (0.86, 0.88, 0.9)],
             ['colsample_bylevel', (0.7, 0.75, 0.8)]]
            # [['learning_rate', [0.003]]]
        ]
        train_seed_list = [999]
        cv_seed_list = [95]
        TM.auto_train_boost_round('xgb',
                                  num_boost_round=100,
                                  n_epoch=1,
                                  full_grid_search=True,
                                  train_seed_list=train_seed_list,
                                  cv_seed_list=cv_seed_list,
                                  base_parameters=base_parameters,
                                  parameter_grid_list=pg_list,
                                  save_final_pred=False,
                                  train_args=train_args,
                                  cv_args=cv_args)
        """Train Different Rounds"""
        # num_boost_round_list = [83, 85, 87]
        # self.train_diff_round('xgb', TM, num_boost_round_list=num_boost_round_list, n_epoch=1, full_grid_search=True,
        #                       train_seed_list=train_seed_list, cv_seed_list=cv_seed_list,
        #                       base_parameters=base_parameters, parameter_grid_list=pg_list, save_final_pred=True,
        #                       train_args=train_args, cv_args=cv_args)
        """
            Auto Grid Search Parameters
        """
        # pg_list = [
        #            [
        #             ['max_depth', (8, 9, 10)],
        #             ['min_child_weight', (2, 4, 6, 8)],
        #             ['subsample', (0.81, 0.84, 0.87, 0.9)],
        #             ['colsample_bytree', (0.8, 0.85, 0.9)],
        #             ['colsample_bylevel', (0.7, 0.75, 0.8)],
        #             ]
        #            ]
        # train_seed_list = [999]
        # cv_seed_list = [95]
        # # train_seed_list = None
        # # cv_seed_list = None
        # TM.auto_grid_search('xgb', num_boost_round=95, n_epoch=1, full_grid_search=True,
        #                     train_seed_list=train_seed_list, cv_seed_list=cv_seed_list,
        #                     parameter_grid_list=pg_list, base_parameters=base_parameters, save_final_pred=False,
        #                     train_args=train_args, cv_args=cv_args)

        print('======================================================')
        print('Global Train Seed: {}'.format(train_seed))
        print('Global Cross Validation Seed: {}'.format(cv_seed))