def tune_gbdt_params(model, X, y, n_splits) -> dict: ''' Tune parameters by training ''' # start tuning train log create_logger('train', **c.log) logger_train = getLogger('train') logger_train.debug('{}\t{}\t{}\t{}'.format('fold', 'iteration', 'train_auc', 'val_auc')) aucs = list() feature_importances = pd.DataFrame() feature_importances['feature'] = X.columns # split data into train, validation folds = TimeSeriesSplit(n_splits=n_splits) for i, (idx_train, idx_val) in enumerate(folds.split(X, y)): fold = i + 1 with blocktimer(f'Fold {fold}'): # prepare logger.info(f'Training on fold {fold}') X_train = X.iloc[idx_train] y_train = y.iloc[idx_train] X_val = X.iloc[idx_val] y_val = y.iloc[idx_val] # train model = model.train_and_validate(X_train, y_train, X_val, y_val, logger_train, fold) # record result feature_importances[f'fold_{fold}'] = model.feature_importance aucs.append(model.validation_auc) # TODO: save models at each steps logger.debug(f'Fold {fold} finished') logger.info('Training has finished.') logger.debug(f'Mean AUC: {np.mean(aucs)}') # TODO: save feature importance and other # make optimal config from result optimal_c_model = model.config if model.best_iteration is not None: # new param optimal_c_model.params['num_boost_round'] = model.best_iteration else: logger.warn( 'Did not meet early stopping. Try larger num_boost_rounds.') # no need after optimized num_boost_round del optimal_c_model.params['early_stopping_rounds'] return optimal_c_model
def tune_gbdt_params(model, X, y, n_splits) -> dict: ''' Tune parameter num_boost_round ''' # start tuning train log create_logger('train', **c.log) logger_train = getLogger('train') logger_train.debug('{}\t{}\t{}\t{}'.format('fold', 'iteration', 'train_auc', 'val_auc')) # aucs = list() # split data into train, validation folds = TimeSeriesSplit(n_splits=n_splits) for i, (idx_train, idx_val) in enumerate(folds.split(X, y)): fold = i + 1 with blocktimer(f'Training on Fold {fold}'): X_train = X.iloc[idx_train] y_train = y.iloc[idx_train] X_val = X.iloc[idx_val] y_val = y.iloc[idx_val] # train model = model.train_and_validate(X_train, y_train, X_val, y_val, logger_train, fold) model.save( c.model.dir / f'model_{c.runtime.VERSION}_{c.model.TYPE}_fold{fold}.pkl') # record result # aucs.append(model.val_auc) # logger.info(f'train_auc: {model.train_auc} val_auc: {model.val_auc}') # logger.info(f'Mean AUC: {np.mean(aucs)}') # make optimal config from result optimal_c_model = model.config if model.best_iteration is not None: optimal_c_model.params['num_boost_round'] = model.best_iteration else: logger.warn( 'Did not meet early stopping. Try larger num_boost_rounds.') # no need after optimized num_boost_round del optimal_c_model.params['early_stopping_rounds'] return optimal_c_model
return optimal_c_model if __name__ == "__main__": gc.enable() warnings.filterwarnings('ignore') # read config & apply option c = EasyDict(config) opt = parse_option() c.transformer.USE_SMALL_DATA = opt.small c.log.slackauth.NO_SEND_MESSAGE = opt.nomsg seed_everything(c.runtime.RANDOM_SEED) create_logger('main', **c.log) logger = getLogger('main') logger.info( f':thinking_face: Starting experiment {c.runtime.VERSION}_{c.runtime.DESCRIPTION}' ) try: main(c) logger.info( f':sunglasses: Finished experiment {c.runtime.VERSION}_{c.runtime.DESCRIPTION}' ) except Exception: logger.critical( f':smiling_imp: Exception occured \n {traceback.format_exc()}') logger.critical( f':skull: Stopped experiment {c.runtime.VERSION}_{c.runtime.DESCRIPTION}'
seed_everything(c.runtime.random_seed) # check config.experiment_type if c.experiment_type != experiment_type: raise ValueError( f'experiment_type in config: {c.experiment_type} does not match this script' ) # start logging r.paths.main_log_path = f'log/main_{c.runtime.version}{c.runtime.dsize}.log' r.paths.train_log_path = f'log/train_{c.runtime.version}{c.runtime.dsize}.tsv' slackauth = EasyDict(json.load(open('./slackauth.json', 'r'))) slackauth.token_path = Path().home() / slackauth.token_file create_logger('main', version=c.runtime.version, log_path=r.paths.main_log_path, slackauth=slackauth, no_send_message=c.runtime.no_send_message) create_logger('train', version=c.runtime.version, log_path=r.paths.train_log_path, slackauth=slackauth, no_send_message=c.runtime.no_send_message) logger = getLogger('main') logger.info( f':thinking_face: Starting experiment {c.runtime.version}_{c.model.type}{c.runtime.dsize}' ) logger.info(f'Options indicated: {opt}') ''' logger_train = getLogger('train') logger_train.debug('{}\t{}\t{}\t{}'.format('fold', 'iteration', 'train_auc', 'val_auc'))