def cross_validation(train, params, ID_COLUMN_NAME, LABEL_COLUMN_NAME, N_FOLD=5): ''' :return: loss ''' NUM_BOOST_ROUND = 1000 EARLY_STOPPING_ROUNDS = 50 # Cross validation model folds = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=1001) feats = [ f for f in train.columns if f not in [LABEL_COLUMN_NAME, ID_COLUMN_NAME] ] for i_fold, (train_idx, valid_idx) in enumerate( folds.split(train[feats], train[LABEL_COLUMN_NAME])): dtrain = lgb.Dataset(data=train[feats].iloc[train_idx], label=train[LABEL_COLUMN_NAME].iloc[train_idx], free_raw_data=False, silent=True) dvalid = lgb.Dataset(data=train[feats].iloc[valid_idx], label=train[LABEL_COLUMN_NAME].iloc[valid_idx], free_raw_data=False, silent=True) with timer('cross validation-fold {} train model'.format(i_fold)): log.info('params is {}'.format(params)) clf = lgb.train(num_boost_round=NUM_BOOST_ROUND, params=params, verbose_eval=10, train_set=dtrain, valid_sets=[dvalid], early_stopping_rounds=EARLY_STOPPING_ROUNDS) with timer('cross validation-fold {} predict'.format(i_fold)): v_data = clf.predict(dvalid.data) y_pre = [] for d in v_data: max = d[0] max_i = 0 for i in range(1, 15): if d[i] > max: max = d[i] max_i = i y_pre.append(max_i) f1 = f1_score(dvalid.label, y_pre, average='macro') return f1
def write2file(col_id, pre_label, name=None): with timer('write result {}'.format(name)): y_pre = one_hot2label_index(pre_label) df = pd.DataFrame() df[ID] = col_id df['predict'] = index2label(y_pre) df.to_csv('result{}.csv'.format(name), index=False)
def data_prepare(df_train, df_test): conti_list = [ '1_total_fee', '2_total_fee', '3_total_fee', '4_total_fee', 'contract_time', 'former_complaint_fee', 'former_complaint_num', 'last_month_traffic', 'local_caller_time', 'local_trafffic_month', 'month_traffic', 'online_time', 'pay_num', 'pay_times', 'service1_caller_time', 'service2_caller_time', 'pay_num_per_time', 'll' ] normalize_process(df_train, df_test, conti_list) # label 2 index base_data_process.label2index(df_train, LABEL) base_util.pickle_dump( (base_data_process.encode_map, base_data_process.decode_list), 'origin_data/label2index.pkl') with timer('save train data'): df_train.to_csv('origin_data/train_modified.csv', index=False) with timer('save test data'): df_test.to_csv('origin_data/test_modified.csv', index=False)
def optimization(): space = { 'learning_rate': 0.1, 'boosting_type': hp.choice('boosting_type', ['gbdt']), 'num_leaves': hp.choice('num_leaves', [15, 20, 30, 50, 65, 80, 100, 150, 400]), 'bin_construct_sample_cnt': hp.choice('bin_construct_sample_cnt', [10000, 20000, 60000, 100000, 200000]), 'min_data_in_leaf': hp.quniform('min_data_in_leaf', 20, 500, 10), 'reg_alpha': hp.choice('reg_alpha', [0, 0.001, 0.01, 0.1, 0.2]), 'reg_lambda': hp.choice('reg_lambda', [0, 0.001, 0.01, 0.1, 0.2]), 'feature_fraction': hp.uniform('feature_fraction', 0.8, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.8, 1.0), 'bagging_freq': hp.choice('bagging_freq', [0, 2, 6, 10, 16]), 'is_unbalance': hp.choice('is_unbalance', [True, False]), 'num_threads': 40, 'objective': 'multiclass', 'num_class': 15, 'verbose': -1 } trials = Trials() with timer('optimization'): # Run optimization best = fmin(fn=objective, space=space, algo=tpe.suggest, trials=trials, max_evals=config_dict['max_evals']) print('-' * 100) log.warn(best) with open('model_trials.pkl', mode='wb') as mt: pickle.dump(trials, mt)
def write2file(col_id, pre_label, name=None): with timer('write result {}'.format(name)): y_pre = [] for d in pre_label: max = d[0] max_i = 0 for i in range(1, 15): if d[i] > max: max = d[i] max_i = i y_pre.append(decode_list[max_i]) df = pd.DataFrame() df['user_id'] = col_id df['predict'] = y_pre df.to_csv('result{}.csv'.format(name), index=False)
def objective(hyperparameters): # Keep track of evals global ITERATION ITERATION += 1 # Make sure parameters that need to be integers are integers for parameter_name in [ 'num_leaves', 'bin_construct_sample_cnt', 'bagging_freq', 'min_data_in_leaf' ]: hyperparameters[parameter_name] = int(hyperparameters[parameter_name]) with timer('run lgb') as ti: # Perform n_folds cross validation f1 = cross_validation(config_dict['train'], hyperparameters, 'user_id', 'current_service') loss = 1 - f1**2 run_time = ti.get_delay_t0() # Write to the csv file ('a' means append) of_connection = open('hyperparameters.csv', 'a') writer = csv.writer(of_connection) writer.writerow([loss, hyperparameters, ITERATION, run_time, 1 - loss]) of_connection.close() log.info('iteration-{} f1:{} loss:{} train_time:{}'.format( ITERATION, f1, loss, run_time)) # Dictionary with information for evaluation return { 'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION, 'train_time': run_time, 'status': STATUS_OK }
def main(limit): """ This method is used to generate processed data for train and test. :return: No return, only write files (1) {mode}_{LIMITED}_wfd.pkl write index, words, tags, len_w (2) {mode}_{LIMITED}_wbd.pkl write bert embedding in line """ # change LIMITED global LIMITED LIMITED = limit modes = ['train', 'valid', 'test'] mode_map = {'train': [0, 1, 2, 3, 4], 'valid': [5], 'test': [6]} mod_num = 7 import os for mode in modes: if mode == 'valid': LIMITED = LIMITED // 5 word_flag_data = [] word_bert_emb_data = [] origin_bert_emb = collections.OrderedDict() tags = collections.OrderedDict() tokens = collections.OrderedDict() for f in os.listdir(INPUT_DIR): if 'json' not in f and bu.get_str_index(f, mod_num) in mode_map[mode]: data_file = INPUT_DIR + f if LIMITED > 0 and len(origin_bert_emb) < LIMITED: with bu.timer(f'load {data_file} bert emb'): load_bert_embedding(data_file, origin_bert_emb, tags, tokens) index = 0 for i, words_emb_bert, words, tags, len_w in generator_fn( origin_bert_emb, tags, tokens): # used to generate small dataset if set LIMITED's value if LIMITED > 0 and index >= LIMITED: break if words_emb_bert is None: continue if i % 1000 == 0: print(f'{mode} index:{i} finished!') assert len(words_emb_bert) == len( words) == len_w, f'length not match in {i},' \ f'{len(words_emb_bert)}-{len(words)}-{len_w}' word_bert_emb_data.append(words_emb_bert) word_flag_data.append((index, words, tags, len_w)) index += 1 with open(DATA_DIR + f'/processed/{mode}_{LIMITED}_wfd.pkl', 'wb') as wfd, open( DATA_DIR + f'/processed/{mode}_{LIMITED}_wbd.pkl', 'wb') as wbd: with bu.timer(f'write {mode} to file'): # each line is (index, words, tags, len_w) pickle.dump(word_flag_data, wfd) # each line is word's bert context embedding pickle.dump(word_bert_emb_data, wbd)
def model(train, test, num_folds=5, stratified=True, num_boost_round=1000): LABEL_SIZE = train[LABEL].value_counts().count() print("Starting LightGBM. Train shape: {}, test shape: {}".format( train.shape, test.shape)) gc.collect() # Cross validation model if stratified: folds = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=1001) else: folds = KFold(n_splits=num_folds, shuffle=True, random_state=1001) # Create arrays and dataframes to store results sub_preds = np.zeros(shape=(test.shape[0], LABEL_SIZE)) feature_importance_df = pd.DataFrame() feats = [f for f in train.columns if f not in [LABEL, ID]] for i_fold, (train_idx, valid_idx) in enumerate( folds.split(train[feats], train[LABEL])): dtrain = lgb.Dataset(data=train[feats].iloc[train_idx], label=train[LABEL].iloc[train_idx], free_raw_data=False, silent=True) dvalid = lgb.Dataset(data=train[feats].iloc[valid_idx], label=train[LABEL].iloc[valid_idx], free_raw_data=False, silent=True) params = { 'bagging_fraction': 0.94795171020152, 'bagging_freq': 6, 'bin_construct_sample_cnt': 200000, 'boosting_type': 'gbdt', 'feature_fraction': 0.9953235660931046, 'is_unbalance': False, 'learning_rate': 0.001, 'min_data_in_leaf': 30, 'num_class': 15, 'num_leaves': 80, 'num_threads': 40, 'objective': 'multiclass', 'reg_alpha': 0.001, 'reg_lambda': 0.1, 'verbose': -1 } with timer('fold {} train model'.format(i_fold)): clf = lgb.train(num_boost_round=num_boost_round, params=params, train_set=dtrain, valid_sets=[dvalid], early_stopping_rounds=50) with timer('fold {} predict'.format(i_fold)): v_data = clf.predict(dvalid.data) y_pre = one_hot2label_index(v_data) sub_preds += clf.predict(test[feats]) write2file(test[ID], sub_preds, i_fold) fold_importance_df = pd.DataFrame() fold_importance_df["feature"] = feats fold_importance_df["importance"] = clf.feature_importance( importance_type='gain') fold_importance_df["fold"] = i_fold + 1 feature_importance_df = pd.concat( [feature_importance_df, fold_importance_df], axis=0) f1 = f1_score(dvalid.label, y_pre, average='macro') log.warn('Fold {} f1 : {} score {}'.format(i_fold + 1, f1, f1**2)) del clf, dtrain, dvalid gc.collect() display_importances(feature_importance_df)
df = pd.DataFrame() df[ID] = col_id df['predict'] = index2label(y_pre) df.to_csv('result{}.csv'.format(name), index=False) # Display/plot feature importance def display_importances(feature_importance_df_): cols = feature_importance_df_[[ "feature", "importance" ]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:40].index best_features = feature_importance_df_.loc[ feature_importance_df_.feature.isin(cols)] plt.figure(figsize=(8, 10)) sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False)) plt.title('LightGBM Features (avg over folds)') plt.tight_layout() plt.savefig('lgbm_importances01.png') if __name__ == '__main__': with timer('data process'): df_train, df_test = eda() label2index(df_train, LABEL) with timer('model process'): model(df_train, df_test, num_folds=5, num_boost_round=10000)
def main(): params = { 'output_dir': str(Path(RESULT_DIR, 'res_torch')), 'checkpoint': str(Path(RESULT_DIR, 'res_torch/model')), 'glove_dim': 300, 'vocab_tags': str(Path(DATA_DIR, 'processed/vocab.tags.txt')), 'glove': str(Path(DATA_DIR, 'embedding/glove.npz')), 'words': str(Path(DATA_DIR, 'processed/vocab.words.txt')), 'tags': str(Path(DATA_DIR, 'processed/vocab.tags.txt')), } parser = argparse.ArgumentParser() parser.add_argument('--undo_train_valid', help="undo train data as valid", action='store_true', default=False) parser.add_argument('--input', help="input dir or file", type=str, required=True) parser.add_argument('--valid_input', help="valid data input dir or file", type=str, required=True) parser.add_argument('--output', help="output file dir for writing result", type=str, default=params['output_dir']) parser.add_argument('--limit', help="if use data limit", type=int, default=0) parser.add_argument('--gpu_index', help="gpu index must>-1,if use gpu", type=int, default=0) parser.add_argument('--dropout', help="dropout rate in embed and liner layer", type=float, default=0.2) parser.add_argument('--batch_size', help="batch size od data", type=int, default=32) parser.add_argument('--hidden_size', help="set the hidden size", type=int, default=128) parser.add_argument('--epochs', help="epochs of train", type=int, default=100) parser.add_argument('--monitor', help="monitor f1,acc,precision or recall, " "value like ORG:f1 or PER:acc or LOC:recall", type=str, default='ORG:f1') parser.add_argument('--use_glove', help="denote whether use use_glove", type=bool, default=False) parser.add_argument('--model_name', help="file name of model file", type=str, default='ner_model_crf') parser.add_argument('--mode_type', help="choose transformer(t) or biLstm(b) or only crf(c)", choices=['b', 't', 'c', 'bt', 'cnn'], type=str, default='b') parser.add_argument('--bert_dim', help="bert dim", type=int, default=768) parser.add_argument('--te_dropout', help="te dropout", type=float, default=0.1) parser.add_argument('--lr', help="learning rate", type=float, default=3e-4) parser.add_argument('--lr_times', help="learning rate decay times", type=int, default=0) parser.add_argument('--wd', help="weight decay", type=float, default=1e-3) parser.add_argument('--head_num', help="set the head num", type=int, default=8) parser.add_argument('--vip', help="the ip or domain of visdom server", type=str, default='') parser.add_argument('--env', help="the name of env of visdom", type=str, default='ner') parser.add_argument('--pre_model_path', help="the pre model path", type=str, default='') parser.add_argument('--use_cross_entropy', help="use cross entropy loss", action='store_true', default=False) args = parser.parse_args() params['dropout'] = args.dropout params['use_glove'] = args.use_glove params['bert_dim'] = args.bert_dim params['mode_type'] = args.mode_type params['hidden_size'] = args.hidden_size # just for transformer params['te_dropout'] = args.te_dropout params['head_num'] = args.head_num params['use_cross_entropy'] = args.use_cross_entropy model_time_str = args.model_name + '_' + bu.get_time_str() log = bu.get_logger(model_time_str) if args.vip: vis = visdom.Visdom(args.vip, env=args.env) else: vis = None word_to_ix = {'<pad>': 0} if params['use_glove']: with open(params['words']) as wvf: for word in wvf: word = word.strip() if word not in word_to_ix: word_to_ix[word] = len(word_to_ix) tag_to_ix = {'O': 0} with open(params['tags']) as wvf: for tag in wvf: tag = tag.strip() if tag not in tag_to_ix: tag_to_ix[tag] = len(tag_to_ix) idx_to_tag = {tag_to_ix[key]: key for key in tag_to_ix} if args.gpu_index > -1: device = torch.device(f'cuda:{args.gpu_index}') else: device = torch.device('cpu') model = Bert_CRF(tag_to_ix, params, device) model.to(device) if args.pre_model_path: with Path(args.pre_model_path).open('rb') as mp: if args.gpu_index < 0: ml = 'cpu' else: ml = None best_state_dict = torch.load(mp, map_location=ml) model.load_state_dict(best_state_dict, False) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # begin to train model step_index = 0 # model, bert_dim, tag_to_ix, word_to_ix, rw, batch collate_fn = functools.partial(data_provider.collect_fn, model, params['bert_dim'], tag_to_ix, None, False) with bu.timer('load train data'): dataset = data_provider.BBNDatasetCombine(args.input, args.limit) data_loader = tud.DataLoader(dataset, args.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True) if not args.undo_train_valid: sampler = tud.RandomSampler(data_source=dataset, replacement=True, num_samples=5000) else: sampler = None log.info('begin to train') Path(params['checkpoint']).mkdir(parents=True, exist_ok=True) monitor_best = 0 wait = 0 loss_train_epoch = [] loss_valid_epoch = [] loss_train_t = [] loss_train_valid = [] criterion_key = ['f1', 'precision', 'recall'] criterion_map = {} lr_times = args.lr_times lr = args.lr for epoch in range(args.epochs): loss_train = [] # index_batch, words_batch, words_ids_batch, len_w_batch, tags_batch # sentence_batch for i, w, wi, l, t, _ in data_loader: # Step 1. Remember that Pytorch accumulates gradients. model.zero_grad() # Step 2. Run our forward pass. # words, words_ids, len_w, tags loss = model.neg_log_likelihood(w, wi, l, t) # Step 3. Compute the loss, gradients, and update the parameters by # calling optimizer.step() ls = loss.mean() ls.backward() optimizer.step() step_index += 1 step_loss = ls.item() log.info( f'global step:{step_index} epoch:{epoch} loss:{step_loss}') loss_train.append(step_loss) loss_train_t.append(step_loss) plot(vis, loss_train_t, args.model_name, ['train_loss']) if sampler: # collate_fn, model, args, tag_to_ix = None, idx_to_tag = None, # fpr = True, get_loss = False, input_dir = None, dataset_in = None, # sampler = None criterion, loss_valid_ = evaluate(collate_fn, model, args, tag_to_ix, idx_to_tag, True, True, dataset_in=dataset, sampler=sampler) for k in criterion: # ['f1', 'precision', 'recall'] for ck in criterion_key: key = f'train_{k}_{ck}' if key not in criterion_map: criterion_map[key] = [] criterion_map[key].append(criterion[k][ck]) loss_train_valid.append(np.mean(loss_valid_)) criterion, loss_valid = evaluate(collate_fn, model, args, tag_to_ix, idx_to_tag, True, True, input_dir=args.valid_input) loss_train_epoch.append(np.mean(loss_train)) loss_valid_epoch.append(np.mean(loss_valid)) for k in criterion: # ['f1', 'precision', 'recall'] for ck in criterion_key: key = f'valid_{k}_{ck}' if key not in criterion_map: criterion_map[key] = [] criterion_map[key].append(criterion[k][ck]) plot_data = [] keys = list(criterion_map.keys()) for k in criterion_map: plot_data.append(criterion_map[k]) if sampler: legend = ['train_loss', 'valid_loss', 'train_loss_t'] + keys x_in = zip(loss_train_epoch, loss_valid_epoch, loss_train_valid, *plot_data) else: legend = ['train_loss', 'valid_loss'] + keys x_in = zip(loss_train_epoch, loss_valid_epoch, *plot_data) plot(vis, x_in, args.model_name, legend) log.info(f'valid:{criterion}') tag_type, monitor_type = args.monitor.split(':') if (criterion[tag_type][monitor_type] > monitor_best or monitor_best == 0): monitor_best = criterion[tag_type][monitor_type] wait = 0 best_state_dict = model.state_dict() if monitor_best: save_mode(best_state_dict, params, tag_to_ix, args.model_name) else: wait += 1 if (epoch + 1) % 5 == 0: temp_name = f't_{args.model_name}_{epoch+1}' save_mode(model.state_dict(), params, tag_to_ix, temp_name) if wait > 8: if lr_times: lr_times -= 1 wait = 3 lr /= 3 optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=args.wd) else: log.warn(f'meat early stopping! best score is {monitor_best}') break log.info('finish train')
def main(): params = { 'output_dir': str(Path(RESULT_DIR, 'res_torch')), 'checkpoint': str(Path(RESULT_DIR, 'res_torch/model')), 'glove_dim': 300, 'vocab_tags': str(Path(DATA_DIR, 'processed/vocab.tags.txt')), 'glove': str(Path(DATA_DIR, 'embedding/glove.npz')), 'words': str(Path(DATA_DIR, 'processed/vocab.words.txt')), 'tags': str(Path(DATA_DIR, 'processed/vocab.tags.txt')), } parser = argparse.ArgumentParser() parser.add_argument('--input', help="input dir or file", type=str, required=True) parser.add_argument('--output', help="output file dir for writing result", type=str, default=params['output_dir']) parser.add_argument('--limit', help="if use data limit", type=int, default=0) parser.add_argument('--gpu_index', help="gpu index must>-1,if use gpu", type=int, default=0) parser.add_argument('--model_name', help="file name of model file", type=str, default='ner_model_crf') args = parser.parse_args() model_time_str = args.model_name + '_' + bu.get_time_str() log = bu.get_logger(model_time_str) log.info('begin predict') fn_model = params['checkpoint'] + f'/{args.model_name}_torch.pkl' fn_config = params['checkpoint'] + f'/{args.model_name}_config.pkl' with Path(fn_model).open('rb') as mp: if args.gpu_index < 0: ml = 'cpu' else: ml = None best_state_dict = torch.load(mp, map_location=ml) with Path(fn_config).open('rb') as mp: params, tag_to_ix = pickle.load(mp) print(tag_to_ix) idx_to_tag = {tag_to_ix[key]: key for key in tag_to_ix} if args.gpu_index > -1: device = torch.device(f'cuda:{args.gpu_index}') else: device = torch.device('cpu') model = Bert_CRF(tag_to_ix, params, device) model.to(device) model.load_state_dict(best_state_dict, strict=False) with bu.timer('load data'): dataset = data_provider.BBNDatasetCombine(args.input, args.limit) # change batch_size to 1 args.batch_size = 1 # model, bert_dim, tag_to_ix, word_to_ix, rw, batch collate_fn = functools.partial(data_provider.collect_fn, model, params['bert_dim'], tag_to_ix, None, True) log.warn(f"{'-'*25}test_valid{'-'*25}") evaluate(collate_fn, model, args, tag_to_ix, idx_to_tag, True, False, f"{args.output}/{args.model_name}.txt", dataset_in=dataset)
def model(train, test, num_folds=5, stratified=True, num_boost_round=1000): global decode_list # Divide in training/validation and test data ID_COLUMN_NAME = 'user_id' LABEL_COLUMN_NAME = 'current_service' LABEL_SIZE = train[LABEL_COLUMN_NAME].value_counts().count() print("Starting LightGBM. Train shape: {}, test shape: {}".format(train.shape, test.shape)) gc.collect() # Cross validation model if stratified: folds = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=1001) else: folds = KFold(n_splits=num_folds, shuffle=True, random_state=1001) # Create arrays and dataframes to store results sub_preds = np.zeros(shape=(test.shape[0], LABEL_SIZE)) feature_importance_df = pd.DataFrame() feats = [f for f in train.columns if f not in [LABEL_COLUMN_NAME, ID_COLUMN_NAME]] for i_fold, (train_idx, valid_idx) in enumerate(folds.split(train[feats], train[LABEL_COLUMN_NAME])): dtrain = lgb.Dataset(data=train[feats].iloc[train_idx], label=train[LABEL_COLUMN_NAME].iloc[train_idx], free_raw_data=False, silent=True) dvalid = lgb.Dataset(data=train[feats].iloc[valid_idx], label=train[LABEL_COLUMN_NAME].iloc[valid_idx], free_raw_data=False, silent=True) # LightGBM parameters found by Bayesian optimization # {'boosting_type': 'dart', 'colsample_bytree': 0.9577639825746964, 'is_unbalance': False, # 'learning_rate': 0.11102546218712299, 'min_child_samples': 355, 'min_data_in_leaf': 101, 'num_class': 15, # 'num_leaves': 22, 'num_threads': 35, 'objective': 'multiclass', 'reg_alpha': 0.12542902430757463, # 'reg_lambda': 0.15833387646203106, 'subsample_for_bin': 260000, 'verbose': -1, 'subsample': 0.738876981095225} params = { 'objective': 'multiclass', 'boosting_type': 'gbdt', 'learning_rate': 0.1, 'num_leaves': 80, 'feature_fraction': 0.85, 'bagging_fraction': 0.9, 'bagging_freq': 10, 'num_threads': 35, 'verbose': -1, 'max_bin': 550, 'num_class': LABEL_SIZE } with timer('fold {} train model'.format(i_fold)): clf = lgb.train( num_boost_round=num_boost_round, params=params, train_set=dtrain, valid_sets=[dvalid], early_stopping_rounds=50 ) with timer('fold {} predict'.format(i_fold)): v_data = clf.predict(dvalid.data) y_pre = [] for d in v_data: max = d[0] max_i = 0 for i in range(1, 15): if d[i] > max: max = d[i] max_i = i y_pre.append(max_i) sub_preds += clf.predict(test[feats]) write2file(test[ID_COLUMN_NAME], sub_preds, i_fold) fold_importance_df = pd.DataFrame() fold_importance_df["feature"] = feats fold_importance_df["importance"] = clf.feature_importance(importance_type='gain') fold_importance_df["fold"] = i_fold + 1 feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0) f1 = f1_score(dvalid.label, y_pre, average='macro') log.warn('Fold {} f1 : {} score {}'.format(i_fold + 1, f1, f1 ** 2)) del clf, dtrain, dvalid gc.collect() display_importances(feature_importance_df)