def __call__(self, trial): # sample params sf_para_dict = self.sf_parameter.grid_search(trial) if sf_para_dict['id'] == 'ffnns': sf_para_dict['ffnns'].update( dict(num_features=self.data_dict['num_features'])) model_para_dict = self.model_parameter.grid_search(trial) k_flod_average = 0. for i in range(self.fold_num): # evaluation over k-fold data fold_k = i + 1 study = self.k_studies[i] train_data, test_data, vali_data = self.load_data( self.eval_dict, self.data_dict, fold_k) ranker = self.load_ranker(model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) if self.eval_dict['vali_obj']: study.optimize(LTRObjective(ranker=ranker, epochs=self.epochs, label_type=train_data.label_type, train_data=train_data, vali_objective=True, vali_k=self.vali_k, vali_data=vali_data), n_trials=1) else: study.optimize(LTRObjective(ranker=ranker, epochs=self.epochs, label_type=train_data.label_type, train_data=train_data, vali_objective=False), n_trials=1) ''' the meaning of n_trials The number of trials. If this argument is set to None, there is no limitation on the number of trials. If timeout is also set to None, the study continues to create trials until it receives a termination signal such as Ctrl+C or SIGTERM. ''' # store loss test_eval_tmp = ndcg_at_k(ranker=ranker, test_data=test_data, k=1, label_type=test_data.label_type, gpu=self.gpu, device=self.device) test_eval_v = test_eval_tmp.data.numpy() k_flod_average += test_eval_v # calculate loss todo average k-fold validation score k_flod_average /= self.fold_num return k_flod_average
def __call__(self, trial): if self.vali_objective: for epoch_k in range( 1, self.epochs + 1): # one-epoch fitting over the entire training data presort = self.train_data.presort for qid, batch_rankings, batch_stds in self.train_data: # _, [batch, ranking_size, num_features], [batch, ranking_size] if self.gpu: batch_rankings, batch_stds = batch_rankings.to( self.device), batch_stds.to(self.device) batch_loss, stop_training = self.ranker.train( batch_rankings, batch_stds, qid=qid, epoch_k=epoch_k, presort=presort, label_type=self.label_type) #print(batch_loss) if stop_training: break # Report intermediate objective value. vali_eval_tmp = ndcg_at_k(ranker=self.ranker, test_data=self.vali_data, k=self.vali_k, label_type=self.label_type, gpu=self.gpu, device=self.device) vali_eval_v = vali_eval_tmp.data.numpy() print(vali_eval_v) intermediate_value = vali_eval_v trial.report(intermediate_value, epoch_k) ''' Report an objective function value for a given step. The reported values are used by the pruners to determine whether this trial should be pruned. ''' # Handle pruning based on the intermediate value. if trial.should_prune(): raise optuna.TrialPruned() # using validation data again vali_eval_tmp = ndcg_at_k(ranker=self.ranker, test_data=self.vali_data, k=self.vali_k, label_type=self.label_type, gpu=self.gpu, device=self.device) vali_eval_v = vali_eval_tmp.data.numpy() return vali_eval_v else: for epoch_k in range( 1, self.epochs + 1): # one-epoch fitting over the entire training data epoch_loss = torch.cuda.FloatTensor( [0.0]) if self.gpu else torch.FloatTensor([0.0]) presort = self.train_data.presort for qid, batch_rankings, batch_stds in self.train_data: # _, [batch, ranking_size, num_features], [batch, ranking_size] if self.gpu: batch_rankings, batch_stds = batch_rankings.to( self.device), batch_stds.to(self.device) batch_loss, stop_training = self.ranker.train( batch_rankings, batch_stds, qid=qid, epoch_k=epoch_k, presort=presort, label_type=self.label_type) #print(batch_loss) if stop_training: break else: epoch_loss += batch_loss.item() intermediate_value = -epoch_loss # due to the maximize setting trial.report(intermediate_value, epoch_k) ''' Report an objective function value for a given step. The reported values are used by the pruners to determine whether this trial should be pruned. ''' # Handle pruning based on the intermediate value. if trial.should_prune(): raise optuna.TrialPruned() return -epoch_loss
def ad_cv_eval(self, data_dict=None, eval_dict=None, ad_para_dict=None, sf_para_dict=None): """ Adversarial training and evaluation :param data_dict: :param eval_dict: :param ad_para_dict: :param sf_para_dict: :return: """ self.check_consistency(data_dict, eval_dict) self.display_information(data_dict, model_para_dict=ad_para_dict) self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict=ad_para_dict) model_id = ad_para_dict['model_id'] fold_num = data_dict['fold_num'] # for quick access of common evaluation settings epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided'] vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[ 'log_step'], eval_dict['cutoffs'] do_vali, do_summary = eval_dict['do_validation'], eval_dict[ 'do_summary'] if sf_para_dict['id'] == 'ffnns': sf_para_dict['ffnns'].update( dict(num_features=data_dict['num_features'])) else: raise NotImplementedError ad_machine = self.get_ad_machine(eval_dict=eval_dict, data_dict=data_dict, sf_para_dict=sf_para_dict, ad_para_dict=ad_para_dict) time_begin = datetime.datetime.now() # timing g_l2r_cv_avg_scores, d_l2r_cv_avg_scores = np.zeros( len(cutoffs)), np.zeros(len(cutoffs)) # fold average for fold_k in range(1, fold_num + 1): dict_buffer = dict() # for buffering frequently used objs ad_machine.reset_generator_discriminator() fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)]) train_data, test_data, vali_data = self.load_data( eval_dict, data_dict, fold_k) if do_vali: g_fold_optimal_ndcgk, d_fold_optimal_ndcgk = 0.0, 0.0 if do_summary: list_epoch_loss = [] # not used yet g_list_fold_k_train_eval_track, g_list_fold_k_test_eval_track, g_list_fold_k_vali_eval_track = [], [], [] d_list_fold_k_train_eval_track, d_list_fold_k_test_eval_track, d_list_fold_k_vali_eval_track = [], [], [] for _ in range(10): ad_machine.burn_in(train_data=train_data) for epoch_k in range(1, epochs + 1): if model_id == 'IR_GMAN_List': stop_training = ad_machine.mini_max_train( train_data=train_data, generator=ad_machine.generator, pool_discriminator=ad_machine.pool_discriminator, dict_buffer=dict_buffer) g_ranker = ad_machine.get_generator() d_ranker = ad_machine.pool_discriminator[0] else: stop_training = ad_machine.mini_max_train( train_data=train_data, generator=ad_machine.generator, discriminator=ad_machine.discriminator, dict_buffer=dict_buffer) g_ranker = ad_machine.get_generator() d_ranker = ad_machine.get_discriminator() if stop_training: print('training is failed !') break if (do_summary or do_vali) and (epoch_k % log_step == 0 or epoch_k == 1): # stepwise check if do_vali: g_vali_eval_tmp = ndcg_at_k( ranker=g_ranker, test_data=vali_data, k=vali_k, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) d_vali_eval_tmp = ndcg_at_k( ranker=d_ranker, test_data=vali_data, k=vali_k, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) g_vali_eval_v, d_vali_eval_v = g_vali_eval_tmp.data.numpy( ), d_vali_eval_tmp.data.numpy() if epoch_k > 1: g_buffer, g_tmp_metric_val, g_tmp_epoch = \ self.per_epoch_validation(ranker=g_ranker, curr_metric_val=g_vali_eval_v, fold_optimal_metric_val=g_fold_optimal_ndcgk, curr_epoch=epoch_k, id_str='G', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs) # observe better performance if g_buffer: g_fold_optimal_ndcgk, g_fold_optimal_epoch_val = g_tmp_metric_val, g_tmp_epoch d_buffer, d_tmp_metric_val, d_tmp_epoch = \ self.per_epoch_validation(ranker=d_ranker, curr_metric_val=d_vali_eval_v, fold_optimal_metric_val=d_fold_optimal_ndcgk, curr_epoch=epoch_k, id_str='D', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs) if d_buffer: d_fold_optimal_ndcgk, d_fold_optimal_epoch_val = d_tmp_metric_val, d_tmp_epoch if do_summary: # summarize per-step performance w.r.t. train, test self.per_epoch_summary_step1( ranker=g_ranker, train_data=train_data, test_data=test_data, list_fold_k_train_eval_track= g_list_fold_k_train_eval_track, list_fold_k_test_eval_track= g_list_fold_k_test_eval_track, vali_eval_v=g_vali_eval_v, list_fold_k_vali_eval_track= g_list_fold_k_vali_eval_track, cutoffs=cutoffs, do_vali=do_vali) self.per_epoch_summary_step1( ranker=d_ranker, train_data=train_data, test_data=test_data, list_fold_k_train_eval_track= d_list_fold_k_train_eval_track, list_fold_k_test_eval_track= d_list_fold_k_test_eval_track, vali_eval_v=d_vali_eval_v, list_fold_k_vali_eval_track= d_list_fold_k_vali_eval_track, cutoffs=cutoffs, do_vali=do_vali) if do_summary: self.per_epoch_summary_step2( id_str='G', fold_k=fold_k, list_fold_k_train_eval_track=g_list_fold_k_train_eval_track, list_fold_k_test_eval_track=g_list_fold_k_test_eval_track, do_vali=do_vali, list_fold_k_vali_eval_track=g_list_fold_k_vali_eval_track) self.per_epoch_summary_step2( id_str='D', fold_k=fold_k, list_fold_k_train_eval_track=d_list_fold_k_train_eval_track, list_fold_k_test_eval_track=d_list_fold_k_test_eval_track, do_vali=do_vali, list_fold_k_vali_eval_track=d_list_fold_k_vali_eval_track) if do_vali: # using the fold-wise optimal model for later testing based on validation data # g_buffered_model = '_'.join( ['net_params_epoch', str(g_fold_optimal_epoch_val), 'G']) + '.pkl' g_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + g_buffered_model) g_fold_optimal_ranker = g_ranker d_buffered_model = '_'.join( ['net_params_epoch', str(d_fold_optimal_epoch_val), 'D']) + '.pkl' d_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + d_buffered_model) d_fold_optimal_ranker = d_ranker else: # using default G # buffer the model after a fixed number of training-epoches if no validation is deployed g_ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k), 'G']) + '.pkl') g_fold_optimal_ranker = g_ranker d_ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k), 'D']) + '.pkl') d_fold_optimal_ranker = d_ranker g_torch_fold_ndcg_ks = ndcg_at_ks( ranker=g_fold_optimal_ranker, test_data=test_data, ks=cutoffs, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) g_fold_ndcg_ks = g_torch_fold_ndcg_ks.data.numpy() d_torch_fold_ndcg_ks = ndcg_at_ks( ranker=d_fold_optimal_ranker, test_data=test_data, ks=cutoffs, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) d_fold_ndcg_ks = d_torch_fold_ndcg_ks.data.numpy() performance_list = [' Fold-' + str(fold_k) ] # fold-wise performance performance_list.append('Generator') for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, g_fold_ndcg_ks[i])) performance_list.append('\nDiscriminator') for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, d_fold_ndcg_ks[i])) performance_str = '\t'.join(performance_list) print('\t', performance_str) g_l2r_cv_avg_scores = np.add( g_l2r_cv_avg_scores, g_fold_ndcg_ks) # sum for later cv-performance d_l2r_cv_avg_scores = np.add(d_l2r_cv_avg_scores, d_fold_ndcg_ks) time_end = datetime.datetime.now() # overall timing elapsed_time_str = str(time_end - time_begin) print('Elapsed time:\t', elapsed_time_str + "\n\n") # begin to print either cv or average performance g_l2r_cv_avg_scores = np.divide(g_l2r_cv_avg_scores, fold_num) d_l2r_cv_avg_scores = np.divide(d_l2r_cv_avg_scores, fold_num) if do_vali: eval_prefix = str(fold_num) + '-fold cross validation scores:' else: eval_prefix = str(fold_num) + '-fold average scores:' print( 'Generator', eval_prefix, metric_results_to_string(list_scores=g_l2r_cv_avg_scores, list_cutoffs=cutoffs)) print( 'Discriminator', eval_prefix, metric_results_to_string(list_scores=d_l2r_cv_avg_scores, list_cutoffs=cutoffs))
def kfold_cv_eval(self, data_dict=None, eval_dict=None, sf_para_dict=None, model_para_dict=None): """ Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold. :param data_dict: settings w.r.t. data :param eval_dict: settings w.r.t. evaluation :param sf_para_dict: settings w.r.t. scoring function :param model_para_dict: settings w.r.t. the ltr_adhoc model :return: """ self.display_information(data_dict, model_para_dict) self.check_consistency(data_dict, eval_dict, sf_para_dict) self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict) model_id = model_para_dict['model_id'] fold_num = data_dict['fold_num'] # for quick access of common evaluation settings epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided'] vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[ 'log_step'], eval_dict['cutoffs'] do_vali, do_summary = eval_dict['do_validation'], eval_dict[ 'do_summary'] ranker = self.load_ranker(model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) time_begin = datetime.datetime.now() # timing l2r_cv_avg_scores = np.zeros(len(cutoffs)) # fold average for fold_k in range(1, fold_num + 1): # evaluation over k-fold data ranker.reset_parameters( ) # reset with the same random initialization train_data, test_data, vali_data = self.load_data( eval_dict, data_dict, fold_k) if do_vali: fold_optimal_ndcgk = 0.0 if do_summary: list_epoch_loss, list_fold_k_train_eval_track, list_fold_k_test_eval_track, list_fold_k_vali_eval_track = [], [], [], [] if not do_vali and loss_guided: first_round = True threshold_epoch_loss = torch.cuda.FloatTensor([ 10000000.0 ]) if self.gpu else torch.FloatTensor([10000000.0]) for epoch_k in range(1, epochs + 1): torch_fold_k_epoch_k_loss, stop_training = self.train_ranker( ranker=ranker, train_data=train_data, model_para_dict=model_para_dict, epoch_k=epoch_k) ranker.scheduler.step( ) # adaptive learning rate with step_size=40, gamma=0.5 if stop_training: print('training is failed !') break if (do_summary or do_vali) and (epoch_k % log_step == 0 or epoch_k == 1): # stepwise check if do_vali: # per-step validation score vali_eval_tmp = ndcg_at_k(ranker=ranker, test_data=vali_data, k=vali_k, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) vali_eval_v = vali_eval_tmp.data.numpy() if epoch_k > 1: # further validation comparison curr_vali_ndcg = vali_eval_v if (curr_vali_ndcg > fold_optimal_ndcgk) or ( epoch_k == epochs and curr_vali_ndcg == fold_optimal_ndcgk ): # we need at least a reference, in case all zero print('\t', epoch_k, '- nDCG@{} - '.format(vali_k), curr_vali_ndcg) fold_optimal_ndcgk = curr_vali_ndcg fold_optimal_checkpoint = '-'.join( ['Fold', str(fold_k)]) fold_optimal_epoch_val = epoch_k ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join( ['net_params_epoch', str(epoch_k)]) + '.pkl') # buffer currently optimal model else: print('\t\t', epoch_k, '- nDCG@{} - '.format(vali_k), curr_vali_ndcg) if do_summary: # summarize per-step performance w.r.t. train, test fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=train_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu( ).numpy( ) if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy( ) list_fold_k_train_eval_track.append( np_fold_k_epoch_k_train_ndcg_ks) fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu( ).numpy( ) if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy( ) list_fold_k_test_eval_track.append( np_fold_k_epoch_k_test_ndcg_ks) fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu( ).numpy( ) if self.gpu else torch_fold_k_epoch_k_loss.data.numpy( ) list_epoch_loss.append(fold_k_epoch_k_loss) if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v) elif loss_guided: # stopping check via epoch-loss if first_round and torch_fold_k_epoch_k_loss >= threshold_epoch_loss: print('Bad threshold: ', torch_fold_k_epoch_k_loss, threshold_epoch_loss) if torch_fold_k_epoch_k_loss < threshold_epoch_loss: first_round = False print('\tFold-', str(fold_k), ' Epoch-', str(epoch_k), 'Loss: ', torch_fold_k_epoch_k_loss) threshold_epoch_loss = torch_fold_k_epoch_k_loss else: print('\tStopped according epoch-loss!', torch_fold_k_epoch_k_loss, threshold_epoch_loss) break if do_summary: # track sy_prefix = '_'.join(['Fold', str(fold_k)]) fold_k_train_eval = np.vstack(list_fold_k_train_eval_track) fold_k_test_eval = np.vstack(list_fold_k_test_eval_track) pickle_save(fold_k_train_eval, file=self.dir_run + '_'.join([sy_prefix, 'train_eval.np'])) pickle_save(fold_k_test_eval, file=self.dir_run + '_'.join([sy_prefix, 'test_eval.np'])) fold_k_epoch_loss = np.hstack(list_epoch_loss) pickle_save( (fold_k_epoch_loss, train_data.__len__()), file=self.dir_run + '_'.join([sy_prefix, 'epoch_loss.np'])) if do_vali: fold_k_vali_eval = np.hstack(list_fold_k_vali_eval_track) pickle_save(fold_k_vali_eval, file=self.dir_run + '_'.join([sy_prefix, 'vali_eval.np'])) if do_vali: # using the fold-wise optimal model for later testing based on validation data buffered_model = '_'.join( ['net_params_epoch', str(fold_optimal_epoch_val)]) + '.pkl' ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + buffered_model) fold_optimal_ranker = ranker else: # buffer the model after a fixed number of training-epoches if no validation is deployed fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)]) ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k)]) + '.pkl') fold_optimal_ranker = ranker torch_fold_ndcg_ks = ndcg_at_ks( ranker=fold_optimal_ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting.data_dict['label_type']) fold_ndcg_ks = torch_fold_ndcg_ks.data.numpy() performance_list = [model_id + ' Fold-' + str(fold_k) ] # fold-wise performance for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, fold_ndcg_ks[i])) performance_str = '\t'.join(performance_list) print('\t', performance_str) l2r_cv_avg_scores = np.add( l2r_cv_avg_scores, fold_ndcg_ks) # sum for later cv-performance time_end = datetime.datetime.now() # overall timing elapsed_time_str = str(time_end - time_begin) print('Elapsed time:\t', elapsed_time_str + "\n\n") l2r_cv_avg_scores = np.divide(l2r_cv_avg_scores, fold_num) eval_prefix = str( fold_num) + '-fold cross validation scores:' if do_vali else str( fold_num) + '-fold average scores:' print(model_id, eval_prefix, metric_results_to_string(list_scores=l2r_cv_avg_scores, list_cutoffs=cutoffs) ) # print either cv or average performance return l2r_cv_avg_scores
def __call__(self, trial): # sample params model_id = self.model_parameter.model_id para_dict = self.model_parameter.grid_search(trial) self.setup_eval(data_dict=self.data_dict, eval_dict=self.eval_dict) k_flod_average = 0. for i in range(self.fold_num): # evaluation over k-fold data fold_k = i + 1 study = self.k_studies[i] train_data, test_data, vali_data = self.load_data( self.eval_dict, self.data_dict, fold_k) data_id = self.data_dict['data_id'] train_presort, validation_presort, test_presort = self.data_dict['train_presort'], self.data_dict['validation_presort'],\ self.data_dict['test_presort'] file_train, file_vali, file_test = self.determine_files( data_dict=self.data_dict, fold_k=fold_k) self.update_save_model_dir(data_dict=self.data_dict, fold_k=fold_k) # prepare training & testing datasets file_train_data, file_train_group = load_letor_data_as_libsvm_data( file_train, split_type=SPLIT_TYPE.Train, data_dict=self.data_dict, eval_dict=self.eval_dict, presort=train_presort) x_train, y_train = load_svmlight_file(file_train_data) group_train = np.loadtxt(file_train_group) train_set = Dataset(data=x_train, label=y_train, group=group_train) file_test_data, file_test_group = load_letor_data_as_libsvm_data( file_test, split_type=SPLIT_TYPE.Test, data_dict=self.data_dict, eval_dict=self.eval_dict, presort=test_presort) x_test, y_test = load_svmlight_file(file_test_data) group_test = np.loadtxt(file_test_group) file_vali_data, file_vali_group = load_letor_data_as_libsvm_data( file_vali, split_type=SPLIT_TYPE.Validation, data_dict=self.data_dict, eval_dict=self.eval_dict, presort=validation_presort) x_valid, y_valid = load_svmlight_file(file_vali_data) group_valid = np.loadtxt(file_vali_group) valid_set = Dataset(data=x_valid, label=y_valid, group=group_valid) if para_dict['custom_dict']['custom'] and para_dict['custom_dict'][ 'use_LGBMRanker']: lgbm_ranker = lgbm.LGBMRanker() else: lgbm_ranker = lgbm study.optimize(TreeLTRObjective(model_id=model_id, data_id = data_id, x_train=x_train , y_train=y_train, group_train=group_train, train_set=train_set, x_valid=x_valid , y_valid=y_valid, group_valid=group_valid, valid_set=valid_set, \ ranker=lgbm_ranker, fold_k=fold_k, para_dict=para_dict, data_dict=self.data_dict, eval_dict=self.eval_dict, save_model_dir=self.save_model_dir), n_trials=1) # ??? the meaning of n_trials # store loss if data_id in YAHOO_LTR: model_file = self.save_model_dir + 'model.txt' else: model_file = self.save_model_dir + '_'.join( ['fold', str(fold_k), 'model']) + '.txt' lgbm_ranker = lgbm.Booster(model_file=model_file) vali_eval_tmp = ndcg_at_k(ranker=lgbm_ranker, test_data=vali_data, k=self.vali_k, label_type=vali_data.label_type, gpu=self.gpu, device=self.device) vali_eval_v = vali_eval_tmp.data.numpy() k_flod_average += vali_eval_v # calculate loss todo average k-fold validation score k_flod_average /= self.fold_num return k_flod_average