def per_epoch_summary_step1(self, ranker, train_data, list_fold_k_train_eval_track, test_data, list_fold_k_test_eval_track, vali_eval_v, list_fold_k_vali_eval_track, cutoffs, do_vali): fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=train_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting.data_dict['label_type']) np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu( ).numpy() if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy() list_fold_k_train_eval_track.append(np_fold_k_epoch_k_train_ndcg_ks) fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting.data_dict['label_type']) np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu( ).numpy() if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy() list_fold_k_test_eval_track.append(np_fold_k_epoch_k_test_ndcg_ks) #fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu().numpy() if gpu else torch_fold_k_epoch_k_loss.data.numpy() #list_epoch_loss.append(fold_k_epoch_k_loss) if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v)
def per_epoch_summary_step1(self, ranker, train_data, list_fold_k_train_eval_track, test_data, list_fold_k_test_eval_track, vali_eval_v, list_fold_k_vali_eval_track, cutoffs, do_vali): fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=train_data, ks=cutoffs, multi_level_rele=self.data_setting.data_dict['multi_level_rele'], batch_mode=True) np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu( ).numpy() if gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy() list_fold_k_train_eval_track.append(np_fold_k_epoch_k_train_ndcg_ks) fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=test_data, ks=cutoffs, multi_level_rele=self.data_setting.data_dict['multi_level_rele'], batch_mode=True) np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu( ).numpy() if gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy() list_fold_k_test_eval_track.append(np_fold_k_epoch_k_test_ndcg_ks) #fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu().numpy() if gpu else torch_fold_k_epoch_k_loss.data.numpy() #list_epoch_loss.append(fold_k_epoch_k_loss) if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v)
def naive_train(self, ranker, eval_dict, train_data=None, test_data=None, vali_data=None): """ A simple train and test, namely train based on training data & test based on testing data :param ranker: :param eval_dict: :param train_data: :param test_data: :param vali_data: :return: """ ranker.reset_parameters() # reset with the same random initialization assert train_data is not None assert test_data is not None list_losses = [] list_train_ndcgs = [] list_test_ndcgs = [] epochs, cutoffs = eval_dict['epochs'], eval_dict['cutoffs'] for i in range(epochs): epoch_loss = torch.zeros(1).to(device) if gpu else torch.zeros(1) for qid, batch_rankings, batch_stds in train_data: if gpu: batch_rankings, batch_stds = batch_rankings.to( device), batch_stds.to(device) batch_loss, stop_training = ranker.train(batch_rankings, batch_stds, qid=qid) epoch_loss += batch_loss.item() np_epoch_loss = epoch_loss.cpu().numpy( ) if gpu else epoch_loss.data.numpy() list_losses.append(np_epoch_loss) test_ndcg_ks = ndcg_at_ks(ranker=ranker, test_data=test_data, ks=cutoffs, multi_level_rele=True) np_test_ndcg_ks = test_ndcg_ks.data.numpy() list_test_ndcgs.append(np_test_ndcg_ks) train_ndcg_ks = ndcg_at_ks(ranker=ranker, test_data=train_data, ks=cutoffs, multi_level_rele=True) np_train_ndcg_ks = train_ndcg_ks.data.numpy() list_train_ndcgs.append(np_train_ndcg_ks) test_ndcgs = np.vstack(list_test_ndcgs) train_ndcgs = np.vstack(list_train_ndcgs) return list_losses, train_ndcgs, test_ndcgs
def kfold_cv_eval(self, data_dict=None, eval_dict=None, sf_para_dict=None, model_para_dict=None): """ Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold. :param data_dict: settings w.r.t. data :param eval_dict: settings w.r.t. evaluation :param sf_para_dict: settings w.r.t. scoring function :param model_para_dict: settings w.r.t. the ltr_adhoc model :return: """ self.display_information(data_dict, model_para_dict) self.check_consistency(data_dict, eval_dict, sf_para_dict) self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict) model_id = model_para_dict['model_id'] fold_num = data_dict['fold_num'] # for quick access of common evaluation settings epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided'] vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[ 'log_step'], eval_dict['cutoffs'] do_vali, do_summary = eval_dict['do_validation'], eval_dict[ 'do_summary'] ranker = self.load_ranker(model_para_dict=model_para_dict, sf_para_dict=sf_para_dict) time_begin = datetime.datetime.now() # timing l2r_cv_avg_scores = np.zeros(len(cutoffs)) # fold average for fold_k in range(1, fold_num + 1): # evaluation over k-fold data ranker.reset_parameters( ) # reset with the same random initialization train_data, test_data, vali_data = self.load_data( eval_dict, data_dict, fold_k) if do_vali: fold_optimal_ndcgk = 0.0 if do_summary: list_epoch_loss, list_fold_k_train_eval_track, list_fold_k_test_eval_track, list_fold_k_vali_eval_track = [], [], [], [] if not do_vali and loss_guided: first_round = True threshold_epoch_loss = torch.cuda.FloatTensor([ 10000000.0 ]) if self.gpu else torch.FloatTensor([10000000.0]) for epoch_k in range(1, epochs + 1): torch_fold_k_epoch_k_loss, stop_training = self.train_ranker( ranker=ranker, train_data=train_data, model_para_dict=model_para_dict, epoch_k=epoch_k) ranker.scheduler.step( ) # adaptive learning rate with step_size=40, gamma=0.5 if stop_training: print('training is failed !') break if (do_summary or do_vali) and (epoch_k % log_step == 0 or epoch_k == 1): # stepwise check if do_vali: # per-step validation score vali_eval_tmp = ndcg_at_k(ranker=ranker, test_data=vali_data, k=vali_k, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) vali_eval_v = vali_eval_tmp.data.numpy() if epoch_k > 1: # further validation comparison curr_vali_ndcg = vali_eval_v if (curr_vali_ndcg > fold_optimal_ndcgk) or ( epoch_k == epochs and curr_vali_ndcg == fold_optimal_ndcgk ): # we need at least a reference, in case all zero print('\t', epoch_k, '- nDCG@{} - '.format(vali_k), curr_vali_ndcg) fold_optimal_ndcgk = curr_vali_ndcg fold_optimal_checkpoint = '-'.join( ['Fold', str(fold_k)]) fold_optimal_epoch_val = epoch_k ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join( ['net_params_epoch', str(epoch_k)]) + '.pkl') # buffer currently optimal model else: print('\t\t', epoch_k, '- nDCG@{} - '.format(vali_k), curr_vali_ndcg) if do_summary: # summarize per-step performance w.r.t. train, test fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=train_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu( ).numpy( ) if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy( ) list_fold_k_train_eval_track.append( np_fold_k_epoch_k_train_ndcg_ks) fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks( ranker=ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting. data_dict['label_type']) np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu( ).numpy( ) if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy( ) list_fold_k_test_eval_track.append( np_fold_k_epoch_k_test_ndcg_ks) fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu( ).numpy( ) if self.gpu else torch_fold_k_epoch_k_loss.data.numpy( ) list_epoch_loss.append(fold_k_epoch_k_loss) if do_vali: list_fold_k_vali_eval_track.append(vali_eval_v) elif loss_guided: # stopping check via epoch-loss if first_round and torch_fold_k_epoch_k_loss >= threshold_epoch_loss: print('Bad threshold: ', torch_fold_k_epoch_k_loss, threshold_epoch_loss) if torch_fold_k_epoch_k_loss < threshold_epoch_loss: first_round = False print('\tFold-', str(fold_k), ' Epoch-', str(epoch_k), 'Loss: ', torch_fold_k_epoch_k_loss) threshold_epoch_loss = torch_fold_k_epoch_k_loss else: print('\tStopped according epoch-loss!', torch_fold_k_epoch_k_loss, threshold_epoch_loss) break if do_summary: # track sy_prefix = '_'.join(['Fold', str(fold_k)]) fold_k_train_eval = np.vstack(list_fold_k_train_eval_track) fold_k_test_eval = np.vstack(list_fold_k_test_eval_track) pickle_save(fold_k_train_eval, file=self.dir_run + '_'.join([sy_prefix, 'train_eval.np'])) pickle_save(fold_k_test_eval, file=self.dir_run + '_'.join([sy_prefix, 'test_eval.np'])) fold_k_epoch_loss = np.hstack(list_epoch_loss) pickle_save( (fold_k_epoch_loss, train_data.__len__()), file=self.dir_run + '_'.join([sy_prefix, 'epoch_loss.np'])) if do_vali: fold_k_vali_eval = np.hstack(list_fold_k_vali_eval_track) pickle_save(fold_k_vali_eval, file=self.dir_run + '_'.join([sy_prefix, 'vali_eval.np'])) if do_vali: # using the fold-wise optimal model for later testing based on validation data buffered_model = '_'.join( ['net_params_epoch', str(fold_optimal_epoch_val)]) + '.pkl' ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + buffered_model) fold_optimal_ranker = ranker else: # buffer the model after a fixed number of training-epoches if no validation is deployed fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)]) ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k)]) + '.pkl') fold_optimal_ranker = ranker torch_fold_ndcg_ks = ndcg_at_ks( ranker=fold_optimal_ranker, test_data=test_data, ks=cutoffs, gpu=self.gpu, device=self.device, label_type=self.data_setting.data_dict['label_type']) fold_ndcg_ks = torch_fold_ndcg_ks.data.numpy() performance_list = [model_id + ' Fold-' + str(fold_k) ] # fold-wise performance for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, fold_ndcg_ks[i])) performance_str = '\t'.join(performance_list) print('\t', performance_str) l2r_cv_avg_scores = np.add( l2r_cv_avg_scores, fold_ndcg_ks) # sum for later cv-performance time_end = datetime.datetime.now() # overall timing elapsed_time_str = str(time_end - time_begin) print('Elapsed time:\t', elapsed_time_str + "\n\n") l2r_cv_avg_scores = np.divide(l2r_cv_avg_scores, fold_num) eval_prefix = str( fold_num) + '-fold cross validation scores:' if do_vali else str( fold_num) + '-fold average scores:' print(model_id, eval_prefix, metric_results_to_string(list_scores=l2r_cv_avg_scores, list_cutoffs=cutoffs) ) # print either cv or average performance return l2r_cv_avg_scores
def ad_cv_eval(self, data_dict=None, eval_dict=None, ad_para_dict=None, sf_para_dict=None): """ Adversarial training and evaluation :param data_dict: :param eval_dict: :param ad_para_dict: :param sf_para_dict: :return: """ self.check_consistency(data_dict, eval_dict) self.display_information(data_dict, model_para_dict=ad_para_dict) self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict=ad_para_dict) model_id = ad_para_dict['model_id'] fold_num = data_dict['fold_num'] # for quick access of common evaluation settings epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided'] vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[ 'log_step'], eval_dict['cutoffs'] do_vali, do_summary = eval_dict['do_validation'], eval_dict[ 'do_summary'] if sf_para_dict['id'] == 'ffnns': sf_para_dict['ffnns'].update( dict(num_features=data_dict['num_features'])) else: raise NotImplementedError ad_machine = self.get_ad_machine(eval_dict=eval_dict, data_dict=data_dict, sf_para_dict=sf_para_dict, ad_para_dict=ad_para_dict) time_begin = datetime.datetime.now() # timing g_l2r_cv_avg_scores, d_l2r_cv_avg_scores = np.zeros( len(cutoffs)), np.zeros(len(cutoffs)) # fold average for fold_k in range(1, fold_num + 1): dict_buffer = dict() # for buffering frequently used objs ad_machine.reset_generator_discriminator() fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)]) train_data, test_data, vali_data = self.load_data( eval_dict, data_dict, fold_k) if do_vali: g_fold_optimal_ndcgk, d_fold_optimal_ndcgk = 0.0, 0.0 if do_summary: list_epoch_loss = [] # not used yet g_list_fold_k_train_eval_track, g_list_fold_k_test_eval_track, g_list_fold_k_vali_eval_track = [], [], [] d_list_fold_k_train_eval_track, d_list_fold_k_test_eval_track, d_list_fold_k_vali_eval_track = [], [], [] for _ in range(10): ad_machine.burn_in(train_data=train_data) for epoch_k in range(1, epochs + 1): if model_id == 'IR_GMAN_List': stop_training = ad_machine.mini_max_train( train_data=train_data, generator=ad_machine.generator, pool_discriminator=ad_machine.pool_discriminator, dict_buffer=dict_buffer) g_ranker = ad_machine.get_generator() d_ranker = ad_machine.pool_discriminator[0] else: stop_training = ad_machine.mini_max_train( train_data=train_data, generator=ad_machine.generator, discriminator=ad_machine.discriminator, dict_buffer=dict_buffer) g_ranker = ad_machine.get_generator() d_ranker = ad_machine.get_discriminator() if stop_training: print('training is failed !') break if (do_summary or do_vali) and (epoch_k % log_step == 0 or epoch_k == 1): # stepwise check if do_vali: g_vali_eval_tmp = ndcg_at_k( ranker=g_ranker, test_data=vali_data, k=vali_k, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) d_vali_eval_tmp = ndcg_at_k( ranker=d_ranker, test_data=vali_data, k=vali_k, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) g_vali_eval_v, d_vali_eval_v = g_vali_eval_tmp.data.numpy( ), d_vali_eval_tmp.data.numpy() if epoch_k > 1: g_buffer, g_tmp_metric_val, g_tmp_epoch = \ self.per_epoch_validation(ranker=g_ranker, curr_metric_val=g_vali_eval_v, fold_optimal_metric_val=g_fold_optimal_ndcgk, curr_epoch=epoch_k, id_str='G', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs) # observe better performance if g_buffer: g_fold_optimal_ndcgk, g_fold_optimal_epoch_val = g_tmp_metric_val, g_tmp_epoch d_buffer, d_tmp_metric_val, d_tmp_epoch = \ self.per_epoch_validation(ranker=d_ranker, curr_metric_val=d_vali_eval_v, fold_optimal_metric_val=d_fold_optimal_ndcgk, curr_epoch=epoch_k, id_str='D', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs) if d_buffer: d_fold_optimal_ndcgk, d_fold_optimal_epoch_val = d_tmp_metric_val, d_tmp_epoch if do_summary: # summarize per-step performance w.r.t. train, test self.per_epoch_summary_step1( ranker=g_ranker, train_data=train_data, test_data=test_data, list_fold_k_train_eval_track= g_list_fold_k_train_eval_track, list_fold_k_test_eval_track= g_list_fold_k_test_eval_track, vali_eval_v=g_vali_eval_v, list_fold_k_vali_eval_track= g_list_fold_k_vali_eval_track, cutoffs=cutoffs, do_vali=do_vali) self.per_epoch_summary_step1( ranker=d_ranker, train_data=train_data, test_data=test_data, list_fold_k_train_eval_track= d_list_fold_k_train_eval_track, list_fold_k_test_eval_track= d_list_fold_k_test_eval_track, vali_eval_v=d_vali_eval_v, list_fold_k_vali_eval_track= d_list_fold_k_vali_eval_track, cutoffs=cutoffs, do_vali=do_vali) if do_summary: self.per_epoch_summary_step2( id_str='G', fold_k=fold_k, list_fold_k_train_eval_track=g_list_fold_k_train_eval_track, list_fold_k_test_eval_track=g_list_fold_k_test_eval_track, do_vali=do_vali, list_fold_k_vali_eval_track=g_list_fold_k_vali_eval_track) self.per_epoch_summary_step2( id_str='D', fold_k=fold_k, list_fold_k_train_eval_track=d_list_fold_k_train_eval_track, list_fold_k_test_eval_track=d_list_fold_k_test_eval_track, do_vali=do_vali, list_fold_k_vali_eval_track=d_list_fold_k_vali_eval_track) if do_vali: # using the fold-wise optimal model for later testing based on validation data # g_buffered_model = '_'.join( ['net_params_epoch', str(g_fold_optimal_epoch_val), 'G']) + '.pkl' g_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + g_buffered_model) g_fold_optimal_ranker = g_ranker d_buffered_model = '_'.join( ['net_params_epoch', str(d_fold_optimal_epoch_val), 'D']) + '.pkl' d_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' + d_buffered_model) d_fold_optimal_ranker = d_ranker else: # using default G # buffer the model after a fixed number of training-epoches if no validation is deployed g_ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k), 'G']) + '.pkl') g_fold_optimal_ranker = g_ranker d_ranker.save( dir=self.dir_run + fold_optimal_checkpoint + '/', name='_'.join(['net_params_epoch', str(epoch_k), 'D']) + '.pkl') d_fold_optimal_ranker = d_ranker g_torch_fold_ndcg_ks = ndcg_at_ks( ranker=g_fold_optimal_ranker, test_data=test_data, ks=cutoffs, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) g_fold_ndcg_ks = g_torch_fold_ndcg_ks.data.numpy() d_torch_fold_ndcg_ks = ndcg_at_ks( ranker=d_fold_optimal_ranker, test_data=test_data, ks=cutoffs, multi_level_rele=self.data_setting. data_dict['multi_level_rele'], batch_mode=True) d_fold_ndcg_ks = d_torch_fold_ndcg_ks.data.numpy() performance_list = [' Fold-' + str(fold_k) ] # fold-wise performance performance_list.append('Generator') for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, g_fold_ndcg_ks[i])) performance_list.append('\nDiscriminator') for i, co in enumerate(cutoffs): performance_list.append('nDCG@{}:{:.4f}'.format( co, d_fold_ndcg_ks[i])) performance_str = '\t'.join(performance_list) print('\t', performance_str) g_l2r_cv_avg_scores = np.add( g_l2r_cv_avg_scores, g_fold_ndcg_ks) # sum for later cv-performance d_l2r_cv_avg_scores = np.add(d_l2r_cv_avg_scores, d_fold_ndcg_ks) time_end = datetime.datetime.now() # overall timing elapsed_time_str = str(time_end - time_begin) print('Elapsed time:\t', elapsed_time_str + "\n\n") # begin to print either cv or average performance g_l2r_cv_avg_scores = np.divide(g_l2r_cv_avg_scores, fold_num) d_l2r_cv_avg_scores = np.divide(d_l2r_cv_avg_scores, fold_num) if do_vali: eval_prefix = str(fold_num) + '-fold cross validation scores:' else: eval_prefix = str(fold_num) + '-fold average scores:' print( 'Generator', eval_prefix, metric_results_to_string(list_scores=g_l2r_cv_avg_scores, list_cutoffs=cutoffs)) print( 'Discriminator', eval_prefix, metric_results_to_string(list_scores=d_l2r_cv_avg_scores, list_cutoffs=cutoffs))