def get_foldIDsampleID_stata_dict(data, train_test_indices, kfold=10): evaluation_k_set = np.arange(1, kfold + 1, 1) foldIDsampleID_stata_dict = dblp_eval.init_eval_holder(evaluation_k_set) for fold_counter in evaluation_k_set: _, _, x_test, _ = get_fold_data(fold_counter, data, train_test_indices, mute=True) for smaple in x_test: foldIDsampleID_stata_dict[fold_counter].append( len(smaple[0].nonzero()[1])) return foldIDsampleID_stata_dict
model1 = int(models.split()[0]) - 1 model2 = int(models.split()[1]) - 1 file_name1 = file_names[model1] file_name2 = file_names[model2] # loading models outputs method_name1, pred_indices1, true_indices1, _, _, k_fold1, _ = dblp_eval.load_output_file( file_name1, foldIDsampleID_strata_dict) method_name2, pred_indices2, true_indices2, _, _, _, _ = dblp_eval.load_output_file( file_name2, foldIDsampleID_strata_dict) # eval settings fold_set = np.arange(1, k_fold1 + 1, 1) # Initializing metric holders holder_ndcg = dblp_eval.init_eval_holder(at_k_set) holder_map = dblp_eval.init_eval_holder(at_k_set) holder_mrr = dblp_eval.init_eval_holder(at_k_set) # calculating the diff for k in at_k_set: for i in fold_set: truth1 = true_indices1[i] pred1 = pred_indices1[i] truth2 = true_indices2[i] pred2 = pred_indices2[i] print('{} & {}, fold {}, @ {}'.format(method_name1, method_name2, i, k)) # if metric.lower() is 'coverage':
RRN = '../output/predictions/RRN_output.csv' BL2009 = '../output/predictions/BL2009_output.csv' BL2017 = '../output/predictions/BL2017_output.csv' file_names = [ OKLO, OKLU, OVAEO, OVAEU, SKLO, SKLU, SVAEO, SVAEU, Sapienza, SVDpp, RRN, BL2009, BL2017 ] for file_name in file_names: method_name, pred_indices, true_indices, calc_user_time, calc_skill_time, k_fold, k_max =\ dblp_eval.load_output_file(file_name, foldIDsampleID_strata_dict) # eval settings evaluation_k_set = np.arange(1, k_max + 1, 1) fold_set = np.arange(1, k_fold + 1, 1) # Initializing metric holders Coverage = dblp_eval.init_eval_holder(evaluation_k_set) nDCG = dblp_eval.init_eval_holder(evaluation_k_set) MAP = dblp_eval.init_eval_holder(evaluation_k_set) MRR = dblp_eval.init_eval_holder(evaluation_k_set) Quality = dblp_eval.init_eval_holder(evaluation_k_set) Hindex_min = dblp_eval.init_eval_holder(evaluation_k_set) Hindex_avg = dblp_eval.init_eval_holder(evaluation_k_set) Hindex_max = dblp_eval.init_eval_holder(evaluation_k_set) Hindex_diff = dblp_eval.init_eval_holder(evaluation_k_set) # writing output file result_output_name = "../output/eval_results/{}_strata.csv".format( method_name) with open(result_output_name, 'w') as file: writer = csv.writer(file) writer.writerow(['User Quantity Strata Computation Time'])
member_size_filter=min_member_size) if not dblp.preprocessed_dataset_exist( ) or not dblp.train_test_indices_exist(): dblp.dataset_preprocessing( dblp.load_ae_dataset(file_path='../dataset/ae_dataset.pkl'), seed=seed, kfolds=k_fold) dataset = dblp.load_preprocessed_dataset() train_test_indices = dblp.load_train_test_indices() # k_fold Cross Validation cvscores = [] # Defining evaluation scores holders for train data r_at_k_all_train = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set r_at_k_overall_train = dblp_eval.init_eval_holder( evaluation_k_set ) # overall r@k of instances in one fold and one k_evaluation_set # Defining evaluation scores holders for test data r_at_k_all = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set r_at_k_overall = dblp_eval.init_eval_holder( evaluation_k_set ) # overall r@k of instances in one fold and one k_evaluation_set mapk = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set
ret += (2**scores[i] - 1) / np.log2(i + 2) return ret ideal_sorted_scores = np.sort(y_true)[::-1] ideal_dcg_score = dcg(ideal_sorted_scores, k=k, powered=powered) pred_sorted_ind = np.argsort(y_pred)[::-1] pred_sorted_scores = y_true[pred_sorted_ind] dcg_score = dcg(pred_sorted_scores, k=k, powered=powered) return dcg_score / ideal_dcg_score predictions = algo.test(data_test) r_at_k = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set p_at_k = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set ndcg = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set mrr = dblp_eval.init_eval_holder( evaluation_k_set ) # all r@k of instances in one fold and one k_evaluation_set for k in evaluation_k_set: precisions, recalls = precision_recall_at_k(predictions, k=k, threshold=0.5) rak = np.mean(list(recalls.values()))
x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) print(x_train.shape) print(x_test.shape) autoencoder.load_weights('./KL.h5') # Unit test for recall and precision @k methods y_pred = autoencoder.predict(x_test) k_set = np.arange(1, 51, 1) p_at_k = dblp_eval.init_eval_holder(k_set) # all p@k of instances in one fold and one k_evaluation_set r_at_k = dblp_eval.init_eval_holder(k_set) # all p@k of instances in one fold and one k_evaluation_set for k in k_set: print('Calculating for @{}'.format(k)) # all_precision = [] # for pred, t in zip(y_pred, y_test): # t = np.asarray(t) # pred = np.asarray(pred) # # t_indices = np.argwhere(t) # if t_indices.__len__() == 0: # continue # pred_indices = pred.argsort()[-k:][::-1] # # precision = 0 # for pred_index in pred_indices:
ME_M2V_SVAEO = '../output/predictions/ME_M2V_S_VAE_O_output.csv' MET_M2V_SVAEO = '../output/predictions/MET_M2V_S_VAE_O_output.csv' file_names = [ MET_M2V_SVAEO, ME_M2V_SVAEO, BL2017, BL2009, RRN, SVDpp, Sapienza, SVAEO ] for file_name in file_names: method_name, pred_indices, true_indices, _, calc_skill_time, k_fold, k_max = \ dblp_eval.load_output_file(file_name, foldIDsampleID_strata_dict) k_max = 10 # eval settings evaluation_k_set = np.arange(1, k_max + 1, 1) fold_set = np.arange(1, k_fold + 1, 1) # Initializing metric holders Coverage = dblp_eval.init_eval_holder(evaluation_k_set) Sensitivity = dblp_eval.init_eval_holder(evaluation_k_set) nDCG = dblp_eval.init_eval_holder(evaluation_k_set) MAP = dblp_eval.init_eval_holder(evaluation_k_set) MRR = dblp_eval.init_eval_holder(evaluation_k_set) Quality = dblp_eval.init_eval_holder(evaluation_k_set) team_personal = dblp_eval.init_eval_holder(evaluation_k_set) # writing output file result_output_name = "../output/eval_results/{}.csv".format(method_name) with open(result_output_name, 'w') as file: writer = csv.writer(file) writer.writerow([ '@K', 'Coverage Mean', 'Coverage STDev', 'nDCG Mean', 'nDCG STDev', 'MAP Mean', 'MAP STDev', 'MRR Mean', 'MRR STDev', 'Quality Mean', 'Quality STDev', 'Team Personal Mean', 'Team Personal STDev'