def compute_ranks(feat_orders, gt_order, query_img_index, include_query=False): stat_indexes = [] max_feats_len, min_feats_len = utils.max_min_length(feat_orders + [gt_order]) #prepare the axis for computing log-scale recall-at-k. log_base = 1.3 logscale_ub = np.floor(math.log(max_feats_len, log_base)) k_logscale_axis = np.floor( np.power(log_base, np.array(range(int(logscale_ub))))) k_logscale_axis = np.unique(k_logscale_axis) k_logscale_axis = k_logscale_axis.astype(int) feat_distances = utils.build_feat_dict(feat_orders, query_img_index, min_length=min_feats_len, include_query=include_query, cache_fld='DOP_cache_bis') gt_distance = utils.build_feat_dict([gt_order], query_img_index, min_length=min_feats_len, include_query=include_query, cache_fld='DOP_cache_bis') assert len(gt_distance) == 1, 'More than one Ground-Truth!' dist_gt, _, perm_gt = list(gt_distance.values())[0] for name, (dist, _, permut) in feat_distances.items(): #calculate stats for every feature k_logscale = { k: recall_at(permut, perm_gt, k) for k in k_logscale_axis } norm_gt_similarities = 1 - (dist_gt / max(dist_gt)) norm_similarities = 1 - (dist / max(dist)) stat_indexes.append({ 'label': name, 'kendall-tau': kendalltau(dist, dist_gt)[0], 'spearmanr': spearmanr(dist, dist_gt)[0], 'nDCG': metrics.ndcg_score(norm_gt_similarities, norm_similarities, 20), 'recall-at-10': recall_at(permut, perm_gt, 10), 'recall-at-100': recall_at(permut, perm_gt, 100), 'recall-at-1000': recall_at(permut, perm_gt, 1000), 'recall-at-k': dict(k_logscale) }) return stat_indexes
def score(self, k=10): """ Calculate mean NDCG for users in test """ score = [] for userId, df in self.test.gr_users: if userId in self.train.gr_users_pos.groups.keys(): not_watched = tensor(self.train.not_liked_movies(userId), device=self.device) order = self.predict(userId, not_watched).argsort(descending=True) top = torch.take(not_watched, order).cpu().numpy() gain = df.set_index('movieId').loc[top, 'rating'].fillna(0) best = df.sort_values('rating')['rating'] score.append(ndcg_score(best, gain, k=k)) return np.mean(score)
def scatterplot_methods_varying_beta(frame, store_path, metric_order): # Number of methods being plotted n = frame['method'].unique().shape[0] # Create the figure _ = plt.figure(figsize=(6, 3)) frame['metric'] = pd.Categorical(frame['metric'], metric_order) f1 = frame.loc[frame['metric'].isin( ['tss_combined'])].sort_values('method')['val'].values.flatten() vals = [] for metric in metric_order: f2 = frame.loc[frame['metric'].isin( [metric])].sort_values('method')['val'].values.flatten() # vals.append(weightedtau(f1, f2)) vals.append(ndcg_score((f2 - min(f2)) / (max(f2) - min(f2)), f1)) # print(("WT", weightedtau(f1.flatten(), f2.flatten()))) # print(("Sp", spearmanr(f1.flatten(), f2.flatten()))) plt.plot(range(len(vals)), vals, marker='o', linewidth=5, markersize=12) # Fix the y axis extent and labels plt.ylim([0, 1.05]) plt.yticks(fontsize=14) # plt.ylabel('Weighted Kendall-Tau', fontsize=16) plt.ylabel('NDCG', fontsize=16) plt.xlabel(r'$\beta$', fontsize=16) plt.xticks(range(len(vals)), [ r'$%s$' % e for e in [ '0.0', '0.1', '0.2', '0.5', r'{\bf 1.0}', '2.0', '5.0', '10.0', '\infty' ] ], fontsize=14) # Add the legend # plt.legend(ncol=3) # Save the plot plt.savefig(store_path, bbox_inches='tight') plt.close()
def test(net, test_seq): k_index = [3, 5, 10] net.eval() label = np.array([1, 0, 0, 0, 0] * 10) prec, ap, ndcg, rr_list = [[], [], []], [[], [], []], [[], [], []], [] num_users = int(test_seq.shape[0] / 50) for i in range(num_users): score = F.softmax(net(test_seq[i * 50:(i + 1) * 50])[0], dim=1)[:, 1].cpu().detach().numpy() ordered = sorted(zip(label, score), key=itemgetter(1), reverse=True) ordered_label = [i[0] for i in ordered] for (i, k) in zip(range(0, 3), k_index): prec[i].append(prec_score(ordered_label, k)) ap[i].append(ap_score(ordered_label, k)) ndcg[i].append(ndcg_score(ordered_label, k)) rr_list.append(1 + ordered_label.index(1)) rr = np.mean(1 / np.array(rr_list)) result = [np.mean(v) for v in prec] + [np.mean(v) for v in ap] + [np.mean(v) for v in ndcg] + [rr] net.train() return result
def run_evaluation(self, model, data_iter, k_vals=None, max_grade=2): """Run inference with the model and compute performance metrics.""" if k_vals is None: k_vals = [] sum_ndcg_at_k = [0.0 for _ in range(len(k_vals))] sum_err_at_k = [0.0 for _ in range(len(k_vals))] sample_count = 0 for _, batch in enumerate(data_iter): labels = batch[1] features = batch[2] for lbls, scrs in zip( labels.cpu().numpy(), model.forward(features).cpu().detach().numpy()): sum_ndcg_at_k = list( map(operator.add, sum_ndcg_at_k, [metrics.ndcg_score(lbls, scrs, k=k) for k in k_vals])) sum_err_at_k = list( map(operator.add, sum_err_at_k, [ metrics.err(lbls, scrs, k=k, max_grade=max_grade) for k in k_vals ])) sample_count += 1 return [x / sample_count for x in sum_ndcg_at_k ], [x / sample_count for x in sum_err_at_k]
def rank_users(users): global fact_to_words print("Creating nodes") user_to_links, user_to_weight = get_user_edges(users) X_train, X_test, y_train, y_test = train_test_split( user_to_links, user_to_weight) print("Building graph..") G = build_graph(user_to_links, user_to_weight) graph_plot(G) pr = nx.pagerank(G) pr_cred_users = {u: v for u, v in list(pr.items()) if u in user_to_links} # print(sorted([(v,y[1]) for u,v in pr_cred_users.items() for y in user_to_weight if u == y[0]], reverse=True, key=lambda x: x[0])) pred = get_ranks(X_test, G, pr) print( sorted(np.asarray([e for e in zip(pred, [y[1] for y in y_test])]), reverse=True, key=lambda x: x[0])) ndgc = ndcg_score([y[1] for y in y_test], pred) print("NDCG: {}".format(ndgc))
def score_classifier(clf, X_test, y_test): """ Given a classifier clf, and a Test set (X_test, y_test), It scores it by returning its NDCG@5 score""" probabilities = clf.predict_proba(X_test) return ndcg_score(y_test, probabilities, k=5)
def eval(): # Algorithm: # Pick N random samples from query.txt # Get top 10 results from bool query for each rnd query # Get top 10 results from vector query for each rnd query # Compute NDCG btn bool query results and qrels.txt # Compute NDCG btn vector query results and qrels.txt # Get p-value btn bool and vector # Get the query collection qc = loadCranQry(query_path) poss_queries = list(qc) # Load up the inverted index ii = InvertedIndex() ii.load(index_file) # Load up the document collection cf = CranFile("cran.all") # Get ground-truth results from qrels.txt with open(qrels_path) as f: qrels = f.readlines() # Index qrels into a dict qrel_dict = {} for qrel in qrels: qrel_split = qrel.split() if int(qrel_split[0]) in qrel_dict: qrel_dict[int(qrel_split[0])].append(int(qrel_split[1])) else: qrel_dict[int(qrel_split[0])] = [int(qrel_split[1])] # Run over N random queries, collecting NDCGs bool_ndcgs = [] vector_ndcgs = [] for _ in range(n): # Get random query ID query_id = choice(poss_queries) # Get the query if 0 < int(query_id) < 10: query_id = '00' + str(int(query_id)) elif 9 < int(query_id) < 100: query_id = '0' + str(int(query_id)) try: query = qc[query_id].text except KeyError: print("Invalid query id", query_id) return # Initialize the query processor qp = QueryProcessor(query, ii, cf) # Run bool query bool_result = qp.booleanQuery()[:10] # Run vector query vector_result = qp.vectorQuery(10) # Pull top 10 ground-truth results from qrels dict gt_results = qrel_dict[poss_queries.index(query_id) + 1][:10] # Compute NDCG for bool query # NOTE: There is no weighting on the bool query, so give all an even 1 truth_vector = list(map(lambda x: x in gt_results, bool_result)) bool_ndcg = ndcg_score(truth_vector, [1] * len(truth_vector), k=len(truth_vector)) # Compute NDCG for vector query vector_docs = [] vector_scores = [] for v in vector_result: vector_docs.append(v[0]) vector_scores.append(v[1]) truth_vector = list(map(lambda x: x in gt_results, vector_docs)) vector_ndcg = ndcg_score(truth_vector, vector_scores, k=len(truth_vector)) # Accumulate NDCGs bool_ndcgs.append(bool_ndcg) vector_ndcgs.append(vector_ndcg) # Average out score lists bool_avg = 0 for bool in bool_ndcgs: bool_avg += bool bool_avg /= len(bool_ndcgs) vector_avg = 0 for vector in vector_ndcgs: vector_avg += vector vector_avg /= len(vector_ndcgs) # Present averages and p-values print("Boolean NDCG average:", bool_avg) print("Vector NDCG average:", vector_avg) if n > 19: print("Wilcoxon p-value:", wilcoxon(bool_ndcgs, vector_ndcgs).pvalue) else: print("Wilcoxon p-value: Sample size too small to be significant") print("T-Test p-value:", ttest_ind(bool_ndcgs, vector_ndcgs).pvalue)
def eval(testOn): k = 10 # k the number of top k pairs of (docID, similarity) to get from vectorQuery dictQ_ID = [] indexFile = sys.argv[1] #v "src/Data/tempFile" queryText = sys.argv[2] qrelsText = sys.argv[3] dictOfQuery = {} dictQrelsText = {} docCollection = CranFile('./CranfieldDataset/cran.all') NDCGScoreBool = [] numberOfQueries = int(sys.argv[4]) NDCGScoreVector = [] #indexFile = "src/Data/tempFile" #queryText = 'src/CranfieldDataset/query.text' #qrelsText = 'src/CranfieldDataset/qrels.text' #numberOfQueries = 50 numberOfTimeToLoop = 5 #Loads Files listOfQueryRelsMaping = readFile(qrelsText) queryFile = loadCranQry(queryText) #Data Need for i in range(numberOfTimeToLoop): #Get random Queiry dictOfQuery = getRandomQuery(queryFile, numberOfQueries) if testOn: assert len(dictOfQuery ) == numberOfQueries, "Error are getting random query" # Return all query # dictOfQuery = getAllDataItems(queryFile) # if testOn: # assert len(dictOfQuery) == 225, "Error are getting random query" #get list of Query result from qrel.txt dictQrelsText = getResultsFrom_QrelsFile(listOfQueryRelsMaping, dictOfQuery) if testOn: assert len(dictQrelsText ) == numberOfQueries, "Error number Of Queries to large" start = timer() queryProcessor = QueryProcessor( "", indexFile, docCollection.docs) # This is an extremely expensive process\ end = timer() if testOn: print("Time for creating QueryProcessor:", end - start) countDoc = 0 start = timer() dictQ_ID = [] for qid, queryText in dictOfQuery.items(): countDoc += 1 dictQ_ID.append(qid) if testOn: print("QID:", qid) start = timer() queryProcessor.loadQuery(queryText) end = timer() if testOn: print("Time for Load:", end - start) print("qrels: ", dictQrelsText[qid]) start = timer() docIDs = queryProcessor.booleanQuery( ) # data would need to be like this [12, 14, 78, 141, 486, 746, 172, 573, 1003] #docIDs_1 = queryProcessor.booleanQuery_1() end = timer() if testOn: print("Time for booleanQuery:", end - start) start = timer() listOfDocIDAndSimilarity = queryProcessor.vectorQuery( k ) # data need to look like k=3 [[625,0.8737006126353902],[401,0.8697643788341478],[943,0.8424991316663082]] #vectorQueryDict[qid] = dictOfDocIDAndSimilarity end = timer() if testOn: print("Time for vectorQuery:", end - start) print("booleanQuery:", docIDs) #For Boolean part start = timer() yTrue = [] yScore = [] for docID in docIDs: yScore.append(1) if docID in dictQrelsText[qid]: yTrue.append(1) else: yTrue.append(0) yTrue.sort(reverse=True) score = metrics.ndcg_score(yTrue[:k], yScore[:k], k, "exponential") if math.isnan(score): NDCGScoreBool.append(0) else: NDCGScoreBool.append(score) end = timer() if testOn: print("Time for Boolean ndcg:", end - start) #For Vector part start = timer() yTrue = [] yScore = [] if testOn: print("vectorQuery:", listOfDocIDAndSimilarity) for docID_Score in listOfDocIDAndSimilarity: yScore.append(float(docID_Score[1])) if docID_Score[0] in dictQrelsText[qid]: yTrue.append(1) else: yTrue.append(0) yTrue.sort(reverse=True) score = metrics.ndcg_score(yTrue[:k], yScore[:k], k, "exponential") if math.isnan(score): NDCGScoreVector.append(0) else: NDCGScoreVector.append(score) end = timer() if testOn: print("Time for Vector ndcg:", end - start) print("\nRunning Querys iteration:(", str(i + 1), ")\n", dictQ_ID) if testOn: for QID, boolScore, vectorScore in zip(dictQ_ID, NDCGScoreBool, NDCGScoreVector): print("QID", QID, "Boolean Model:", boolScore, "Vector Model", vectorScore) print("\nThe Length Of Both NDCG Score is: ", len(NDCGScoreBool), "==", len(NDCGScoreVector)) print('\nThe Avg NDCG Score') vectorAvg = avg(NDCGScoreVector) BoolAvg = avg(NDCGScoreBool) print("Avg NDCG Score for Bool:", BoolAvg, "\nAvg NDCG Score for Vector:", vectorAvg) end = timer() if testOn: print("\n\nTime for running ", countDoc, " queries:", end - start) print('\nThe P-Value') p_va_ttest = stats.ttest_ind(NDCGScoreBool, NDCGScoreVector) p_va_wilcoxon = stats.wilcoxon(NDCGScoreBool, NDCGScoreVector) print("T-Test P-value: ", p_va_ttest) print("Wilcoxon P-value: ", p_va_wilcoxon) print('Done')
def eval(indexfilename, queryfilename, queryrefilename, numberofrandomqueries): # ToDo actual = [] # if numberofrandomqueries > 225: raise Exception('please enter query count less than or equal to 225') qrys = loadCranQry("query.text") validqueries = [] querycounter = 0 for q in qrys: validqueries.append(int(q)) loadiindex = InvertedIndex() loadiindex = loadiindex.load("index_file.pickle") # print("index loaded") cf = CranFile('cran.all') #QueryProcessor.numberofresult =10 #qp = QueryProcessor(qrys,loadiindex,cf.docs,10) queryRelevence = dict() for line in open(queryrefilename): fields = line.split(" ") fields[0] = '%0*d' % (3, int(fields[0])) if fields[0] in queryRelevence: # and let's extract the data: queryRelevence[fields[0]].append(fields[1]) else: # create a new array in this slot queryRelevence[fields[0]] = [fields[1]] replacecounter = 0 queryRelevenceUpdated = {} for k in queryRelevence: queryRelevenceUpdated['%0*d' % (3, int( validqueries[replacecounter]))] = queryRelevence.get(k) replacecounter = replacecounter + 1 # relevent = list(queryRelevence.keys()) # relevent = list(map(int, relevent)) #samplespace = np.intersect1d(relevent, validqueries) list_of_random_items = random.sample(validqueries, numberofrandomqueries) tempcounter2 = 0 booleanndcg = [] vectorndcg = [] while tempcounter2 < numberofrandomqueries: list_of_random_items[tempcounter2] = '%0*d' % ( 3, int(list_of_random_items[tempcounter2])) print('query for which ndcg is calculated ' + str(list_of_random_items[tempcounter2])) y = str(list_of_random_items[tempcounter2]) vectorresult = query(indexfilename, '1', queryfilename, str(list_of_random_items[tempcounter2]), 10) # vectorresult = ['573', '51', '944', '878', '12', '486', '875', '879', '746', '665'] # print(vectorresult) tempcounter = 0 for z in vectorresult: if z in queryRelevenceUpdated[str( list_of_random_items[tempcounter2])]: vectorresult[tempcounter] = 1 else: vectorresult[tempcounter] = 0 tempcounter = tempcounter + 1 #print(vectorresult) idealvectorresult = vectorresult.copy() idealvectorresult.sort(reverse=True) #print(idealvectorresult) if sum(idealvectorresult) == 0: ndcgscore = 0 else: ndcgscore = ndcg_score(idealvectorresult, vectorresult) # print(ndcgscore) vectorndcg.append(ndcgscore) tempcounter3 = 0 booleanqueryresult = query(indexfilename, '0', queryfilename, str(list_of_random_items[tempcounter2]), 10) #booleanqueryresult = ['462','462','462','462','462','462','462','462','462'] booleanquery = booleanqueryresult.copy() for g in booleanquery: if g in queryRelevenceUpdated[str( list_of_random_items[tempcounter2])]: booleanquery[tempcounter3] = 1 else: booleanquery[tempcounter3] = 0 tempcounter3 = tempcounter3 + 1 #print(booleanquery) tempcounter4 = len(booleanquery) while tempcounter4 < 10: booleanquery.append(0) tempcounter4 = tempcounter4 + 1 idealbooleanresult = [] for i in range(0, 10): if i < len(queryRelevenceUpdated[str( list_of_random_items[tempcounter2])]): idealbooleanresult.append(1) else: idealbooleanresult.append(0) idealbooleanresult.sort(reverse=True) if sum(booleanquery) == 0: ndcgscoreboolean = 0 else: ndcgscoreboolean = ndcg_score(booleanquery, idealbooleanresult) booleanndcg.append(ndcgscoreboolean) tempcounter2 = tempcounter2 + 1 print('P value for all the queries processed is:') print( scipy.stats.wilcoxon(vectorndcg, booleanndcg, zero_method='wilcox', correction=False)) print('Done')
def test(args): if args.enable_hvd: import horovod.torch as hvd hvd_size, hvd_rank, hvd_local_rank = utils.init_hvd_cuda( args.enable_hvd, args.enable_gpu) if args.load_ckpt_name is not None: #TODO: choose ckpt_path ckpt_path = utils.get_checkpoint(args.model_dir, args.load_ckpt_name) else: ckpt_path = utils.latest_checkpoint(args.model_dir) assert ckpt_path is not None, 'No ckpt found' checkpoint = torch.load(ckpt_path) if 'subcategory_dict' in checkpoint: subcategory_dict = checkpoint['subcategory_dict'] else: subcategory_dict = {} category_dict = checkpoint['category_dict'] word_dict = checkpoint['word_dict'] domain_dict = checkpoint['domain_dict'] tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") config = AutoConfig.from_pretrained("bert-base-uncased", output_hidden_states=True) bert_model = AutoModel.from_pretrained("bert-base-uncased", config=config) model = ModelBert(args, bert_model, len(category_dict), len(domain_dict), len(subcategory_dict)) if args.enable_gpu: model.cuda() model.load_state_dict(checkpoint['model_state_dict']) logging.info(f"Model loaded from {ckpt_path}") if args.enable_hvd: hvd.broadcast_parameters(model.state_dict(), root_rank=0) model.eval() torch.set_grad_enabled(False) news, news_index, category_dict, domain_dict, subcategory_dict = read_news_bert( os.path.join(args.root_data_dir, f'{args.market}/{args.test_dir}/news.tsv'), args, tokenizer) news_title, news_title_type, news_title_attmask, \ news_abstract, news_abstract_type, news_abstract_attmask, \ news_body, news_body_type, news_body_attmask, \ news_category, news_domain, news_subcategory = get_doc_input_bert( news, news_index, category_dict, domain_dict, subcategory_dict, args) news_combined = np.concatenate([ x for x in [news_title, news_title_type, news_title_attmask, \ news_abstract, news_abstract_type, news_abstract_attmask, \ news_body, news_body_type, news_body_attmask, \ news_category, news_domain, news_subcategory] if x is not None], axis=1) class NewsDataset(Dataset): def __init__(self, data): self.data = data def __getitem__(self, idx): return self.data[idx] def __len__(self): return self.data.shape[0] def news_collate_fn(arr): arr = torch.LongTensor(arr) return arr news_dataset = NewsDataset(news_combined) news_dataloader = DataLoader(news_dataset, batch_size=args.batch_size * 4, num_workers=args.num_workers, collate_fn=news_collate_fn) news_scoring = [] with torch.no_grad(): for input_ids in tqdm(news_dataloader): input_ids = input_ids.cuda() news_vec = model.news_encoder(input_ids) news_vec = news_vec.to(torch.device("cpu")).detach().numpy() news_scoring.extend(news_vec) news_scoring = np.array(news_scoring) logging.info("news scoring num: {}".format(news_scoring.shape[0])) dataloader = DataLoaderTest( news_index=news_index, news_scoring=news_scoring, word_dict=word_dict, news_bias_scoring=None, data_dir=os.path.join(args.root_data_dir, f'{args.market}/{args.test_dir}'), filename_pat=args.filename_pat, args=args, world_size=hvd_size, worker_rank=hvd_rank, cuda_device_idx=hvd_local_rank, enable_prefetch=True, enable_shuffle=False, enable_gpu=args.enable_gpu, ) from metrics import roc_auc_score, ndcg_score, mrr_score, ctr_score AUC = [] MRR = [] nDCG5 = [] nDCG10 = [] def print_metrics(hvd_local_rank, cnt, x): logging.info("[{}] Ed: {}: {}".format(hvd_local_rank, cnt, \ '\t'.join(["{:0.2f}".format(i * 100) for i in x]))) def get_mean(arr): return [np.array(i).mean() for i in arr] #for cnt, (log_vecs, log_mask, news_vecs, news_bias, labels) in enumerate(dataloader): for cnt, (log_vecs, log_mask, news_vecs, news_bias, labels) in enumerate(dataloader): his_lens = torch.sum(log_mask, dim=-1).to(torch.device("cpu")).detach().numpy() if args.enable_gpu: log_vecs = log_vecs.cuda(non_blocking=True) log_mask = log_mask.cuda(non_blocking=True) user_vecs = model.user_encoder(log_vecs, log_mask).to( torch.device("cpu")).detach().numpy() for index, user_vec, news_vec, bias, label, his_len in zip( range(len(labels)), user_vecs, news_vecs, news_bias, labels, his_lens): if label.mean() == 0 or label.mean() == 1: continue score = np.dot(news_vec, user_vec) auc = roc_auc_score(label, score) mrr = mrr_score(label, score) ndcg5 = ndcg_score(label, score, k=5) ndcg10 = ndcg_score(label, score, k=10) AUC.append(auc) MRR.append(mrr) nDCG5.append(ndcg5) nDCG10.append(ndcg10) if cnt % args.log_steps == 0: print_metrics(hvd_rank, cnt * args.batch_size, get_mean([AUC, MRR, nDCG5, nDCG10])) # stop scoring dataloader.join() for i in range(2): print_metrics(hvd_rank, cnt * args.batch_size, get_mean([AUC, MRR, nDCG5, nDCG10]))
def test(rank, args): if rank is None: is_distributed = False rank = 0 else: is_distributed = True if is_distributed: utils.setuplogger() dist.init_process_group('nccl', world_size=args.nGPU, init_method='env://', rank=rank) torch.cuda.set_device(rank) if args.load_ckpt_name is not None: ckpt_path = utils.get_checkpoint(args.model_dir, args.load_ckpt_name) assert ckpt_path is not None, 'No checkpoint found.' checkpoint = torch.load(ckpt_path, map_location='cpu') subcategory_dict = checkpoint['subcategory_dict'] category_dict = checkpoint['category_dict'] word_dict = checkpoint['word_dict'] dummy_embedding_matrix = np.zeros( (len(word_dict) + 1, args.word_embedding_dim)) module = importlib.import_module(f'model.{args.model}') model = module.Model(args, dummy_embedding_matrix, len(category_dict), len(subcategory_dict)) model.load_state_dict(checkpoint['model_state_dict']) logging.info(f"Model loaded from {ckpt_path}") if args.enable_gpu: model.cuda(rank) model.eval() torch.set_grad_enabled(False) news, news_index = read_news(os.path.join(args.test_data_dir, 'news.tsv'), args, mode='test') news_title, news_category, news_subcategory = get_doc_input( news, news_index, category_dict, subcategory_dict, word_dict, args) news_combined = np.concatenate([ x for x in [news_title, news_category, news_subcategory] if x is not None ], axis=-1) news_dataset = NewsDataset(news_combined) news_dataloader = DataLoader(news_dataset, batch_size=args.batch_size, num_workers=4) news_scoring = [] with torch.no_grad(): for input_ids in tqdm(news_dataloader): input_ids = input_ids.cuda(rank) news_vec = model.news_encoder(input_ids) news_vec = news_vec.to(torch.device("cpu")).detach().numpy() news_scoring.extend(news_vec) news_scoring = np.array(news_scoring) logging.info("news scoring num: {}".format(news_scoring.shape[0])) if rank == 0: doc_sim = 0 for _ in tqdm(range(1000000)): i = random.randrange(1, len(news_scoring)) j = random.randrange(1, len(news_scoring)) if i != j: doc_sim += np.dot(news_scoring[i], news_scoring[j]) / ( np.linalg.norm(news_scoring[i]) * np.linalg.norm(news_scoring[j])) logging.info(f'News doc-sim: {doc_sim / 1000000}') data_file_path = os.path.join(args.test_data_dir, f'behaviors_{rank}.tsv') def collate_fn(tuple_list): log_vecs = torch.FloatTensor([x[0] for x in tuple_list]) log_mask = torch.FloatTensor([x[1] for x in tuple_list]) news_vecs = [x[2] for x in tuple_list] labels = [x[3] for x in tuple_list] return (log_vecs, log_mask, news_vecs, labels) dataset = DatasetTest(data_file_path, news_index, news_scoring, args) dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn) from metrics import roc_auc_score, ndcg_score, mrr_score AUC = [] MRR = [] nDCG5 = [] nDCG10 = [] def print_metrics(rank, cnt, x): logging.info("[{}] {} samples: {}".format( rank, cnt, '\t'.join(["{:0.2f}".format(i * 100) for i in x]))) def get_mean(arr): return [np.array(i).mean() for i in arr] def get_sum(arr): return [np.array(i).sum() for i in arr] local_sample_num = 0 for cnt, (log_vecs, log_mask, news_vecs, labels) in enumerate(dataloader): local_sample_num += log_vecs.shape[0] if args.enable_gpu: log_vecs = log_vecs.cuda(rank, non_blocking=True) log_mask = log_mask.cuda(rank, non_blocking=True) user_vecs = model.user_encoder(log_vecs, log_mask).to( torch.device("cpu")).detach().numpy() for user_vec, news_vec, label in zip(user_vecs, news_vecs, labels): if label.mean() == 0 or label.mean() == 1: continue score = np.dot(news_vec, user_vec) auc = roc_auc_score(label, score) mrr = mrr_score(label, score) ndcg5 = ndcg_score(label, score, k=5) ndcg10 = ndcg_score(label, score, k=10) AUC.append(auc) MRR.append(mrr) nDCG5.append(ndcg5) nDCG10.append(ndcg10) if cnt % args.log_steps == 0: print_metrics(rank, local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10])) logging.info('[{}] local_sample_num: {}'.format(rank, local_sample_num)) if is_distributed: local_sample_num = torch.tensor(local_sample_num).cuda(rank) dist.reduce(local_sample_num, dst=0, op=dist.ReduceOp.SUM) local_metrics_sum = torch.FloatTensor( get_sum([AUC, MRR, nDCG5, nDCG10])).cuda(rank) dist.reduce(local_metrics_sum, dst=0, op=dist.ReduceOp.SUM) if rank == 0: print_metrics('*', local_sample_num, local_metrics_sum / local_sample_num) else: print_metrics('*', local_sample_num, get_mean([AUC, MRR, nDCG5, nDCG10]))
def to_ndcg(qrels, q_text, idx_file, tk=10, n=2): column_names = ['qid', 'docid', 'bool_rel', 'vec_rel' ] #for creating a dataframe for easier data manupilation #df_qrels = pd.read_csv('../CranfieldDataset/qrels.text', names=column_names, sep=' ') #can test by hard-coding df_qrels = pd.read_csv('../CranfieldDataset/qrels.sample', names=column_names, sep=' ') #can test by hard-coding #df_qrels = pd.read_csv(qrels, names=column_names, sep=' ') #print df_qrels unique_qids = list(set(list(df_qrels.qid.values))) random.shuffle(unique_qids) random_qids = unique_qids[0:n] qrys = cranqry.loadCranQry('../CranfieldDataset/query.text' ) #qrys is a dict---for hard-coded testing #qrys = cranqry.loadCranQry(q_text) #qrys is a dict qrys_ids = [key for key, val in qrys.iteritems()] II = index.InvertedIndex() index_file = II.load("index_file.json") #for hard-coded testing #index_file = II.load(idx_file) vec_agg_ndcg, bool_agg_ndcg = list(), list( ) #for storing aggregate ndcg scores for qid in random_qids: print qid df_qid = df_qrels[ df_qrels["qid"] == qid] #dataframe for one query id---comparison of an integer qid in a string qid qid_docids = list( df_qid['docid'] ) #list of doc ids for a randomly chosen query id from qrels.text---to be used for ndcg_score print qid_docids st_qid = str( qid ) #very important----the decimal number in random_qids should be matched the octal numbers in the cranfield dataset if len(st_qid) == 1: #for handing decimal to octal qid conversion st_qid = "00" + st_qid elif len(st_qid) == 2: st_qid = "0" + st_qid else: st_qid = st_qid if st_qid in qrys_ids: qp = QueryProcessor(qrys[st_qid].text, index_file, 'cran.all') bool_array = qp.booleanQuery() vec_array = qp.vectorQuery(10) #change back to 'tk' print bool_array bool_array = [int(v) for v in bool_array] print bool_array #ndcg for boolean model bool_list = [(0, 0)] * 10 #change back to tk idx = 0 for doc_id in bool_array: if doc_id in qid_docids: #iteratively check if a docid returned by the vector model is present in qrels.text for the specific query(qid) #y_true[idx] = 1 bool_list[idx] = (1, 1) idx += 1 else: bool_list[idx] = (0, 1) if idx == 10: break #print bool_list y_true = [int(bool_id[0]) for bool_id in bool_list] y_score = [int(bool_id[1]) for bool_id in bool_list] print "bool", y_true print "bool", y_score bool_agg_ndcg.append(metrics.ndcg_score(y_true, y_score, 10)) #ndcg for vector model print vec_array y_score = [ vec_id[1] for vec_id in vec_array ] #y_score--to be passed to ndcg_score is the list of cosine similarity scores vec_ids = [ int(vec_id[0]) for vec_id in vec_array ] #list of docids from the list of tuples of the form (docid, similarity_score) #print vec_ids y_true = [0] * 10 ##added on 0317---change back to tk idx = 0 for doc_id in vec_ids: if doc_id in qid_docids: #iteratively check if a docid returned by the vector model is present in qrels.text for the specific query(qid) y_true[idx] = 1 idx += 1 print "vec", y_true print "vec", y_score vec_agg_ndcg.append(metrics.ndcg_score(y_true, y_score, 10)) del qp ##garbage collection return bool_agg_ndcg, vec_agg_ndcg
def eval(index_file, query_file, qrels_File, number_of_queries): #read queryfile,indexfile # ToDo queries = loadCranQry(query_file) queries_id_list = [str(int(x)) for x in queries.keys()] #print(queries_id_list) #read querls.txt qrels_dict = process_querls_file(qrels_File, queries_id_list) inputdocument = cran.CranFile("cran.all") # load the index file saved at from part 1 index = InvertedIndex().load(index_file) qp = QueryProcessor(queries, index, inputdocument, number_of_queries) queries_id_list_int = [int(x) for x in qrels_dict.keys()] queries_id_ls = [int(x) for x in queries.keys()] #IdeaVectorsforQuery_ids={} sumbooleanNADC = [] sumvectorNADC = [] with open('Evaluation_search.csv', 'w') as f: f.write("%s,%s,%s,%s\n" % ("Iteration", "AverageNDCG-booleanModel", "AverageNDCG-vectorModel", "P-value")) for i in range(0, 5): vectorNADC = [] booleanNADC = [] intersection_queries = list( set(queries_id_list_int) & set(queries_id_ls)) random_query_id_list = random.sample(queries_id_list_int, number_of_queries) #random_query_id_list=[153, 18] #print(random_query_id_list) for q_id in random_query_id_list: print("Processing for Query ID ::", q_id) qp.querynumber = q_id #boolean_res=qp.booleanQuery() vector_top3 = qp.vectorQuery(5) #vector_top3=[('12',0.34),('746',0.33),('875',0.24)] #print(boolean_res) print("Output for Vector Model Result::", vector_top3) if (vector_top3.__len__() < 1): vectorNADC.append(0) else: vector_label = [x[0] for x in vector_top3] score = [x[1] for x in vector_top3] print("DocumentIDs of Vector Model Result:: ", vector_label) print("Scores of Vector Model Result::", score) true_label = vector_label.copy() query_id = str(q_id) for x in vector_label: #str_x="{0:0=3d}".format(x) ind = vector_label.index(x) if (x in qrels_dict.get(query_id)): true_label[ind] = 1 else: true_label[ind] = 0 if true_label.__len__() < 5: len_val = 10 - (true_label.__len__()) true_label.extend([0] * len_val) print("Actual Vector:: ", true_label) print("Predicted Vector:: ", score) if sum(true_label) == 0: vectorNADC.append(0) else: ndcg = metrics.ndcg_score(true_label, score, 5) print("Calculated ndcg for Vector::", ndcg) vectorNADC.append(ndcg) boolean_res = qp.booleanQuery() print("output of boolean_res:: ", boolean_res) if boolean_res.__len__() < 1: booleanNADC.append(0) else: score = [1] * len(boolean_res) if (score.__len__() < 5): leng = 5 - (score.__len__()) score.extend([0] * leng) true_label = boolean_res.copy() query_id = str(q_id) for x in boolean_res: ind = boolean_res.index(x) if (x in qrels_dict.get(query_id)): true_label[ind] = 1 else: true_label[ind] = 0 if true_label.__len__() < 5: len_val = 10 - (true_label.__len__()) true_label.extend([0] * len_val) print("Actual boolean:: ", true_label) print("Predicted boolean:: ", score) if sum(true_label) == 0: booleanNADC.append(0) else: ndcg = metrics.ndcg_score(true_label, score, 5) print("Calculated ndcg for Boolean::", ndcg) booleanNADC.append(ndcg) print("Calculated NADC sum for all queries", vectorNADC) avergae_vectorNADC = float(sum(vectorNADC) / number_of_queries) print("Calculated NADC sum for all queries", booleanNADC) avergae_booleanNADC = float(sum(booleanNADC) / number_of_queries) print("Avergae NADC Vector::", avergae_vectorNADC) print("Avergae NADC boolean::", avergae_booleanNADC) p_value = scipy.stats.wilcoxon(vectorNADC, booleanNADC, zero_method='wilcox', correction=False) print(i, str(avergae_booleanNADC), str(avergae_vectorNADC), str(p_value[1])) p = "%.20f" % float(str(p_value[1])) print('P value for all the queries processed is:', p) f.write("%s,%s,%s,%s\n" % (i + 1, str(avergae_booleanNADC), str(avergae_vectorNADC), str(p))) print('Done')
def VectorCompare(): queries = loadCranQry("query.text") queries_id_list=[str(int(x)) for x in queries.keys()] inputdocument = cran.CranFile("cran.all") # load the index file saved at from part 1 index = InvertedIndex().load("index_file") qp = QueryProcessor(queries, index, inputdocument, 10) queries_id_list=[str(int(x)) for x in queries.keys()] #print(queries_id_list) #read querls.txt qrels_dict=process_querls_file("qrels.text",queries_id_list) #IdeaVectorsforQuery_ids={} sumbooleanNADC=[] sumvectorNADC=[] vectorNADC1 = [] booleanNADC2 = [] # random_query_id_list=[153, 18] # print(random_query_id_list) query_id = [4 , 29, 53, 58, 100] vectorNADC1=[] vectorNADC2=[] for q_id in query_id: qp.querynumber = q_id # boolean_res=qp.booleanQuery() vector_top3 = qp.vectorQuery(5) vector2_top3=qp.vectorQuery(5,True) # vector_top3=[('12',0.34),('746',0.33),('875',0.24)] # print(boolean_res) print("Output for Vector Model Result::", vector_top3) if (vector_top3.__len__() < 1): vectorNADC1.append(0) else: vector_label = [x[0] for x in vector_top3] score = [x[1] for x in vector_top3] print("DocumentIDs of Vector Model Result:: ", vector_label) print("Scores of Vector Model Result::", score) true_label = vector_label.copy() query_id = str(q_id) for x in vector_label: # str_x="{0:0=3d}".format(x) ind = vector_label.index(x) if (x in qrels_dict.get(query_id)): true_label[ind] = 1 else: true_label[ind] = 0 if true_label.__len__() < 5: len_val = 10 - (true_label.__len__()) true_label.extend([0] * len_val) print("Actual Vector:: ", true_label) print("Predicted Vector:: ", score) if sum(true_label) == 0: vectorNADC1.append(0) else: ndcg = metrics.ndcg_score(true_label, score, 5) print("Calculated ndcg for Vector::", ndcg) vectorNADC1.append(ndcg) if (vector2_top3.__len__() < 1): vectorNADC2.append(0) else: vector_label = [x[0] for x in vector2_top3] score = [x[1] for x in vector2_top3] print("DocumentIDs of Vector Model Result:: ", vector_label) print("Scores of Vector Model Result::", score) true_label = vector_label.copy() query_id = str(q_id) for x in vector_label: # str_x="{0:0=3d}".format(x) ind = vector_label.index(x) if (x in qrels_dict.get(query_id)): true_label[ind] = 1 else: true_label[ind] = 0 if true_label.__len__() < 5: len_val = 10 - (true_label.__len__()) true_label.extend([0] * len_val) print("Actual Vector:: ", true_label) print("Predicted Vector:: ", score) if sum(true_label) == 0: vectorNADC2.append(0) else: ndcg = metrics.ndcg_score(true_label, score, 5) print("Calculated ndcg for Vector::", ndcg) vectorNADC2.append(ndcg) print("Calculated NADC sum for all queries", vectorNADC1) avergae_vectorNADC = float(sum(vectorNADC1) / 5) print("Calculated NADC sum for all queries", vectorNADC2) avergae_vectorNADC2 = float(sum(vectorNADC2) / 5) print("Avergae NADC Vector::", avergae_vectorNADC) print("Avergae NADC boolean::", avergae_vectorNADC2) print(vectorNADC1) print(vectorNADC2) p_value = scipy.stats.wilcoxon(vectorNADC1, vectorNADC2, zero_method='wilcox', correction=False) p = "%.20f" % float(str(p_value[1])) print('P value for all the queries processed is:', p)
def eval(index_file, query_text, qrels, n): qrys = cranqry.loadCranQry(query_text) queries = {} for q in qrys: queries[q] = qrys[q].text query_ids = list(queries.keys()) query_ids.sort() query_ids_ints = [] for k in range(0, len(query_ids)): # generating n random queries query_ids_ints.append(int(query_ids[k])) set1 = set() while len(set1) != n: set1.add(random.choice(query_ids_ints)) selected_queries = list(set1) docs = set() qrels = {} f = open("qrels.text", "r") # parsing relevant queries(qrels.text) l = f.readline() while l: j = l.split(" ") if query_ids_ints[int(j[0]) - 1] in qrels.keys(): qrels[query_ids_ints[int(j[0]) - 1]].append(int(j[1])) else: qrels[query_ids_ints[int(j[0]) - 1]] = [int(j[1])] l = f.readline() cranqryobj = cranqry.loadCranQry(query_text) dict_query = {} for q in cranqryobj: dict_query[int(q)] = cranqryobj[ q].text # matching queries in query.text and qrels.text indexObject = index.InvertedIndex() items = indexObject.load(index_file) vector_ndcg_score = {} vector_score_dict = {} for q in selected_queries: print(q) query_raw = dict_query[q] QPobj = QueryProcessor(query_raw, items, index_file) QPobj.preprocessing() result_list = QPobj.vectorQuery( 10) # fetching first 10 documents for a query using vector model boolean_result_list = QPobj.booleanQuery() print("Boolean query result : ", boolean_result_list ) # fetching documents for a query using booleanQuery ndcg_boolean = 0 truth_list = qrels[q] boolean_output_list = [] rank_doc_list = list(map(lambda x: int(x[0]), result_list)) print("Relavant documents for this query : ", truth_list) # relavant documents for the query print("Vector model result : ", rank_doc_list) # documents result list for vector model vector_score_list = [] for id in boolean_result_list: # calculating the predicted scores for boolean model if int(id) in truth_list: boolean_output_list.append(1) else: boolean_output_list.append(0) boolean_score_list = [] if len(boolean_score_list) < 10: boolean_score_list = boolean_output_list while len(boolean_score_list) != 10: boolean_score_list.append(0) elif len(boolean_score_list) > 10: for i in range(0, 10): boolean_score_list[i] = boolean_output_list[i] for id in rank_doc_list: # calculating the predicted scores for vector model if id in truth_list: vector_score_list.append(1) else: vector_score_list.append(0) vector_score_dict[q] = vector_score_list truth_score_list = [] for i in range( 0, len(vector_score_list) ): # calculating the ground_truth scores for vector model truth_score_list.append(vector_score_list[i]) truth_score_list.sort(reverse=True) boolean_truth_score_list = [] for i in range( 0, len(boolean_score_list) ): # calculating the ground_truth scores for boolean model boolean_truth_score_list.append(boolean_score_list[i]) boolean_truth_score_list.sort(reverse=True) print("Vector model ground_truth list is:\n", truth_score_list) print("Vector ranking score list is:\n", vector_score_list) print("Boolean model ground_truth list is:\n", boolean_truth_score_list) print("Boolean model score list is:\n", boolean_score_list) vector_ndcg_score[q] = [ ndcg_score(np.array(boolean_truth_score_list), np.array(boolean_score_list)), ndcg_score(np.array(truth_score_list), np.array(vector_score_list)) ] vector_list = [ ] # compute ndcg score for boolean and vector models for all the randomly generated queries boolean_list = [] for qu in vector_ndcg_score: vector_list.append(vector_ndcg_score[qu][1]) boolean_list.append(vector_ndcg_score[qu][0]) print("ndcg score of boolean and vector models for all the queries:\n", vector_ndcg_score) print("ndcg scores list for boolean model for all the queries:\n", boolean_list) print("ndcg scores list for vector model for all the queries:\n", vector_list) p_value_wilcoxon = stats.wilcoxon( np.array(boolean_list), np.array(vector_list) ) # calculating p value using wilcoxon test and ttest for boolean and vector models p_value_ttest=stats.ttest_ind(np.array(boolean_list),np.array(vector_list), equal_var = False) print("wilcoxon test p value is:", p_value_wilcoxon[1]) print("ttest p value is :", p_value_ttest[1])