def ns_ptspr(): # get the query-based pagerank result ptspr_mtx = pts_pagerank.online_tspr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/ns_ptspr_rank.txt', 'w') query_count = -1 for cur_num in sorted(indri_names): query_count += 1 query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) # sort by descending order qtspr_score = np.argsort(ptspr_mtx[query_count][doc_id])[::-1].tolist() doc_id_arr = np.array(doc_id) qtspr_rank = doc_id_arr[qtspr_score] rank_num = 0 for idx in qtspr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format( query_id, idx + 1, rank_num, ptspr_mtx[query_count][idx])) f.close() print "No-search QTSPR ranking finished." + '\n'
def ws_ptspr(): # get the query-based pagerank result ptspr_mtx = pts_pagerank.online_tspr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/ws_ptspr_rank.txt', 'w') query_count = -1 for cur_num in sorted(indri_names): query_count += 1 query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) # normalize intri score for each doc indri_score = score_extracter(file_name) indri_score_pos = np.power(math.e, indri_score) # transform to all positive value indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos] # normalize pagerank value ptspr_value = ptspr_mtx[query_count][doc_id] ptspr_norm = [float(i)/sum(ptspr_value) for i in ptspr_value] # combine indri and pagerank score ws_score = map(add, np.multiply(indri_norm, 0.95), np.multiply(ptspr_norm, 0.05)) # sort by descending order ptspr_score = np.argsort(ws_score)[::-1].tolist() doc_id_arr = np.array(doc_id) qtspr_rank = doc_id_arr[ptspr_score] rank_num = 0 for idx in qtspr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)])) f.close() print "Weighted Sum Personalized TSPR ranking finished." + '\n'
def cm_ptspr(): # get the query-based pagerank result ptspr_mtx = pts_pagerank.online_tspr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/cm_ptspr_rank.txt', 'w') query_count = -1 for cur_num in sorted(indri_names): query_count += 1 query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) doc_num = len(doc_id) # normalize intri score for each doc indri_score = score_extracter(file_name) indri_score_pos = np.power(math.e, indri_score) # transform to all positive value indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos] # normalize pagerank value ptspr_value = ptspr_mtx[query_count][doc_id] ptspr_norm = [float(i)/sum(ptspr_value) for i in ptspr_value] # use custom method to combine mu_list = np.arange(0.85, 0.95, 0.1 / doc_num)[::-1] decay_list = np.arange(0.45, 1.0, 0.55 / doc_num) mu2_list = cosine_inter(mu_list) mu3_list = np.multiply(np.subtract(1.0, mu2_list), decay_list) # ws_score = map(add, np.multiply(indri_norm, mu2_list), np.multiply(ptspr_norm, np.subtract(1.0, mu2_list))) ws_score = np.subtract(np.multiply(indri_norm, mu2_list), np.multiply(ptspr_norm, mu3_list)) # sort by descending order ptspr_score = np.argsort(ws_score)[::-1].tolist() doc_id_arr = np.array(doc_id) qtspr_rank = doc_id_arr[ptspr_score] rank_num = 0 for idx in qtspr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)])) f.close() print "Custom method personalized TSPR ranking finished." + '\n'
def ns_ptspr(): # get the query-based pagerank result ptspr_mtx = pts_pagerank.online_tspr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/ns_ptspr_rank.txt', 'w') query_count = -1 for cur_num in sorted(indri_names): query_count += 1 query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) # sort by descending order qtspr_score = np.argsort(ptspr_mtx[query_count][doc_id])[::-1].tolist() doc_id_arr = np.array(doc_id) qtspr_rank = doc_id_arr[qtspr_score] rank_num = 0 for idx in qtspr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ptspr_mtx[query_count][idx])) f.close() print "No-search QTSPR ranking finished." + '\n'