def ws_gpr():
    # get the global pagerank result
    gpr_mtx = global_pagerank.gpr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/ws_gpr_rank.txt', 'w')
    for cur_num in sorted(indri_names):
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        # normalize intri score for each doc
        indri_score = score_extracter(file_name)
        # indri_score_pos = np.subtract(indri_score, min(indri_score) - 1)
        indri_score_pos = np.power(math.e, indri_score)
        # transform to all positive value
        indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos]
        # normalize pagerank value
        gpr_value = gpr_mtx[doc_id]
        gpr_norm = [float(i)/sum(gpr_value) for i in gpr_value]
        # combine indri and pagerank score
        ws_score = map(add, np.multiply(indri_norm, 0.95), np.multiply(gpr_norm, 0.05))
        # sort by descending order
        gpr_score = np.argsort(ws_score)[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        gpr_rank = doc_id_arr[gpr_score]
        rank_num = 0
        for idx in gpr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)]))
    f.close()
    print "Weighted Sum GPR ranking finished." + '\n'
def ns_gpr():
    # get the global pagerank result
    gpr_mtx = global_pagerank.gpr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/ns_gpr_rank.txt', 'w')
    for cur_num in sorted(indri_names):
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        # sort by descending order
        gpr_score = np.argsort(gpr_mtx[doc_id])[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        gpr_rank = doc_id_arr[gpr_score]
        rank_num = 0
        for idx in gpr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, gpr_mtx[idx]))
    f.close()
    print "No-search GPR ranking finished." + '\n'
Esempio n. 3
0
def cm_gpr():
    # get the global pagerank result
    gpr_mtx = global_pagerank.gpr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/cm_gpr_rank.txt', 'w')
    for cur_num in sorted(indri_names):
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        doc_num = len(doc_id)
        # normalize intri score for each doc
        indri_score = score_extracter(file_name)
        # indri_score_pos = np.subtract(indri_score, min(indri_score) - 1)
        indri_score_pos = np.power(math.e, indri_score)
        # transform to all positive value
        indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos]
        # normalize pagerank value
        gpr_value = gpr_mtx[doc_id]
        gpr_norm = [float(i)/sum(gpr_value) for i in gpr_value]
        mu_list = np.arange(0.85, 0.95, 0.1 / doc_num)[::-1]
        decay_list = np.arange(0.65, 1.0, 0.35 / doc_num)
        mu2_list = cosine_inter(mu_list)
        mu3_list = np.multiply(np.subtract(1.0, mu2_list), decay_list)
        # ws_score = map(add, np.multiply(indri_norm, mu2_list), np.multiply(gpr_norm, np.subtract(1.0, mu2_list)))
        ws_score = np.subtract(np.multiply(indri_norm, mu2_list), np.multiply(gpr_norm, mu3_list))
        # sort by descending order
        gpr_score = np.argsort(ws_score)[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        gpr_rank = doc_id_arr[gpr_score]
        rank_num = 0
        for idx in gpr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)]))
    f.close()
    print "Custom method GPR ranking finished." + '\n'
Esempio n. 4
0
def ns_gpr():
    # get the global pagerank result
    gpr_mtx = global_pagerank.gpr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/ns_gpr_rank.txt', 'w')
    for cur_num in sorted(indri_names):
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        # sort by descending order
        gpr_score = np.argsort(gpr_mtx[doc_id])[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        gpr_rank = doc_id_arr[gpr_score]
        rank_num = 0
        for idx in gpr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1,
                                                    rank_num, gpr_mtx[idx]))
    f.close()
    print "No-search GPR ranking finished." + '\n'