Beispiel #1
0
def ns_qtspr():
    # get the query-based pagerank result
    qtspr_mtx = qts_pagerank.online_tspr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/ns_qtspr_rank.txt', 'w')
    query_count = -1
    for cur_num in sorted(indri_names):
        query_count += 1
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        # sort by descending order
        qtspr_score = np.argsort(qtspr_mtx[query_count][doc_id])[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        qtspr_rank = doc_id_arr[qtspr_score]
        rank_num = 0
        for idx in qtspr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(
                query_id, idx + 1, rank_num, qtspr_mtx[query_count][idx]))
    f.close()
    print "No-search QTSPR ranking finished." + '\n'
def ws_qtspr():
    # get the query-based pagerank result
    qtspr_mtx = qts_pagerank.online_tspr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/ws_qtspr_rank.txt', 'w')
    query_count = -1
    for cur_num in sorted(indri_names):
        query_count += 1
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        # normalize intri score for each doc
        indri_score = score_extracter(file_name)
        indri_score_pos = np.power(math.e, indri_score)
        # transform to all positive value
        indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos]
        # normalize pagerank value
        qtspr_value = qtspr_mtx[query_count][doc_id]
        qtspr_norm = [float(i)/sum(qtspr_value) for i in qtspr_value]
        # combine indri and pagerank score
        ws_score = map(add, np.multiply(indri_norm, 0.95), np.multiply(qtspr_norm, 0.05))
        # sort by descending order
        qtspr_score = np.argsort(ws_score)[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        qtspr_rank = doc_id_arr[qtspr_score]
        rank_num = 0
        for idx in qtspr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)]))
    f.close()
    print "Weighted Sum Query-based TSPR ranking finished." + '\n'
Beispiel #3
0
def cm_qtspr():
    # get the query-based pagerank result
    qtspr_mtx = qts_pagerank.online_tspr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/cm_qtspr_rank.txt', 'w')
    query_count = -1
    for cur_num in sorted(indri_names):
        query_count += 1
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        doc_num = len(doc_id)
        # normalize intri score for each doc
        indri_score = score_extracter(file_name)
        indri_score_pos = np.power(math.e, indri_score)
        # transform to all positive value
        indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos]
        # normalize pagerank value
        qtspr_value = qtspr_mtx[query_count][doc_id]
        qtspr_norm = [float(i)/sum(qtspr_value) for i in qtspr_value]
        # use custom method to combine
        mu_list = np.arange(0.86, 0.96, 0.1 / doc_num)[::-1]
        decay_list = np.arange(0.65, 1.0, 0.35 / doc_num)
        mu2_list = cosine_inter(mu_list)
        mu3_list = np.multiply(np.subtract(1.0, mu2_list), decay_list)
        # TODO: change the pagerank score to be penalty
        # ws_score = map(add, np.multiply(indri_norm, mu2_list), np.multiply(qtspr_norm, np.subtract(1.0, mu2_list)))
        ws_score = np.subtract(np.multiply(indri_norm, mu2_list), np.multiply(qtspr_norm, mu3_list))
        # sort by descending order
        qtspr_score = np.argsort(ws_score)[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        qtspr_rank = doc_id_arr[qtspr_score]
        rank_num = 0
        for idx in qtspr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)]))
    f.close()
    print "Custom method query-based TSPR ranking finished." + '\n'
def ns_qtspr():
    # get the query-based pagerank result
    qtspr_mtx = qts_pagerank.online_tspr()
    # get the indri file names
    indri_names = file_scanner()
    # write the ranking result into txt file
    f = open('rank/ns_qtspr_rank.txt', 'w')
    query_count = -1
    for cur_num in sorted(indri_names):
        query_count += 1
        query_id = indri_names[cur_num][0]
        file_name = indri_names[cur_num][1]
        # doc id in the current indri file
        doc_id = doc_extracter(file_name)
        # sort by descending order
        qtspr_score = np.argsort(qtspr_mtx[query_count][doc_id])[::-1].tolist()
        doc_id_arr = np.array(doc_id)
        qtspr_rank = doc_id_arr[qtspr_score]
        rank_num = 0
        for idx in qtspr_rank:
            rank_num += 1
            f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, qtspr_mtx[query_count][idx]))
    f.close()
    print "No-search QTSPR ranking finished." + '\n'