Python get_BM25_scores Examples, BM25_function.get_BM25_scores Python Examples

Example #1

0

Show file

File: get_subgraph.py Project: vikas95/AIR-retriever

def get_BM25_subgraph(ques_text, answer_text,
                      justifications):  ## subgraph size is not used

    gensim_BM25_scores_indexes = np.argsort(
        get_BM25_scores(justifications, ques_text + " " + answer_text))[::-1]

    return gensim_BM25_scores_indexes  ## returning the whole passage

Example #2

0

Show file

def get_BM25_subgraph(ques_text, answer_text, justifications,
                      subgraph_size):  ## subgraph size is not used

    gensim_BM25_scores_indexes = np.argsort(
        get_BM25_scores(justifications, ques_text + " " + answer_text))[::-1]
    top_ranked_passage = ""
    for i in gensim_BM25_scores_indexes[:subgraph_size]:
        top_ranked_passage += justifications[i] + " "

    return top_ranked_passage, gensim_BM25_scores_indexes[:
                                                          subgraph_size]  ## returning the whole passage

Example #3

0

Show file

def get_POC_subgraph_BM25_filtered(
        ques_text,
        answer_text,
        justifications,
        subgraph_size=5,
        BM25_threshold=15):  ## subgraph size is not used
    ques_terms = Preprocess_QA_sentences(ques_text, 1)
    answer_terms = Preprocess_QA_sentences(answer_text, 0)

    All_justification_terms = {}
    BM25_scores = {}
    gensim_BM25_scores = get_BM25_scores(justifications,
                                         ques_text + " " + answer_text)

    min_score = abs(min(gensim_BM25_scores))
    gensim_BM25_scores = [
        min_score + score_1 for score_1 in gensim_BM25_scores
    ]

    BM25_ranked_indexes = np.argsort(
        gensim_BM25_scores)[:min(len(justifications), BM25_threshold)]

    for jind1, just1 in enumerate(justifications):
        if jind1 in BM25_ranked_indexes:
            jind_score = gensim_BM25_scores[jind1]
            just_terms = Preprocess_QA_sentences(just1, 1)

            All_justification_terms.update(
                {jind1: just_terms})  ## this is basically list of lists
            BM25_scores.update({jind1: jind_score})
    # print ("len of justificatio list is :", All_justification_terms)
    best_subgraph = get_all_combination_withCoverage_best_graph_Cand_boost(
        All_justification_terms, BM25_scores, ques_terms, answer_terms,
        subgraph_size)
    # print ("the best subgraph that we get is ", best_subgraph)
    if best_subgraph == "Crashed":
        best_justifications = justifications  # [justifications[s1] for s1 in [i+1 for i in range(subgraph_size)]]
        best_subgraph_indexes = [i for i in range(subgraph_size)]
    else:
        best_justifications = [justifications[int(s1)] for s1 in best_subgraph]
        best_subgraph_indexes = [int(s1) for s1 in best_subgraph]
    # if len(best_justifications)<len(justifications):
    #    print ("yep, we remove atleast few noisy sentences ", len(best_justifications), len(justifications))
    return " ".join(best_justifications
                    ), best_subgraph_indexes  ## returning the whole passage

Example #4

0

Show file

File: get_subgraph.py Project: vikas95/AIR-retriever

def get_LEXICAL_justification(ques_terms, answer_terms, justifications):

    All_BM25_scores = get_BM25_scores(justifications,
                                      " ".join(ques_terms + answer_terms))
    Justification_ques_ans_remaining_terms = {}

    query_terms = set(ques_terms + answer_terms)

    num_remaining_terms = []
    for jind1, just1 in enumerate(justifications):
        just_terms = Preprocess_QA_sentences(just1, 1)
        remainning_terms_current_justification = list(query_terms -
                                                      set(just_terms))
        Justification_ques_ans_remaining_terms.update(
            {jind1: remainning_terms_current_justification})

        num_remaining_terms.append(
            len(Justification_ques_ans_remaining_terms[jind1]))

    Final_index = list(np.argsort(All_BM25_scores)[::-1])
    # Final_index = list(np.argsort(num_remaining_terms)[0:10])

    return Final_index, Justification_ques_ans_remaining_terms[
        Final_index[0]], Justification_ques_ans_remaining_terms

Example #5

0

Show file

def get_POC_subgraph_LR(ques_text,
                        answer_text,
                        justifications,
                        IDF_vals,
                        All_x_features,
                        All_y_features,
                        gold_labels,
                        subgraph_size=5,
                        return_ROCC_vals=0):  ## subgraph size is not used
    ques_terms = Preprocess_QA_sentences(ques_text, 1)
    answer_terms = Preprocess_QA_sentences(answer_text, 1)

    best_subgraph_ROCC_vals = {
        "indexes": [],
        "R": [],
        "O": [],
        "C_ans": [],
        "C_ques": []
    }

    All_justification_terms = {}
    BM25_scores = {}
    gensim_BM25_scores = get_BM25_scores(justifications,
                                         ques_text + " " + answer_text)

    min_score = abs(min(gensim_BM25_scores))
    gensim_BM25_scores = [
        min_score + score_1 for score_1 in gensim_BM25_scores
    ]

    for jind1, just1 in enumerate(justifications):
        jind_score = gensim_BM25_scores[jind1]
        just_terms = Preprocess_QA_sentences(just1, 1)

        All_justification_terms.update({jind1: just_terms
                                        })  ## this is basically list of lists
        BM25_scores.update({jind1: jind_score})
    # print ("len of justificatio list is :", All_justification_terms)
    # best_subgraph = get_all_combination_withCoverage_best_graph_Cand_boost(All_justification_terms, BM25_scores, ques_terms, answer_terms, subgraph_size )
    # if return_ROCC_vals == 1:
    best_subgraph, overlap_scores, ques_coverage_scores, ans_coverage_scores, All_x_features, All_y_features = get_all_combination_withCoverage_best_graph_Cand_boost_withIDF_forLR(
        All_justification_terms, BM25_scores, ques_terms, answer_terms,
        subgraph_size, IDF_vals, All_x_features, All_y_features, gold_labels)

    # print ("the best subgraph that we get is ", best_subgraph)
    if best_subgraph == "Crashed":
        best_justifications = justifications  # [justifications[s1] for s1 in [i+1 for i in range(subgraph_size)]]
        best_subgraph_indexes = [i for i in range(subgraph_size)]
    else:
        best_subgraph = sorted(best_subgraph)
        best_justifications = [justifications[int(s1)] for s1 in best_subgraph]
        best_subgraph_indexes = [int(s1) for s1 in best_subgraph]

        if return_ROCC_vals == 1:
            best_subgraph_ROCC_vals["indexes"] = best_subgraph_indexes
            best_subgraph_ROCC_vals["R"] = [
                BM25_scores[int(s1)] for s1 in best_subgraph
            ]
            best_subgraph_ROCC_vals["C_ques"] = ques_coverage_scores
            best_subgraph_ROCC_vals["C_ans"] = ans_coverage_scores

    # if len(best_justifications)<len(justifications):
    #    print ("yep, we remove atleast few noisy sentences ", len(best_justifications), len(justifications))
    return " ".join(
        best_justifications
    ), best_subgraph_indexes, best_subgraph_ROCC_vals, All_x_features, All_y_features  ## returning the whole passage