Ejemplo n.º 1
0
def make_scores_lstm(my_rnn, query, answers, emb_size=768, hidden_size=64):
    query_embs = bc.encode((clean_punct(cleanhtml(query))).split())
    tensor_query_embs = torch.tensor(query_embs)

    all_query_hidden, (last_query_hidden, last_query_state) = my_rnn(
        tensor_query_embs.unsqueeze(0))
    print("all_query_hidden")
    #last_answers_hidden = [my_rnn(torch.tensor(bc.encode((clean_punct(cleanhtml(answer))).split())).unsqueeze(0))[1] for answer in answers]
    last_answers_hidden = []
    for ind, answer in enumerate(answers):
        print(ind)
        last_answers_hidden.append(
            my_rnn(create_tensor(answer).unsqueeze(0))[1][0])

    aa = last_query_hidden.detach().numpy()
    print("last_answers_hidden")
    bb = last_answers_hidden[0].detach().numpy()
    #print ("bb shape", bb.shape)

    scores = [
        cosine_similarity(
            last_query_hidden.squeeze(0).detach().numpy(),
            last_answer_hidden.squeeze(0).detach().numpy())[0][0]
        for last_answer_hidden in last_answers_hidden
    ]
    return scores
Ejemplo n.º 2
0
def make_scores_cam(query, titles, answers):
    print ("make_scores_cam")
    query = (cleanhtml(query))
    answers_clean = [(cleanhtml(answer)) for answer in answers]
    titles_clean = [(cleanhtml(title)) for title in titles]
    obj1, obj2, pred, asp = extract_objs_asp(extr, query)
    print ("obj1, obj2, pred, asp", obj1, obj2, pred, asp, "\n")
    number_of_comparative_sentences = []
    for ind, answer in enumerate(answers_clean):
        sentenсes = [titles[ind]] + nltk.tokenize.sent_tokenize(answer)
        dframe = prepare_sentence_DF(sentenсes, obj1, obj2)
        answ = classify_sentences(dframe, 'infersent')
        filt = (answ["BETTER"] >= 0.2) | (answ["WORSE"] >= 0.2)
        new_answ_df = answ.where(filt)
        new_answ_df = new_answ_df.dropna()
        number_of_comparative_sentences.append(len(new_answ_df))
    return number_of_comparative_sentences
Ejemplo n.º 3
0
def make_scores_obj(query, answers):
    (obj1, obj2, pred, asp) = extract_objs_asp(extr, query)
    print("in make scores", obj1, obj2, pred, asp)
    scores_answers = [
        count_score(cleanhtml(answer), (obj1, obj2, pred, asp))
        for answer in answers
    ]
    return scores_answers
Ejemplo n.º 4
0
def make_scores_1(query, answer_titles):
    query_emb = bc.encode([query])
    scores = [
        cosine_similarity(query_emb.reshape(1, -1),
                          bc.encode([cleanhtml(answer_title)
                                     ]).reshape(1, -1))[0][0]
        for answer_title in answer_titles
    ]
    return scores
Ejemplo n.º 5
0
def create_tensor(strng):
    #print ((clean_punct(cleanhtml(strng))).split())
    arr = bc.encode((clean_punct(cleanhtml(strng))).split())
    return torch.tensor(np.copy(arr))