def make_scores_lstm(my_rnn, query, answers, emb_size=768, hidden_size=64): query_embs = bc.encode((clean_punct(cleanhtml(query))).split()) tensor_query_embs = torch.tensor(query_embs) all_query_hidden, (last_query_hidden, last_query_state) = my_rnn( tensor_query_embs.unsqueeze(0)) print("all_query_hidden") #last_answers_hidden = [my_rnn(torch.tensor(bc.encode((clean_punct(cleanhtml(answer))).split())).unsqueeze(0))[1] for answer in answers] last_answers_hidden = [] for ind, answer in enumerate(answers): print(ind) last_answers_hidden.append( my_rnn(create_tensor(answer).unsqueeze(0))[1][0]) aa = last_query_hidden.detach().numpy() print("last_answers_hidden") bb = last_answers_hidden[0].detach().numpy() #print ("bb shape", bb.shape) scores = [ cosine_similarity( last_query_hidden.squeeze(0).detach().numpy(), last_answer_hidden.squeeze(0).detach().numpy())[0][0] for last_answer_hidden in last_answers_hidden ] return scores
def make_scores_cam(query, titles, answers): print ("make_scores_cam") query = (cleanhtml(query)) answers_clean = [(cleanhtml(answer)) for answer in answers] titles_clean = [(cleanhtml(title)) for title in titles] obj1, obj2, pred, asp = extract_objs_asp(extr, query) print ("obj1, obj2, pred, asp", obj1, obj2, pred, asp, "\n") number_of_comparative_sentences = [] for ind, answer in enumerate(answers_clean): sentenсes = [titles[ind]] + nltk.tokenize.sent_tokenize(answer) dframe = prepare_sentence_DF(sentenсes, obj1, obj2) answ = classify_sentences(dframe, 'infersent') filt = (answ["BETTER"] >= 0.2) | (answ["WORSE"] >= 0.2) new_answ_df = answ.where(filt) new_answ_df = new_answ_df.dropna() number_of_comparative_sentences.append(len(new_answ_df)) return number_of_comparative_sentences
def make_scores_obj(query, answers): (obj1, obj2, pred, asp) = extract_objs_asp(extr, query) print("in make scores", obj1, obj2, pred, asp) scores_answers = [ count_score(cleanhtml(answer), (obj1, obj2, pred, asp)) for answer in answers ] return scores_answers
def make_scores_1(query, answer_titles): query_emb = bc.encode([query]) scores = [ cosine_similarity(query_emb.reshape(1, -1), bc.encode([cleanhtml(answer_title) ]).reshape(1, -1))[0][0] for answer_title in answer_titles ] return scores
def create_tensor(strng): #print ((clean_punct(cleanhtml(strng))).split()) arr = bc.encode((clean_punct(cleanhtml(strng))).split()) return torch.tensor(np.copy(arr))