def compute_sentence_similarity(): nlp = spacy.load('en_core_web_sm') nlp.add_pipe(WMD.SpacySimilarityHook(nlp), last=True) all_score = [] for i in range(len(all_summary)): if len(all_summary[i]) == 1: all_score.append([1.0]) continue score = [] for j in range(1, len(all_summary[i])): doc1 = nlp(all_summary[i][j-1]) doc2 = nlp(all_summary[i][j]) try: score.append(1.0/(1.0 + math.exp(-doc1.similarity(doc2)+7))) except: score.append(1.0) all_score.append(score) return all_score
import pandas as pd import re import glob import sys sys.path.append("./BERT/pytorch-pretrained-BERT-master") sys.path.append("./BERT") from pytorch_pretrained_bert import BertTokenizer, BertModel from wmd import WMD from torch.nn.modules.distance import CosineSimilarity torch_emb_sim = CosineSimilarity() from bert_score import score as bert_score nlp = spacy.load('en_core_web_md') nlp.add_pipe(WMD.SpacySimilarityHook(nlp), last=True) def _clean_text(txt): return txt.lower() class CFRInstance(object): def __init__( self, original_context: str, cf_context: str, original_ending: str, predicted_ending: str, gold_cf_endings: List[str], ):
def SimilarityHook(doc): return WMD.SpacySimilarityHook(doc)