def test_ParaphraseMiningEvaluator(self): """Tests that the ParaphraseMiningEvaluator can be loaded""" model = SentenceTransformer('paraphrase-distilroberta-base-v1') sentences = { 0: "Hello World", 1: "Hello World!", 2: "The cat is on the table", 3: "On the table the cat is" } data_eval = evaluation.ParaphraseMiningEvaluator( sentences, [(0, 1), (2, 3)]) score = data_eval(model) assert score > 0.99
dev_sentences[row['qid']] = row['question'] if len(dev_sentences) >= max_dev_samples: break with open(os.path.join(dataset_path, "duplicate-mining/dev_duplicates.tsv"), encoding='utf8') as fIn: reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: if row['qid1'] in dev_sentences and row['qid2'] in dev_sentences: dev_duplicates.append([row['qid1'], row['qid2']]) # The ParaphraseMiningEvaluator computes the cosine similarity between all sentences and # extracts a list with the pairs that have the highest similarity. Given the duplicate # information in dev_duplicates, it then computes and F1 score how well our duplicate mining worked paraphrase_mining_evaluator = evaluation.ParaphraseMiningEvaluator( dev_sentences, dev_duplicates, name='dev') evaluators.append(paraphrase_mining_evaluator) ###### Duplicate Questions Information Retrieval ###### # Given a question and a large corpus of thousands questions, find the most relevant (i.e. duplicate) question # in that corpus. # For faster processing, we limit the development corpus to only 10,000 sentences. max_corpus_size = 100000 ir_queries = {} #Our queries (qid => question) ir_needed_qids = set() #QIDs we need in the corpus ir_corpus = {} #Our corpus (qid => question) ir_relevant_docs = { } #Mapping of relevant documents for a given query (qid => set([relevant_question_ids])