def test_textual_entailment(self): predictor = pretrained.decomposable_attention_with_elmo_parikh_2017() result = predictor.predict_json({ "premise": "An interplanetary spacecraft is in orbit around a gas giant's icy moon.", "hypothesis": "The spacecraft has the ability to travel between planets." }) assert result["label_probs"][0] > 0.7 # entailment result = predictor.predict_json({ "premise": "Two women are wandering along the shore drinking iced tea.", "hypothesis": "Two women are sitting on a blanket near some rocks talking about politics." }) assert result["label_probs"][1] > 0.8 # contradiction result = predictor.predict_json({ "premise": "A large, gray elephant walked beside a herd of zebras.", "hypothesis": "The elephant was lost." }) assert result["label_probs"][2] > 0.6 # neutral
def IR(sent_select_method): if sent_select_method == 'esim': esim = pretrained.esim_nli_with_elmo_chen_2017() elif sent_select_method == 'entail': attention = pretrained.decomposable_attention_with_elmo_parikh_2017() else: print("loading embedding...") nlp = spacy.load('en_vectors_web_lg') # 300-dim GloVe vectors # TODO: change num_vector? print("finished loading embedding...") titles_dict = xdb_query.load_xapian_titles(Args.OBJECTS, Args.TITLES) predictor = get_constituency_parser() with open(DATA_SET, 'r') as data_set_f: data_set = json.load(data_set_f) num_sents = 1 print("num sents selected", num_sents) output_content = {} for id_, record in tqdm(data_set.items()): parse_result = predictor.predict_json({"sentence": record['claim']}) NPs = get_constituency_parsing_NPs(parse_result, NPs=set()) NPs = get_customised_NPs(parse_result, NPs=NPs) # doc_ids = NPs2titles(NPs, titles_dict) if Args.LOG_MISSING_DOCS: evidence = list(map(lambda x: x[0], record['evidence'])) matched_titles, missing = result_stat(evidence, NPs) log_missing(missing, record, NPs, parse_result) # Sentence selection # if sent_select_method == 'esim': # sents = sent_selection_esim(esim, # record['claim'], # doc_ids, # Args.DB_PATH, # num_sents) # elif sent_select_method == 'entail': # texual entailment # # not very well # sents = sent_selection_entail(attention, # record['claim'], # doc_ids, # Args.DB_PATH, # num_sents) # else: # similarity # sents = sent_selection_sim(nlp, # record['claim'], # doc_ids, # Args.DB_PATH, # num_sents) # 1 sent is the best # record['evidence'] = sents t = [NP for NP in NPs if NP in titles_dict] # randomly select 'sentence 0' from each doc to do doc retrieval record['evidence'] = [[title, 0] for title in t] output_content[id_] = record return output_content