def write_predictions(args, model, dataset): """ Writes model predictions to an output file. The official QA metrics (EM/F1) can be computed using `evaluation.py`. Args: args: `argparse` object. model: Instance of the PyTorch model. dataset: Test dataset (technically, the development dataset since the official test datasets are blind and hosted by official servers). """ # Load model checkpoint. model.load_state_dict(torch.load(args.model_path, map_location='cpu')) model.eval() # Set up test dataloader. test_dataloader = tqdm( dataset.get_batch(shuffle_examples=False), **_TQDM_OPTIONS, ) # Output predictions. outputs = [] with torch.no_grad(): for (i, batch) in enumerate(test_dataloader): # Forward inputs. start_logits, end_logits = model(batch) # Form distributions over start and end positions. batch_start_probs = F.softmax(start_logits, 1) batch_end_probs = F.softmax(end_logits, 1) for j in range(start_logits.size(0)): # Find question index and passage. sample_index = args.batch_size * i + j qid, passage, _, _, _ = dataset.samples[sample_index] # Unpack start and end probabilities. Find the constrained # (start, end) pair that has the highest joint probability. start_probs = unpack(batch_start_probs[j]) end_probs = unpack(batch_end_probs[j]) # change # start_index, end_index = search_span_endpoints( # start_probs, end_probs, # qid, passage # ) start_index, end_index = search_span_endpoints( start_probs, end_probs) #Grab predicted span. pred_span = ' '.join(passage[start_index:(end_index + 1)]) # Add prediction to outputs. outputs.append({'qid': qid, 'answer': pred_span}) # Write predictions to output file. with open(args.output_path, 'w+') as f: for elem in outputs: f.write(f'{json.dumps(elem)}\n')
def write_predictions(args, model, dataset): """ Writes model predictions to an output file. The official QA metrics (EM/F1) can be computed using `evaluation.py`. Args: args: `argparse` object. model: Instance of the PyTorch model. dataset: Test dataset (technically, the development dataset since the official test datasets are blind and hosted by official servers). """ # Load model checkpoint. model.load_state_dict(torch.load(args.model_path, map_location='cpu')) model.eval() # Set up test dataloader. test_dataloader = tqdm( dataset.get_batch(shuffle_examples=False), **_TQDM_OPTIONS, ) # Output predictions. outputs = [] # Task 1 stuff keep = {'PROPN', 'NUM', 'VERB', 'NOUN', 'ADJ'} tokenize = lambda text: [token.lemma_ for token in nlp(text)] question_answering = pipeline('question-answering') with torch.no_grad(): for (i, batch) in enumerate(test_dataloader): # Forward inputs. start_logits, end_logits = model(batch) # Form distributions over start and end positions. batch_start_probs = F.softmax(start_logits, 1) batch_end_probs = F.softmax(end_logits, 1) for j in range(start_logits.size(0)): # Find question index and passage. sample_index = args.batch_size * i + j qid, context, question, ans_start, ans_end = dataset.samples[ sample_index] if args.task == 2: result = question_answering(question=' '.join(question), context=' '.join(context)) outputs.append({'qid': qid, 'answer': result['answer']}) else: # Unpack start and end probabilities. Find the constrained # (start, end) pair that has the highest joint probability. start_probs = unpack(batch_start_probs[j]) end_probs = unpack(batch_end_probs[j]) start_index, end_index = search_span_endpoints( start_probs, end_probs, args, context, question, ans_start, ans_end) # Grab predicted span. pred_span = ' '.join(context[start_index:(end_index + 1)]) # Add prediction to outputs. outputs.append({'qid': qid, 'answer': pred_span}) # Write predictions to output file. with open(args.output_path, 'w+') as f: for elem in outputs: f.write(f'{json.dumps(elem)}\n')
def write_predictions(args, model, dataset): """ Writes model predictions to an output file. The official QA metrics (EM/F1) can be computed using `evaluation.py`. Args: args: `argparse` object. model: Instance of the PyTorch model. dataset: Test dataset (technically, the development dataset since the official test datasets are blind and hosted by official servers). """ # Load model checkpoint. model.load_state_dict(torch.load(args.model_path, map_location='cpu')) model.eval() # Set up test dataloader. test_dataloader = tqdm( dataset.get_batch(shuffle_examples=False), **_TQDM_OPTIONS, ) # Output predictions. outputs = [] # Load spacy NER tags # Need to download the lg package separately before running using # python -m spacy download en_core_web_lg ner = spacy.load("en_core_web_lg") prob_diff = 0.10 with torch.no_grad(): for (i, batch) in enumerate(test_dataloader): print("Starting loop: " + str(i)) # Forward inputs. start_logits, end_logits = model(batch) # Form distributions over start and end positions. batch_start_probs = F.softmax(start_logits, 1) batch_end_probs = F.softmax(end_logits, 1) for j in range(start_logits.size(0)): # Find question index and passage. sample_index = args.batch_size * i + j # Getting errors here, but my code shouldnt have messed with this qid, passage, question, _, _ = dataset.samples[sample_index] # Unpack start and end probabilities. Find the constrained # (start, end) pair that has the highest joint probability. start_probs = unpack(batch_start_probs[j]) end_probs = unpack(batch_end_probs[j]) question_joined = ' '.join(question) passage_joined = ' '.join(passage) ner_passage_tokens = ner(passage_joined) passage_ner_token_start_indices = [i.start_char for i in ner_passage_tokens.ents] passage_ner_token_end_indices = [i.end_char for i in ner_passage_tokens.ents] start_probs_len = len(start_probs) end_probs_len = len(end_probs) # 1. Loop through length of passage start probs for i in range(start_probs_len): # 2. IF this index is NOT a token start index if i not in passage_ner_token_start_indices: # 3. THEN decrement the probability start_probs[i] -= prob_diff for i in range(end_probs_len): if i not in passage_ner_token_end_indices: end_probs[i] -= prob_diff # ? who, when, where ? if bool(re.match("(who|WHO|Who|whom|WHOM|Whom)", question_joined)): # Mask everything except PERSON person_token_start_indices = [i.start_char for i in ner_passage_tokens.ents if i.label_ == "PERSON"] person_token_end_indices = [i.end_char for i in ner_passage_tokens.ents if i.label_ == "PERSON"] for i in range(start_probs_len): if i in person_token_start_indices: start_probs[i] += prob_diff else: start_probs[i] -= prob_diff for i in range(end_probs_len): if i in person_token_end_indices: end_probs[i] += prob_diff else: end_probs[i] -= prob_diff elif bool(re.match("(when|WHEN|When)", question_joined)): # Mask everything except DATE, TIME when_token_start_indices = [i.start_char for i in ner_passage_tokens.ents if i.label_ == "DATE" or i.label_ == "TIME"] when_token_end_indices = [i.end_char for i in ner_passage_tokens.ents if i.label_ == "DATE" or i.label_ == "TIME"] for i in range(start_probs_len): if i in when_token_start_indices: start_probs[i] += prob_diff else: start_probs[i] -= prob_diff for i in range(end_probs_len): if i in when_token_end_indices: end_probs[i] += prob_diff else: end_probs[i] -= prob_diff elif bool(re.match("(where|WHERE|Where)", question_joined)): # Mask everything except LOC where_token_start_indices = [i.start_char for i in ner_passage_tokens.ents if i.label_ == "LOC"] where_token_end_indices = [i.end_char for i in ner_passage_tokens.ents if i.label_ == "LOC"] for i in range(start_probs_len): if i in where_token_start_indices: start_probs[i] += prob_diff else: start_probs[i] -= prob_diff for i in range(end_probs_len): if i in where_token_end_indices: end_probs[i] += prob_diff else: end_probs[i] -= prob_diff """ At this point, start_probs and end_probs should have all the probabilities for indices that are associated with O (Other) tokens decremented by the value of prob_diff. Also, if the question contains some form of "who", then we are probably looking for a person so we should use the PERSON tokens found in the passage from the spacy library to add the value of prob_diff to the corresponding index in start_probs and end_probs Expectation: Incrementing the probability of the right tokens should correspond to a greater chance of the model predicting the right answer. Decrementing the probability of O (Other) tokens should prevent the model from starting or ending on a word that doesnt really mean anything """ start_index, end_index = search_span_endpoints( start_probs, end_probs ) # Grab predicted span. pred_span = ' '.join(passage[start_index:(end_index + 1)]) # Add prediction to outputs. outputs.append({'qid': qid, 'answer': pred_span}) # Write predictions to output file. with open(args.output_path, 'w+') as f: for elem in outputs: f.write(f'{json.dumps(elem)}\n')