def get_predictions(example, result, n_best_size, max_answer_length): """ This function has been mostly copied from run_squad.py. Unfortunate, but I needed to return local variables from that function. :param all_examples: :param all_features: :param all_results: :param n_best_size: :param max_answer_length: :param null_score_diff_threshold: :return: """ _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name "PrelimPrediction", ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score # start_indexes = run_squad._get_best_indexes(result.start_logits, n_best_size) # end_indexes = run_squad._get_best_indexes(result.end_logits, n_best_size) start_indexes, end_indexes = get_nbest_bounds_from_membership(result.membership, n_best_size) for start_index in start_indexes: for end_index in end_indexes: # We could hypothetically create invalid predictions, e.g., predict # that the start of the span is in the question. We throw out all # invalid predictions. if start_index >= len(example.tokens): continue if end_index >= len(example.tokens): continue if start_index not in example.token_to_orig_map: continue if end_index not in example.token_to_orig_map: continue if not example.token_is_max_context.get(start_index, False): continue if end_index < start_index: continue length = end_index - start_index + 1 if length > max_answer_length: continue prelim_predictions.append( _PrelimPrediction( # feature_index=feature_index, feature_index=0, start_index=start_index, end_index=end_index, start_logit=1, end_logit=1)) # start_logit=result.start_logits[start_index], # end_logit=result.end_logits[end_index])) prelim_predictions = sorted( prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name "NbestPrediction", ["text", "start_logit", "end_logit", "start_offset", "end_offset"]) seen_predictions = set() nbest = [] for pred in prelim_predictions: if len(nbest) >= n_best_size: break # feature = features[pred.feature_index] if pred.start_index > 0: # this is a non-null prediction start_offset = example.token_to_orig_map[pred.start_index] end_offset = example.token_to_orig_map[pred.end_index] + 1 try: final_text = ' '.join(example.doc_tokens[start_offset:end_offset]) except AttributeError: final_text = '' if (start_offset, end_offset) in seen_predictions: continue seen_predictions.add((start_offset, end_offset)) else: final_text = "" start_offset = 0 end_offset = 0 seen_predictions.add((0, 0)) nbest.append( _NbestPrediction( text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit, start_offset=start_offset, end_offset=end_offset)) # if we didn't include the empty option in the n-best, include it if (0, 0) not in seen_predictions: nbest.append( _NbestPrediction( text="", start_logit=null_start_logit, end_logit=null_end_logit, start_offset=0, end_offset=0)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append( _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0, start_offset=0, end_offset=0)) assert len(nbest) >= 1 total_scores = [] best_non_null_entry = None for entry in nbest: total_scores.append(entry.start_logit + entry.end_logit) if not best_non_null_entry: if entry.text: best_non_null_entry = entry probs = run_squad._compute_softmax(total_scores) nbest_json = [] for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["start_offset"] = entry.start_offset output["end_offset"] = entry.end_offset output["probability"] = probs[i] output["start_logit"] = entry.start_logit output["end_logit"] = entry.end_logit nbest_json.append(output) assert len(nbest_json) >= 1 return nbest_json
def write_predictions(self, all_examples, all_features, all_results, n_best_size, max_answer_length, do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file): """Write final predictions to the json file and log-odds of null if needed.""" tf.logging.info("Writing predictions to: %s" % (output_prediction_file)) tf.logging.info("Writing nbest to: %s" % (output_nbest_file)) example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) unique_id_to_result = {} for result in all_results: unique_id_to_result[result.unique_id] = result _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name "PrelimPrediction", [ "feature_index", "start_index", "end_index", "start_logit", "end_logit" ]) all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() for (example_index, example) in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 score_null = 1000000 # large and positive min_null_feature_index = 0 # the paragraph slice with min mull score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score for (feature_index, feature) in enumerate(features): result = unique_id_to_result[feature.unique_id] start_indexes = run_squad._get_best_indexes( result.start_logits, n_best_size) end_indexes = run_squad._get_best_indexes( result.end_logits, n_best_size) # if we could have irrelevant answers, get the min score of irrelevant if FLAGS.version_2_with_negative: feature_null_score = result.start_logits[ 0] + result.end_logits[0] if feature_null_score < score_null: score_null = feature_null_score min_null_feature_index = feature_index null_start_logit = result.start_logits[0] null_end_logit = result.end_logits[0] for start_index in start_indexes: for end_index in end_indexes: # We could hypothetically create invalid predictions, e.g., predict # that the start of the span is in the question. We throw out all # invalid predictions. if start_index >= len(feature.tokens): continue if end_index >= len(feature.tokens): continue if start_index not in feature.token_to_orig_map: continue if end_index not in feature.token_to_orig_map: continue if not feature.token_is_max_context.get( start_index, False): continue if end_index < start_index: continue length = end_index - start_index + 1 if length > max_answer_length: continue prelim_predictions.append( _PrelimPrediction( feature_index=feature_index, start_index=start_index, end_index=end_index, start_logit=result.start_logits[start_index], end_logit=result.end_logits[end_index])) if FLAGS.version_2_with_negative: prelim_predictions.append( _PrelimPrediction(feature_index=min_null_feature_index, start_index=0, end_index=0, start_logit=null_start_logit, end_logit=null_end_logit)) prelim_predictions = sorted(prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name "NbestPrediction", ["text", "start_logit", "end_logit"]) seen_predictions = {} nbest = [] for pred in prelim_predictions: if len(nbest) >= n_best_size: break feature = features[pred.feature_index] if pred.start_index > 0: # this is a non-null prediction tok_tokens = feature.tokens[pred.start_index:( pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[ pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] orig_tokens = example.doc_tokens[orig_doc_start:( orig_doc_end + 1)] tok_text = " ".join(tok_tokens) # De-tokenize WordPieces that have been split off. tok_text = tok_text.replace(" ##", "") tok_text = tok_text.replace("##", "") # Clean whitespace tok_text = tok_text.strip() tok_text = " ".join(tok_text.split()) orig_text = " ".join(orig_tokens) final_text = run_squad.get_final_text( tok_text, orig_text, do_lower_case) if final_text in seen_predictions: continue seen_predictions[final_text] = True else: final_text = "" seen_predictions[final_text] = True nbest.append( _NbestPrediction(text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit)) # if we didn't inlude the empty option in the n-best, inlcude it if FLAGS.version_2_with_negative: if "" not in seen_predictions: nbest.append( _NbestPrediction(text="", start_logit=null_start_logit, end_logit=null_end_logit)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append( _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) assert len(nbest) >= 1 total_scores = [] best_non_null_entry = None for entry in nbest: total_scores.append(entry.start_logit + entry.end_logit) if not best_non_null_entry: if entry.text: best_non_null_entry = entry probs = run_squad._compute_softmax(total_scores) nbest_json = [] for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] output["start_logit"] = entry.start_logit output["end_logit"] = entry.end_logit nbest_json.append(output) assert len(nbest_json) >= 1 if not FLAGS.version_2_with_negative: all_predictions[example.qas_id] = nbest_json[0]["text"] else: # predict "" iff the null score - the score of best non-null > threshold score_diff = score_null - best_non_null_entry.start_logit - ( best_non_null_entry.end_logit) scores_diff_json[example.qas_id] = score_diff if score_diff > FLAGS.null_score_diff_threshold: all_predictions[example.qas_id] = "" else: all_predictions[example.qas_id] = best_non_null_entry.text all_nbest_json[example.qas_id] = nbest_json if FLAGS.version_2_with_negative: return all_predictions, all_nbest_json, scores_diff_json return all_predictions, all_nbest_json
def get_answer(doc_tokens, tokens_for_postprocessing, start_logits, end_logits, args): result = RawResult(start_logits=start_logits, end_logits=end_logits) predictions = [] Prediction = collections.namedtuple('Prediction', ['text', 'start_logit', 'end_logit']) if args.version_2_with_negative: null_val = (float("inf"), 0, 0) start_indices = _get_best_indices(result.start_logits, args.n_best_size) end_indices = _get_best_indices(result.end_logits, args.n_best_size) prelim_predictions = get_valid_prelim_predictions( start_indices, end_indices, tokens_for_postprocessing, result, args) prelim_predictions = sorted(prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True) if args.version_2_with_negative: score = result.start_logits[0] + result.end_logits[0] if score < null_val[0]: null_val = (score, result.start_logits[0], result.end_logits[0]) doc_tokens_obj = { 'doc_tokens': doc_tokens, } doc_tokens_obj = SimpleNamespace(**doc_tokens_obj) curr_predictions = [] seen_predictions = [] for pred in prelim_predictions: if len(curr_predictions) == args.n_best_size: break if pred.end_index > 0: # this is a non-null prediction final_text = get_answer_text(doc_tokens_obj, tokens_for_postprocessing, pred, args) if final_text in seen_predictions: continue else: final_text = "" seen_predictions.append(final_text) curr_predictions.append( Prediction(final_text, pred.start_logit, pred.end_logit)) predictions += curr_predictions # add empty prediction if args.version_2_with_negative: predictions.append(Prediction('', null_val[1], null_val[2])) nbest_answers = [] answer = None nbest = sorted(predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True)[:args.n_best_size] total_scores = [] best_non_null_entry = None for entry in nbest: total_scores.append(entry.start_logit + entry.end_logit) if not best_non_null_entry and entry.text: best_non_null_entry = entry probs = _compute_softmax(total_scores) for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] output["start_logit"] = entry.start_logit output["end_logit"] = entry.end_logit nbest_answers.append(output) if args.version_2_with_negative: score_diff = null_val[ 0] - best_non_null_entry.start_logit - best_non_null_entry.end_logit if score_diff > args.null_score_diff_threshold: answer = "" else: answer = best_non_null_entry.text else: answer = nbest_answers[0]['text'] return answer, nbest_answers
def predict(self, all_examples, all_features, all_results, n_best_size, max_answer_length, do_lower_case): example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) unique_id_to_result = {} for result in all_results: unique_id_to_result[result.unique_id] = result _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name "PrelimPrediction", [ "feature_index", "start_index", "end_index", "start_logit", "end_logit" ]) all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() for (example_index, example) in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] for (feature_index, feature) in enumerate(features): result = unique_id_to_result[feature.unique_id] start_indexes = _get_best_indexes(result.start_logits, n_best_size) end_indexes = _get_best_indexes(result.end_logits, n_best_size) # if we could have irrelevant answers, get the min score of irrelevant for start_index in start_indexes: for end_index in end_indexes: # We could hypothetically create invalid predictions, e.g., predict # that the start of the span is in the question. We throw out all # invalid predictions. if (start_index >= len(feature.tokens) or end_index >= len(feature.tokens) or start_index not in feature.token_to_orig_map or end_index not in feature.token_to_orig_map or not feature.token_is_max_context.get( start_index, False) or end_index < start_index): continue length = end_index - start_index + 1 if length > max_answer_length: continue prelim_predictions.append( _PrelimPrediction( feature_index=feature_index, start_index=start_index, end_index=end_index, start_logit=result.start_logits[start_index], end_logit=result.end_logits[end_index])) prelim_predictions = sorted(prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name "NbestPrediction", ["text", "start_logit", "end_logit"]) seen_predictions = {} nbest = [] for pred in prelim_predictions: if len(nbest) >= n_best_size: break feature = features[pred.feature_index] if pred.start_index > 0: # this is a non-null prediction tok_tokens = feature.tokens[pred.start_index:( pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[ pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] orig_tokens = example.doc_tokens[orig_doc_start:( orig_doc_end + 1)] tok_text = " ".join(tok_tokens) # De-tokenize WordPieces that have been split off. tok_text = tok_text.replace(" ##", "") tok_text = tok_text.replace("##", "") # Clean whitespace tok_text = tok_text.strip() tok_text = " ".join(tok_text.split()) orig_text = " ".join(orig_tokens) final_text = get_final_text(tok_text, orig_text, do_lower_case) if final_text in seen_predictions: continue seen_predictions[final_text] = True else: final_text = "" seen_predictions[final_text] = True nbest.append( _NbestPrediction(text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append( _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) assert len(nbest) >= 1 total_scores = [] best_non_null_entry = None for entry in nbest: total_scores.append(entry.start_logit + entry.end_logit) if not best_non_null_entry: if entry.text: best_non_null_entry = entry probs = _compute_softmax(total_scores) nbest_json = [] for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] output["start_logit"] = entry.start_logit output["end_logit"] = entry.end_logit nbest_json.append(output) assert len(nbest_json) >= 1 all_predictions[example.qas_id] = nbest_json[0]["text"] all_nbest_json[example.qas_id] = nbest_json return all_nbest_json
start_logit=pred.start_logit, end_logit=pred.end_logit)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) assert len(nbest) >= 1 total_scores = [] for entry in nbest: total_scores.append(entry.start_logit + entry.end_logit) probs = run_squad._compute_softmax(total_scores) print(nbest[0].text) #END SECTION TRYING A NEW APPROACH #END SECTION TRYING A NEW APPROACH #END SECTION TRYING A NEW APPROACH #END SECTION TRYING A NEW APPROACH i = 0 example_index_routes = example_indices_routes[0] #start_logits and end_logits are both lists of len 384 start_logits_routes = batch_start_logits_routes[i].detach().cpu().tolist() end_logits_routes = batch_end_logits_routes[i].detach().cpu().tolist() #remember from above, eval_features is list of objects, each w/ fields for tokens, input_mask, input_ids, segment_ids, etc. eval_feature_routes = eval_features_routes[