def testSpanEqual(self): """Test span equals.""" span_a = util.Span(100, 102, -1, -1) span_b = util.Span(100, 102, -1, -1) self.assertTrue(util.nonnull_span_equal(span_a, span_b)) span_a = util.Span(-1, -1, 100, 102) span_b = util.Span(-1, -1, 100, 102) self.assertTrue(util.nonnull_span_equal(span_a, span_b)) span_a = util.Span(100, 102, -1, -1) span_b = util.Span(-1, -1, 100, 102) self.assertFalse(util.nonnull_span_equal(span_a, span_b))
def score_long_answer(gold_label_list, pred_label): """Scores a long answer as correct or not. 1) First decide if there is a gold long answer with LONG_NO_NULL_THRESHOLD. 2) The prediction will get a match if: a. There is a gold long answer. b. The prediction span match exactly with *one* of the non-null gold long answer span. Args: gold_label_list: A list of NQLabel, could be None. pred_label: A single NQLabel, could be None. Returns: gold_has_answer, pred_has_answer, is_correct, score """ gold_has_answer = util.gold_has_long_answer(gold_label_list) pred_has_answer = pred_label and ( not pred_label.long_answer_span.is_null_span()) is_correct = False score = pred_label.long_score # Both sides are non-null spans. if gold_has_answer and pred_has_answer: for gold_label in gold_label_list: # while the voting results indicate there is an long answer, each # annotator might still say there is no long answer. if gold_label.long_answer_span.is_null_span(): continue if util.nonnull_span_equal(gold_label.long_answer_span, pred_label.long_answer_span): is_correct = True break return gold_has_answer, pred_has_answer, is_correct, score