def testIntervalsCount(self): finding = eval_lib.Finding findings = set([ finding('NAME', 0, 9, 'The quick'), finding('ID', 10, 19, 'brown fox'), finding('ORGANIZATION', 20, 30, 'jumps over') ]) golden_findings = set([ finding('NAME', 0, 9, 'The quick'), finding('AGE', 10, 19, 'brown fox'), finding('DATE', 35, 43, 'lazy dog') ]) result = eval_lib.intervals_count_compare(findings, golden_findings, record_id='') expected_typeless = results_pb2.Stats() expected_typeless.true_positives = 2 expected_typeless.false_positives = 1 expected_typeless.false_negatives = 1 expected_typeless.precision = 0.666667 expected_typeless.recall = 0.666667 expected_typeless.f_score = 0.666667 self.assertEqual(normalize_floats(expected_typeless), normalize_floats(result.typeless)) expected_total = results_pb2.Stats() expected_total.true_positives = 1 expected_total.false_positives = 2 expected_total.false_negatives = 2 expected_total.precision = 0.333333 expected_total.recall = 0.333333 expected_total.f_score = 0.333333 self.assertEqual(normalize_floats(expected_total), normalize_floats(result.stats)) expected_name = results_pb2.Stats() expected_name.true_positives = 1 expected_id = results_pb2.Stats() expected_id.false_positives = 1 expected_age = results_pb2.Stats() expected_age.false_negatives = 1 expected_org = results_pb2.Stats() expected_org.false_positives = 1 expected_date = results_pb2.Stats() expected_date.false_negatives = 1 expected_per_type = { 'NAME': expected_name, 'ID': expected_id, 'AGE': expected_age, 'ORGANIZATION': expected_org, 'DATE': expected_date } self.assertEqual(expected_per_type, result.per_type)
def testIntervalsCountNotExactMatch(self): finding = eval_lib.Finding findings = set([ finding('NAME', 1, 8, 'he quic'), # Golden contains. finding('NAME', 10, 19, 'brown fox'), # Golden contained. finding('NAME', 20, 30, 'jumps over') # Intersection. ]) golden_findings = set([ finding('NAME', 0, 9, 'The quick'), # Golden contains. finding('NAME', 11, 18, 'rown fo'), # Golden contained. finding('NAME', 26, 34, 'over the') # Intersection. ]) result = eval_lib.intervals_count_compare(findings, golden_findings, record_id='') expected_typeless = results_pb2.Stats() expected_typeless.true_positives = 3 expected_typeless.false_positives = 3 expected_typeless.false_negatives = 3 expected_typeless.precision = 0.5 expected_typeless.recall = 0.5 expected_typeless.f_score = 0.5 self.assertEqual(normalize_floats(expected_typeless), normalize_floats(result.typeless)) expected_total = results_pb2.Stats() expected_total.true_positives = 3 expected_total.false_positives = 3 expected_total.false_negatives = 3 expected_total.precision = 0.5 expected_total.recall = 0.5 expected_total.f_score = 0.5 self.assertEqual(normalize_floats(expected_total), normalize_floats(result.stats)) expected_name = results_pb2.Stats() expected_name.true_positives = 3 expected_name.false_positives = 3 expected_name.false_negatives = 3 expected_per_type = {'NAME': expected_name} self.assertEqual(expected_per_type, result.per_type)