Example #1
0
def write_aggregate_results_to_gcs(results_bytes, results_dir):
    """Write the aggregate results to results_dir."""
    storage_client = storage.Client()
    results = results_pb2.Results()
    results.ParseFromString(results_bytes)

    filename = gcsutil.GcsFileName.from_path(
        posixpath.join(results_dir, 'aggregate_results.txt'))
    logging.info('Writing aggregate results to %s', filename.string())
    bucket = storage_client.lookup_bucket(filename.bucket)
    blob = bucket.blob(filename.blob)
    blob.upload_from_string(str(results))
Example #2
0
def format_aggregate_text_for_bq(text_aggregate_results, timestamp):
    """Format results as a BigQuery row from a text input."""
    ret = []
    aggregate_results = results_pb2.Results()
    text_format.Merge(text_aggregate_results, aggregate_results)
    binary_token_results = aggregate_results.binary_token_matching_results
    ret.append(
        _create_row(binary_token_results.micro_average_results, timestamp,
                    [('info_type', 'ALL')]))
    for result in binary_token_results.per_type_micro_average_results:
        ret.append(
            _create_row(result.stats, timestamp,
                        [('info_type', result.info_type_category)]))
    return ret
Example #3
0
def format_aggregate_results_for_bq(aggregate_results_bytes, now):
    """Format results as a BigQuery row (dict from column name to value)."""
    ret = []
    aggregate_results = results_pb2.Results()
    aggregate_results.ParseFromString(aggregate_results_bytes)
    binary_token_results = aggregate_results.binary_token_matching_results
    ret.append(
        _create_row(binary_token_results.micro_average_results, now,
                    [('info_type', 'ALL')]))
    for result in binary_token_results.per_type_micro_average_results:
        ret.append(
            _create_row(result.stats, now,
                        [('info_type', result.info_type_category)]))
    return ret
Example #4
0
    def to_results_proto(self):
        """Convert to results_pb2.Results."""
        results = results_pb2.Results()
        eval_lib.calculate_stats(self.strict_entity_matching.micro)
        results.strict_entity_matching_results.micro_average_results.CopyFrom(
            self.strict_entity_matching.micro)
        results.strict_entity_matching_results.macro_average_results.CopyFrom(
            self.strict_entity_matching.macro.calculate_stats())
        r = results.strict_entity_matching_results.per_type_micro_average_results
        r.extend(self.strict_entity_matching.per_type_protos())

        eval_lib.calculate_stats(self.binary_token_matching.typeless_micro)
        results.binary_token_matching_results.micro_average_results.CopyFrom(
            self.binary_token_matching.typeless_micro)
        results.binary_token_matching_results.macro_average_results.CopyFrom(
            self.binary_token_matching.typeless_macro.calculate_stats())
        results.binary_token_matching_results.per_type_micro_average_results.extend(
            self.binary_token_matching.per_type_protos())

        return results
    def testE2eGCS(self, fake_client_fn, mock_bq_sink_fn, mock_utcnow_fn):
        def make_sink(table_name, schema, write_disposition):  # pylint: disable=unused-argument
            return beam_testutil.FakeSink(table_name)

        mock_bq_sink_fn.side_effect = make_sink
        now = 'current time'
        mock_utcnow_fn.return_value = now

        input_pattern = 'gs://bucketname/input/*'
        golden_dir = 'gs://bucketname/goldens'
        results_dir = 'gs://bucketname/results'
        storage_client = testutil.FakeStorageClient()
        fake_client_fn.return_value = storage_client

        tp_tag = tag_template.format('TypeA', 0, 5)
        fp_tag = tag_template.format('TypeA', 8, 10)
        fn_tag = tag_template.format('TypeA', 11, 13)
        fn2_tag = tag_template.format('TypeA', 15, 19)
        findings_tags = '\n'.join([tp_tag, fp_tag])
        golden_tags = '\n'.join([tp_tag, fn_tag, fn2_tag])
        testutil.set_gcs_file('bucketname/input/1-1.xml',
                              xml_template.format(findings_tags))
        testutil.set_gcs_file('bucketname/goldens/1-1.xml',
                              xml_template.format(golden_tags))

        tp2_tag = tag_template.format('TypeB', 20, 21)
        # False negative + false positive for entity matching, but true positive for
        # binary token matching.
        entity_fp_tag = tag_template.format('TypeX', 30, 35)
        entity_fn_tag = tag_template.format('TypeY', 30, 35)
        # Two tokens are tagged as one in the golden. This is not a match for entity
        # matching, but is two matches for binary token matching.
        partial_tag1 = tag_template.format('TypeA', 36, 41)
        partial_tag2 = tag_template.format('TypeA', 42, 47)
        partial_tag3 = tag_template.format('TypeA', 48, 54)
        multi_token_tag = tag_template.format('TypeA', 36, 54)
        ignored_tag = tag_template.format('ignore', 55, 57)
        findings_tags = '\n'.join([
            tp_tag, tp2_tag, entity_fp_tag, partial_tag1, partial_tag2,
            partial_tag3, ignored_tag
        ])
        golden_tags = '\n'.join(
            [tp_tag, tp2_tag, entity_fn_tag, multi_token_tag])
        testutil.set_gcs_file('bucketname/input/1-2.xml',
                              xml_template.format(findings_tags))
        testutil.set_gcs_file('bucketname/goldens/1-2.xml',
                              xml_template.format(golden_tags))
        self.old_write_to_text = beam.io.WriteToText
        beam.io.WriteToText = beam_testutil.DummyWriteTransform
        types_to_ignore = ['ignore']
        mae_input_query = None
        mae_golden_table = None
        run_pipeline_lib.run_pipeline(input_pattern,
                                      golden_dir,
                                      results_dir,
                                      mae_input_query,
                                      mae_golden_table,
                                      True,
                                      'results_table',
                                      'per_note_results_table',
                                      'debug_output_table',
                                      types_to_ignore,
                                      pipeline_args=None)
        beam.io.WriteToText = self.old_write_to_text

        # Check we wrote the correct results to BigQuery.
        expected_results = [{
            'info_type': 'ALL',
            'recall': 0.7777777777777778,
            'precision': 0.875,
            'f_score': 0.823529411764706,
            'true_positives': 7,
            'false_positives': 1,
            'false_negatives': 2
        }, {
            'info_type': u'TypeA',
            'recall': 0.7142857142857143,
            'precision': 0.8333333333333334,
            'f_score': 0.7692307692307694,
            'true_positives': 5,
            'false_positives': 1,
            'false_negatives': 2
        }, {
            'info_type': u'TypeB',
            'recall': 1.0,
            'precision': 1.0,
            'f_score': 1.0,
            'true_positives': 1,
            'false_positives': 0,
            'false_negatives': 0
        }, {
            'info_type': u'TypeY',
            'recall': 1.0,
            'precision': 1.0,
            'f_score': 1.0,
            'true_positives': 1,
            'false_positives': 0,
            'false_negatives': 0
        }]
        for r in expected_results:
            r.update({'timestamp': now})
        actual_results = sorted(beam_testutil.get_table('results_table'),
                                key=lambda x: x['info_type'])
        self.assertEqual([normalize_dict_floats(r) for r in expected_results],
                         [normalize_dict_floats(r) for r in actual_results])

        full_text = 'word1   w2 w3  wrd4 5 word6   word7 multi token entity w8'

        def debug_info(record_id, classification, text, info_type, start, end):
            location = full_text.find(text)
            context = (full_text[0:location] + '{[--' + text + '--]}' +
                       full_text[location + len(text):])
            return {
                'record_id': record_id,
                'classification': classification,
                'text': text,
                'info_type': info_type,
                'context': context,
                'start': start,
                'end': end
            }

        expected_debug_info = [
            debug_info('1-1', 'true_positive', 'word1', 'TypeA', 0, 5),
            debug_info('1-1', 'false_positive', 'w2', 'TypeA', 8, 10),
            debug_info('1-1', 'false_negative', 'w3', 'TypeA', 11, 13),
            debug_info('1-1', 'false_negative', 'wrd4', 'TypeA', 15, 19),
            debug_info('1-2', 'true_positive', 'word1', 'TypeA', 0, 5),
            debug_info('1-2', 'true_positive', '5', 'TypeB', 20, 21),
            debug_info('1-2', 'true_positive', 'word7', 'TypeY', 30, 35),
            debug_info('1-2', 'true_positive', 'multi', 'TypeA', 36, 41),
            debug_info('1-2', 'true_positive', 'token', 'TypeA', 42, 47),
            debug_info('1-2', 'true_positive', 'entity', 'TypeA', 48, 54),
        ]
        for r in expected_debug_info:
            r.update({'timestamp': now})

        def s(l):
            return sorted(l, key=lambda x: x['record_id'] + x['context'])

        self.assertEqual(s(expected_debug_info),
                         s(beam_testutil.get_table('debug_output_table')))

        expected_per_note = [{
            'record_id': '1-1',
            'precision': 0.5,
            'recall': 0.3333333333333333,
            'f_score': 0.4,
            'true_positives': 1,
            'false_positives': 1,
            'false_negatives': 2
        }, {
            'record_id': '1-2',
            'precision': 1.0,
            'recall': 1.0,
            'f_score': 1.0,
            'true_positives': 6,
            'false_positives': 0,
            'false_negatives': 0
        }]
        for r in expected_per_note:
            r.update({'timestamp': now})
        actual_results = sorted(
            beam_testutil.get_table('per_note_results_table'),
            key=lambda x: x['record_id'])
        self.assertEqual([normalize_dict_floats(r) for r in expected_per_note],
                         [normalize_dict_floats(r) for r in actual_results])

        # Check we wrote the correct results to GCS.
        expected_text = ''
        with open(os.path.join(TESTDATA_DIR, 'expected_results')) as f:
            expected_text = f.read()
        expected_results = results_pb2.Results()
        text_format.Merge(expected_text, expected_results)
        results = results_pb2.Results()
        text_format.Merge(
            testutil.get_gcs_file('bucketname/results/aggregate_results.txt'),
            results)
        self.assertEqual(normalize_floats(expected_results),
                         normalize_floats(results))

        # Check the per-file results were written correctly.
        expected_result1 = results_pb2.IndividualResult()
        text_format.Merge(
            """
record_id: "1-1"
stats {
  true_positives: 1
  false_positives: 1
  false_negatives: 2
  precision: 0.5
  recall: 0.333333333333
  f_score: 0.4
}""", expected_result1)
        expected_result2 = results_pb2.IndividualResult()
        text_format.Merge(
            """
record_id: "1-2"
stats {
  true_positives: 6
  precision: 1.0
  recall: 1.0
  f_score: 1.0
}""", expected_result2)
        normalize_floats(expected_result1)
        normalize_floats(expected_result2)
        full_text = testutil.get_gcs_file(
            'bucketname/results/per-note-results')
        actual_results = []
        for record in sorted(full_text.split('\n\n')):
            if not record:
                continue
            actual_result = results_pb2.IndividualResult()
            text_format.Merge(record, actual_result)
            actual_results.append(normalize_floats(actual_result))

        self.assertEqual([expected_result1, expected_result2], actual_results)