def write_aggregate_results_to_gcs(results_bytes, results_dir): """Write the aggregate results to results_dir.""" storage_client = storage.Client() results = results_pb2.Results() results.ParseFromString(results_bytes) filename = gcsutil.GcsFileName.from_path( posixpath.join(results_dir, 'aggregate_results.txt')) logging.info('Writing aggregate results to %s', filename.string()) bucket = storage_client.lookup_bucket(filename.bucket) blob = bucket.blob(filename.blob) blob.upload_from_string(str(results))
def format_aggregate_text_for_bq(text_aggregate_results, timestamp): """Format results as a BigQuery row from a text input.""" ret = [] aggregate_results = results_pb2.Results() text_format.Merge(text_aggregate_results, aggregate_results) binary_token_results = aggregate_results.binary_token_matching_results ret.append( _create_row(binary_token_results.micro_average_results, timestamp, [('info_type', 'ALL')])) for result in binary_token_results.per_type_micro_average_results: ret.append( _create_row(result.stats, timestamp, [('info_type', result.info_type_category)])) return ret
def format_aggregate_results_for_bq(aggregate_results_bytes, now): """Format results as a BigQuery row (dict from column name to value).""" ret = [] aggregate_results = results_pb2.Results() aggregate_results.ParseFromString(aggregate_results_bytes) binary_token_results = aggregate_results.binary_token_matching_results ret.append( _create_row(binary_token_results.micro_average_results, now, [('info_type', 'ALL')])) for result in binary_token_results.per_type_micro_average_results: ret.append( _create_row(result.stats, now, [('info_type', result.info_type_category)])) return ret
def to_results_proto(self): """Convert to results_pb2.Results.""" results = results_pb2.Results() eval_lib.calculate_stats(self.strict_entity_matching.micro) results.strict_entity_matching_results.micro_average_results.CopyFrom( self.strict_entity_matching.micro) results.strict_entity_matching_results.macro_average_results.CopyFrom( self.strict_entity_matching.macro.calculate_stats()) r = results.strict_entity_matching_results.per_type_micro_average_results r.extend(self.strict_entity_matching.per_type_protos()) eval_lib.calculate_stats(self.binary_token_matching.typeless_micro) results.binary_token_matching_results.micro_average_results.CopyFrom( self.binary_token_matching.typeless_micro) results.binary_token_matching_results.macro_average_results.CopyFrom( self.binary_token_matching.typeless_macro.calculate_stats()) results.binary_token_matching_results.per_type_micro_average_results.extend( self.binary_token_matching.per_type_protos()) return results
def testE2eGCS(self, fake_client_fn, mock_bq_sink_fn, mock_utcnow_fn): def make_sink(table_name, schema, write_disposition): # pylint: disable=unused-argument return beam_testutil.FakeSink(table_name) mock_bq_sink_fn.side_effect = make_sink now = 'current time' mock_utcnow_fn.return_value = now input_pattern = 'gs://bucketname/input/*' golden_dir = 'gs://bucketname/goldens' results_dir = 'gs://bucketname/results' storage_client = testutil.FakeStorageClient() fake_client_fn.return_value = storage_client tp_tag = tag_template.format('TypeA', 0, 5) fp_tag = tag_template.format('TypeA', 8, 10) fn_tag = tag_template.format('TypeA', 11, 13) fn2_tag = tag_template.format('TypeA', 15, 19) findings_tags = '\n'.join([tp_tag, fp_tag]) golden_tags = '\n'.join([tp_tag, fn_tag, fn2_tag]) testutil.set_gcs_file('bucketname/input/1-1.xml', xml_template.format(findings_tags)) testutil.set_gcs_file('bucketname/goldens/1-1.xml', xml_template.format(golden_tags)) tp2_tag = tag_template.format('TypeB', 20, 21) # False negative + false positive for entity matching, but true positive for # binary token matching. entity_fp_tag = tag_template.format('TypeX', 30, 35) entity_fn_tag = tag_template.format('TypeY', 30, 35) # Two tokens are tagged as one in the golden. This is not a match for entity # matching, but is two matches for binary token matching. partial_tag1 = tag_template.format('TypeA', 36, 41) partial_tag2 = tag_template.format('TypeA', 42, 47) partial_tag3 = tag_template.format('TypeA', 48, 54) multi_token_tag = tag_template.format('TypeA', 36, 54) ignored_tag = tag_template.format('ignore', 55, 57) findings_tags = '\n'.join([ tp_tag, tp2_tag, entity_fp_tag, partial_tag1, partial_tag2, partial_tag3, ignored_tag ]) golden_tags = '\n'.join( [tp_tag, tp2_tag, entity_fn_tag, multi_token_tag]) testutil.set_gcs_file('bucketname/input/1-2.xml', xml_template.format(findings_tags)) testutil.set_gcs_file('bucketname/goldens/1-2.xml', xml_template.format(golden_tags)) self.old_write_to_text = beam.io.WriteToText beam.io.WriteToText = beam_testutil.DummyWriteTransform types_to_ignore = ['ignore'] mae_input_query = None mae_golden_table = None run_pipeline_lib.run_pipeline(input_pattern, golden_dir, results_dir, mae_input_query, mae_golden_table, True, 'results_table', 'per_note_results_table', 'debug_output_table', types_to_ignore, pipeline_args=None) beam.io.WriteToText = self.old_write_to_text # Check we wrote the correct results to BigQuery. expected_results = [{ 'info_type': 'ALL', 'recall': 0.7777777777777778, 'precision': 0.875, 'f_score': 0.823529411764706, 'true_positives': 7, 'false_positives': 1, 'false_negatives': 2 }, { 'info_type': u'TypeA', 'recall': 0.7142857142857143, 'precision': 0.8333333333333334, 'f_score': 0.7692307692307694, 'true_positives': 5, 'false_positives': 1, 'false_negatives': 2 }, { 'info_type': u'TypeB', 'recall': 1.0, 'precision': 1.0, 'f_score': 1.0, 'true_positives': 1, 'false_positives': 0, 'false_negatives': 0 }, { 'info_type': u'TypeY', 'recall': 1.0, 'precision': 1.0, 'f_score': 1.0, 'true_positives': 1, 'false_positives': 0, 'false_negatives': 0 }] for r in expected_results: r.update({'timestamp': now}) actual_results = sorted(beam_testutil.get_table('results_table'), key=lambda x: x['info_type']) self.assertEqual([normalize_dict_floats(r) for r in expected_results], [normalize_dict_floats(r) for r in actual_results]) full_text = 'word1 w2 w3 wrd4 5 word6 word7 multi token entity w8' def debug_info(record_id, classification, text, info_type, start, end): location = full_text.find(text) context = (full_text[0:location] + '{[--' + text + '--]}' + full_text[location + len(text):]) return { 'record_id': record_id, 'classification': classification, 'text': text, 'info_type': info_type, 'context': context, 'start': start, 'end': end } expected_debug_info = [ debug_info('1-1', 'true_positive', 'word1', 'TypeA', 0, 5), debug_info('1-1', 'false_positive', 'w2', 'TypeA', 8, 10), debug_info('1-1', 'false_negative', 'w3', 'TypeA', 11, 13), debug_info('1-1', 'false_negative', 'wrd4', 'TypeA', 15, 19), debug_info('1-2', 'true_positive', 'word1', 'TypeA', 0, 5), debug_info('1-2', 'true_positive', '5', 'TypeB', 20, 21), debug_info('1-2', 'true_positive', 'word7', 'TypeY', 30, 35), debug_info('1-2', 'true_positive', 'multi', 'TypeA', 36, 41), debug_info('1-2', 'true_positive', 'token', 'TypeA', 42, 47), debug_info('1-2', 'true_positive', 'entity', 'TypeA', 48, 54), ] for r in expected_debug_info: r.update({'timestamp': now}) def s(l): return sorted(l, key=lambda x: x['record_id'] + x['context']) self.assertEqual(s(expected_debug_info), s(beam_testutil.get_table('debug_output_table'))) expected_per_note = [{ 'record_id': '1-1', 'precision': 0.5, 'recall': 0.3333333333333333, 'f_score': 0.4, 'true_positives': 1, 'false_positives': 1, 'false_negatives': 2 }, { 'record_id': '1-2', 'precision': 1.0, 'recall': 1.0, 'f_score': 1.0, 'true_positives': 6, 'false_positives': 0, 'false_negatives': 0 }] for r in expected_per_note: r.update({'timestamp': now}) actual_results = sorted( beam_testutil.get_table('per_note_results_table'), key=lambda x: x['record_id']) self.assertEqual([normalize_dict_floats(r) for r in expected_per_note], [normalize_dict_floats(r) for r in actual_results]) # Check we wrote the correct results to GCS. expected_text = '' with open(os.path.join(TESTDATA_DIR, 'expected_results')) as f: expected_text = f.read() expected_results = results_pb2.Results() text_format.Merge(expected_text, expected_results) results = results_pb2.Results() text_format.Merge( testutil.get_gcs_file('bucketname/results/aggregate_results.txt'), results) self.assertEqual(normalize_floats(expected_results), normalize_floats(results)) # Check the per-file results were written correctly. expected_result1 = results_pb2.IndividualResult() text_format.Merge( """ record_id: "1-1" stats { true_positives: 1 false_positives: 1 false_negatives: 2 precision: 0.5 recall: 0.333333333333 f_score: 0.4 }""", expected_result1) expected_result2 = results_pb2.IndividualResult() text_format.Merge( """ record_id: "1-2" stats { true_positives: 6 precision: 1.0 recall: 1.0 f_score: 1.0 }""", expected_result2) normalize_floats(expected_result1) normalize_floats(expected_result2) full_text = testutil.get_gcs_file( 'bucketname/results/per-note-results') actual_results = [] for record in sorted(full_text.split('\n\n')): if not record: continue actual_result = results_pb2.IndividualResult() text_format.Merge(record, actual_result) actual_results.append(normalize_floats(actual_result)) self.assertEqual([expected_result1, expected_result2], actual_results)