def main(argv): del argv # Unused. p = beam.Pipeline() version_config = _get_version_config(FLAGS.fhir_version_config) keyed_bundles = ( p | 'readBundles' >> beam.io.ReadFromTFRecord( FLAGS.input_filepattern, coder=beam.coders.ProtoCoder(resources_pb2.Bundle)) | 'KeyBundlesByPatientId' >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) event_labels = ( p | 'readEventLabels' >> beam.io.ReadFromTFRecord( FLAGS.labels_filepattern, coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel))) keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels) bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels( keyed_bundles, keyed_event_labels) _ = ( bundles_and_labels | 'Reshuffle1' >> beam.Reshuffle() | 'GenerateSeqex' >> beam.ParDo( bundle_to_seqex.BundleAndLabelsToSeqexDoFn( version_config=version_config, enable_attribution=False)) | 'Reshuffle2' >> beam.Reshuffle() | 'WriteSeqex' >> beam.io.WriteToTFRecord( FLAGS.output_filepattern, coder=beam.coders.ProtoCoder(example_pb2.SequenceExample))) result = p.run() logging.info('Job result: %s', result)
def testCreateTriggerLabelsPairLists(self): trigger1 = text_format.Parse( """ event_time { value_us: 1417392000000000 } # "2014-12-01T00:00:00+00:00" source { encounter_id { value: "1" } } """, google_extensions_pb2.EventTrigger()) trigger2 = text_format.Parse( """ event_time { value_us: 1417428000000000 } # "2014-12-01T01:00:00+00:00" """, google_extensions_pb2.EventTrigger()) label1 = text_format.Parse( """ patient { patient_id { value: "14" } } type { code { value: "test1" } } event_time { value_us: 1417392000000000 } # "2014-12-01T00:00:00+00:00" source { encounter_id { value: "1" } } """, google_extensions_pb2.EventLabel()) label2 = text_format.Parse( """ patient { patient_id { value: "14" } } type { code { value: "test2" } } event_time { value_us: 1417428000000000 } # "2014-12-01T01:00:00+00:00" label { class_name { system { value: "urn:test:label" } code { value: "green" } } } """, google_extensions_pb2.EventLabel()) with test_pipeline.TestPipeline() as p: event_labels_pcoll = ( p | "CreateEventLabels" >> beam.Create([label1, label2])) result = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels_pcoll) def check_result(got): try: self.assertLen(got, 1) (got_key, got_trigger_labels_pairs_list) = got[0] self.assertEqual(b"Patient/14", got_key) self.assertLen(got_trigger_labels_pairs_list, 2) # Sort got_trigger_labels_pairs_list by trigger.event_time, so that # the ordering is always consistent in ordering. sorted_list = sorted( got_trigger_labels_pairs_list, key=lambda x: x[0].event_time.value_us) (got_trigger1, got_label_list1) = sorted_list[0] self.assertProtoEqual(got_trigger1, trigger1) self.assertLen(got_label_list1, 1) self.assertProtoEqual(got_label_list1[0], label1) (got_trigger2, got_label_list2) = sorted_list[1] self.assertProtoEqual(got_trigger2, trigger2) self.assertLen(got_label_list2, 1) self.assertProtoEqual(got_label_list2[0], label2) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result)
def main(argv): del argv # Unused. # Always use DirectRunner. options = PipelineOptions() options.view_as(StandardOptions).runner = 'DirectRunner' p = beam.Pipeline(options=options) version_config = _get_version_config(FLAGS.fhir_version_config) keyed_bundles = ( p | 'readBundles' >> beam.io.ReadFromTFRecord( FLAGS.bundle_path, coder=beam.coders.ProtoCoder(resources_pb2.Bundle)) | 'KeyBundlesByPatientId' >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) event_labels = ( p | 'readEventLabels' >> beam.io.ReadFromTFRecord( FLAGS.label_path, coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel))) keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels) bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels( keyed_bundles, keyed_event_labels) _ = ( bundles_and_labels | 'Reshuffle1' >> beam.Reshuffle() | 'GenerateSeqex' >> beam.ParDo( bundle_to_seqex.BundleAndLabelsToSeqexDoFn( version_config=version_config, enable_attribution=False, generate_sequence_label=False)) | 'Reshuffle2' >> beam.Reshuffle() | 'WriteSeqex' >> beam.io.WriteToTFRecord( FLAGS.output_path, coder=beam.coders.ProtoCoder(example_pb2.SequenceExample), file_name_suffix='.tfrecords', num_shards=FLAGS.num_output_shards)) p.run()