def main(argv): del argv # Unused. p = beam.Pipeline() version_config = _get_version_config(FLAGS.fhir_version_config) keyed_bundles = ( p | 'readBundles' >> beam.io.ReadFromTFRecord( FLAGS.input_filepattern, coder=beam.coders.ProtoCoder(resources_pb2.Bundle)) | 'KeyBundlesByPatientId' >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) event_labels = ( p | 'readEventLabels' >> beam.io.ReadFromTFRecord( FLAGS.labels_filepattern, coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel))) keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels) bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels( keyed_bundles, keyed_event_labels) _ = ( bundles_and_labels | 'Reshuffle1' >> beam.Reshuffle() | 'GenerateSeqex' >> beam.ParDo( bundle_to_seqex.BundleAndLabelsToSeqexDoFn( version_config=version_config, enable_attribution=False)) | 'Reshuffle2' >> beam.Reshuffle() | 'WriteSeqex' >> beam.io.WriteToTFRecord( FLAGS.output_filepattern, coder=beam.coders.ProtoCoder(example_pb2.SequenceExample))) result = p.run() logging.info('Job result: %s', result)
def main(argv): del argv # Unused. # Always use DirectRunner. options = PipelineOptions() options.view_as(StandardOptions).runner = 'DirectRunner' p = beam.Pipeline(options=options) version_config = _get_version_config(FLAGS.fhir_version_config) keyed_bundles = ( p | 'readBundles' >> beam.io.ReadFromTFRecord( FLAGS.bundle_path, coder=beam.coders.ProtoCoder(resources_pb2.Bundle)) | 'KeyBundlesByPatientId' >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) event_labels = ( p | 'readEventLabels' >> beam.io.ReadFromTFRecord( FLAGS.label_path, coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel))) keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels) bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels( keyed_bundles, keyed_event_labels) _ = ( bundles_and_labels | 'Reshuffle1' >> beam.Reshuffle() | 'GenerateSeqex' >> beam.ParDo( bundle_to_seqex.BundleAndLabelsToSeqexDoFn( version_config=version_config, enable_attribution=False, generate_sequence_label=False)) | 'Reshuffle2' >> beam.Reshuffle() | 'WriteSeqex' >> beam.io.WriteToTFRecord( FLAGS.output_path, coder=beam.coders.ProtoCoder(example_pb2.SequenceExample), file_name_suffix='.tfrecords', num_shards=FLAGS.num_output_shards)) p.run()
def testCreateBundleAndLabels(self): bundle1 = text_format.Parse( """ entry { resource { patient { id { value: "14" } } } } entry { resource { condition { id { value: "1" } subject { patient_id { value: "14" } } code { coding { system { value: "http://hl7.org/fhir/sid/icd-9-cm/diagnosis" } code { value: "bar" } } } asserted_date { value_us: 1417392000000000 # "2014-12-01T00:00:00+00:00" } } } }""", resources_pb2.Bundle()) bundle1_event_trigger = text_format.Parse( """ event_time { value_us: 1388566800000000 } # "2014-01-01T09:00:00+00:00" """, google_extensions_pb2.EventTrigger()) # For the purpose of testing, bundle2 does not exist. bundle2_event_trigger = text_format.Parse( """ event_time { value_us: 1388566800000000 } # "2014-01-01T09:00:00+00:00" """, google_extensions_pb2.EventTrigger()) bundle3 = text_format.Parse( """ entry { resource { patient { id { value: "30" } } } }""", resources_pb2.Bundle()) bundle1_event_trigger_labels_list = [ ( bundle1_event_trigger, list(), ), ] bundle2_event_trigger_labels_list = [ ( bundle2_event_trigger, list(), ), ] with test_pipeline.TestPipeline() as p: bundle_pcoll = p | "CreateBundles" >> beam.Create([ (b"Patient/14", bundle1), (b"Patient/30", bundle3), ]) trigger_list_pcoll = p | "CreateTriggerLists" >> beam.Create([ (b"Patient/14", bundle1_event_trigger_labels_list), (b"Patient/20", bundle2_event_trigger_labels_list), ]) result = bundle_to_seqex.CreateBundleAndLabels(bundle_pcoll, trigger_list_pcoll) def check_result(got): try: self.assertLen(got, 1) (got_key, got_bundle_and_labels) = got[0] self.assertEqual(b"Patient/14", got_key) (got_bundle, got_trigger_labels_list) = got_bundle_and_labels self.assertProtoEqual(got_bundle, bundle1) self.assertLen(got_trigger_labels_list, 1) self.assertProtoEqual(got_trigger_labels_list[0][0], bundle1_event_trigger) self.assertFalse(len(got_trigger_labels_list[0][1])) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result)