def main(argv): del argv # Unused. p = beam.Pipeline() version_config = _get_version_config(FLAGS.fhir_version_config) keyed_bundles = ( p | 'readBundles' >> beam.io.ReadFromTFRecord( FLAGS.input_filepattern, coder=beam.coders.ProtoCoder(resources_pb2.Bundle)) | 'KeyBundlesByPatientId' >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) event_labels = ( p | 'readEventLabels' >> beam.io.ReadFromTFRecord( FLAGS.labels_filepattern, coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel))) keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels) bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels( keyed_bundles, keyed_event_labels) _ = ( bundles_and_labels | 'Reshuffle1' >> beam.Reshuffle() | 'GenerateSeqex' >> beam.ParDo( bundle_to_seqex.BundleAndLabelsToSeqexDoFn( version_config=version_config, enable_attribution=False)) | 'Reshuffle2' >> beam.Reshuffle() | 'WriteSeqex' >> beam.io.WriteToTFRecord( FLAGS.output_filepattern, coder=beam.coders.ProtoCoder(example_pb2.SequenceExample))) result = p.run() logging.info('Job result: %s', result)
def main(argv): del argv # Unused. # Always use DirectRunner. options = PipelineOptions() options.view_as(StandardOptions).runner = 'DirectRunner' p = beam.Pipeline(options=options) version_config = _get_version_config(FLAGS.fhir_version_config) keyed_bundles = ( p | 'readBundles' >> beam.io.ReadFromTFRecord( FLAGS.bundle_path, coder=beam.coders.ProtoCoder(resources_pb2.Bundle)) | 'KeyBundlesByPatientId' >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) event_labels = ( p | 'readEventLabels' >> beam.io.ReadFromTFRecord( FLAGS.label_path, coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel))) keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists( event_labels) bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels( keyed_bundles, keyed_event_labels) _ = ( bundles_and_labels | 'Reshuffle1' >> beam.Reshuffle() | 'GenerateSeqex' >> beam.ParDo( bundle_to_seqex.BundleAndLabelsToSeqexDoFn( version_config=version_config, enable_attribution=False, generate_sequence_label=False)) | 'Reshuffle2' >> beam.Reshuffle() | 'WriteSeqex' >> beam.io.WriteToTFRecord( FLAGS.output_path, coder=beam.coders.ProtoCoder(example_pb2.SequenceExample), file_name_suffix='.tfrecords', num_shards=FLAGS.num_output_shards)) p.run()
def testKeyBundleByPatientId(self): bundle = text_format.Parse( """ entry { resource { patient { id { value: "14" } } } }""", resources_pb2.Bundle()) with test_pipeline.TestPipeline() as p: result = (p | "CreateBundles" >> beam.Create([bundle]) | "KeyBundleByPatientId" >> beam.ParDo( bundle_to_seqex.KeyBundleByPatientIdFn())) def check_result(got): try: self.assertLen(got, 1) (got_key, got_bundle) = got[0] self.assertEqual(b"Patient/14", got_key) self.assertProtoEqual(got_bundle, bundle) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result)