Example #1
0
def main(argv):
  del argv  # Unused.
  p = beam.Pipeline()

  version_config = _get_version_config(FLAGS.fhir_version_config)

  keyed_bundles = (
      p
      | 'readBundles' >> beam.io.ReadFromTFRecord(
          FLAGS.input_filepattern,
          coder=beam.coders.ProtoCoder(resources_pb2.Bundle))
      | 'KeyBundlesByPatientId' >> beam.ParDo(
          bundle_to_seqex.KeyBundleByPatientIdFn()))
  event_labels = (
      p | 'readEventLabels' >> beam.io.ReadFromTFRecord(
          FLAGS.labels_filepattern,
          coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel)))
  keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists(
      event_labels)
  bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels(
      keyed_bundles, keyed_event_labels)
  _ = (
      bundles_and_labels
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'GenerateSeqex' >> beam.ParDo(
          bundle_to_seqex.BundleAndLabelsToSeqexDoFn(
              version_config=version_config, enable_attribution=False))
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'WriteSeqex' >> beam.io.WriteToTFRecord(
          FLAGS.output_filepattern,
          coder=beam.coders.ProtoCoder(example_pb2.SequenceExample)))

  result = p.run()
  logging.info('Job result: %s', result)
Example #2
0
def main(argv):
  del argv  # Unused.

  # Always use DirectRunner.
  options = PipelineOptions()
  options.view_as(StandardOptions).runner = 'DirectRunner'
  p = beam.Pipeline(options=options)

  version_config = _get_version_config(FLAGS.fhir_version_config)

  keyed_bundles = (
      p
      | 'readBundles' >> beam.io.ReadFromTFRecord(
          FLAGS.bundle_path, coder=beam.coders.ProtoCoder(resources_pb2.Bundle))
      | 'KeyBundlesByPatientId' >> beam.ParDo(
          bundle_to_seqex.KeyBundleByPatientIdFn()))
  event_labels = (
      p | 'readEventLabels' >> beam.io.ReadFromTFRecord(
          FLAGS.label_path,
          coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel)))
  keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists(
      event_labels)
  bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels(
      keyed_bundles, keyed_event_labels)
  _ = (
      bundles_and_labels
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'GenerateSeqex' >> beam.ParDo(
          bundle_to_seqex.BundleAndLabelsToSeqexDoFn(
              version_config=version_config,
              enable_attribution=False,
              generate_sequence_label=False))
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'WriteSeqex' >> beam.io.WriteToTFRecord(
          FLAGS.output_path,
          coder=beam.coders.ProtoCoder(example_pb2.SequenceExample),
          file_name_suffix='.tfrecords',
          num_shards=FLAGS.num_output_shards))

  p.run()
Example #3
0
    def testKeyBundleByPatientId(self):
        bundle = text_format.Parse(
            """
      entry { resource { patient {
        id { value: "14" }
      } } }""", resources_pb2.Bundle())
        with test_pipeline.TestPipeline() as p:
            result = (p
                      | "CreateBundles" >> beam.Create([bundle])
                      | "KeyBundleByPatientId" >> beam.ParDo(
                          bundle_to_seqex.KeyBundleByPatientIdFn()))

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    (got_key, got_bundle) = got[0]
                    self.assertEqual(b"Patient/14", got_key)
                    self.assertProtoEqual(got_bundle, bundle)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result)