예제 #1
0
def main(argv):
  del argv  # Unused.
  p = beam.Pipeline()

  version_config = _get_version_config(FLAGS.fhir_version_config)

  keyed_bundles = (
      p
      | 'readBundles' >> beam.io.ReadFromTFRecord(
          FLAGS.input_filepattern,
          coder=beam.coders.ProtoCoder(resources_pb2.Bundle))
      | 'KeyBundlesByPatientId' >> beam.ParDo(
          bundle_to_seqex.KeyBundleByPatientIdFn()))
  event_labels = (
      p | 'readEventLabels' >> beam.io.ReadFromTFRecord(
          FLAGS.labels_filepattern,
          coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel)))
  keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists(
      event_labels)
  bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels(
      keyed_bundles, keyed_event_labels)
  _ = (
      bundles_and_labels
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'GenerateSeqex' >> beam.ParDo(
          bundle_to_seqex.BundleAndLabelsToSeqexDoFn(
              version_config=version_config, enable_attribution=False))
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'WriteSeqex' >> beam.io.WriteToTFRecord(
          FLAGS.output_filepattern,
          coder=beam.coders.ProtoCoder(example_pb2.SequenceExample)))

  result = p.run()
  logging.info('Job result: %s', result)
예제 #2
0
def main(argv):
  del argv  # Unused.

  # Always use DirectRunner.
  options = PipelineOptions()
  options.view_as(StandardOptions).runner = 'DirectRunner'
  p = beam.Pipeline(options=options)

  version_config = _get_version_config(FLAGS.fhir_version_config)

  keyed_bundles = (
      p
      | 'readBundles' >> beam.io.ReadFromTFRecord(
          FLAGS.bundle_path, coder=beam.coders.ProtoCoder(resources_pb2.Bundle))
      | 'KeyBundlesByPatientId' >> beam.ParDo(
          bundle_to_seqex.KeyBundleByPatientIdFn()))
  event_labels = (
      p | 'readEventLabels' >> beam.io.ReadFromTFRecord(
          FLAGS.label_path,
          coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel)))
  keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists(
      event_labels)
  bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels(
      keyed_bundles, keyed_event_labels)
  _ = (
      bundles_and_labels
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'GenerateSeqex' >> beam.ParDo(
          bundle_to_seqex.BundleAndLabelsToSeqexDoFn(
              version_config=version_config,
              enable_attribution=False,
              generate_sequence_label=False))
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'WriteSeqex' >> beam.io.WriteToTFRecord(
          FLAGS.output_path,
          coder=beam.coders.ProtoCoder(example_pb2.SequenceExample),
          file_name_suffix='.tfrecords',
          num_shards=FLAGS.num_output_shards))

  p.run()
예제 #3
0
  def testCreateBundleAndLabels(self):
    bundle1 = text_format.Parse(
        """
      entry { resource { patient {
        id { value: "14" }
      } } }
      entry { resource { condition {
        id { value: "1" }
        subject { patient_id { value: "14" } }
        code {
          coding {
            system { value: "http://hl7.org/fhir/sid/icd-9-cm/diagnosis" }
            code { value: "bar" }
          }
        }
        asserted_date {
          value_us: 1417392000000000 # "2014-12-01T00:00:00+00:00"
        }
      } } }""", resources_pb2.Bundle())
    bundle1_event_trigger = text_format.Parse(
        """
      event_time { value_us: 1388566800000000 }  # "2014-01-01T09:00:00+00:00"
    """, google_extensions_pb2.EventTrigger())

    # For the purpose of testing, bundle2 does not exist.
    bundle2_event_trigger = text_format.Parse(
        """
      event_time { value_us: 1388566800000000 }  # "2014-01-01T09:00:00+00:00"
    """, google_extensions_pb2.EventTrigger())

    bundle3 = text_format.Parse(
        """
      entry { resource { patient {
        id { value: "30" }
      } } }""", resources_pb2.Bundle())
    bundle1_event_trigger_labels_list = [
        (
            bundle1_event_trigger,
            list(),
        ),
    ]
    bundle2_event_trigger_labels_list = [
        (
            bundle2_event_trigger,
            list(),
        ),
    ]
    with test_pipeline.TestPipeline() as p:
      bundle_pcoll = p | "CreateBundles" >> beam.Create([
          (b"Patient/14", bundle1),
          (b"Patient/30", bundle3),
      ])
      trigger_list_pcoll = p | "CreateTriggerLists" >> beam.Create([
          (b"Patient/14", bundle1_event_trigger_labels_list),
          (b"Patient/20", bundle2_event_trigger_labels_list),
      ])
      result = bundle_to_seqex.CreateBundleAndLabels(bundle_pcoll,
                                                     trigger_list_pcoll)

      def check_result(got):
        try:
          self.assertLen(got, 1)
          (got_key, got_bundle_and_labels) = got[0]
          self.assertEqual(b"Patient/14", got_key)
          (got_bundle, got_trigger_labels_list) = got_bundle_and_labels
          self.assertProtoEqual(got_bundle, bundle1)
          self.assertLen(got_trigger_labels_list, 1)
          self.assertProtoEqual(got_trigger_labels_list[0][0],
                                bundle1_event_trigger)
          self.assertFalse(len(got_trigger_labels_list[0][1]))

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result)