Beispiel #1
0
def main(argv):
  del argv  # Unused.
  p = beam.Pipeline()

  version_config = _get_version_config(FLAGS.fhir_version_config)

  keyed_bundles = (
      p
      | 'readBundles' >> beam.io.ReadFromTFRecord(
          FLAGS.input_filepattern,
          coder=beam.coders.ProtoCoder(resources_pb2.Bundle))
      | 'KeyBundlesByPatientId' >> beam.ParDo(
          bundle_to_seqex.KeyBundleByPatientIdFn()))
  event_labels = (
      p | 'readEventLabels' >> beam.io.ReadFromTFRecord(
          FLAGS.labels_filepattern,
          coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel)))
  keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists(
      event_labels)
  bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels(
      keyed_bundles, keyed_event_labels)
  _ = (
      bundles_and_labels
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'GenerateSeqex' >> beam.ParDo(
          bundle_to_seqex.BundleAndLabelsToSeqexDoFn(
              version_config=version_config, enable_attribution=False))
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'WriteSeqex' >> beam.io.WriteToTFRecord(
          FLAGS.output_filepattern,
          coder=beam.coders.ProtoCoder(example_pb2.SequenceExample)))

  result = p.run()
  logging.info('Job result: %s', result)
    def testCreateTriggerLabelsPairLists(self):
        trigger1 = text_format.Parse(
            """
      event_time { value_us: 1417392000000000 } # "2014-12-01T00:00:00+00:00"
      source { encounter_id { value: "1" } }
    """, google_extensions_pb2.EventTrigger())
        trigger2 = text_format.Parse(
            """
      event_time { value_us: 1417428000000000 } # "2014-12-01T01:00:00+00:00"
    """, google_extensions_pb2.EventTrigger())
        label1 = text_format.Parse(
            """
      patient { patient_id { value: "14" } }
      type { code { value: "test1" } }
      event_time { value_us: 1417392000000000 } # "2014-12-01T00:00:00+00:00"
      source { encounter_id { value: "1" } }
    """, google_extensions_pb2.EventLabel())
        label2 = text_format.Parse(
            """
      patient { patient_id { value: "14" } }
      type { code { value: "test2" } }
      event_time { value_us: 1417428000000000 } # "2014-12-01T01:00:00+00:00"
      label { class_name {
        system { value: "urn:test:label" }
        code { value: "green" }
      } }
    """, google_extensions_pb2.EventLabel())
        with test_pipeline.TestPipeline() as p:
            event_labels_pcoll = (
                p | "CreateEventLabels" >> beam.Create([label1, label2]))
            result = bundle_to_seqex.CreateTriggerLabelsPairLists(
                event_labels_pcoll)

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    (got_key, got_trigger_labels_pairs_list) = got[0]
                    self.assertEqual(b"Patient/14", got_key)
                    self.assertLen(got_trigger_labels_pairs_list, 2)
                    # Sort got_trigger_labels_pairs_list by trigger.event_time, so that
                    # the ordering is always consistent in ordering.
                    sorted_list = sorted(
                        got_trigger_labels_pairs_list,
                        key=lambda x: x[0].event_time.value_us)
                    (got_trigger1, got_label_list1) = sorted_list[0]
                    self.assertProtoEqual(got_trigger1, trigger1)
                    self.assertLen(got_label_list1, 1)
                    self.assertProtoEqual(got_label_list1[0], label1)
                    (got_trigger2, got_label_list2) = sorted_list[1]
                    self.assertProtoEqual(got_trigger2, trigger2)
                    self.assertLen(got_label_list2, 1)
                    self.assertProtoEqual(got_label_list2[0], label2)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result)
Beispiel #3
0
def main(argv):
  del argv  # Unused.

  # Always use DirectRunner.
  options = PipelineOptions()
  options.view_as(StandardOptions).runner = 'DirectRunner'
  p = beam.Pipeline(options=options)

  version_config = _get_version_config(FLAGS.fhir_version_config)

  keyed_bundles = (
      p
      | 'readBundles' >> beam.io.ReadFromTFRecord(
          FLAGS.bundle_path, coder=beam.coders.ProtoCoder(resources_pb2.Bundle))
      | 'KeyBundlesByPatientId' >> beam.ParDo(
          bundle_to_seqex.KeyBundleByPatientIdFn()))
  event_labels = (
      p | 'readEventLabels' >> beam.io.ReadFromTFRecord(
          FLAGS.label_path,
          coder=beam.coders.ProtoCoder(google_extensions_pb2.EventLabel)))
  keyed_event_labels = bundle_to_seqex.CreateTriggerLabelsPairLists(
      event_labels)
  bundles_and_labels = bundle_to_seqex.CreateBundleAndLabels(
      keyed_bundles, keyed_event_labels)
  _ = (
      bundles_and_labels
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'GenerateSeqex' >> beam.ParDo(
          bundle_to_seqex.BundleAndLabelsToSeqexDoFn(
              version_config=version_config,
              enable_attribution=False,
              generate_sequence_label=False))
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'WriteSeqex' >> beam.io.WriteToTFRecord(
          FLAGS.output_path,
          coder=beam.coders.ProtoCoder(example_pb2.SequenceExample),
          file_name_suffix='.tfrecords',
          num_shards=FLAGS.num_output_shards))

  p.run()