Ejemplo n.º 1
0
 def read_from_file(self):
     return (
         self.pipeline
         | 'reading_from_file' >> beam.io.ReadFromText(self.args.input)
         | 'deserialization' >> beam.ParDo(nexmark_util.ParseJsonEventFn())
         | 'timestamping' >>
         beam.Map(lambda e: window.TimestampedValue(e, e.date_time)))
Ejemplo n.º 2
0
 def read_from_pubsub(self):
     # Read from PubSub into a PCollection.
     if self.subscription_name:
         raw_events = self.pipeline | 'ReadPubSub_sub' >> beam.io.ReadFromPubSub(
             subscription=self.subscription_name,
             with_attributes=True,
             timestamp_attribute='timestamp')
     else:
         raw_events = self.pipeline | 'ReadPubSub_topic' >> beam.io.ReadFromPubSub(
             topic=self.topic_name,
             with_attributes=True,
             timestamp_attribute='timestamp')
     events = (
         raw_events
         | 'pubsub_unwrap' >> beam.Map(lambda m: m.data)
         | 'deserialization' >> beam.ParDo(nexmark_util.ParseJsonEventFn()))
     return events