Beispiel #1
0
 def expand(self, pcoll):
     do_once = pcoll.pipeline | 'DoOnce' >> core.Create([None])
     init_result_coll = do_once | 'InitializeWrite' >> core.Map(
         lambda _, sink: sink.initialize_write(), self.sink)
     if getattr(self.sink, 'num_shards', 0):
         min_shards = self.sink.num_shards
         if min_shards == 1:
             keyed_pcoll = pcoll | core.Map(lambda x: (None, x))
         else:
             keyed_pcoll = pcoll | core.ParDo(_RoundRobinKeyFn(min_shards))
         write_result_coll = (
             keyed_pcoll
             | core.WindowInto(window.GlobalWindows())
             | core.GroupByKey()
             |
             'WriteBundles' >> core.ParDo(_WriteKeyedBundleDoFn(self.sink),
                                          AsSingleton(init_result_coll)))
     else:
         min_shards = 1
         write_result_coll = (
             pcoll
             | 'WriteBundles' >> core.ParDo(_WriteBundleDoFn(self.sink),
                                            AsSingleton(init_result_coll))
             | 'Pair' >> core.Map(lambda x: (None, x))
             | core.WindowInto(window.GlobalWindows())
             | core.GroupByKey()
             | 'Extract' >> core.FlatMap(lambda x: x[1]))
     return do_once | 'FinalizeWrite' >> core.FlatMap(
         _finalize_write, self.sink, AsSingleton(init_result_coll),
         AsIter(write_result_coll), min_shards)
 def expand(self, pcoll):
     return (pcoll
             | 'Use Value as Key' >> core.Map(lambda x: (x, None))
             | 'DeduplicatePerKey' >> DeduplicatePerKey(
                 processing_time_duration=self.processing_time_duration,
                 event_time_duration=self.event_time_duration)
             | 'Output Value' >> core.Map(lambda kv: kv[0]))
Beispiel #3
0
 def expand(self, pcoll):
     paired_with_void_type = KV[pcoll.element_type, Any]
     return (pcoll
             | (core.Map(
                 '%s:PairWithVoid' % self.label, lambda x:
                 (x, None)).with_output_types(paired_with_void_type))
             | core.CombinePerKey(CountCombineFn()))
Beispiel #4
0
 def expand(self, pcoll):
     paired_with_void_type = typehints.Tuple[pcoll.element_type, Any]
     output_type = typehints.KV[pcoll.element_type, int]
     return (pcoll
             | ('%s:PairWithVoid' % self.label >> core.Map(lambda x: (
                 x, None)).with_output_types(paired_with_void_type))
             | core.CombinePerKey(
                 CountCombineFn()).with_output_types(output_type))