Example #1
0
 def expand(self, pcoll):
     do_once = pcoll.pipeline | 'DoOnce' >> core.Create([None])
     init_result_coll = do_once | 'InitializeWrite' >> core.Map(
         lambda _, sink: sink.initialize_write(), self.sink)
     if getattr(self.sink, 'num_shards', 0):
         min_shards = self.sink.num_shards
         if min_shards == 1:
             keyed_pcoll = pcoll | core.Map(lambda x: (None, x))
         else:
             keyed_pcoll = pcoll | core.ParDo(_RoundRobinKeyFn(min_shards))
         write_result_coll = (
             keyed_pcoll
             | core.WindowInto(window.GlobalWindows())
             | core.GroupByKey()
             |
             'WriteBundles' >> core.ParDo(_WriteKeyedBundleDoFn(self.sink),
                                          AsSingleton(init_result_coll)))
     else:
         min_shards = 1
         write_result_coll = (
             pcoll
             | 'WriteBundles' >> core.ParDo(_WriteBundleDoFn(self.sink),
                                            AsSingleton(init_result_coll))
             | 'Pair' >> core.Map(lambda x: (None, x))
             | core.WindowInto(window.GlobalWindows())
             | core.GroupByKey()
             | 'Extract' >> core.FlatMap(lambda x: x[1]))
     return do_once | 'FinalizeWrite' >> core.FlatMap(
         _finalize_write, self.sink, AsSingleton(init_result_coll),
         AsIter(write_result_coll), min_shards)
Example #2
0
 def expand(self, pcoll):
     compare = self._compare
     if (not self._args and not self._kwargs
             and pcoll.windowing.is_default()):
         if self._reverse:
             if compare is None or compare is operator.lt:
                 compare = operator.gt
             else:
                 original_compare = compare
                 compare = lambda a, b: original_compare(b, a)
         # This is a more efficient global algorithm.
         top_per_bundle = pcoll | core.ParDo(
             _TopPerBundle(self._n, compare, self._key))
         # If pcoll is empty, we can't guerentee that top_per_bundle
         # won't be empty, so inject at least one empty accumulator
         # so that downstream is guerenteed to produce non-empty output.
         empty_bundle = pcoll.pipeline | core.Create([(None, [])])
         return ((top_per_bundle, empty_bundle) | core.Flatten()
                 | core.GroupByKey()
                 | core.ParDo(
                     _MergeTopPerBundle(self._n, compare, self._key)))
     else:
         if self.has_defaults:
             return pcoll | core.CombineGlobally(
                 TopCombineFn(self._n, compare, self._key,
                              self._reverse), *self._args, **
                 self._kwargs)
         else:
             return pcoll | core.CombineGlobally(
                 TopCombineFn(self._n, compare, self._key,
                              self._reverse), *self._args, **
                 self._kwargs).without_defaults()
    def expand(self, pcoll):
        do_once = pcoll.pipeline | 'DoOnceSuccess' >> core.Create([None])
        main_write_result = pcoll | 'MainWrite' >> Write(self.sink)

        return (do_once
                | 'SuccessWrite' >> core.FlatMap(
                    self._success_write, pvalue.AsIter(main_write_result)))
Example #4
0
 def expand(self, pcoll):
     if pcoll.windowing.is_default():
         # This is a more efficient global algorithm.
         top_per_bundle = pcoll | core.ParDo(
             _TopPerBundle(self._n, self._key, self._reverse))
         # If pcoll is empty, we can't guerentee that top_per_bundle
         # won't be empty, so inject at least one empty accumulator
         # so that downstream is guerenteed to produce non-empty output.
         empty_bundle = pcoll.pipeline | core.Create([(None, [])])
         return ((top_per_bundle, empty_bundle) | core.Flatten()
                 | core.GroupByKey()
                 | core.ParDo(
                     _MergeTopPerBundle(self._n, self._key,
                                        self._reverse)))
     else:
         if self.has_defaults:
             return pcoll | core.CombineGlobally(
                 TopCombineFn(self._n, self._key, self._reverse))
         else:
             return pcoll | core.CombineGlobally(
                 TopCombineFn(self._n, self._key,
                              self._reverse)).without_defaults()