Ejemplo n.º 1
0
 def __init__(self, name_context, spec, counter_factory, state_sampler):
     super(PGBKCVOperation, self).__init__(name_context, spec,
                                           counter_factory, state_sampler)
     # Combiners do not accept deferred side-inputs (the ignored fourth
     # argument) and therefore the code to handle the extra args/kwargs is
     # simpler than for the DoFn's of ParDo.
     fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3]
     self.combine_fn = curry_combine_fn(fn, args, kwargs)
     if (getattr(fn.add_input, 'im_func', None) is
             core.CombineFn.add_input.__func__):
         # Old versions of the SDK have CombineFns that don't implement add_input.
         self.combine_fn_add_input = (
             lambda a, e: self.combine_fn.add_inputs(a, [e]))
     else:
         self.combine_fn_add_input = self.combine_fn.add_input
     # Optimization for the (known tiny accumulator, often wide keyspace)
     # combine functions.
     # TODO(b/36567833): Bound by in-memory size rather than key count.
     self.max_keys = (
         1000 * 1000 if
         isinstance(fn,
                    (combiners.CountCombineFn, combiners.MeanCombineFn)) or
         # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized
         # combiners to the short list above.
         (isinstance(fn, core.CallableWrapperCombineFn)
          and fn._fn in (min, max, sum)) else 100 * 1000)  # pylint: disable=protected-access
     self.key_count = 0
     self.table = {}
Ejemplo n.º 2
0
 def __init__(self, operation_name, spec, counter_factory, state_sampler):
   super(PGBKCVOperation, self).__init__(
       operation_name, spec, counter_factory, state_sampler)
   # Combiners do not accept deferred side-inputs (the ignored fourth
   # argument) and therefore the code to handle the extra args/kwargs is
   # simpler than for the DoFn's of ParDo.
   fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3]
   self.combine_fn = curry_combine_fn(fn, args, kwargs)
   if (getattr(fn.add_input, 'im_func', None)
       is core.CombineFn.add_input.__func__):
     # Old versions of the SDK have CombineFns that don't implement add_input.
     self.combine_fn_add_input = (
         lambda a, e: self.combine_fn.add_inputs(a, [e]))
   else:
     self.combine_fn_add_input = self.combine_fn.add_input
   # Optimization for the (known tiny accumulator, often wide keyspace)
   # combine functions.
   # TODO(b/36567833): Bound by in-memory size rather than key count.
   self.max_keys = (
       1000 * 1000 if
       isinstance(fn, (combiners.CountCombineFn, combiners.MeanCombineFn)) or
       # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized
       # combiners to the short list above.
       (isinstance(fn, core.CallableWrapperCombineFn) and
        fn._fn in (min, max, sum)) else 100 * 1000)  # pylint: disable=protected-access
   self.key_count = 0
   self.table = {}
Ejemplo n.º 3
0
 def __init__(self,
              name_context,
              spec,
              counter_factory,
              state_sampler,
              windowing=None):
     super(PGBKCVOperation, self).__init__(name_context, spec,
                                           counter_factory, state_sampler)
     # Combiners do not accept deferred side-inputs (the ignored fourth
     # argument) and therefore the code to handle the extra args/kwargs is
     # simpler than for the DoFn's of ParDo.
     fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3]
     self.combine_fn = curry_combine_fn(fn, args, kwargs)
     self.combine_fn_add_input = self.combine_fn.add_input
     base_compact = (core.CombineFn.compact if sys.version_info >=
                     (3, ) else core.CombineFn.compact.__func__)
     if self.combine_fn.compact.__func__ is base_compact:
         self.combine_fn_compact = None
     else:
         self.combine_fn_compact = self.combine_fn.compact
     if windowing:
         self.is_default_windowing = windowing.is_default()
         tsc_type = windowing.timestamp_combiner
         self.timestamp_combiner = (
             None if tsc_type == window.TimestampCombiner.OUTPUT_AT_EOW else
             window.TimestampCombiner.get_impl(tsc_type,
                                               windowing.windowfn))
     else:
         self.is_default_windowing = False  # unknown
         self.timestamp_combiner = None
     # Optimization for the (known tiny accumulator, often wide keyspace)
     # combine functions.
     # TODO(b/36567833): Bound by in-memory size rather than key count.
     self.max_keys = (
         1000 * 1000 if
         isinstance(fn,
                    (combiners.CountCombineFn, combiners.MeanCombineFn)) or
         # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized
         # combiners to the short list above.
         (isinstance(fn, core.CallableWrapperCombineFn)
          and fn._fn in (min, max, sum)) else 100 * 1000)  # pylint: disable=protected-access
     self.key_count = 0
     self.table = {}