def __init__(self, name_context, spec, counter_factory, state_sampler): super(PGBKCVOperation, self).__init__(name_context, spec, counter_factory, state_sampler) # Combiners do not accept deferred side-inputs (the ignored fourth # argument) and therefore the code to handle the extra args/kwargs is # simpler than for the DoFn's of ParDo. fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3] self.combine_fn = curry_combine_fn(fn, args, kwargs) if (getattr(fn.add_input, 'im_func', None) is core.CombineFn.add_input.__func__): # Old versions of the SDK have CombineFns that don't implement add_input. self.combine_fn_add_input = ( lambda a, e: self.combine_fn.add_inputs(a, [e])) else: self.combine_fn_add_input = self.combine_fn.add_input # Optimization for the (known tiny accumulator, often wide keyspace) # combine functions. # TODO(b/36567833): Bound by in-memory size rather than key count. self.max_keys = ( 1000 * 1000 if isinstance(fn, (combiners.CountCombineFn, combiners.MeanCombineFn)) or # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized # combiners to the short list above. (isinstance(fn, core.CallableWrapperCombineFn) and fn._fn in (min, max, sum)) else 100 * 1000) # pylint: disable=protected-access self.key_count = 0 self.table = {}
def __init__(self, operation_name, spec, counter_factory, state_sampler): super(PGBKCVOperation, self).__init__( operation_name, spec, counter_factory, state_sampler) # Combiners do not accept deferred side-inputs (the ignored fourth # argument) and therefore the code to handle the extra args/kwargs is # simpler than for the DoFn's of ParDo. fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3] self.combine_fn = curry_combine_fn(fn, args, kwargs) if (getattr(fn.add_input, 'im_func', None) is core.CombineFn.add_input.__func__): # Old versions of the SDK have CombineFns that don't implement add_input. self.combine_fn_add_input = ( lambda a, e: self.combine_fn.add_inputs(a, [e])) else: self.combine_fn_add_input = self.combine_fn.add_input # Optimization for the (known tiny accumulator, often wide keyspace) # combine functions. # TODO(b/36567833): Bound by in-memory size rather than key count. self.max_keys = ( 1000 * 1000 if isinstance(fn, (combiners.CountCombineFn, combiners.MeanCombineFn)) or # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized # combiners to the short list above. (isinstance(fn, core.CallableWrapperCombineFn) and fn._fn in (min, max, sum)) else 100 * 1000) # pylint: disable=protected-access self.key_count = 0 self.table = {}
def __init__(self, name_context, spec, counter_factory, state_sampler, windowing=None): super(PGBKCVOperation, self).__init__(name_context, spec, counter_factory, state_sampler) # Combiners do not accept deferred side-inputs (the ignored fourth # argument) and therefore the code to handle the extra args/kwargs is # simpler than for the DoFn's of ParDo. fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3] self.combine_fn = curry_combine_fn(fn, args, kwargs) self.combine_fn_add_input = self.combine_fn.add_input base_compact = (core.CombineFn.compact if sys.version_info >= (3, ) else core.CombineFn.compact.__func__) if self.combine_fn.compact.__func__ is base_compact: self.combine_fn_compact = None else: self.combine_fn_compact = self.combine_fn.compact if windowing: self.is_default_windowing = windowing.is_default() tsc_type = windowing.timestamp_combiner self.timestamp_combiner = ( None if tsc_type == window.TimestampCombiner.OUTPUT_AT_EOW else window.TimestampCombiner.get_impl(tsc_type, windowing.windowfn)) else: self.is_default_windowing = False # unknown self.timestamp_combiner = None # Optimization for the (known tiny accumulator, often wide keyspace) # combine functions. # TODO(b/36567833): Bound by in-memory size rather than key count. self.max_keys = ( 1000 * 1000 if isinstance(fn, (combiners.CountCombineFn, combiners.MeanCombineFn)) or # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized # combiners to the short list above. (isinstance(fn, core.CallableWrapperCombineFn) and fn._fn in (min, max, sum)) else 100 * 1000) # pylint: disable=protected-access self.key_count = 0 self.table = {}