def test_implicit_payload_builder_with_bytes(self): values = PayloadBase.bytes_values builder = ImplicitSchemaPayloadBuilder(values) result = builder.build() if sys.version_info[0] < 3: # in python 2.x bytes coder will be inferred args = { 'integer_example': ConfigValue( coder_urn=['beam:coder:varint:v1'], payload=VarIntCoder() .get_impl().encode_nested(values['integer_example'])), 'string_example': ConfigValue( coder_urn=['beam:coder:bytes:v1'], payload=StrUtf8Coder() .get_impl().encode_nested(values['string_example'])), 'list_of_strings': ConfigValue( coder_urn=['beam:coder:iterable:v1', 'beam:coder:bytes:v1'], payload=IterableCoder(StrUtf8Coder()) .get_impl().encode_nested(values['list_of_strings'])), 'optional_kv': ConfigValue( coder_urn=['beam:coder:kv:v1', 'beam:coder:bytes:v1', 'beam:coder:double:v1'], payload=TupleCoder([StrUtf8Coder(), FloatCoder()]) .get_impl().encode_nested(values['optional_kv'])), } expected = get_payload(args) self.assertEqual(result, expected) else: expected = get_payload(PayloadBase.args) self.assertEqual(result, expected)
def get_restriction_coder(self): # type: () -> Optional[TupleCoder] """Get coder for a restriction when processing an SDF. """ if self.is_splittable_dofn(): return TupleCoder([ (self.get_restriction_provider().restriction_coder()), (self.get_watermark_estimator_provider().estimator_state_coder()) ])
def _encode_map(dict_obj): kv_list = [(key.encode('utf-8'), val.encode('utf-8')) for key, val in dict_obj.items()] coder = IterableCoder( TupleCoder( [LengthPrefixCoder(BytesCoder()), LengthPrefixCoder(BytesCoder())])) coder_urns = [ 'beam:coder:iterable:v1', 'beam:coder:kv:v1', 'beam:coder:bytes:v1', 'beam:coder:bytes:v1' ] return ConfigValue(coder_urn=coder_urns, payload=coder.encode(kv_list))
class PayloadBase(object): values = { 'integer_example': 1, 'boolean': True, 'string_example': u'thing', 'list_of_strings': [u'foo', u'bar'], 'optional_kv': (u'key', 1.1), 'optional_integer': None, } bytes_values = { 'integer_example': 1, 'boolean': True, 'string_example': 'thing', 'list_of_strings': ['foo', 'bar'], 'optional_kv': ('key', 1.1), 'optional_integer': None, } args = { 'integer_example': ConfigValue(coder_urn=['beam:coder:varint:v1'], payload=VarIntCoder().get_impl().encode_nested( values['integer_example'])), 'boolean': ConfigValue(coder_urn=['beam:coder:bool:v1'], payload=BooleanCoder().get_impl().encode_nested( values['boolean'])), 'string_example': ConfigValue(coder_urn=['beam:coder:string_utf8:v1'], payload=StrUtf8Coder().get_impl().encode_nested( values['string_example'])), 'list_of_strings': ConfigValue( coder_urn=['beam:coder:iterable:v1', 'beam:coder:string_utf8:v1'], payload=IterableCoder(StrUtf8Coder()).get_impl().encode_nested( values['list_of_strings'])), 'optional_kv': ConfigValue(coder_urn=[ 'beam:coder:kv:v1', 'beam:coder:string_utf8:v1', 'beam:coder:double:v1' ], payload=TupleCoder([ StrUtf8Coder(), FloatCoder() ]).get_impl().encode_nested(values['optional_kv'])), } def get_payload_from_typing_hints(self, values): """Return ExternalConfigurationPayload based on python typing hints""" raise NotImplementedError def get_payload_from_beam_typehints(self, values): """Return ExternalConfigurationPayload based on beam typehints""" raise NotImplementedError def test_typing_payload_builder(self): result = self.get_payload_from_typing_hints(self.values) expected = get_payload(self.args) self.assertEqual(result, expected) def test_typing_payload_builder_with_bytes(self): """ string_utf8 coder will be used even if values are not unicode in python 2.x """ result = self.get_payload_from_typing_hints(self.bytes_values) expected = get_payload(self.args) self.assertEqual(result, expected) def test_typehints_payload_builder(self): result = self.get_payload_from_beam_typehints(self.values) expected = get_payload(self.args) self.assertEqual(result, expected) def test_typehints_payload_builder_with_bytes(self): """ string_utf8 coder will be used even if values are not unicode in python 2.x """ result = self.get_payload_from_beam_typehints(self.bytes_values) expected = get_payload(self.args) self.assertEqual(result, expected) def test_optional_error(self): """ value can only be None if typehint is Optional """ with self.assertRaises(RuntimeError): self.get_payload_from_typing_hints({k: None for k in self.values})
class GeneralTriggerManagerDoFn(DoFn): """A trigger manager that supports all windowing / triggering cases. This implements a DoFn that manages triggering in a per-key basis. All elements for a single key are processed together. Per-key state holds data related to all windows. """ # TODO(BEAM-12026) Add support for Global and custom window fns. KNOWN_WINDOWS = SetStateSpec('known_windows', IntervalWindowCoder()) FINISHED_WINDOWS = SetStateSpec('finished_windows', IntervalWindowCoder()) LAST_KNOWN_TIME = CombiningValueStateSpec('last_known_time', combine_fn=max) LAST_KNOWN_WATERMARK = CombiningValueStateSpec('last_known_watermark', combine_fn=max) # TODO(pabloem) What's the coder for the elements/keys here? WINDOW_ELEMENT_PAIRS = BagStateSpec( 'all_elements', TupleCoder([IntervalWindowCoder(), PickleCoder()])) WINDOW_TAG_VALUES = BagStateSpec( 'per_window_per_tag_value_state', TupleCoder([IntervalWindowCoder(), StrUtf8Coder(), VarIntCoder()])) PROCESSING_TIME_TIMER = TimerSpec('processing_time_timer', TimeDomain.REAL_TIME) WATERMARK_TIMER = TimerSpec('watermark_timer', TimeDomain.WATERMARK) def __init__(self, windowing: Windowing): self.windowing = windowing # Only session windows are merging. Other windows are non-merging. self.merging_windows = self.windowing.windowfn.is_merging() def process( self, element: typing.Tuple[ K, typing.Iterable[windowed_value.WindowedValue]], all_elements: BagRuntimeState = DoFn.StateParam( WINDOW_ELEMENT_PAIRS), # type: ignore latest_processing_time: AccumulatingRuntimeState = DoFn.StateParam( LAST_KNOWN_TIME), # type: ignore latest_watermark: AccumulatingRuntimeState = DoFn. StateParam( # type: ignore LAST_KNOWN_WATERMARK), window_tag_values: BagRuntimeState = DoFn.StateParam( WINDOW_TAG_VALUES), # type: ignore windows_state: SetRuntimeState = DoFn.StateParam( KNOWN_WINDOWS), # type: ignore finished_windows_state: SetRuntimeState = DoFn. StateParam( # type: ignore FINISHED_WINDOWS), processing_time_timer=DoFn.TimerParam(PROCESSING_TIME_TIMER), watermark_timer=DoFn.TimerParam(WATERMARK_TIMER), *args, **kwargs): context = FnRunnerStatefulTriggerContext( processing_time_timer=processing_time_timer, watermark_timer=watermark_timer, latest_processing_time=latest_processing_time, latest_watermark=latest_watermark, all_elements_state=all_elements, window_tag_values=window_tag_values, finished_windows_state=finished_windows_state) key, windowed_values = element watermark = read_watermark(latest_watermark) windows_to_elements = collections.defaultdict(list) for wv in windowed_values: for window in wv.windows: # ignore expired windows if watermark > window.end + self.windowing.allowed_lateness: continue if window in finished_windows_state.read(): continue windows_to_elements[window].append( TimestampedValue(wv.value, wv.timestamp)) # Processing merging of windows if self.merging_windows: old_windows = set(windows_state.read()) all_windows = old_windows.union(list(windows_to_elements)) if all_windows != old_windows: merge_context = TriggerMergeContext(all_windows, context, self.windowing) self.windowing.windowfn.merge(merge_context) merged_windows_to_elements = collections.defaultdict(list) for window, values in windows_to_elements.items(): while window in merge_context.merged_away: window = merge_context.merged_away[window] merged_windows_to_elements[window].extend(values) windows_to_elements = merged_windows_to_elements for w in windows_to_elements: windows_state.add(w) # Done processing merging of windows seen_windows = set() for w in windows_to_elements: window_context = context.for_window(w) seen_windows.add(w) for value_w_timestamp in windows_to_elements[w]: _LOGGER.debug(value_w_timestamp) all_elements.add((w, value_w_timestamp)) self.windowing.triggerfn.on_element(windowed_values, w, window_context) return self._fire_eligible_windows(key, TimeDomain.WATERMARK, watermark, None, context, seen_windows) def _fire_eligible_windows(self, key: K, time_domain, timestamp: Timestamp, timer_tag: typing.Optional[str], context: 'FnRunnerStatefulTriggerContext', windows_of_interest: typing.Optional[ typing.Set[BoundedWindow]] = None): windows_to_elements = context.windows_to_elements_map() context.all_elements_state.clear() fired_windows = set() _LOGGER.debug('%s - tag %s - timestamp %s', time_domain, timer_tag, timestamp) for w, elems in windows_to_elements.items(): if windows_of_interest is not None and w not in windows_of_interest: # windows_of_interest=None means that we care about all windows. # If we care only about some windows, and this window is not one of # them, then we do not intend to fire this window. continue window_context = context.for_window(w) if self.windowing.triggerfn.should_fire(time_domain, timestamp, w, window_context): finished = self.windowing.triggerfn.on_fire( timestamp, w, window_context) _LOGGER.debug('Firing on window %s. Finished: %s', w, finished) fired_windows.add(w) if finished: context.finished_windows_state.add(w) # TODO(pabloem): Format the output: e.g. pane info elems = [ WindowedValue(e.value, e.timestamp, (w, )) for e in elems ] yield (key, elems) finished_windows: typing.Set[BoundedWindow] = set( context.finished_windows_state.read()) # Add elements that were not fired back into state. for w, elems in windows_to_elements.items(): for e in elems: if (w in finished_windows or (w in fired_windows and self.windowing.accumulation_mode == AccumulationMode.DISCARDING)): continue context.all_elements_state.add((w, e)) @on_timer(PROCESSING_TIME_TIMER) def processing_time_trigger( self, key=DoFn.KeyParam, timer_tag=DoFn.DynamicTimerTagParam, timestamp=DoFn.TimestampParam, latest_processing_time=DoFn.StateParam(LAST_KNOWN_TIME), all_elements=DoFn.StateParam(WINDOW_ELEMENT_PAIRS), processing_time_timer=DoFn.TimerParam(PROCESSING_TIME_TIMER), window_tag_values: BagRuntimeState = DoFn.StateParam( WINDOW_TAG_VALUES), # type: ignore finished_windows_state: SetRuntimeState = DoFn. StateParam( # type: ignore FINISHED_WINDOWS), watermark_timer=DoFn.TimerParam(WATERMARK_TIMER)): context = FnRunnerStatefulTriggerContext( processing_time_timer=processing_time_timer, watermark_timer=watermark_timer, latest_processing_time=latest_processing_time, latest_watermark=None, all_elements_state=all_elements, window_tag_values=window_tag_values, finished_windows_state=finished_windows_state) result = self._fire_eligible_windows(key, TimeDomain.REAL_TIME, timestamp, timer_tag, context) latest_processing_time.add(timestamp) return result @on_timer(WATERMARK_TIMER) def watermark_trigger( self, key=DoFn.KeyParam, timer_tag=DoFn.DynamicTimerTagParam, timestamp=DoFn.TimestampParam, latest_watermark=DoFn.StateParam(LAST_KNOWN_WATERMARK), all_elements=DoFn.StateParam(WINDOW_ELEMENT_PAIRS), processing_time_timer=DoFn.TimerParam(PROCESSING_TIME_TIMER), window_tag_values: BagRuntimeState = DoFn.StateParam( WINDOW_TAG_VALUES), # type: ignore finished_windows_state: SetRuntimeState = DoFn. StateParam( # type: ignore FINISHED_WINDOWS), watermark_timer=DoFn.TimerParam(WATERMARK_TIMER)): context = FnRunnerStatefulTriggerContext( processing_time_timer=processing_time_timer, watermark_timer=watermark_timer, latest_processing_time=None, latest_watermark=latest_watermark, all_elements_state=all_elements, window_tag_values=window_tag_values, finished_windows_state=finished_windows_state) result = self._fire_eligible_windows(key, TimeDomain.WATERMARK, timestamp, timer_tag, context) latest_watermark.add(timestamp) return result