def start_bundle(self): # pylint: disable=wrong-import-order, wrong-import-position from apache_beam.transforms.trigger import InMemoryUnmergedState from apache_beam.transforms.trigger import create_trigger_driver # pylint: enable=wrong-import-order, wrong-import-position self.driver = create_trigger_driver(self.windowing, True) self.state_type = InMemoryUnmergedState
def __iter__(self): output_stream = create_OutputStream() trigger_driver = trigger.create_trigger_driver(self._windowing, True) for encoded_key, windowed_values in self._table.items(): key = self._key_coder.decode(encoded_key) for wkvs in trigger_driver.process_entire_key(key, windowed_values): self._post_grouped_coder.get_impl().encode_to_stream( wkvs, output_stream, True) return iter([output_stream.get()])
def start_bundle(self): assert len(self._outputs) == 1 self.output_pcollection = list(self._outputs)[0] self.step_context = self._execution_context.get_step_context() self.driver = create_trigger_driver( self._applied_ptransform.transform.windowing) self.gabw_items = [] self.keyed_holds = {} # The input type of a GroupAlsoByWindow will be KV[Any, Iter[Any]] or more # specific. kv_type_hint = ( self._applied_ptransform.transform.get_type_hints().input_types[0]) self.key_coder = coders.registry.get_coder(kv_type_hint[0].tuple_types[0])
def start_bundle(self): assert len(self._outputs) == 1 self.output_pcollection = list(self._outputs)[0] self.driver = create_trigger_driver( self._applied_ptransform.transform.windowing, clock=self._evaluation_context._watermark_manager._clock) self.gabw_items = [] self.keyed_holds = {} # The input type (which is the same as the output type) of a # GroupAlsoByWindow will be Tuple[Any, Iter[Any]] or more specific. kv_type_hint = self._applied_ptransform.outputs[None].element_type key_type_hint = (kv_type_hint.tuple_types[0] if kv_type_hint else Any) self.key_coder = coders.registry.get_coder(key_type_hint)
def start_bundle(self): assert len(self._outputs) == 1 self.output_pcollection = list(self._outputs)[0] self.driver = create_trigger_driver( self._applied_ptransform.transform.windowing, clock=self._evaluation_context._watermark_manager._clock) self.gabw_items = [] self.keyed_holds = {} # The input type (which is the same as the output type) of a # GroupAlsoByWindow will be KV[Any, Iter[Any]] or more specific. kv_type_hint = self._applied_ptransform.outputs[None].element_type key_type_hint = (kv_type_hint.tuple_types[0] if kv_type_hint else typehints.Any) self.key_coder = coders.registry.get_coder(key_type_hint)
def __iter__(self): output_stream = create_OutputStream() if self._windowing.is_default(): globally_window = GlobalWindows.windowed_value(None).with_value windowed_key_values = lambda key, values: [globally_window((key, values))] else: trigger_driver = trigger.create_trigger_driver(self._windowing, True) windowed_key_values = trigger_driver.process_entire_key coder_impl = self._post_grouped_coder.get_impl() key_coder_impl = self._key_coder.get_impl() for encoded_key, windowed_values in self._table.items(): key = key_coder_impl.decode(encoded_key) for wkvs in windowed_key_values(key, windowed_values): coder_impl.encode_to_stream(wkvs, output_stream, True) return iter([output_stream.get()])
def partition(self, n): # type: (int) -> List[List[bytes]] """ It is used to partition _GroupingBuffer to N parts. Once it is partitioned, it would not be re-partitioned with diff N. Re-partition is not supported now. """ if not self._grouped_output: if self._windowing.is_default(): globally_window = GlobalWindows.windowed_value( None, timestamp=GlobalWindow().max_timestamp(), pane_info=windowed_value.PaneInfo( is_first=True, is_last=True, timing=windowed_value.PaneInfoTiming.ON_TIME, index=0, nonspeculative_index=0)).with_value windowed_key_values = lambda key, values: [ globally_window((key, values)) ] else: # TODO(pabloem, BEAM-7514): Trigger driver needs access to the clock # note that this only comes through if windowing is default - but what # about having multiple firings on the global window. # May need to revise. trigger_driver = trigger.create_trigger_driver( self._windowing, True) windowed_key_values = trigger_driver.process_entire_key coder_impl = self._post_grouped_coder.get_impl() key_coder_impl = self._key_coder.get_impl() self._grouped_output = [[] for _ in range(n)] output_stream_list = [create_OutputStream() for _ in range(n)] for idx, (encoded_key, windowed_values) in enumerate(self._table.items()): key = key_coder_impl.decode(encoded_key) for wkvs in windowed_key_values(key, windowed_values): coder_impl.encode_to_stream(wkvs, output_stream_list[idx % n], True) for ix, output_stream in enumerate(output_stream_list): self._grouped_output[ix] = [output_stream.get()] self._table.clear() return self._grouped_output
def start_bundle(self): # pylint: disable=wrong-import-order, wrong-import-position from apache_beam.transforms.trigger import create_trigger_driver # pylint: enable=wrong-import-order, wrong-import-position self.driver = create_trigger_driver(self.windowing, True)