예제 #1
0
 def start_bundle(self):
   # pylint: disable=wrong-import-order, wrong-import-position
   from apache_beam.transforms.trigger import InMemoryUnmergedState
   from apache_beam.transforms.trigger import create_trigger_driver
   # pylint: enable=wrong-import-order, wrong-import-position
   self.driver = create_trigger_driver(self.windowing, True)
   self.state_type = InMemoryUnmergedState
예제 #2
0
 def __iter__(self):
   output_stream = create_OutputStream()
   trigger_driver = trigger.create_trigger_driver(self._windowing, True)
   for encoded_key, windowed_values in self._table.items():
     key = self._key_coder.decode(encoded_key)
     for wkvs in trigger_driver.process_entire_key(key, windowed_values):
       self._post_grouped_coder.get_impl().encode_to_stream(
           wkvs, output_stream, True)
   return iter([output_stream.get()])
예제 #3
0
 def __iter__(self):
   output_stream = create_OutputStream()
   trigger_driver = trigger.create_trigger_driver(self._windowing, True)
   for encoded_key, windowed_values in self._table.items():
     key = self._key_coder.decode(encoded_key)
     for wkvs in trigger_driver.process_entire_key(key, windowed_values):
       self._post_grouped_coder.get_impl().encode_to_stream(
           wkvs, output_stream, True)
   return iter([output_stream.get()])
예제 #4
0
  def start_bundle(self):
    assert len(self._outputs) == 1
    self.output_pcollection = list(self._outputs)[0]
    self.step_context = self._execution_context.get_step_context()
    self.driver = create_trigger_driver(
        self._applied_ptransform.transform.windowing)
    self.gabw_items = []
    self.keyed_holds = {}

    # The input type of a GroupAlsoByWindow will be KV[Any, Iter[Any]] or more
    # specific.
    kv_type_hint = (
        self._applied_ptransform.transform.get_type_hints().input_types[0])
    self.key_coder = coders.registry.get_coder(kv_type_hint[0].tuple_types[0])
예제 #5
0
    def start_bundle(self):
        assert len(self._outputs) == 1
        self.output_pcollection = list(self._outputs)[0]
        self.driver = create_trigger_driver(
            self._applied_ptransform.transform.windowing,
            clock=self._evaluation_context._watermark_manager._clock)
        self.gabw_items = []
        self.keyed_holds = {}

        # The input type (which is the same as the output type) of a
        # GroupAlsoByWindow will be Tuple[Any, Iter[Any]] or more specific.
        kv_type_hint = self._applied_ptransform.outputs[None].element_type
        key_type_hint = (kv_type_hint.tuple_types[0] if kv_type_hint else Any)
        self.key_coder = coders.registry.get_coder(key_type_hint)
  def start_bundle(self):
    assert len(self._outputs) == 1
    self.output_pcollection = list(self._outputs)[0]
    self.driver = create_trigger_driver(
        self._applied_ptransform.transform.windowing,
        clock=self._evaluation_context._watermark_manager._clock)
    self.gabw_items = []
    self.keyed_holds = {}

    # The input type (which is the same as the output type) of a
    # GroupAlsoByWindow will be KV[Any, Iter[Any]] or more specific.
    kv_type_hint = self._applied_ptransform.outputs[None].element_type
    key_type_hint = (kv_type_hint.tuple_types[0] if kv_type_hint
                     else typehints.Any)
    self.key_coder = coders.registry.get_coder(key_type_hint)
예제 #7
0
 def __iter__(self):
   output_stream = create_OutputStream()
   if self._windowing.is_default():
     globally_window = GlobalWindows.windowed_value(None).with_value
     windowed_key_values = lambda key, values: [globally_window((key, values))]
   else:
     trigger_driver = trigger.create_trigger_driver(self._windowing, True)
     windowed_key_values = trigger_driver.process_entire_key
   coder_impl = self._post_grouped_coder.get_impl()
   key_coder_impl = self._key_coder.get_impl()
   for encoded_key, windowed_values in self._table.items():
     key = key_coder_impl.decode(encoded_key)
     for wkvs in windowed_key_values(key, windowed_values):
       coder_impl.encode_to_stream(wkvs, output_stream, True)
   return iter([output_stream.get()])
예제 #8
0
 def __iter__(self):
   output_stream = create_OutputStream()
   if self._windowing.is_default():
     globally_window = GlobalWindows.windowed_value(None).with_value
     windowed_key_values = lambda key, values: [globally_window((key, values))]
   else:
     trigger_driver = trigger.create_trigger_driver(self._windowing, True)
     windowed_key_values = trigger_driver.process_entire_key
   coder_impl = self._post_grouped_coder.get_impl()
   key_coder_impl = self._key_coder.get_impl()
   for encoded_key, windowed_values in self._table.items():
     key = key_coder_impl.decode(encoded_key)
     for wkvs in windowed_key_values(key, windowed_values):
       coder_impl.encode_to_stream(wkvs, output_stream, True)
   return iter([output_stream.get()])
예제 #9
0
 def partition(self, n):
     # type: (int) -> List[List[bytes]]
     """ It is used to partition _GroupingBuffer to N parts. Once it is
 partitioned, it would not be re-partitioned with diff N. Re-partition
 is not supported now.
 """
     if not self._grouped_output:
         if self._windowing.is_default():
             globally_window = GlobalWindows.windowed_value(
                 None,
                 timestamp=GlobalWindow().max_timestamp(),
                 pane_info=windowed_value.PaneInfo(
                     is_first=True,
                     is_last=True,
                     timing=windowed_value.PaneInfoTiming.ON_TIME,
                     index=0,
                     nonspeculative_index=0)).with_value
             windowed_key_values = lambda key, values: [
                 globally_window((key, values))
             ]
         else:
             # TODO(pabloem, BEAM-7514): Trigger driver needs access to the clock
             #   note that this only comes through if windowing is default - but what
             #   about having multiple firings on the global window.
             #   May need to revise.
             trigger_driver = trigger.create_trigger_driver(
                 self._windowing, True)
             windowed_key_values = trigger_driver.process_entire_key
         coder_impl = self._post_grouped_coder.get_impl()
         key_coder_impl = self._key_coder.get_impl()
         self._grouped_output = [[] for _ in range(n)]
         output_stream_list = [create_OutputStream() for _ in range(n)]
         for idx, (encoded_key,
                   windowed_values) in enumerate(self._table.items()):
             key = key_coder_impl.decode(encoded_key)
             for wkvs in windowed_key_values(key, windowed_values):
                 coder_impl.encode_to_stream(wkvs,
                                             output_stream_list[idx % n],
                                             True)
         for ix, output_stream in enumerate(output_stream_list):
             self._grouped_output[ix] = [output_stream.get()]
         self._table.clear()
     return self._grouped_output
예제 #10
0
 def start_bundle(self):
     # pylint: disable=wrong-import-order, wrong-import-position
     from apache_beam.transforms.trigger import create_trigger_driver
     # pylint: enable=wrong-import-order, wrong-import-position
     self.driver = create_trigger_driver(self.windowing, True)