Exemple #1
0
    def process(
        self,
        element,
        window=DoFn.WindowParam,
        element_state_0=DoFn.StateParam(ELEMENT_STATE_0),
        element_state_1=DoFn.StateParam(ELEMENT_STATE_1),
        element_state_2=DoFn.StateParam(ELEMENT_STATE_2),
        element_state_3=DoFn.StateParam(ELEMENT_STATE_3),
        count_state=DoFn.StateParam(COUNT_STATE),
        window_timer=DoFn.TimerParam(WINDOW_TIMER),
        buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)):
      # Allowed lateness not supported in Python SDK
      # https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data
      window_timer.set(window.end)

      count_state.add(1)
      count = count_state.read()

      element_states = [element_state_0, element_state_1, element_state_2, element_state_3]
      element_states[count % 4].add(element)

      if count == 1 and max_buffering_duration_secs > 0:
        # This is the first element in batch. Start counting buffering time if a
        # limit was set.
        buffering_timer.set(clock() + max_buffering_duration_secs)
      if count >= batch_size:
        return self.flush_batch(element_states, count_state, buffering_timer)
Exemple #2
0
 def process(self,
             element: Tuple[str, bytes],
             window=DoFn.WindowParam,
             values_state=DoFn.StateParam(VALUES_STATE),
             end_of_window_timer=DoFn.TimerParam(END_OF_WINDOW_TIMER)):
     logging.info('start process.')
     key, value = element
     end_of_window_timer.set(window.end)
     values_state.add(value)
     logging.info('end process.')
Exemple #3
0
    def on_window_timer(
        self,
        element_state_0=DoFn.StateParam(ELEMENT_STATE_0),
        element_state_1=DoFn.StateParam(ELEMENT_STATE_1),
        element_state_2=DoFn.StateParam(ELEMENT_STATE_2),
        element_state_3=DoFn.StateParam(ELEMENT_STATE_3),
        count_state=DoFn.StateParam(COUNT_STATE),
        buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)):

      element_states = [element_state_0, element_state_1, element_state_2, element_state_3]
      return self.flush_batch(element_states, count_state, buffering_timer)