def process( self, element, window=DoFn.WindowParam, element_state_0=DoFn.StateParam(ELEMENT_STATE_0), element_state_1=DoFn.StateParam(ELEMENT_STATE_1), element_state_2=DoFn.StateParam(ELEMENT_STATE_2), element_state_3=DoFn.StateParam(ELEMENT_STATE_3), count_state=DoFn.StateParam(COUNT_STATE), window_timer=DoFn.TimerParam(WINDOW_TIMER), buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)): # Allowed lateness not supported in Python SDK # https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data window_timer.set(window.end) count_state.add(1) count = count_state.read() element_states = [element_state_0, element_state_1, element_state_2, element_state_3] element_states[count % 4].add(element) if count == 1 and max_buffering_duration_secs > 0: # This is the first element in batch. Start counting buffering time if a # limit was set. buffering_timer.set(clock() + max_buffering_duration_secs) if count >= batch_size: return self.flush_batch(element_states, count_state, buffering_timer)
def process(self, element: Tuple[str, bytes], window=DoFn.WindowParam, values_state=DoFn.StateParam(VALUES_STATE), end_of_window_timer=DoFn.TimerParam(END_OF_WINDOW_TIMER)): logging.info('start process.') key, value = element end_of_window_timer.set(window.end) values_state.add(value) logging.info('end process.')
def on_window_timer( self, element_state_0=DoFn.StateParam(ELEMENT_STATE_0), element_state_1=DoFn.StateParam(ELEMENT_STATE_1), element_state_2=DoFn.StateParam(ELEMENT_STATE_2), element_state_3=DoFn.StateParam(ELEMENT_STATE_3), count_state=DoFn.StateParam(COUNT_STATE), buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)): element_states = [element_state_0, element_state_1, element_state_2, element_state_3] return self.flush_batch(element_states, count_state, buffering_timer)