def open(self, function_context: FunctionContext): self._internal_timer_service = InternalTimerServiceImpl( self._state_backend) self._window_aggregator.open( PerWindowStateDataViewStore(function_context, self._state_backend)) if isinstance(self._window_assigner, PanedWindowAssigner): self._window_function = PanedWindowProcessFunction( self._allowed_lateness, self._window_assigner, self._window_aggregator) elif isinstance(self._window_assigner, MergingWindowAssigner): self._window_function = MergingWindowProcessFunction( self._allowed_lateness, self._window_assigner, self._window_aggregator, self._state_backend) else: self._window_function = GeneralWindowProcessFunction( self._allowed_lateness, self._window_assigner, self._window_aggregator) self._trigger_context = TriggerContext(self._trigger, self._internal_timer_service, self._state_backend) self._trigger_context.open() self._window_context = WindowContext( self, self._trigger_context, self._state_backend, self._state_value_coder, self._internal_timer_service, self._window_assigner.is_event_time()) self._window_function.open(self._window_context)
class GroupWindowAggFunctionBase(Generic[K, W]): def __init__(self, allowed_lateness: int, key_selector: RowKeySelector, state_backend: RemoteKeyedStateBackend, state_value_coder: Coder, window_assigner: WindowAssigner[W], window_aggregator: NamespaceAggsHandleFunctionBase[W], trigger: Trigger[W], rowtime_index: int, shift_timezone: str): self._allowed_lateness = allowed_lateness self._key_selector = key_selector self._state_backend = state_backend self._state_value_coder = state_value_coder self._window_assigner = window_assigner self._window_aggregator = window_aggregator self._rowtime_index = rowtime_index self._shift_timezone = shift_timezone self._window_function = None # type: InternalWindowProcessFunction[K, W] self._internal_timer_service = None # type: InternalTimerServiceImpl self._window_context = None # type: WindowContext self._trigger = trigger self._trigger_context = None # type: TriggerContext self._window_state = self._state_backend.get_value_state( "window_state", state_value_coder) def open(self, function_context: FunctionContext): self._internal_timer_service = InternalTimerServiceImpl( self._state_backend) self._window_aggregator.open( PerWindowStateDataViewStore(function_context, self._state_backend)) if isinstance(self._window_assigner, PanedWindowAssigner): self._window_function = PanedWindowProcessFunction( self._allowed_lateness, self._window_assigner, self._window_aggregator) elif isinstance(self._window_assigner, MergingWindowAssigner): self._window_function = MergingWindowProcessFunction( self._allowed_lateness, self._window_assigner, self._window_aggregator, self._state_backend) else: self._window_function = GeneralWindowProcessFunction( self._allowed_lateness, self._window_assigner, self._window_aggregator) self._trigger_context = TriggerContext(self._trigger, self._internal_timer_service, self._state_backend) self._trigger_context.open() self._window_context = WindowContext( self, self._trigger_context, self._state_backend, self._state_value_coder, self._internal_timer_service, self._window_assigner.is_event_time()) self._window_function.open(self._window_context) def process_element(self, input_row: Row): input_value = input_row._values current_key = self._key_selector.get_key(input_value) self._state_backend.set_current_key(current_key) if self._window_assigner.is_event_time(): timestamp = input_value[self._rowtime_index] seconds = int( timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()) microseconds_of_second = timestamp.microsecond milliseconds = seconds * 1000 + microseconds_of_second // 1000 timestamp = milliseconds else: timestamp = self._internal_timer_service.current_processing_time() timestamp = self.to_utc_timestamp_mills(timestamp) # the windows which the input row should be placed into affected_windows = self._window_function.assign_state_namespace( input_value, timestamp) for window in affected_windows: self._window_state.set_current_namespace(window) acc = self._window_state.value() # type: List if acc is None: acc = self._window_aggregator.create_accumulators() self._window_aggregator.set_accumulators(window, acc) if input_row._is_accumulate_msg(): self._window_aggregator.accumulate(input_row) else: self._window_aggregator.retract(input_row) acc = self._window_aggregator.get_accumulators() self._window_state.update(acc) # the actual window which the input row is belongs to actual_windows = self._window_function.assign_actual_windows( input_value, timestamp) result = [] for window in actual_windows: self._trigger_context.window = window trigger_result = self._trigger_context.on_element( input_row, timestamp) if trigger_result: result.append(self._emit_window_result(current_key, window)) self._register_cleanup_timer(window) return result def process_watermark(self, watermark: int): self._internal_timer_service.advance_watermark(watermark) def on_event_time(self, timer: InternalTimer): result = [] timestamp = timer.get_timestamp() key = timer.get_key() self._state_backend.set_current_key(key) window = timer.get_namespace() self._trigger_context.window = window if self._trigger_context.on_event_time(timestamp): # fire result.append(self._emit_window_result(key, window)) if self._window_assigner.is_event_time(): self._window_function.clean_window_if_needed(window, timestamp) return result def on_processing_time(self, timer: InternalTimer): result = [] timestamp = timer.get_timestamp() key = timer.get_key() self._state_backend.set_current_key(key) window = timer.get_namespace() self._trigger_context.window = window if self._trigger_context.on_processing_time(timestamp): # fire result.append(self._emit_window_result(key, window)) if not self._window_assigner.is_event_time(): self._window_function.clean_window_if_needed(window, timestamp) return result def get_timers(self): yield from self._internal_timer_service.timers.keys() self._internal_timer_service.timers.clear() def to_utc_timestamp_mills(self, epoch_mills): if self._shift_timezone == "UTC": return epoch_mills else: timezone = pytz.timezone(self._shift_timezone) local_date_time = datetime.datetime.fromtimestamp(epoch_mills / 1000., timezone)\ .replace(tzinfo=None) epoch = datetime.datetime.utcfromtimestamp(0) return int((local_date_time - epoch).total_seconds() * 1000.0) def close(self): self._window_aggregator.close() def _register_cleanup_timer(self, window: N): cleanup_time = self.cleanup_time(window) if cleanup_time == MAX_LONG_VALUE: return if self._window_assigner.is_event_time(): self._trigger_context.register_event_time_timer(cleanup_time) else: self._trigger_context.register_processing_time_timer(cleanup_time) def cleanup_time(self, window: N) -> int: if self._window_assigner.is_event_time(): cleanup_time = max(0, window.max_timestamp() + self._allowed_lateness) if cleanup_time >= window.max_timestamp(): return cleanup_time else: return MAX_LONG_VALUE else: return max(0, window.max_timestamp()) @abstractmethod def _emit_window_result(self, key: List, window: W): pass
def on_merge(self, window: W, merge_context: TriggerContext): merge_context.merge_partitioned_state(self._count_state_desc)