def __init__(self, name, spec, counter_factory, sampler, consumers, operation_cls): super(FunctionOperation, self).__init__(name, spec, counter_factory, sampler) consumer = consumers['output'][0] if isinstance(consumer, DataOutputOperation): self._output_processor = NetworkOutputProcessor(consumer) else: self._output_processor = IntermediateOutputProcessor(consumer) self.operation_cls = operation_cls self.operation = self.generate_operation() self.process_element = self.operation.process_element self.operation.open() if spec.serialized_fn.profile_enabled: self._profiler = Profiler() else: self._profiler = None
def __init__(self, name, spec, counter_factory, sampler, consumers, operation_cls): super(FunctionOperation, self).__init__(name, spec, counter_factory, sampler) self.consumer = consumers['output'][0] self._value_coder_impl = self.consumer.windowed_coder.wrapped_value_coder.get_impl( ) self.operation_cls = operation_cls self.operation = self.generate_operation() self.process_element = self.operation.process_element self.operation.open() if spec.serialized_fn.profile_enabled: self._profiler = Profiler() else: self._profiler = None
def __init__(self, name, spec, counter_factory, sampler, consumers, operation_cls): super(FunctionOperation, self).__init__(name, spec, counter_factory, sampler) self._output_processors = self._create_output_processors( consumers) # type: Dict[str, List[OutputProcessor]] self.operation_cls = operation_cls self.operation = self.generate_operation() self.process_element = self.operation.process_element self.operation.open() if spec.serialized_fn.profile_enabled: self._profiler = Profiler() else: self._profiler = None if isinstance(spec.serialized_fn, UserDefinedDataStreamFunction): self._has_side_output = spec.serialized_fn.has_side_output else: # it doesn't support side output in Table API & SQL self._has_side_output = False if not self._has_side_output: self._main_output_processor = self._output_processors[ DEFAULT_OUTPUT_TAG][0]
class FunctionOperation(Operation): """ Base class of function operation that will execute StatelessFunction or StatefulFunction for each input element. """ def __init__(self, name, spec, counter_factory, sampler, consumers, operation_cls): super(FunctionOperation, self).__init__(name, spec, counter_factory, sampler) self._output_processors = self._create_output_processors( consumers) # type: Dict[str, List[OutputProcessor]] self.operation_cls = operation_cls self.operation = self.generate_operation() self.process_element = self.operation.process_element self.operation.open() if spec.serialized_fn.profile_enabled: self._profiler = Profiler() else: self._profiler = None if isinstance(spec.serialized_fn, UserDefinedDataStreamFunction): self._has_side_output = spec.serialized_fn.has_side_output else: # it doesn't support side output in Table API & SQL self._has_side_output = False if not self._has_side_output: self._main_output_processor = self._output_processors[ DEFAULT_OUTPUT_TAG][0] def setup(self): super(FunctionOperation, self).setup() def start(self): with self.scoped_start_state: super(FunctionOperation, self).start() if self._profiler: self._profiler.start() def finish(self): with self.scoped_finish_state: super(FunctionOperation, self).finish() self.operation.finish() if self._profiler: self._profiler.close() def needs_finalization(self): return False def reset(self): super(FunctionOperation, self).reset() def teardown(self): with self.scoped_finish_state: self.operation.close() for processors in self._output_processors.values(): for p in processors: p.close() def progress_metrics(self): metrics = super(FunctionOperation, self).progress_metrics() metrics.processed_elements.measured.output_element_counts.clear() tag = None receiver = self.receivers[0] metrics.processed_elements.measured.output_element_counts[str( tag)] = receiver.opcounter.element_counter.value() return metrics def process(self, o: WindowedValue): with self.scoped_process_state: if self._has_side_output: for value in o.value: for tag, row in self.process_element(value): for p in self._output_processors.get(tag, []): p.process_outputs(o, [row]) else: if isinstance(self.operation, BundleOperation): for value in o.value: self.process_element(value) self._main_output_processor.process_outputs( o, self.operation.finish_bundle()) else: for value in o.value: self._main_output_processor.process_outputs( o, self.operation.process_element(value)) def monitoring_infos(self, transform_id, tag_to_pcollection_id): """ Only pass user metric to Java :param tag_to_pcollection_id: useless for user metric """ return super().user_monitoring_infos(transform_id) @staticmethod def _create_output_processors(consumers_map): def _create_processor(consumer): if isinstance(consumer, DataOutputOperation): return NetworkOutputProcessor(consumer) else: return IntermediateOutputProcessor(consumer) return { tag: [_create_processor(c) for c in consumers] for tag, consumers in consumers_map.items() } @abstractmethod def generate_operation(self): pass
class FunctionOperation(Operation): """ Base class of function operation that will execute StatelessFunction or StatefulFunction for each input element. """ def __init__(self, name, spec, counter_factory, sampler, consumers, operation_cls): super(FunctionOperation, self).__init__(name, spec, counter_factory, sampler) consumer = consumers['output'][0] if isinstance(consumer, DataOutputOperation): self._output_processor = NetworkOutputProcessor(consumer) else: self._output_processor = IntermediateOutputProcessor(consumer) self.operation_cls = operation_cls self.operation = self.generate_operation() self.process_element = self.operation.process_element self.operation.open() if spec.serialized_fn.profile_enabled: self._profiler = Profiler() else: self._profiler = None def setup(self): super(FunctionOperation, self).setup() def start(self): with self.scoped_start_state: super(FunctionOperation, self).start() if self._profiler: self._profiler.start() def finish(self): with self.scoped_finish_state: super(FunctionOperation, self).finish() self.operation.finish() if self._profiler: self._profiler.close() def needs_finalization(self): return False def reset(self): super(FunctionOperation, self).reset() def teardown(self): with self.scoped_finish_state: self.operation.close() self._output_processor.close() def progress_metrics(self): metrics = super(FunctionOperation, self).progress_metrics() metrics.processed_elements.measured.output_element_counts.clear() tag = None receiver = self.receivers[0] metrics.processed_elements.measured.output_element_counts[ str(tag)] = receiver.opcounter.element_counter.value() return metrics def process(self, o: WindowedValue): with self.scoped_process_state: if isinstance(self.operation, BundleOperation): for value in o.value: self.process_element(value) self._output_processor.process_outputs(o, self.operation.finish_bundle()) else: for value in o.value: self._output_processor.process_outputs(o, self.process_element(value)) def monitoring_infos(self, transform_id, tag_to_pcollection_id): """ Only pass user metric to Java :param tag_to_pcollection_id: useless for user metric """ return super().user_monitoring_infos(transform_id) @abstractmethod def generate_operation(self): pass