def reduce(self, func: Union[Callable, ReduceFunction]) -> 'DataStream': """ Applies a reduce transformation on the grouped data stream grouped on by the given key position. The `ReduceFunction` will receive input values based on the key value. Only input values with the same key will go to the same reducer. Example: :: >>> ds = env.from_collection([(1, 'a'), (2, 'a'), (3, 'a'), (4, 'b']) >>> ds.key_by(lambda x: x[1]).reduce(lambda a, b: a[0] + b[0], b[1]) :param func: The ReduceFunction that is called for each element of the DataStream. :return: The transformed DataStream. """ if not isinstance(func, ReduceFunction): if callable(func): func = ReduceFunctionWrapper(func) else: raise TypeError( "The input must be a ReduceFunction or a callable function!" ) from pyflink.fn_execution.flink_fn_execution_pb2 import UserDefinedDataStreamFunction func_name = "m_reduce_" + str(func) j_python_data_stream_scalar_function_operator, j_output_type_info = \ self._get_java_python_function_operator(func, None, func_name, UserDefinedDataStreamFunction.REDUCE) return DataStream( self._j_data_stream.transform( "Keyed Reduce", j_output_type_info, j_python_data_stream_scalar_function_operator))
def __init__(self, name: str, reduce_function, type_info: TypeInformation): """ Constructor of the ReducingStateDescriptor. :param name: The name of the state. :param reduce_function: The ReduceFunction used to aggregate the state. :param type_info: The type of the values in the state. """ super(ReducingStateDescriptor, self).__init__(name, type_info) from pyflink.datastream.functions import ReduceFunction, ReduceFunctionWrapper if not isinstance(reduce_function, ReduceFunction): if callable(reduce_function): reduce_function = ReduceFunctionWrapper(reduce_function) # type: ignore else: raise TypeError("The input must be a ReduceFunction or a callable function!") self._reduce_function = reduce_function
def __init__(self, name: str, reduce_function, type_info: TypeInformation): """ Constructor of the ReducingStateDescriptor. :param name: The name of the state. :param reduce_function: The ReduceFunction used to aggregate the state. :param type_info: The type of the values in the state. """ super(ReducingStateDescriptor, self).__init__(name, type_info) from pyflink.datastream.functions import ReduceFunction, ReduceFunctionWrapper if not isinstance(reduce_function, ReduceFunction): if callable(reduce_function): reduce_function = ReduceFunctionWrapper( reduce_function) # type: ignore else: raise TypeError( "The input must be a ReduceFunction or a callable function!" ) if not isinstance(type_info, PickledBytesTypeInfo): raise ValueError( "The type information of the state could only be PickledBytesTypeInfo " "(created via Types.PICKLED_BYTE_ARRAY()) currently, got %s" % type(type_info)) self._reduce_function = reduce_function