Example #1
0
    def reduce(self, func: Union[Callable, ReduceFunction]) -> 'DataStream':
        """
        Applies a reduce transformation on the grouped data stream grouped on by the given
        key position. The `ReduceFunction` will receive input values based on the key value.
        Only input values with the same key will go to the same reducer.

        Example:
        ::
            >>> ds = env.from_collection([(1, 'a'), (2, 'a'), (3, 'a'), (4, 'b'])
            >>> ds.key_by(lambda x: x[1]).reduce(lambda a, b: a[0] + b[0], b[1])

        :param func: The ReduceFunction that is called for each element of the DataStream.
        :return: The transformed DataStream.
        """

        if not isinstance(func, ReduceFunction):
            if callable(func):
                func = ReduceFunctionWrapper(func)
            else:
                raise TypeError(
                    "The input must be a ReduceFunction or a callable function!"
                )

        from pyflink.fn_execution.flink_fn_execution_pb2 import UserDefinedDataStreamFunction
        func_name = "m_reduce_" + str(func)
        j_python_data_stream_scalar_function_operator, j_output_type_info = \
            self._get_java_python_function_operator(func,
                                                    None,
                                                    func_name,
                                                    UserDefinedDataStreamFunction.REDUCE)
        return DataStream(
            self._j_data_stream.transform(
                "Keyed Reduce", j_output_type_info,
                j_python_data_stream_scalar_function_operator))
Example #2
0
    def __init__(self,
                 name: str,
                 reduce_function,
                 type_info: TypeInformation):
        """
        Constructor of the ReducingStateDescriptor.

        :param name: The name of the state.
        :param reduce_function: The ReduceFunction used to aggregate the state.
        :param type_info: The type of the values in the state.
        """
        super(ReducingStateDescriptor, self).__init__(name, type_info)
        from pyflink.datastream.functions import ReduceFunction, ReduceFunctionWrapper
        if not isinstance(reduce_function, ReduceFunction):
            if callable(reduce_function):
                reduce_function = ReduceFunctionWrapper(reduce_function)  # type: ignore
            else:
                raise TypeError("The input must be a ReduceFunction or a callable function!")
        self._reduce_function = reduce_function
Example #3
0
    def __init__(self, name: str, reduce_function, type_info: TypeInformation):
        """
        Constructor of the ReducingStateDescriptor.

        :param name: The name of the state.
        :param reduce_function: The ReduceFunction used to aggregate the state.
        :param type_info: The type of the values in the state.
        """
        super(ReducingStateDescriptor, self).__init__(name, type_info)
        from pyflink.datastream.functions import ReduceFunction, ReduceFunctionWrapper
        if not isinstance(reduce_function, ReduceFunction):
            if callable(reduce_function):
                reduce_function = ReduceFunctionWrapper(
                    reduce_function)  # type: ignore
            else:
                raise TypeError(
                    "The input must be a ReduceFunction or a callable function!"
                )
        if not isinstance(type_info, PickledBytesTypeInfo):
            raise ValueError(
                "The type information of the state could only be PickledBytesTypeInfo "
                "(created via Types.PICKLED_BYTE_ARRAY()) currently, got %s" %
                type(type_info))
        self._reduce_function = reduce_function