コード例 #1
0
def data_layer_not_check(name, shape, dtype='float32', lod_level=0):
    """
    This function creates a Tensor on the global block. The created Tensor
    doesn't check the dtype and the shape of feed data because dygraph input
    data can be various-length. This API is used in translating dygraph into
    static graph.

     Note: 
        The default :code:`stop_gradient` attribute of the Tensor created by
        this API is true, which means the gradient won't be passed backward
        through the data Tensor. Set :code:`var.stop_gradient = False` If
        user would like to pass backward gradient.

    Args:
       name (str): The name/alias of the Tensor, see :ref:`api_guide_Name`
           for more details.
       shape (list|tuple): List|Tuple of integers declaring the shape. You can
           set "None" at a dimension to indicate the dimension can be of any
           size. For example, it is useful to set changeable batch size as "None" 
       dtype (np.dtype|VarType|str, optional): The type of the data. Supported
           dtype: bool, float16, float32, float64, int8, int16, int32, int64,
           uint8. Default: float32
       lod_level (int, optional): The LoD level of the LoDTensor. Usually users
           don't have to set this value. For more details about when and how to
           use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0

    Returns:
        Tensor: The global Tensor that gives access to the data.
    """
    helper = LayerHelper('data', **locals())
    shape = list(shape)
    for i in six.moves.range(len(shape)):
        if shape[i] is None:
            shape[i] = -1

    return helper.create_global_variable(
        name=name,
        shape=shape,
        dtype=dtype,
        type=core.VarDesc.VarType.LOD_TENSOR,
        stop_gradient=True,
        lod_level=lod_level,
        is_data=True,
        need_check_feed=False)
コード例 #2
0
def data(name, shape, dtype='float32', lod_level=0):
    """
    **Data Layer**

    This function creates a variable on the global block. The global variable
    can be accessed by all the following operators in the graph. The variable
    is a placeholder that could be fed with input, such as Executor can feed
    input into the variable.

    Note: 
        `paddle.fluid.layers.data` is deprecated. It will be removed in a
        future version. Please use this `paddle.fluid.data`. 
       
        The `paddle.fluid.layers.data` set shape and dtype at compile time but
        does NOT check the shape or the dtype of fed data, this
        `paddle.fluid.data` checks the shape and the dtype of data fed by
        Executor or ParallelExecutor during run time.

        To feed variable size inputs, users can set None or -1 on the variable
        dimension when using :code:`paddle.fluid.data`, or feed variable size
        inputs directly to :code:`paddle.fluid.layers.data` and PaddlePaddle
        will fit the size accordingly.

        The default :code:`stop_gradient` attribute of the Variable created by
        this API is true, which means the gradient won't be passed backward
        through the data Variable. Set :code:`var.stop_gradient = False` If
        user would like to pass backward gradient.

    Args:
       name (str): The name/alias of the variable, see :ref:`api_guide_Name`
           for more details.
       shape (list|tuple): List|Tuple of integers declaring the shape. You can
           set "None" or -1 at a dimension to indicate the dimension can be of any
           size. For example, it is useful to set changeable batch size as "None" or -1.
       dtype (np.dtype|VarType|str, optional): The type of the data. Supported
           dtype: bool, float16, float32, float64, int8, int16, int32, int64,
           uint8. Default: float32.
       lod_level (int, optional): The LoD level of the LoDTensor. Usually users
           don't have to set this value. For more details about when and how to
           use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0.

    Returns:
        Variable: The global variable that gives access to the data.

    Examples:
        .. code-block:: python

          import paddle.fluid as fluid
          import numpy as np

          # Creates a variable with fixed size [3, 2, 1]
          # User can only feed data of the same shape to x
          x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32')

          # Creates a variable with changeable batch size -1.
          # Users can feed data of any batch size into y,
          # but size of each data sample has to be [2, 1]
          y = fluid.data(name='y', shape=[-1, 2, 1], dtype='float32')

          z = x + y

          # In this example, we will feed x and y with np-ndarray "1"
          # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle
          feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32)

          exe = fluid.Executor(fluid.CPUPlace())
          out = exe.run(fluid.default_main_program(),
                        feed={
                            'x': feed_data,
                            'y': feed_data
                        },
                        fetch_list=[z.name])

          # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2
          print(out)

    """
    helper = LayerHelper('data', **locals())

    check_type(name, 'name', (six.binary_type, six.text_type), 'data')
    check_type(shape, 'shape', (list, tuple), 'data')

    shape = list(shape)
    for i in six.moves.range(len(shape)):
        if shape[i] is None:
            shape[i] = -1

    return helper.create_global_variable(name=name,
                                         shape=shape,
                                         dtype=dtype,
                                         type=core.VarDesc.VarType.LOD_TENSOR,
                                         stop_gradient=True,
                                         lod_level=lod_level,
                                         is_data=True,
                                         need_check_feed=True)
コード例 #3
0
ファイル: input.py プロジェクト: goodcoder-cnn/Paddle
def data(name, shape, dtype=None, lod_level=0):
    """
    **Data Layer**

    This function creates a variable on the global block. The global variable
    can be accessed by all the following operators in the graph. The variable
    is a placeholder that could be fed with input, such as Executor can feed
    input into the variable. When `dtype` is None, the dtype
    will get from the global dtype by `paddle.get_default_dtype()`.

    Args:
       name (str): The name/alias of the variable, see :ref:`api_guide_Name`
           for more details.
       shape (list|tuple): List|Tuple of integers declaring the shape. You can
           set "None" or -1 at a dimension to indicate the dimension can be of any
           size. For example, it is useful to set changeable batch size as "None" or -1.
       dtype (np.dtype|str, optional): The type of the data. Supported
           dtype: bool, float16, float32, float64, int8, int16, int32, int64,
           uint8. Default: None. When `dtype` is not set, the dtype will get
           from the global dtype by `paddle.get_default_dtype()`.
       lod_level (int, optional): The LoD level of the LoDTensor. Usually users
           don't have to set this value. For more details about when and how to
           use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0.

    Returns:
        Variable: The global variable that gives access to the data.

    Examples:
        .. code-block:: python

          import numpy as np
          import paddle

          # Creates a variable with fixed size [3, 2, 1]
          # User can only feed data of the same shape to x
          # the dtype is not set, so it will set "float32" by
          # paddle.get_default_dtype(). You can use paddle.get_default_dtype() to
          # change the global dtype
          x = paddle.static.data(name='x', shape=[3, 2, 1])

          # Creates a variable with changeable batch size -1.
          # Users can feed data of any batch size into y,
          # but size of each data sample has to be [2, 1]
          y = paddle.static.data(name='y', shape=[-1, 2, 1], dtype='float32')

          z = x + y

          # In this example, we will feed x and y with np-ndarray "1"
          # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle
          feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32)

          exe = paddle.static.Executor(paddle.framework.CPUPlace())
          out = exe.run(paddle.static.default_main_program(),
                        feed={
                            'x': feed_data,
                            'y': feed_data
                        },
                        fetch_list=[z.name])

          # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2
          print(out)

    """
    helper = LayerHelper('data', **locals())
    check_type(name, 'name', (six.binary_type, six.text_type), 'data')
    check_type(shape, 'shape', (list, tuple), 'data')

    shape = list(shape)
    for i in six.moves.range(len(shape)):
        if shape[i] is None:
            shape[i] = -1

    if dtype:
        return helper.create_global_variable(
            name=name,
            shape=shape,
            dtype=dtype,
            type=core.VarDesc.VarType.LOD_TENSOR,
            stop_gradient=True,
            lod_level=lod_level,
            is_data=True,
            need_check_feed=True)
    else:
        return helper.create_global_variable(
            name=name,
            shape=shape,
            dtype=paddle.get_default_dtype(),
            type=core.VarDesc.VarType.LOD_TENSOR,
            stop_gradient=True,
            lod_level=lod_level,
            is_data=True,
            need_check_feed=True)
コード例 #4
0
ファイル: quantize_transpiler.py プロジェクト: pyqt1/MyPaddle
class QuantizeTranspiler(object):
    def __init__(self,
                 weight_bits=8,
                 activation_bits=8,
                 activation_quantize_type='abs_max',
                 weight_quantize_type='abs_max',
                 window_size=10000,
                 moving_rate=0.9):
        """
        Convert and rewrite the fluid Program according to weight and
        activation quantization type.

        Args:
            weight_bits (int): quantization bit number for weights,
                the bias is not quantized.
            activation_bits (int): quantization bit number for activation.
            activation_quantize_type (str): quantization type for activation,
                now support 'abs_max', 'range_abs_max'. If use 'abs_max' mode,
                the quantization scale will be calculated dynamically each step
                in both training and testing period. If use 'range_abs_max',
                a static quantization scale will be calculated during training
                and used in inference.
            weight_quantize_type (str): quantization type for weights,
                support 'abs_max'. The 'range_abs_max' usually is not used for
                weight, since weights are fixed once the model is well trained.
            window_size (int): the window size for 'range_abs_max' quantization.

        Examples:

        .. code-block:: python

            # the original program will be rewrite, if you don't want to
            # change it, please clone at first.
            # quantize_program = program.clone()
            t = fluid.QuantizeTranspiler()
            t.transpile(quantize_program)

        """
        self.weight_bits = weight_bits
        self.activation_bits = activation_bits
        quant_type = ['abs_max', 'range_abs_max', 'moving_average_abs_max']
        if weight_quantize_type not in quant_type:
            raise ValueError(
                "Unknown weight_quantize_type: '%s'. It can only be ",
                "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.",
                str(weight_quantize_type))
        if activation_quantize_type not in quant_type:
            raise ValueError(
                "Unknown activation_quantize_type : '%s'. It can only be ",
                "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.",
                str(activation_quantize_type))

        self.weight_quantize_type = weight_quantize_type
        self.activation_quantize_type = activation_quantize_type

        self.window_size = window_size
        self.moving_rate = moving_rate
        self.helper = LayerHelper(self.__class__.__name__)
        self.fake_quant_op_types = [
            'fake_quantize_abs_max', 'fake_quantize_range_abs_max',
            'fake_quantize_moving_average_abs_max'
        ]
        self.fake_dequant_op_types = ['fake_dequantize_max_abs']
        self.is_test = None
        self.global_step = None

    def training_transpile(self, program=None, startup_program=None):
        """Rewrites a training input program in place for simulated
        quantization. Insert fake quantization and de-quantization ops into
        program to simulate the error introduced by quantization. And change
        the graident ops' input by using the faked quantization weights and
        activation. Since the program is transformed in place, the graph
        connection will change.

        Args:
            program (Program): the input program to be transpile.
        """
        self.is_test = False
        program = default_main_program() if program is None else program
        startup_program = default_startup_program() if startup_program is \
            None else startup_program

        # marked the variable which has been quantized and dequantized.
        dequanted_vars = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        grad_op_types = ['%s_grad' % (type) for type in _QUANTIZABLE_OP_TYPES]

        params = [p.name for p in program.global_block().iter_parameters()]

        def _transpile_forward(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            # insert quant op and dequant op
            for name in op.input_arg_names:
                #if share input between ops
                if name in dequanted_vars[block_id]:
                    dequant_var = dequanted_vars[block_id][name]
                else:
                    var = block.var(name)
                    quant_bits = self.weight_bits if var.name in params \
                                 else self.activation_bits
                    quant_type = self.weight_quantize_type if var.name \
                        in params else self.activation_quantize_type

                    quant_var, scale_var = self._insert_quant_op(
                        block, idx, var, quant_bits, quant_type)
                    dequant_var = self._insert_dequant_op(
                        block, idx + 1, quant_var, scale_var, quant_bits)
                    dequanted_vars[block_id][name] = dequant_var
                # rename the forward op inputs
                op._rename_input(name, dequant_var.name)

        def _transpile_backward(block, op):
            block_id = block.idx
            no_dequanted_input_vars = True
            for name in op.input_arg_names:
                if name in dequanted_vars[block_id]:
                    dequant_var = dequanted_vars[block_id][name]
                    op._rename_input(name, dequant_var.name)
                    no_dequanted_input_vars = False
            if no_dequanted_input_vars:
                raise ValueError("There is no dequanted inputs for op %s." %
                                 (op.type))

        with program_guard(program, startup_program):
            self._create_global_step()
            for block in program.blocks:
                ops = list(block.ops)
                block_id = block.idx
                for op in ops:
                    # rewrite the forward ProgramDes
                    if op.type in _QUANTIZABLE_OP_TYPES:
                        _transpile_forward(block, op)
                    # rename the backward op inputs
                    if op.type in grad_op_types:
                        _transpile_backward(block, op)

    def _create_global_step(self):
        if self.weight_quantize_type == 'range_abs_max' or \
            self.activation_quantize_type == 'range_abs_max':
            self.global_step = autoincreased_step_counter()

    def freeze_program(self, program, place, scope=None):
        """Freeze input training program for inference.

        Args:
            program (Program): the input program to be transpile.
        """

        self.is_test = True
        scope = global_scope() if scope is None else scope
        program = default_main_program() if program is None else program

        persistable_vars = [
            v.name
            for v in filter(lambda var: var.persistable, program.list_vars())
        ]
        op_in_rename_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        op_out_rename_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        var_scale_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]

        def _remove_fake_quant_and_dequant_op(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            k = op.output('Out')[0]
            v = op.input('X')[0]
            if v not in op_in_rename_map[block_id]:
                op_in_rename_map[block_id][k] = v
            else:
                op_in_rename_map[block_id][k] = op_in_rename_map[block_id][v]
            block._remove_op(idx)

        def _insert_post_dequant_op(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            max_range = None
            scale_var = None
            for name in op.input_arg_names:
                #rename input name of the op to the input name of last op which has be removed
                if name in op_in_rename_map[block_id]:
                    op._rename_input(name, op_in_rename_map[block_id][name])

                scale_v = var_scale_map[block_id][_original_var_name(name)]
                if _original_var_name(name) in persistable_vars:
                    param_range = (1 << (self.weight_bits - 1)) - 1
                    act_range = (1 << (self.activation_bits - 1)) - 1
                    assert _is_float(scale_v)
                    max_range = param_range * act_range / scale_v
                else:
                    assert isinstance(scale_v, Variable)
                    scale_var = scale_v

            if len(op.output_arg_names) != 1:
                raise ValueError("Only support one output, but op %s has"
                                 " more than one output." % (op.type))
            out_var = block.var(op.output_arg_names[0])
            dequant_var = block.create_var(name=_dequantized_var_name(
                out_var.name),
                                           type=out_var.type,
                                           shape=out_var.shape,
                                           dtype=out_var.dtype)
            # insert fake_dequantize_op
            dequant_op = block._insert_op(
                idx + 1,
                type="fake_dequantize_max_abs",
                attrs={'max_range': float(max_range)},
                inputs={
                    "X": out_var,
                    'Scale': scale_var
                },
                outputs={"Out": dequant_var})
            op_out_rename_map[block_id][out_var.name] = dequant_var.name
            return dequant_var

        def _load_var(name):
            return np.array(scope.find_var(name).get_tensor())

        def _restore_var(name, arr):
            t = scope.find_var(name).get_tensor()
            t.set(arr, place)

        for block in program.blocks:
            ops = list(block.ops)
            block_id = block.idx
            for op in ops:
                op_type = op.type

                # insert dequant_op after fc/conv, need to rename
                # input of the followed ops(of fc/conv) to the dquant_op
                for name in op.input_arg_names:
                    if name in op_out_rename_map[block_id]:
                        op._rename_input(name,
                                         op_out_rename_map[block_id][name])

                if op_type in self.fake_quant_op_types:
                    in_arg_name = op.input('X')[0]
                    if in_arg_name in persistable_vars:
                        if self.weight_quantize_type == 'abs_max':
                            param = _load_var(in_arg_name)
                            scale_v = np.max(np.abs(param))
                        else:
                            scale_v = _load_var(op.output('OutScale')[0])
                        var_scale_map[block_id][in_arg_name] = scale_v
                    else:
                        scale_v = block.var(op.output('OutScale')[0])
                        var_scale_map[block_id][in_arg_name] = scale_v

                    if in_arg_name in persistable_vars:
                        _remove_fake_quant_and_dequant_op(block, op)
                        # quantize weight and restore
                        param_t = _load_var(in_arg_name)
                        param_q_t = quant(param_t, scale_v, self.weight_bits)
                        _restore_var(in_arg_name, param_q_t)

                if op_type in self.fake_dequant_op_types:
                    _remove_fake_quant_and_dequant_op(block, op)

                if op_type in _QUANTIZABLE_OP_TYPES:
                    dequant_var = _insert_post_dequant_op(block, op)

        # remove the unused var in ProgramDesc
        self._remove_unused_var(program)
        #program = program.clone()

    def convert_to_int8(self, program, place, scope=None):
        scope = global_scope() if scope is None else scope
        program = default_main_program() if program is None else program

        def _load_var(name):
            return np.array(scope.find_var(name).get_tensor())

        global_block = program.global_block()

        def convert_to_int8(var):
            int8_var_name = var.name + ".int8"
            int8_var = global_block.create_parameter(
                name=int8_var_name.encode('ascii'),
                type=var.type,
                dtype=core.VarDesc.VarType.INT8,
                shape=var.shape)

            tensor = _load_var(var.name)

            scope.var(int8_var_name)
            int8_tensor = scope.find_var(int8_var_name).get_tensor()
            int8_tensor.set(tensor.astype(np.int8), place)
            return int8_var

        input_map = {}
        for block in program.blocks:
            for op in list(block.ops):
                if op.type in _QUANTIZABLE_OP_TYPES:
                    for name in op.input_arg_names:
                        var = block.var(name)
                        if var.persistable:
                            if name not in input_map:
                                int8_var = convert_to_int8(var)
                                input_map[name] = int8_var.name
                            op._rename_input(name, input_map[name])
        self._remove_unused_var(program)

    def _remove_unused_var(self, program):
        all_remove_vars = []
        for block in program.blocks:
            args = []
            for op in block.ops:
                args += op.input_arg_names
                args += op.output_arg_names
            args = list(set(args))  #vals of all left ops
            var_names = block.vars.keys()  # all vals
            sub_block_remove_vars = []
            for var in var_names:
                if var not in args:
                    sub_block_remove_vars.append(var)
            all_remove_vars.append(sub_block_remove_vars)

        remove_vars = [list(set(v)) for v in all_remove_vars]
        for i, block in enumerate(program.blocks):
            for v in remove_vars[i]:
                block._remove_var(v)

    def _insert_quant_abs_max_op(self, block, idx, var, quant_bits):
        """Insert fake_quantize_abs_max op.
        """
        quant_var = block.create_var(name=_quantized_var_name(var.name),
                                     type=var.type,
                                     shape=var.shape,
                                     dtype=var.dtype)
        scale = block.create_var(name=_quantized_scale_name(var.name),
                                 type=var.type,
                                 shape=var.shape,
                                 dtype=var.dtype)
        quant_op = block._insert_op(idx,
                                    type='fake_quantize_abs_max',
                                    attrs={'bit_length': quant_bits},
                                    inputs={'X': var},
                                    outputs={
                                        'Out': quant_var,
                                        'OutScale': scale
                                    })
        return quant_var, scale

    def _insert_quant_range_abs_max_op(self, block, idx, var, quant_bits):
        """Insert fake_quantize_range_abs_max
        """
        quant_var = block.create_var(name=_quantized_var_name(var.name),
                                     type=var.type,
                                     shape=var.shape,
                                     dtype=var.dtype)
        scale = self.helper.create_parameter(attr=ParamAttr(
            name=_quantized_scale_name(var.name),
            initializer=Constant(0.001),
            trainable=False),
                                             shape=[1],
                                             dtype=var.dtype)
        scale.stop_gradient = True

        ins = {'X': var, 'InScale': scale}
        outs = {'Out': quant_var, 'OutScale': scale}
        if not self.is_test:
            # A global step counter variable with type int64
            scales = self.helper.create_global_variable(
                name=unique_name.generate('scales'),
                persistable=True,
                dtype=var.dtype,
                shape=[self.window_size])
            self.helper.set_variable_initializer(scales,
                                                 initializer=Constant(value=0))

            ins['Iter'] = self.global_step
            outs['OutScales'] = scales

        attrs = {
            'window_size': self.window_size,
            'bit_length': quant_bits,
            'is_test': self.is_test
        }

        quant_op = block._insert_op(idx,
                                    type='fake_quantize_range_abs_max',
                                    attrs=attrs,
                                    inputs=ins,
                                    outputs=outs)

        return quant_var, scale

    def _insert_quant_moving_average_abs_max_op(self, block, idx, var,
                                                quant_bits):
        """Insert fake_quantize_moving_average_abs_max
        """
        quant_var = block.create_var(name=_quantized_var_name(var.name),
                                     type=var.type,
                                     shape=var.shape,
                                     dtype=var.dtype)
        state = self.helper.create_global_variable(
            name=unique_name.generate('state'),
            persistable=True,
            dtype=var.dtype,
            shape=[1])
        self.helper.set_variable_initializer(state,
                                             initializer=Constant(value=1))
        accum = self.helper.create_global_variable(
            name=unique_name.generate('accum'),
            persistable=True,
            dtype=var.dtype,
            shape=[1])
        self.helper.set_variable_initializer(accum,
                                             initializer=Constant(value=1))
        scale = self.helper.create_parameter(attr=ParamAttr(
            name=_quantized_scale_name(var.name),
            initializer=Constant(0.001),
            trainable=False),
                                             shape=[1],
                                             dtype=var.dtype)
        scale.stop_gradient = True

        ins = {'X': var, 'InScale': scale}
        outs = {'Out': quant_var, 'OutScale': scale}
        if not self.is_test:
            ins['InState'] = state
            ins['InAccum'] = accum
            outs['OutState'] = state
            outs['OutAccum'] = accum

        attrs = {
            'bit_length': quant_bits,
            'moving_rate': self.moving_rate,
            'is_test': self.is_test
        }

        quant_op = block._insert_op(
            idx,
            type='fake_quantize_moving_average_abs_max',
            attrs=attrs,
            inputs=ins,
            outputs=outs)

        return quant_var, scale

    def _insert_quant_op(self, block, idx, var, quant_bits, quant_type):
        """
        Insert fake_quantize_op
        """
        if quant_type == 'abs_max':
            return self._insert_quant_abs_max_op(block, idx, var, quant_bits)
        elif quant_type == 'range_abs_max':
            return self._insert_quant_range_abs_max_op(block, idx, var,
                                                       quant_bits)
        elif quant_type == 'moving_average_abs_max':
            return self._insert_quant_moving_average_abs_max_op(
                block, idx, var, quant_bits)

    def _insert_dequant_op(self, block, idx, var, scale, quant_bits):
        """
        Insert fake_quantize_op
        """
        dequant_var = block.create_var(name=_dequantized_var_name(var.name),
                                       type=var.type,
                                       shape=var.shape,
                                       dtype=var.dtype)
        # insert fake_dequantize_op
        max_range = (1 << (quant_bits - 1)) - 1
        dequant_op = block._insert_op(idx,
                                      type="fake_dequantize_max_abs",
                                      attrs={'max_range': float(max_range)},
                                      inputs={
                                          "X": var,
                                          'Scale': scale
                                      },
                                      outputs={"Out": dequant_var})
        return dequant_var
コード例 #5
0
def ctr_metric_bundle(input, label):
    """
    ctr related metric layer

    This function help compute the ctr related metrics: RMSE, MAE, predicted_ctr, q_value.
    To compute the final values of these metrics, we should do following computations using
    total instance number:
    MAE = local_abserr / instance number
    RMSE = sqrt(local_sqrerr / instance number)
    predicted_ctr = local_prob / instance number
    q = local_q / instance number
    Note that if you are doing distribute job, you should all reduce these metrics and instance
    number first

    Args:
        input(Variable): A floating-point 2D Variable, values are in the range
                         [0, 1]. Each row is sorted in descending order. This
                         input should be the output of topk. Typically, this
                         Variable indicates the probability of each label.
        label(Variable): A 2D int Variable indicating the label of the training
                         data. The height is batch size and width is always 1.

    Returns:
        local_sqrerr(Variable): Local sum of squared error
        local_abserr(Variable): Local sum of abs error
        local_prob(Variable): Local sum of predicted ctr
        local_q(Variable): Local sum of q value

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid
            data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
            label = fluid.layers.data(name="label", shape=[1], dtype="int32")
            predict = fluid.layers.sigmoid(fluid.layers.fc(input=data, size=1))
            auc_out = fluid.contrib.layers.ctr_metric_bundle(input=predict, label=label)
    """
    assert input.shape == label.shape
    helper = LayerHelper("ctr_metric_bundle", **locals())

    local_abserr = helper.create_global_variable(persistable=True,
                                                 dtype='float32',
                                                 shape=[1])
    local_sqrerr = helper.create_global_variable(persistable=True,
                                                 dtype='float32',
                                                 shape=[1])
    local_prob = helper.create_global_variable(persistable=True,
                                               dtype='float32',
                                               shape=[1])
    local_q = helper.create_global_variable(persistable=True,
                                            dtype='float32',
                                            shape=[1])
    local_pos_num = helper.create_global_variable(persistable=True,
                                                  dtype='float32',
                                                  shape=[1])
    local_ins_num = helper.create_global_variable(persistable=True,
                                                  dtype='float32',
                                                  shape=[1])

    tmp_res_elesub = helper.create_global_variable(persistable=False,
                                                   dtype='float32',
                                                   shape=[-1])
    tmp_res_sigmoid = helper.create_global_variable(persistable=False,
                                                    dtype='float32',
                                                    shape=[-1])
    tmp_ones = helper.create_global_variable(persistable=False,
                                             dtype='float32',
                                             shape=[-1])

    batch_prob = helper.create_global_variable(persistable=False,
                                               dtype='float32',
                                               shape=[1])
    batch_abserr = helper.create_global_variable(persistable=False,
                                                 dtype='float32',
                                                 shape=[1])
    batch_sqrerr = helper.create_global_variable(persistable=False,
                                                 dtype='float32',
                                                 shape=[1])
    batch_q = helper.create_global_variable(persistable=False,
                                            dtype='float32',
                                            shape=[1])
    batch_pos_num = helper.create_global_variable(persistable=False,
                                                  dtype='float32',
                                                  shape=[1])
    batch_ins_num = helper.create_global_variable(persistable=False,
                                                  dtype='float32',
                                                  shape=[1])
    for var in [
            local_abserr, batch_abserr, local_sqrerr, batch_sqrerr, local_prob,
            batch_prob, local_q, batch_q, batch_pos_num, batch_ins_num,
            local_pos_num, local_ins_num
    ]:
        helper.set_variable_initializer(var, Constant(value=0.0,
                                                      force_cpu=True))

    helper.append_op(type="elementwise_sub",
                     inputs={
                         "X": [input],
                         "Y": [label]
                     },
                     outputs={"Out": [tmp_res_elesub]})

    helper.append_op(type="squared_l2_norm",
                     inputs={"X": [tmp_res_elesub]},
                     outputs={"Out": [batch_sqrerr]})
    helper.append_op(type="elementwise_add",
                     inputs={
                         "X": [batch_sqrerr],
                         "Y": [local_sqrerr]
                     },
                     outputs={"Out": [local_sqrerr]})

    helper.append_op(type="l1_norm",
                     inputs={"X": [tmp_res_elesub]},
                     outputs={"Out": [batch_abserr]})
    helper.append_op(type="elementwise_add",
                     inputs={
                         "X": [batch_abserr],
                         "Y": [local_abserr]
                     },
                     outputs={"Out": [local_abserr]})

    helper.append_op(type="reduce_sum",
                     inputs={"X": [input]},
                     outputs={"Out": [batch_prob]})
    helper.append_op(type="elementwise_add",
                     inputs={
                         "X": [batch_prob],
                         "Y": [local_prob]
                     },
                     outputs={"Out": [local_prob]})
    helper.append_op(type="sigmoid",
                     inputs={"X": [input]},
                     outputs={"Out": [tmp_res_sigmoid]})
    helper.append_op(type="reduce_sum",
                     inputs={"X": [tmp_res_sigmoid]},
                     outputs={"Out": [batch_q]})
    helper.append_op(type="elementwise_add",
                     inputs={
                         "X": [batch_q],
                         "Y": [local_q]
                     },
                     outputs={"Out": [local_q]})

    helper.append_op(type="reduce_sum",
                     inputs={"X": [label]},
                     outputs={"Out": [batch_pos_num]})
    helper.append_op(type="elementwise_add",
                     inputs={
                         "X": [batch_pos_num],
                         "Y": [local_pos_num]
                     },
                     outputs={"Out": [local_pos_num]})

    helper.append_op(type='fill_constant_batch_size_like',
                     inputs={"Input": label},
                     outputs={'Out': [tmp_ones]},
                     attrs={
                         'shape': [-1, 1],
                         'dtype': tmp_ones.dtype,
                         'value': float(1.0),
                     })
    helper.append_op(type="reduce_sum",
                     inputs={"X": [tmp_ones]},
                     outputs={"Out": [batch_ins_num]})
    helper.append_op(type="elementwise_add",
                     inputs={
                         "X": [batch_ins_num],
                         "Y": [local_ins_num]
                     },
                     outputs={"Out": [local_ins_num]})

    return local_sqrerr, local_abserr, local_prob, local_q, local_pos_num, local_ins_num