Esempio n. 1
0
def create_parameter(layers, shape, dtype):
    # use layerhelper to init bias, scale, mean, variance
    helper = LayerHelper("batch_norm", **locals())
    param_name = "batch_norm_" + str(layers)
    scale = helper.create_parameter(attr=fluid.ParamAttr(name=param_name +
                                                         '.w' + '_0'),
                                    shape=[shape],
                                    dtype=dtype,
                                    default_initializer=Constant(1.0))
    scale.stop_gradient = True

    bias = helper.create_parameter(attr=fluid.ParamAttr(name=param_name +
                                                        '.b' + '_0'),
                                   shape=[shape],
                                   dtype=dtype,
                                   is_bias=True)
    bias.stop_gradient = True

    mean = helper.create_parameter(attr=ParamAttr(name=param_name + '.w' +
                                                  '_1',
                                                  initializer=Constant(0.0),
                                                  trainable=False),
                                   shape=[shape],
                                   dtype=dtype)
    mean.stop_gradient = True

    variance = helper.create_parameter(attr=ParamAttr(
        name=param_name + '.w' + '_2',
        initializer=Constant(1.0),
        trainable=False),
                                       shape=[shape],
                                       dtype=dtype)
    variance.stop_gradient = True

    return scale, bias, mean, variance
def layer_norm(x,
               begin_norm_axis=1,
               epsilon=1e-12,
               param_attr=None,
               bias_attr=None):
    """
    Replace build-in layer_norm op with this function
    """
    helper = LayerHelper('layer_norm', **locals())
    mean = layers.reduce_mean(x, dim=begin_norm_axis, keep_dim=True)
    shift_x = layers.elementwise_sub(x=x, y=mean, axis=0)
    variance = layers.reduce_mean(
        layers.square(shift_x), dim=begin_norm_axis, keep_dim=True)
    r_stdev = layers.rsqrt(variance + epsilon)
    norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0)

    param_shape = [reduce(lambda x, y: x * y, norm_x.shape[begin_norm_axis:])]
    param_dtype = norm_x.dtype
    scale = helper.create_parameter(
        attr=param_attr,
        shape=param_shape,
        dtype=param_dtype,
        default_initializer=fluid.initializer.Constant(1.))
    bias = helper.create_parameter(
        attr=bias_attr,
        shape=param_shape,
        dtype=param_dtype,
        is_bias=True,
        default_initializer=fluid.initializer.Constant(0.))

    out = layers.elementwise_mul(x=norm_x, y=scale, axis=-1)
    out = layers.elementwise_add(x=out, y=bias, axis=-1)

    return out
Esempio n. 3
0
File: nn.py Progetto: iducn/Paddle
def rank_attention(input,
                   rank_offset,
                   rank_param_shape,
                   rank_param_attr,
                   max_rank=3):
    """
    **Rank Attention layer**
    This Op can calculate rank attention between input and rank_param, and 
    rank_param gives the organization of data. Notice: It currently supports
    GPU device.
    This Op exists in contrib, which means that it is not shown to the public.
    Args:
        input: Tensor with data type float32, float64.
        rank_offset: Tensor with data type int32.
        rank_para_shape: The shape of rank_param.
        rank_param_attr: Attribute initializer of rank_param.
        max_rank: The max rank of input's ranks.
    Returns:
        Variable: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
           import paddle.fluid as fluid
           import numpy as np
           
           input = fluid.data(name="input", shape=[None, 2], dtype="float32")
           rank_offset = fluid.data(name="rank_offset", shape=[None, 7], dtype="int32")
           out = fluid.contrib.layers.rank_attention(input=input,
                                                     rank_offset=rank_offset,
                                                     rank_param_shape=[18,3],
                                                     rank_param_attr=
                                                       fluid.ParamAttr(learning_rate=1.0,
                                                                     name="ubm_rank_param.w_0",
                                                                     initializer=
                                                                     fluid.initializer.Xavier(uniform=False)),
                                                      max_rank=3)
    """
    helper = LayerHelper('rank_attention', **locals())
    dtype = helper.input_dtype(input_param_name='input')
    input_shape = input.shape
    assert input_shape[1] * max_rank * max_rank == rank_param_shape[0]

    rank_param = helper.create_parameter(attr=rank_param_attr,
                                         shape=rank_param_shape,
                                         dtype=dtype)
    rank_param.stop_gradient = False

    output = helper.create_variable_for_type_inference(dtype)
    ins_rank = helper.create_variable_for_type_inference(dtype=dtype,
                                                         stop_gradient=True)

    helper.append_op(type="rank_attention",
                     inputs={
                         "X": input,
                         "RankOffset": rank_offset,
                         "RankParam": rank_param
                     },
                     outputs={"Out": output},
                     attrs={"MaxRank": max_rank})

    return output
Esempio n. 4
0
class L1(fluid.imperative.Layer):
    def __init__(self, prefix):
        super(L1, self).__init__(prefix)
        self._helper = LayerHelper(
            self.full_name(),
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
                value=0.1)))

        self.w1 = self._helper.create_parameter(attr=self._helper.param_attr,
                                                shape=[2, 2],
                                                dtype='float32',
                                                is_bias=False)
        self.w2 = self._helper.create_parameter(attr=self._helper.param_attr,
                                                shape=[2, 2],
                                                dtype='float32',
                                                is_bias=False)

    def forward(self):
        return self.w1 + self.w2
Esempio n. 5
0
 def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False):
     from paddle.fluid.layer_helper import LayerHelper
     helper = LayerHelper("Scale")
     l2_norm = fluid.layers.l2_normalize(
         input, axis=1)  # l2 norm along channel
     shape = [1] if channel_shared else [input.shape[1]]
     scale = helper.create_parameter(
         attr=helper.param_attr,
         shape=shape,
         dtype=input.dtype,
         default_initializer=Constant(init_scale))
     out = fluid.layers.elementwise_mul(
         x=l2_norm, y=scale, axis=-1 if channel_shared else 1)
     return out
Esempio n. 6
0
def pact(x):
    helper = LayerHelper("pact", **locals())
    dtype = 'float32'
    init_thres = 20
    u_param_attr = paddle.ParamAttr(
        name=x.name + '_pact',
        initializer=paddle.nn.initializer.Constant(value=init_thres),
        regularizer=paddle.regularizer.L2Decay(0.0001),
        learning_rate=1)
    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)

    part_a = paddle.nn.functional.relu(x - u_param)
    part_b = paddle.nn.functional.relu(-u_param - x)
    x = x - part_a + part_b
    return x
Esempio n. 7
0
def pact(x, name=None):
    helper = LayerHelper("pact", **locals())
    dtype = 'float32'
    init_thres = 20
    u_param_attr = fluid.ParamAttr(
        name=x.name + '_pact',
        initializer=fluid.initializer.ConstantInitializer(value=init_thres),
        regularizer=fluid.regularizer.L2Decay(0.0001),
        learning_rate=1)
    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
    x = fluid.layers.elementwise_sub(
        x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
    x = fluid.layers.elementwise_add(
        x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))

    return x
Esempio n. 8
0
    def pact(x):
        helper = LayerHelper("pact", **locals())
        dtype = 'float32'
        init_thres = values[x.name.split('_tmp_input')[0]]
        u_param_attr = fluid.ParamAttr(
            name=x.name + '_pact',
            initializer=fluid.initializer.ConstantInitializer(
                value=init_thres),
            regularizer=fluid.regularizer.L2Decay(0.0001),
            learning_rate=1)
        u_param = helper.create_parameter(attr=u_param_attr,
                                          shape=[1],
                                          dtype=dtype)

        part_a = fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param))
        part_b = fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x))
        x = x - part_a + part_b
        return x
Esempio n. 9
0
def pact(x):
    """
    Process a variable using the pact method you define
    Args:
        x(Tensor): Paddle Tensor, need to be preprocess before quantization
    Returns:
        The processed Tensor x.
    """
    helper = LayerHelper("pact", **locals())
    dtype = 'float32'
    init_thres = 20
    u_param_attr = fluid.ParamAttr(
        name=x.name + '_pact',
        initializer=fluid.initializer.ConstantInitializer(value=init_thres),
        regularizer=fluid.regularizer.L2Decay(0.0001),
        learning_rate=1)
    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
    x = fluid.layers.elementwise_sub(
        x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
    x = fluid.layers.elementwise_add(
        x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))
    return x
Esempio n. 10
0
File: nn.py Progetto: iducn/Paddle
def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
    """
    **Tdm Child**
     According to the input node_id on the given tree, return the corresponding child node_id and 
      whether child is a leaf node by leaf_mask value.
    .. code-block:: text

        Given:
            tree[[0], [1, 2], [3, 4], [5, 6]] # A binary tree with seven nodes
            x = [[2], [3]]
            node_nums = 7
            child_nums = 2

          we get:
            child = [[5, 6],
                     [0, 0]]
            leaf_mask = [[1, 1],
                         [0, 0]]
    Args:
        x(Variable): Variable contained the node_id information, dtype support int32/int64.
        node_nums(int): Number of total nodes.
        child_nums(int): Maximum number of child nodes per node.
        param_attr(ParamAttr): To specify the tdm-tree-info parameter property. Default: None, which means the
            default weight parameter property is used. See usage for details in: ref: `api_fluid_ParamAttr`, should
            has shape(node_nums, 3 + child_nums), dtype support int32/int64. 
            The dimension[1] of tdm-tree-info contains the following: 
            1. Item_id(int, shape(1)), if node is a leaf node, give its item_id corresponding to node_id, else give 0.
            2. Layer_id(int, shape(1)), indicates which layer the node is on.
            3. Parent_id(int, shape(1)), node's parent node.
            4. Child_id(int, shape(child_nums)), all child node's node_id of this node should be given. 
            If the number of child nodes is insufficient, padding 0 until child nums equal to child_nums
        dtype(str): The data type of output child and leaf_mask, support int32/int64.

    Returns:
        tuple: A tuple including input node's child(Variable) and leaf_mask(Variable). 
            If child is a leaf node, leaf_mask equal ot 1, otherwise equal to 0.

    Examples:
        .. code-block:: python
        import paddle.fluid as fluid
        import numpy as np
        x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
        tree_info = [[0,0,0,1,2],
                     [0,1,0,3,4],[0,1,0,5,6],
                     [0,2,1,0,0],[1,2,1,0,0],[2,2,2,0,0],[3,2,2,0,0]]
        tree_info_np = np.array(tree_info)
        tree_info_np = np.reshape(tree_info_np, (7,5))
        node_nums = 7
        child_nums = 2
        child, leaf_mask  = fluid.contrib.layers.tdm_child(x, node_nums, child_nums,
                                param_attr=fluid.ParamAttr(
                                    initializer=fluid.initializer.NumpyArrayInitializer(
                                                                            tree_info_np)))
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        xx = np.array([[2],[3]]).reshape((2,1)).astype("int32")
        child_res, leaf_mask_res = exe.run(feed={"x":xx}, fetch_list=[child, leaf_mask])
     """
    helper = LayerHelper("tdm_child", **locals())
    check_dtype(dtype, 'dtype', ['int32', 'int64'],
                'fluid.contrib.layers.tdm_child')
    c_dtype = convert_np_dtype_to_dtype_(dtype)
    tree_info = helper.create_parameter(attr=helper.param_attr,
                                        shape=[node_nums, 3 + child_nums],
                                        dtype=dtype,
                                        default_initializer=Constant(0))
    tree_info.stop_gradient = True

    child = helper.create_variable_for_type_inference(dtype=dtype)
    leaf_mask = helper.create_variable_for_type_inference(dtype=dtype)

    helper.append_op(type='tdm_child',
                     inputs={
                         'X': x,
                         'TreeInfo': tree_info
                     },
                     outputs={
                         'Child': child,
                         'LeafMask': leaf_mask
                     },
                     attrs={
                         'child_nums': child_nums,
                         'dtype': c_dtype
                     },
                     stop_gradient=True)
    return (child, leaf_mask)
Esempio n. 11
0
File: nn.py Progetto: iducn/Paddle
def search_pyramid_hash(input,
                        num_emb,
                        space_len,
                        pyramid_layer,
                        rand_len,
                        drop_out_percent,
                        is_training,
                        use_filter,
                        white_list_len,
                        black_list_len,
                        seed,
                        lr,
                        param_attr=None,
                        param_attr_wl=None,
                        param_attr_bl=None,
                        name=None,
                        distribute_update_vars=None,
                        dtype='float32'):
    """
    **Pyramid hash embedding**

    Args:
        input (Variable): LoDTensor<int32> Variable contained the IDs' information.
        num_emb (int): The embedding size of output.
        space_len (int): The length of pyramid hash embedding space.
        pyramid_layer (int): The number of pyramid layers. It should be greater than 2.
        rand_len (int): The minimum length of pyramid hash cell.
        drop_out_percent (float): The probability of dropping out the input token randomly.
            It should satisfy: [0., 1.]
        is_training (bool): Whether in training or testing phrase.
        use_filter(bool): If set True, the white filter and black filter should be given by
            :attr:`param_attr_wl` and :attr:`param_attr_bl` .
        white_list_len(int): If set :math:`white_list_len>0` , white filter with shape [white_list_len, 1]
            should be provided by param_attr_wl.
        black_list_len(int): If set :math:`black_list_len>0` , black filter with shape [black_list_len, 1]
            should be provided by param_attr_bl.
        seed(int): The number of random seed.
        lr(float): The learning rate of weight created by :attr:`param_attr` with shape [space_len+rand_len, 1]
            in this layer.
        param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
            default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` .
        param_attr_wl(ParamAttr): Specified parameters of white filter.
        param_attr_bl(ParamAttr): Specified parameters of black filter.
        distribute_update_vars(list[ParamAttr.name]): Decided which params should be updated in distribute training.
            Used in Distribute Transpiler to create a trainer/server program.
        name(str, optional): The default value is None.  Normally there is no need for user to set this property.
            For more information, please refer to :ref:`api_guide_Name` .
        dtype(str): The data type of output variable, float32.
    Returns:
        Variable: LoDTensor of pyramid hash embedding.
    """
    helper = LayerHelper('search_pyramid_hash', **locals())

    w_shape = [space_len + rand_len, 1]
    w = helper.create_parameter(attr=param_attr,
                                shape=w_shape,
                                dtype=dtype,
                                is_bias=False)
    w.stop_gradient = True

    input_vars = {'X': input, 'W': w}
    if white_list_len > 0:
        wl_shape = [white_list_len, 1]
        white_list = helper.create_parameter(attr=param_attr_wl,
                                             shape=wl_shape,
                                             dtype=dtype,
                                             is_bias=False)
        white_list.stop_gradient = True
        input_vars['WhiteList'] = white_list

    if black_list_len >= 0:
        bl_shape = [black_list_len, 1]
        black_list = helper.create_parameter(attr=param_attr_bl,
                                             shape=bl_shape,
                                             dtype=dtype,
                                             is_bias=False)
        black_list.stop_gradient = True
        input_vars['BlackList'] = black_list

    distribute_update_vars_str = ""
    if distribute_update_vars:
        assert isinstance(distribute_update_vars, list)
        special_name_list = []
        if param_attr:
            special_name_list.append(param_attr.name)
        if param_attr_wl:
            special_name_list.append(param_attr_wl.name)
        if param_attr_bl:
            special_name_list.append(param_attr_bl.name)
        for param in distribute_update_vars:
            if param not in special_name_list:
                raise ValueError(
                    "Pyramid Hash layer didn't have parameter {}".format(
                        param))
        distribute_update_vars_str = ",".join(distribute_update_vars)

    res = helper.create_variable_for_type_inference(dtype)
    drop_pos = helper.create_variable_for_type_inference(dtype)
    x_temp_out = helper.create_variable_for_type_inference(dtype)
    helper.append_op(type='pyramid_hash',
                     inputs=input_vars,
                     outputs={
                         "Out": res,
                         "X_Temp_Out": x_temp_out,
                         'DropPos': drop_pos
                     },
                     attrs={
                         'num_emb': num_emb,
                         'space_len': space_len,
                         'pyramid_layer': pyramid_layer,
                         'rand_len': rand_len,
                         'drop_out_percent': drop_out_percent,
                         'is_training': is_training,
                         'use_filter': use_filter,
                         'white_list_len': white_list_len,
                         'black_list_len': black_list_len,
                         'seed': seed,
                         'lr': lr,
                         'distribute_update_vars': distribute_update_vars_str
                     })

    return res
Esempio n. 12
0
File: nn.py Progetto: iducn/Paddle
def fused_embedding_seq_pool(input,
                             size,
                             is_sparse=False,
                             padding_idx=None,
                             combiner='sum',
                             param_attr=None,
                             dtype='float32'):
    """
    **Embedding Sequence pool**

    This layer is the fusion of lookup table and sequence_pool.

    Args:
        input (Variable): Input is a Tensor<int64> Variable, which contains the IDs' information.
            The value of the input IDs should satisfy :math:`0<= id < size[0]`.
        size (tuple|list): The shape of the lookup_table parameter. It should
            have two elements which indicate the size of the dictionary of
            embedding and the size of each embedding vector respectively.
        is_sparse (bool): The flag indicating whether to use sparse update.
            Default: False.
        padding_idx (int|long|None): It will output all-zero padding data whenever
            lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes
            no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx`
            will automatically be converted to :math:`size[0] + padding\_idx` to use.
            Default: None.
        combiner (str): The pooling type of sequence_pool, and only support `sum`.
            Default: sum.
        param_attr (ParamAttr): Parameters for this layer.
        dtype (np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output
            tensor. It can be float32, float_16, int etc.
    Returns:
        The sequence pooling variable which is a Tensor.
    Examples:
        .. code-block:: python
            import numpy as np
            import paddle.fluid as fluid

            dict_size = 20
            data_t = fluid.layers.data(
                name='word', shape=[1], dtype='int64', lod_level=1)
            padding_idx = np.random.randint(1, 10)
            out = fluid.contrib.fused_embedding_seq_pool(
                input=data_t,
                size=[dict_size, 32],
                param_attr='w',
                padding_idx=padding_idx,
                is_sparse=False)
    """
    helper = LayerHelper('fused_embedding_seq_pool', **locals())
    w = helper.create_parameter(attr=helper.param_attr,
                                shape=size,
                                dtype=dtype,
                                is_bias=False)
    out = helper.create_variable_for_type_inference(dtype)
    padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
        size[0] + padding_idx)
    helper.append_op(type='fused_embedding_seq_pool',
                     inputs={
                         'Ids': input,
                         'W': w
                     },
                     outputs={'Out': out},
                     attrs={
                         'is_sparse': is_sparse,
                         'combiner': combiner,
                         'padding_idx': padding_idx
                     })
    return out
Esempio n. 13
0
File: nn.py Progetto: iducn/Paddle
def tree_conv(nodes_vector,
              edge_set,
              output_size,
              num_filters=1,
              max_depth=2,
              act='tanh',
              param_attr=None,
              bias_attr=None,
              name=None):
    """
    ${comment}

    Args:
        nodes_vector(${nodes_vector_type}): ${nodes_vector_comment}
        edge_set(${edge_set_type}): ${edge_set_comment}
        output_size(int): output feature width
        num_filters(int): number of filters, Default 1
        max_depth(int): max depth of filters, Default 2
        act(str): activation function, Default tanh
        param_attr(ParamAttr): the parameter attribute for the filters, Default None
        bias_attr(ParamAttr): the parameter attribute for the bias of this layer, Default None
        name(str): a name of this layer(optional). If set None, the layer will be named automatically, Default None

    Returns:
        out(${out_type}): ${out_comment}

    Examples:
        .. code-block:: python

          import paddle.fluid as fluid
          # 10 for max_node_size of dataset, 5 for vector width
          nodes_vector = fluid.layers.data(
              name='vectors', shape=[10, 5], dtype='float32')
          # 10 for max_node_size of dataset, 2 for every edge has two nodes
          # edges must be directional
          edge_set = fluid.layers.data(name='edge_set', shape=[
                                       10, 2], dtype='float32')
          # the shape of output will be [10, 6, 1],
          # 10 for max_node_size of dataset, 6 for output size, 1 for 1 filter
          out_vector = fluid.layers.tree_conv(nodes_vector, edge_set, 6, 1, 2)
          # After reshape, output tensor could be nodes_vector for next tree convolution
          out_vector = fluid.layers.reshape(out_vector, shape=[-1, 10, 6])
          out_vector_2 = fluid.layers.tree_conv(out_vector, edge_set, 3, 4, 2)
          # also output tensor could be pooling(the pooling in paper called global pooling)
          pooled = fluid.layers.reduce_max(out_vector, dim=2) # global pooling
    """
    helper = LayerHelper("tree_conv", **locals())
    dtype = helper.input_dtype('nodes_vector')
    feature_size = nodes_vector.shape[2]
    W_shape = [feature_size, 3, output_size, num_filters]
    W = helper.create_parameter(attr=param_attr,
                                shape=W_shape,
                                dtype=dtype,
                                is_bias=False)
    out = helper.create_variable_for_type_inference(dtype=dtype)
    helper.append_op(type='tree_conv',
                     inputs={
                         'NodesVector': nodes_vector,
                         'EdgeSet': edge_set,
                         'Filter': W
                     },
                     outputs={
                         'Out': out,
                     },
                     attrs={'max_depth': max_depth})
    if helper.bias_attr:
        pre_activation = helper.append_bias_op(out)
    else:
        pre_activation = out
    return helper.append_activation(pre_activation)
Esempio n. 14
0
File: nn.py Progetto: iducn/Paddle
def match_matrix_tensor(x,
                        y,
                        channel_num,
                        act=None,
                        param_attr=None,
                        dtype='float32',
                        name=None):
    """
    Calculate the semantic matching matrix of two word sequences with variable length.
    Given a query A of length `n` and a title B of length `m`, the input shape are respectively
    [n, h] and [m, h], which h is hidden_size. If :attr:`channel_num` is set to 3,
    it will generate a learnable parameter matrix W with shape [h, 3, h].
    Then the semantic matching matrix of query A and title B is calculated by
    A * W * B.T = [n, h]*[h, 3, h]*[h, m] = [n, 3, m]. The learnable parameter matrix `W`
    is equivalent to a fully connected layer in the calculation process. If :attr:`act` is provided,
    the corresponding activation function will be applied to output matrix.
    The :attr:`x` and :attr:`y` should be LodTensor and only one level LoD is supported.

    .. code-block:: text

            Given a 1-level LoDTensor x:
                x.lod =  [
                    [2,                     3,                               ]]
                x.data = [[0.3, 0.1], [0.2, 0.3], [
                    0.5, 0.6], [0.7, 0.1], [0.3, 0.4]]
                x.dims = [5, 2]
            y is a Tensor:
                y.lod =  [[3,                                 1,       ]]
                y.data = [[0.1, 0.2], [0.3, 0.7], [0.9, 0.2], [0.4, 0.1]]
                y.dims = [4, 2]
            set channel_num 2, then we get a 1-level LoDTensor:
                # where 12 = channel_num * x.lod[0][0] * y.lod[0][0]
                out.lod =  [[12, 6]]
                out.dims = [18, 1]     # where 18 = 12 + 6

    Args:
        x (Variable): Input variable x which should be 1-level LodTensor.
        y (Variable): Input variable y which should be 1-level LodTensor.
        channel_num (int): The channel number of learnable parameter W.
        act (str, default None): Activation to be applied to the output of this layer.
        param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
            parameters/weights of this layer.
        dtype ('float32'): The data type of w data.
        name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None

    Returns:
        Variable: output with LoD specified by this layer.

    Examples:
        .. code-block:: python

            import numpy as np
            from paddle.fluid import layers
            from paddle.fluid import contrib

            x_lod_tensor = layers.data(name='x', shape=[10], lod_level=1)
            y_lod_tensor = layers.data(name='y', shape=[10], lod_level=1)
            out, out_tmp = contrib.match_matrix_tensor(
                x=x_lod_tensor, y=y_lod_tensor, channel_num=3)
    """
    helper = LayerHelper('match_matrix_tensor', **locals())

    x_shape = list(x.shape)
    y_shape = list(y.shape)
    assert len(x_shape) == 2 and len(
        y_shape) == 2 and x_shape[-1] == y_shape[-1]

    weight_shape = [x_shape[-1], channel_num, y_shape[-1]]
    w = helper.create_parameter(attr=helper.param_attr,
                                shape=weight_shape,
                                dtype=dtype,
                                is_bias=False)
    mm_res = helper.create_variable_for_type_inference(dtype)
    tmp_res = helper.create_variable_for_type_inference(dtype,
                                                        stop_gradient=True)
    helper.append_op(type='match_matrix_tensor',
                     inputs={
                         'X': x,
                         'Y': y,
                         'W': w,
                     },
                     outputs={
                         "Out": mm_res,
                         "Tmp": tmp_res
                     },
                     attrs={'dim_t': channel_num})

    return helper.append_activation(mm_res), tmp_res
Esempio n. 15
0
File: nn.py Progetto: iducn/Paddle
def var_conv_2d(input,
                row,
                col,
                input_channel,
                output_channel,
                filter_size,
                stride=1,
                param_attr=None,
                act=None,
                dtype='float32',
                name=None):
    """
    The var_conv_2d layer calculates the output base on the :attr:`input` with variable length,
    row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`,
    and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with
    padding. Besides, input.dims[1] should be 1.

    .. code-block:: text

            If input_channel is 2 and given row lodTensor and col lodTensor as follows:
                row.lod = [[5, 4]]
                col.lod = [[6, 7]]
            input is a lodTensor:
                input.lod = [[60, 56]]	# where 60 = input_channel * 5 * 6
                input.dims = [116, 1]	# where 116 = 60 + 56

            If set output_channel is 3, filter_size is [3, 3], stride is [1, 1]:
                # where 90 = output_channel * [(5-1)/stride + 1] * [(6-1)/stride + 1]
                output.lod = [[90, 84]]
                output.dims = [174, 1]  # where 174 = 90 + 84

    Args:
        input (Variable): The input should be 1-level LodTensor with dims[1] equals 1.
        row (Variable): The row should be 1-level LodTensor to provide height information.
        col (Variable): The col should be 1-level LodTensor to provide width information.
        input_channel (int): The number of input channel.
        output_channel (int): The number of output channel.
        filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
            it must contain two integers, (filter_size_H, filter_size_W).
            Otherwise, the filter will be a square.
        stride (int|tuple): The stride size. If stride is a tuple, it must
            contain two integers, (stride_H, stride_W). Otherwise, the
            stride_H = stride_W = stride. Default: stride = 1.
        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
            of var_conv2d. If it is set to None or one attribute of ParamAttr, var_conv2d
            will create ParamAttr as param_attr. If the Initializer of the param_attr
            is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
            and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
        act (str): Activation type, if it is set to None, activation is not appended.
            Default: None
        dtype ('float32'): The data type of parameter and output.
        name (str|None): A name for this layer(optional). If set None, the layer
            will be named automatically. Default: None

    Returns:
        Variable: Output variable with LoD specified by this layer.

    Examples:
        .. code-block:: python

            import numpy as np
            from paddle.fluid import layers
            from paddle.fluid import contrib

            x_lod_tensor = layers.data(name='x', shape=[1], lod_level=1)
            row_lod_tensor = layers.data(name='row', shape=[6], lod_level=1)
            col_lod_tensor = layers.data(name='col', shape=[6], lod_level=1)
            out = contrib.var_conv_2d(input=x_lod_tensor,
                                     row=row_lod_tensor,
                                     col=col_lod_tensor,
                                     input_channel=3,
                                     output_channel=5,
                                     filter_size=[3, 3],
                                     stride=1)
    """
    helper = LayerHelper('var_conv_2d', **locals())
    x_shape = list(input.shape)
    assert len(x_shape) == 2

    filter_size = utils.convert_to_list(filter_size, 2, 'filter_size')
    stride = utils.convert_to_list(stride, 2, 'stride')

    filter_shape = [
        int(output_channel),
        int(input_channel) * filter_size[0] * filter_size[1]
    ]
    filter_param = helper.create_parameter(
        attr=helper.param_attr,
        shape=filter_shape,
        dtype=dtype,
    )

    conv_res = helper.create_variable_for_type_inference(dtype)
    tmp_res = helper.create_variable_for_type_inference(dtype,
                                                        stop_gradient=True)

    helper.append_op(type='var_conv_2d',
                     inputs={
                         'X': input,
                         'ROW': row,
                         'COLUMN': col,
                         'W': filter_param,
                     },
                     outputs={
                         "Out": conv_res,
                         "Col": tmp_res
                     },
                     attrs={
                         'InputChannel': input_channel,
                         'OutputChannel': output_channel,
                         'StrideH': stride[0],
                         'StrideW': stride[1],
                         'KernelH': filter_size[0],
                         'KernelW': filter_size[1],
                     })

    return helper.append_activation(conv_res)
class SimpleRNNCell(fluid.imperative.Layer):
    def __init__(self, name_scope, step_input_size, hidden_size, output_size,
                 param_attr):
        super(SimpleRNNCell, self).__init__(name_scope)
        self.step_input_size = step_input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self._dype = core.VarDesc.VarType.FP32
        from paddle.fluid.layer_helper import LayerHelper
        self._helper = LayerHelper(
            'SimpleRNNCell', act="tanh", param_attr=param_attr)

    def _build_once(self, inputs, pre_hidden):
        i2h_param_shape = [self.step_input_size, self.hidden_size]
        h2h_param_shape = [self.hidden_size, self.hidden_size]
        h2o_param_shape = [self.output_size, self.hidden_size]
        self._i2h_w = self._helper.create_parameter(
            attr=self._helper.param_attr,
            shape=i2h_param_shape,
            dtype=self._dtype,
            is_bias=False)
        self._h2h_w = self._helper.create_parameter(
            attr=self._helper.param_attr,
            shape=h2h_param_shape,
            dtype=self._dtype,
            is_bias=False)
        self._h2o_w = self._helper.create_parameter(
            attr=self._helper.param_attr,
            shape=h2o_param_shape,
            dtype=self._dtype,
            is_bias=False)

    def forward(self, input, pre_hidden):

        tmp_i2h = self._helper.create_variable_for_type_inference(self._dtype)
        tmp_h2h = self._helper.create_variable_for_type_inference(self._dtype)
        hidden = self._helper.create_variable_for_type_inference(self._dype)
        out = self._helper.create_variable_for_type_inference(self._dype)
        softmax_out = self._helper.create_variable_for_type_inference(
            self._dtype)
        reduce_out = self._helper.create_variable_for_type_inference(
            self._dtype)
        self._helper.append_op(
            type="mul",
            inputs={"X": input,
                    "Y": self._i2h_w},
            outputs={"Out": tmp_i2h},
            attrs={"x_num_col_dims": 1,
                   "y_num_col_dims": 1})

        self._helper.append_op(
            type="mul",
            inputs={"X": pre_hidden,
                    "Y": self._h2h_w},
            outputs={"Out": tmp_h2h},
            attrs={"x_num_col_dims": 1,
                   "y_num_col_dims": 1})

        self._helper.append_op(
            type="elementwise_add",
            inputs={'X': tmp_h2h,
                    'Y': tmp_i2h},
            outputs={'Out': hidden},
            attrs={'axis': -1,
                   'use_mkldnn': False})
        hidden = self._helper.append_activation(hidden)

        self._helper.append_op(
            type="mul",
            inputs={"X": hidden,
                    "Y": self._h2o_w},
            outputs={"Out": out},
            attrs={"x_num_col_dims": 1,
                   "y_num_col_dims": 1})

        self._helper.append_op(
            type="softmax",
            inputs={"X": out},
            outputs={"Out": softmax_out},
            attrs={"use_cudnn": False})

        self._helper.append_op(
            type='reduce_sum',
            inputs={'X': softmax_out},
            outputs={'Out': reduce_out},
            attrs={'dim': None,
                   'keep_dim': False,
                   'reduce_all': True})

        return reduce_out, hidden
Esempio n. 17
0
class QuantizeTranspiler(object):
    def __init__(self,
                 weight_bits=8,
                 activation_bits=8,
                 activation_quantize_type='abs_max',
                 weight_quantize_type='abs_max',
                 window_size=10000,
                 moving_rate=0.9):
        """
        Convert and rewrite the fluid Program according to weight and
        activation quantization type.

        Args:
            weight_bits (int): quantization bit number for weights,
                the bias is not quantized.
            activation_bits (int): quantization bit number for activation.
            activation_quantize_type (str): quantization type for activation,
                now support 'abs_max', 'range_abs_max'. If use 'abs_max' mode,
                the quantization scale will be calculated dynamically each step
                in both training and testing period. If use 'range_abs_max',
                a static quantization scale will be calculated during training
                and used in inference.
            weight_quantize_type (str): quantization type for weights,
                support 'abs_max'. The 'range_abs_max' usually is not used for
                weight, since weights are fixed once the model is well trained.
            window_size (int): the window size for 'range_abs_max' quantization.

        Examples:

        .. code-block:: python

            # the original program will be rewrite, if you don't want to
            # change it, please clone at first.
            # quantize_program = program.clone()
            t = fluid.QuantizeTranspiler()
            t.transpile(quantize_program)

        """
        self.weight_bits = weight_bits
        self.activation_bits = activation_bits
        quant_type = ['abs_max', 'range_abs_max', 'moving_average_abs_max']
        if weight_quantize_type not in quant_type:
            raise ValueError(
                "Unknown weight_quantize_type: '%s'. It can only be ",
                "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.",
                str(weight_quantize_type))
        if activation_quantize_type not in quant_type:
            raise ValueError(
                "Unknown activation_quantize_type : '%s'. It can only be ",
                "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.",
                str(activation_quantize_type))

        self.weight_quantize_type = weight_quantize_type
        self.activation_quantize_type = activation_quantize_type

        self.window_size = window_size
        self.moving_rate = moving_rate
        self.helper = LayerHelper(self.__class__.__name__)
        self.fake_quant_op_types = [
            'fake_quantize_abs_max', 'fake_quantize_range_abs_max',
            'fake_quantize_moving_average_abs_max'
        ]
        self.fake_dequant_op_types = ['fake_dequantize_max_abs']
        self.is_test = None
        self.global_step = None

    def training_transpile(self, program=None, startup_program=None):
        """Rewrites a training input program in place for simulated
        quantization. Insert fake quantization and de-quantization ops into
        program to simulate the error introduced by quantization. And change
        the graident ops' input by using the faked quantization weights and
        activation. Since the program is transformed in place, the graph
        connection will change.

        Args:
            program (Program): the input program to be transpile.
        """
        self.is_test = False
        program = default_main_program() if program is None else program
        startup_program = default_startup_program() if startup_program is \
            None else startup_program

        # marked the variable which has been quantized and dequantized.
        dequanted_vars = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        grad_op_types = ['%s_grad' % (type) for type in _QUANTIZABLE_OP_TYPES]

        params = [p.name for p in program.global_block().iter_parameters()]

        def _transpile_forward(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            # insert quant op and dequant op
            for name in op.input_arg_names:
                #if share input between ops
                if name in dequanted_vars[block_id]:
                    dequant_var = dequanted_vars[block_id][name]
                else:
                    var = block.var(name)
                    quant_bits = self.weight_bits if var.name in params \
                                 else self.activation_bits
                    quant_type = self.weight_quantize_type if var.name \
                        in params else self.activation_quantize_type

                    quant_var, scale_var = self._insert_quant_op(
                        block, idx, var, quant_bits, quant_type)
                    dequant_var = self._insert_dequant_op(
                        block, idx + 1, quant_var, scale_var, quant_bits)
                    dequanted_vars[block_id][name] = dequant_var
                # rename the forward op inputs
                op._rename_input(name, dequant_var.name)

        def _transpile_backward(block, op):
            block_id = block.idx
            no_dequanted_input_vars = True
            for name in op.input_arg_names:
                if name in dequanted_vars[block_id]:
                    dequant_var = dequanted_vars[block_id][name]
                    op._rename_input(name, dequant_var.name)
                    no_dequanted_input_vars = False
            if no_dequanted_input_vars:
                raise ValueError("There is no dequanted inputs for op %s." %
                                 (op.type))

        with program_guard(program, startup_program):
            self._create_global_step()
            for block in program.blocks:
                ops = list(block.ops)
                block_id = block.idx
                for op in ops:
                    # rewrite the forward ProgramDes
                    if op.type in _QUANTIZABLE_OP_TYPES:
                        _transpile_forward(block, op)
                    # rename the backward op inputs
                    if op.type in grad_op_types:
                        _transpile_backward(block, op)

    def _create_global_step(self):
        if self.weight_quantize_type == 'range_abs_max' or \
            self.activation_quantize_type == 'range_abs_max':
            self.global_step = autoincreased_step_counter()

    def freeze_program(self, program, place, scope=None):
        """Freeze input training program for inference.

        Args:
            program (Program): the input program to be transpile.
        """

        self.is_test = True
        scope = global_scope() if scope is None else scope
        program = default_main_program() if program is None else program

        persistable_vars = [
            v.name
            for v in filter(lambda var: var.persistable, program.list_vars())
        ]
        op_in_rename_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        op_out_rename_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        var_scale_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]

        def _remove_fake_quant_and_dequant_op(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            k = op.output('Out')[0]
            v = op.input('X')[0]
            if v not in op_in_rename_map[block_id]:
                op_in_rename_map[block_id][k] = v
            else:
                op_in_rename_map[block_id][k] = op_in_rename_map[block_id][v]
            block._remove_op(idx)

        def _insert_post_dequant_op(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            max_range = None
            scale_var = None
            for name in op.input_arg_names:
                #rename input name of the op to the input name of last op which has be removed
                if name in op_in_rename_map[block_id]:
                    op._rename_input(name, op_in_rename_map[block_id][name])

                scale_v = var_scale_map[block_id][_original_var_name(name)]
                if _original_var_name(name) in persistable_vars:
                    param_range = (1 << (self.weight_bits - 1)) - 1
                    act_range = (1 << (self.activation_bits - 1)) - 1
                    assert _is_float(scale_v)
                    max_range = param_range * act_range / scale_v
                else:
                    assert isinstance(scale_v, Variable)
                    scale_var = scale_v

            if len(op.output_arg_names) != 1:
                raise ValueError("Only support one output, but op %s has"
                                 " more than one output." % (op.type))
            out_var = block.var(op.output_arg_names[0])
            dequant_var = block.create_var(name=_dequantized_var_name(
                out_var.name),
                                           type=out_var.type,
                                           shape=out_var.shape,
                                           dtype=out_var.dtype)
            # insert fake_dequantize_op
            dequant_op = block._insert_op(
                idx + 1,
                type="fake_dequantize_max_abs",
                attrs={'max_range': float(max_range)},
                inputs={
                    "X": out_var,
                    'Scale': scale_var
                },
                outputs={"Out": dequant_var})
            op_out_rename_map[block_id][out_var.name] = dequant_var.name
            return dequant_var

        def _load_var(name):
            return np.array(scope.find_var(name).get_tensor())

        def _restore_var(name, arr):
            t = scope.find_var(name).get_tensor()
            t.set(arr, place)

        for block in program.blocks:
            ops = list(block.ops)
            block_id = block.idx
            for op in ops:
                op_type = op.type

                # insert dequant_op after fc/conv, need to rename
                # input of the followed ops(of fc/conv) to the dquant_op
                for name in op.input_arg_names:
                    if name in op_out_rename_map[block_id]:
                        op._rename_input(name,
                                         op_out_rename_map[block_id][name])

                if op_type in self.fake_quant_op_types:
                    in_arg_name = op.input('X')[0]
                    if in_arg_name in persistable_vars:
                        if self.weight_quantize_type == 'abs_max':
                            param = _load_var(in_arg_name)
                            scale_v = np.max(np.abs(param))
                        else:
                            scale_v = _load_var(op.output('OutScale')[0])
                        var_scale_map[block_id][in_arg_name] = scale_v
                    else:
                        scale_v = block.var(op.output('OutScale')[0])
                        var_scale_map[block_id][in_arg_name] = scale_v

                    if in_arg_name in persistable_vars:
                        _remove_fake_quant_and_dequant_op(block, op)
                        # quantize weight and restore
                        param_t = _load_var(in_arg_name)
                        param_q_t = quant(param_t, scale_v, self.weight_bits)
                        _restore_var(in_arg_name, param_q_t)

                if op_type in self.fake_dequant_op_types:
                    _remove_fake_quant_and_dequant_op(block, op)

                if op_type in _QUANTIZABLE_OP_TYPES:
                    dequant_var = _insert_post_dequant_op(block, op)

        # remove the unused var in ProgramDesc
        self._remove_unused_var(program)
        #program = program.clone()

    def convert_to_int8(self, program, place, scope=None):
        scope = global_scope() if scope is None else scope
        program = default_main_program() if program is None else program

        def _load_var(name):
            return np.array(scope.find_var(name).get_tensor())

        global_block = program.global_block()

        def convert_to_int8(var):
            int8_var_name = var.name + ".int8"
            int8_var = global_block.create_parameter(
                name=int8_var_name.encode('ascii'),
                type=var.type,
                dtype=core.VarDesc.VarType.INT8,
                shape=var.shape)

            tensor = _load_var(var.name)

            scope.var(int8_var_name)
            int8_tensor = scope.find_var(int8_var_name).get_tensor()
            int8_tensor.set(tensor.astype(np.int8), place)
            return int8_var

        input_map = {}
        for block in program.blocks:
            for op in list(block.ops):
                if op.type in _QUANTIZABLE_OP_TYPES:
                    for name in op.input_arg_names:
                        var = block.var(name)
                        if var.persistable:
                            if name not in input_map:
                                int8_var = convert_to_int8(var)
                                input_map[name] = int8_var.name
                            op._rename_input(name, input_map[name])
        self._remove_unused_var(program)

    def _remove_unused_var(self, program):
        all_remove_vars = []
        for block in program.blocks:
            args = []
            for op in block.ops:
                args += op.input_arg_names
                args += op.output_arg_names
            args = list(set(args))  #vals of all left ops
            var_names = block.vars.keys()  # all vals
            sub_block_remove_vars = []
            for var in var_names:
                if var not in args:
                    sub_block_remove_vars.append(var)
            all_remove_vars.append(sub_block_remove_vars)

        remove_vars = [list(set(v)) for v in all_remove_vars]
        for i, block in enumerate(program.blocks):
            for v in remove_vars[i]:
                block._remove_var(v)

    def _insert_quant_abs_max_op(self, block, idx, var, quant_bits):
        """Insert fake_quantize_abs_max op.
        """
        quant_var = block.create_var(name=_quantized_var_name(var.name),
                                     type=var.type,
                                     shape=var.shape,
                                     dtype=var.dtype)
        scale = block.create_var(name=_quantized_scale_name(var.name),
                                 type=var.type,
                                 shape=var.shape,
                                 dtype=var.dtype)
        quant_op = block._insert_op(idx,
                                    type='fake_quantize_abs_max',
                                    attrs={'bit_length': quant_bits},
                                    inputs={'X': var},
                                    outputs={
                                        'Out': quant_var,
                                        'OutScale': scale
                                    })
        return quant_var, scale

    def _insert_quant_range_abs_max_op(self, block, idx, var, quant_bits):
        """Insert fake_quantize_range_abs_max
        """
        quant_var = block.create_var(name=_quantized_var_name(var.name),
                                     type=var.type,
                                     shape=var.shape,
                                     dtype=var.dtype)
        scale = self.helper.create_parameter(attr=ParamAttr(
            name=_quantized_scale_name(var.name),
            initializer=Constant(0.001),
            trainable=False),
                                             shape=[1],
                                             dtype=var.dtype)
        scale.stop_gradient = True

        ins = {'X': var, 'InScale': scale}
        outs = {'Out': quant_var, 'OutScale': scale}
        if not self.is_test:
            # A global step counter variable with type int64
            scales = self.helper.create_global_variable(
                name=unique_name.generate('scales'),
                persistable=True,
                dtype=var.dtype,
                shape=[self.window_size])
            self.helper.set_variable_initializer(scales,
                                                 initializer=Constant(value=0))

            ins['Iter'] = self.global_step
            outs['OutScales'] = scales

        attrs = {
            'window_size': self.window_size,
            'bit_length': quant_bits,
            'is_test': self.is_test
        }

        quant_op = block._insert_op(idx,
                                    type='fake_quantize_range_abs_max',
                                    attrs=attrs,
                                    inputs=ins,
                                    outputs=outs)

        return quant_var, scale

    def _insert_quant_moving_average_abs_max_op(self, block, idx, var,
                                                quant_bits):
        """Insert fake_quantize_moving_average_abs_max
        """
        quant_var = block.create_var(name=_quantized_var_name(var.name),
                                     type=var.type,
                                     shape=var.shape,
                                     dtype=var.dtype)
        state = self.helper.create_global_variable(
            name=unique_name.generate('state'),
            persistable=True,
            dtype=var.dtype,
            shape=[1])
        self.helper.set_variable_initializer(state,
                                             initializer=Constant(value=1))
        accum = self.helper.create_global_variable(
            name=unique_name.generate('accum'),
            persistable=True,
            dtype=var.dtype,
            shape=[1])
        self.helper.set_variable_initializer(accum,
                                             initializer=Constant(value=1))
        scale = self.helper.create_parameter(attr=ParamAttr(
            name=_quantized_scale_name(var.name),
            initializer=Constant(0.001),
            trainable=False),
                                             shape=[1],
                                             dtype=var.dtype)
        scale.stop_gradient = True

        ins = {'X': var, 'InScale': scale}
        outs = {'Out': quant_var, 'OutScale': scale}
        if not self.is_test:
            ins['InState'] = state
            ins['InAccum'] = accum
            outs['OutState'] = state
            outs['OutAccum'] = accum

        attrs = {
            'bit_length': quant_bits,
            'moving_rate': self.moving_rate,
            'is_test': self.is_test
        }

        quant_op = block._insert_op(
            idx,
            type='fake_quantize_moving_average_abs_max',
            attrs=attrs,
            inputs=ins,
            outputs=outs)

        return quant_var, scale

    def _insert_quant_op(self, block, idx, var, quant_bits, quant_type):
        """
        Insert fake_quantize_op
        """
        if quant_type == 'abs_max':
            return self._insert_quant_abs_max_op(block, idx, var, quant_bits)
        elif quant_type == 'range_abs_max':
            return self._insert_quant_range_abs_max_op(block, idx, var,
                                                       quant_bits)
        elif quant_type == 'moving_average_abs_max':
            return self._insert_quant_moving_average_abs_max_op(
                block, idx, var, quant_bits)

    def _insert_dequant_op(self, block, idx, var, scale, quant_bits):
        """
        Insert fake_quantize_op
        """
        dequant_var = block.create_var(name=_dequantized_var_name(var.name),
                                       type=var.type,
                                       shape=var.shape,
                                       dtype=var.dtype)
        # insert fake_dequantize_op
        max_range = (1 << (quant_bits - 1)) - 1
        dequant_op = block._insert_op(idx,
                                      type="fake_dequantize_max_abs",
                                      attrs={'max_range': float(max_range)},
                                      inputs={
                                          "X": var,
                                          'Scale': scale
                                      },
                                      outputs={"Out": dequant_var})
        return dequant_var
Esempio n. 18
0
class TestPrimDistOp(unittest.TestCase):
    def setUp(self):
        self.main_program = paddle.static.Program()
        self.startup_program = paddle.static.Program()
        self.layer_help = LayerHelper('TestPrimDistOp')

        with paddle.static.program_guard(self.main_program,
                                         self.startup_program):
            self.init_prog()

    def init_prog(self):
        # block = self.main_program.global_block()
        # block = self.main_program.global_block()
        self.w = self.layer_help.create_parameter(
            dtype="float", shape=[20], attr=None)
        self.w_grad = paddle.static.data(
            name='w_grad', shape=[20], dtype='float')
        self.tmp1 = paddle.static.data(name='tmp1', shape=[20], dtype='float')
        self.tmp2 = paddle.static.data(name='tmp2', shape=[20], dtype='float')
        self.batch_reduced = paddle.static.data(
            name='batch_reduced', shape=[1], dtype='float')
        self.attrs = {}

        default_dist_context = get_default_distributed_context()
        _global_process_mesh = auto.ProcessMesh(list(range(nranks)))
        tensor_dist_attr = set_var_dist_attr(
            default_dist_context,
            self.tmp1, [-1],
            _global_process_mesh,
            mark_annotated=True)
        tensor_dist_attr = set_var_dist_attr(
            default_dist_context,
            self.tmp1, [-1],
            _global_process_mesh,
            mark_annotated=True)

        op = self.layer_help.append_op(
            type="add_p",
            inputs={'X': self.tmp1,
                    'Y': self.w},
            outputs={'Z': self.w_grad},
            attrs=self.attrs)

        op = self.layer_help.append_op(
            type="reduce_p",
            inputs={'X': self.tmp2},
            outputs={'Y': self.batch_reduced},
            attrs={"axis": [0]})

    def test_loss_and_grad_allreduce(self):

        dist_context = DistributedContext(self.main_program,
                                          self.startup_program)
        completer = Completer(dist_context)
        completer.complete_prim_annotation(self.main_program)
        dist_context.block_state.parse_forward_blocks(self.main_program)
        dist_context.block_state.parse_backward_blocks(self.main_program)
        dist_context.grads_params = dict()
        dist_context.grads_params[self.w_grad.name] = self.w.name
        dist_context.synced_gradient = set()
        dist_context.data_parallel_group = list(range(nranks))
        partitioner = Partitioner(dist_context, rank)
        dist_main_prog, dist_startup_prog, _ = partitioner.partition(
            self.main_program, self.startup_program, [(self.w, self.w_grad)])
        ops = dist_main_prog.global_block().ops

        self.assertTrue(ops[1].type == "c_allreduce_sum")
        self.assertTrue(ops[3].type == "c_allreduce_sum")
class SimpleLSTMRNN(fluid.imperative.Layer):
    def __init__(self,
                 name_scope,
                 hidden_size,
                 num_steps,
                 num_layers=2,
                 init_scale=0.1,
                 dropout=None):
        super(SimpleLSTMRNN, self).__init__(name_scope)
        self._hidden_size = hidden_size
        self._num_layers = num_layers
        self._init_scale = init_scale
        self._dropout = dropout
        self._input = None
        self._num_steps = num_steps
        from paddle.fluid.layer_helper import LayerHelper
        self._helper = LayerHelper('SimpleLSTMRNN', act="tanh")

    def _build_once(self, input_embedding, init_hidden=None, init_cell=None):
        self.weight_1_arr = []
        self.weight_2_arr = []
        self.bias_arr = []
        self.hidden_array = []
        self.cell_array = []
        self.mask_array = []

        for i in range(self._num_layers):
            weight_1 = self._helper.create_parameter(
                attr=fluid.ParamAttr(
                    initializer=fluid.initializer.UniformInitializer(
                        low=-self._init_scale, high=self._init_scale)),
                shape=[self._hidden_size * 2, self._hidden_size * 4],
                dtype="float32",
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-self._init_scale, high=self._init_scale))
            self.weight_1_arr.append(weight_1)
            bias_1 = self._helper.create_parameter(
                attr=fluid.ParamAttr(
                    initializer=fluid.initializer.UniformInitializer(
                        low=-self._init_scale, high=self._init_scale)),
                shape=[self._hidden_size * 4],
                dtype="float32",
                default_initializer=fluid.initializer.Constant(0.0))
            self.bias_arr.append(bias_1)

            pre_hidden = fluid.layers.slice(
                init_hidden, axes=[0], starts=[i], ends=[i + 1])
            pre_cell = fluid.layers.slice(
                init_cell, axes=[0], starts=[i], ends=[i + 1])
            pre_hidden = fluid.layers.reshape(
                pre_hidden, shape=[-1, self._hidden_size])
            pre_cell = fluid.layers.reshape(
                pre_cell, shape=[-1, self._hidden_size])
            self.hidden_array.append(pre_hidden)
            self.cell_array.append(pre_cell)

    def forward(self, input_embedding, init_hidden=None, init_cell=None):
        res = []
        for index in range(self._num_steps):
            self._input = fluid.layers.slice(
                input_embedding, axes=[1], starts=[index], ends=[index + 1])
            self._input = fluid.layers.reshape(
                self._input, shape=[-1, self._hidden_size])
            for k in range(self._num_layers):
                pre_hidden = self.hidden_array[k]
                pre_cell = self.cell_array[k]
                weight_1 = self.weight_1_arr[k]
                bias = self.bias_arr[k]

                nn = fluid.layers.concat([self._input, pre_hidden], 1)
                gate_input = fluid.layers.matmul(x=nn, y=weight_1)

                gate_input = fluid.layers.elementwise_add(gate_input, bias)
                i, j, f, o = fluid.layers.split(
                    gate_input, num_or_sections=4, dim=-1)
                c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid(
                    i) * fluid.layers.tanh(j)
                m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o)
                self.hidden_array[k] = m
                self.cell_array[k] = c
                self._input = m

                if self._dropout is not None and self._dropout > 0.0:
                    self._input = fluid.layers.dropout(
                        self._input,
                        dropout_prob=self._dropout,
                        dropout_implementation='upscale_in_train')
            res.append(
                fluid.layers.reshape(
                    self._input, shape=[1, -1, self._hidden_size]))
        real_res = fluid.layers.concat(res, 0)
        real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
        last_hidden = fluid.layers.concat(self.hidden_array, 1)
        last_hidden = fluid.layers.reshape(
            last_hidden, shape=[-1, self._num_layers, self._hidden_size])
        last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
        last_cell = fluid.layers.concat(self.cell_array, 1)
        last_cell = fluid.layers.reshape(
            last_cell, shape=[-1, self._num_layers, self._hidden_size])
        last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
        return real_res, last_hidden, last_cell
class PtbModel(fluid.imperative.Layer):
    def __init__(self,
                 name_scope,
                 hidden_size,
                 vocab_size,
                 num_layers=2,
                 num_steps=20,
                 init_scale=0.1,
                 dropout=None):
        super(PtbModel, self).__init__(name_scope)
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.init_scale = init_scale
        self.num_layers = num_layers
        self.num_steps = num_steps
        self.dropout = dropout
        from paddle.fluid.layer_helper import LayerHelper
        self._helper = LayerHelper('PtbModel', act="tanh")
        self.simple_lstm_rnn = SimpleLSTMRNN(
            self.full_name(),
            hidden_size,
            num_steps,
            num_layers=num_layers,
            init_scale=init_scale,
            dropout=dropout)
        self.embedding = Embedding(
            self.full_name(),
            size=[vocab_size, hidden_size],
            dtype='float32',
            is_sparse=False,
            param_attr=fluid.ParamAttr(
                name='embedding_para',
                initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale)))
        self.softmax_weight = self._helper.create_parameter(
            attr=fluid.ParamAttr(),
            shape=[self.hidden_size, self.vocab_size],
            dtype="float32",
            default_initializer=fluid.initializer.UniformInitializer(
                low=-self.init_scale, high=self.init_scale))
        self.softmax_bias = self._helper.create_parameter(
            attr=fluid.ParamAttr(),
            shape=[self.vocab_size],
            dtype="float32",
            default_initializer=fluid.initializer.UniformInitializer(
                low=-self.init_scale, high=self.init_scale))

    def _build_once(self, input, label, init_hidden, init_cell):
        pass

    def forward(self, input, label, init_hidden, init_cell):

        init_h = fluid.layers.reshape(
            init_hidden, shape=[self.num_layers, -1, self.hidden_size])

        init_c = fluid.layers.reshape(
            init_cell, shape=[self.num_layers, -1, self.hidden_size])

        x_emb = self.embedding(input)
        x_emb = fluid.layers.reshape(
            x_emb, shape=[-1, self.num_steps, self.hidden_size])
        if self.dropout is not None and self.dropout > 0.0:
            x_emb = fluid.layers.dropout(
                x_emb,
                dropout_prob=self.drop_out,
                dropout_implementation='upscale_in_train')
        rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h,
                                                               init_c)
        rnn_out = fluid.layers.reshape(
            rnn_out, shape=[-1, self.num_steps, self.hidden_size])
        projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
        projection = fluid.layers.reshape(
            projection, shape=[-1, self.vocab_size])
        projection = fluid.layers.reshape(
            projection, shape=[-1, self.vocab_size])
        loss = fluid.layers.softmax_with_cross_entropy(
            logits=projection, label=label, soft_label=False)
        loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps])
        loss = fluid.layers.reduce_mean(loss, dim=[0])
        loss = fluid.layers.reduce_sum(loss)
        loss.permissions = True

        return loss, last_hidden, last_cell
Esempio n. 21
0
def fluid_batch_norm(input,
               act=None,
               is_test=False,
               momentum=0.9,
               epsilon=1e-05,
               param_attr=None,
               bias_attr=None,
               mean_attr=None,
               var_attr=None,
               data_layout='NCHW',
               in_place=False,
               name=None,
               moving_mean_name=None,
               moving_variance_name=None,
               do_model_average_for_mean_and_var=False,
               fuse_with_relu=False):
    """
    **Batch Normalization Layer**
    Editted by Lihang Liu for the reason of exposing mean_attr and var_attr.

    Can be used as a normalizer function for conv2d and fully_connected operations.
    The required data format for this layer is one of the following:

    1. NHWC `[batch, in_height, in_width, in_channels]`

    2. NCHW `[batch, in_channels, in_height, in_width]`

    Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
    Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
    for more details.

    :math:`input` is the input features over a mini-batch.

    ..  math::

        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
        \ mini-batch\ mean \\\\
        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift

    Args:
        input(variable): The input variable which is a LoDTensor.
        act(string, Default None): Activation type, linear|relu|prelu|...
        is_test(bool, Default False): Used for training or training.
        momentum(float, Default 0.9):
        epsilon(float, Default 1e-05):
        param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
             of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
             will create ParamAttr as param_attr. If the Initializer of the param_attr
             is not set, the parameter is initialized with Xavier. Default: None.
        bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
             If it is set to None or one attribute of ParamAttr, batch_norm
             will create ParamAttr as bias_attr. If the Initializer of the bias_attr
             is not set, the bias is initialized zero. Default: None.
        data_layout(string, default NCHW): NCHW|NHWC
        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
        name(string, Default None): A name for this layer(optional). If set None, the layer
            will be named automatically.
        moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
        moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
        do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.
        fuse_with_relu (bool): if True, this OP performs relu after batch norm.

    Returns:
        Variable: A tensor variable which is the result after applying batch normalization on the input.

    Examples:

        .. code-block:: python

            hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
            hidden2 = fluid.layers.batch_norm(input=hidden1)
    """
    assert bias_attr is not False, "bias_attr should not be False in batch_norm."
    helper = LayerHelper('batch_norm', **locals())
    dtype = helper.input_dtype()

    input_shape = input.shape
    if data_layout == 'NCHW':
        channel_num = input_shape[1]
    else:
        if data_layout == 'NHWC':
            channel_num = input_shape[-1]
        else:
            raise ValueError("unsupported data layout:" + data_layout)

    param_shape = [channel_num]

    # create parameter
    scale = helper.create_parameter(
        attr=helper.param_attr,
        shape=param_shape,
        dtype=dtype,
        default_initializer=Constant(1.0))

    bias = helper.create_parameter(
        attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)

    if mean_attr is None:
        mean = helper.create_parameter(
            attr=ParamAttr(
                name=moving_mean_name,
                initializer=Constant(0.0),
                trainable=False,
                do_model_average=do_model_average_for_mean_and_var),
            shape=param_shape,
            dtype=input.dtype)
    else:
        mean = helper.create_parameter(
            attr=mean_attr,
            shape=param_shape,
            dtype=input.dtype)
    mean.stop_gradient = True

    if var_attr is None:
        variance = helper.create_parameter(
            attr=ParamAttr(
                name=moving_variance_name,
                initializer=Constant(1.0),
                trainable=False,
                do_model_average=do_model_average_for_mean_and_var),
            shape=param_shape,
            dtype=input.dtype)
    else:
        variance = helper.create_parameter(
            attr=var_attr,
            shape=param_shape,
            dtype=input.dtype)
    variance.stop_gradient = True

    # create output
    # mean and mean_out share the same memory
    mean_out = mean
    # variance and variance out share the same memory
    variance_out = variance
    saved_mean = helper.create_variable_for_type_inference(
        dtype=dtype, stop_gradient=True)
    saved_variance = helper.create_variable_for_type_inference(
        dtype=dtype, stop_gradient=True)

    batch_norm_out = input if in_place else helper.create_variable_for_type_inference(
        dtype)

    helper.append_op(
        type="batch_norm",
        inputs={
            "X": input,
            "Scale": scale,
            "Bias": bias,
            "Mean": mean,
            "Variance": variance
        },
        outputs={
            "Y": batch_norm_out,
            "MeanOut": mean_out,
            "VarianceOut": variance_out,
            "SavedMean": saved_mean,
            "SavedVariance": saved_variance
        },
        attrs={
            "momentum": momentum,
            "epsilon": epsilon,
            "is_test": is_test,
            "use_mkldnn": False,
            "fuse_with_relu": fuse_with_relu
        })

    return helper.append_activation(batch_norm_out)
Esempio n. 22
0
class CW_L2_Attack(Attack):
    """
    Uses Adam to minimize the CW L2 objective function

    Paper link: https://arxiv.org/abs/1608.04644
    """
    def __init__(self, model, learning_rate):
        super(CW_L2_Attack, self).__init__(model)
        self._predicts_normalized = None
        self._adversary = None  # type: Adversary
        #########################################
        # build cw attack computation graph
        # use CPU
        self.place = fluid.CPUPlace()
        # use GPU
        # place = fluid.CUDAPlace(0)
        self.exe = fluid.Executor(self.place)

        # clone the prebuilt program that has cnn to attack
        self.attack_main_program = fluid.Program(
        )  # prebuilt_program.clone(for_test=False)
        # create an empty program for variable init
        self.attack_startup_program = fluid.Program(
        )  # start_up_program.clone(for_test=False)

        # build cw attack compute graph within attack programs
        with fluid.program_guard(main_program=self.attack_main_program,
                                 startup_program=self.attack_startup_program):
            img_0_1_placehold = fluid.layers.data(name='img_data_scaled',
                                                  shape=[1, 28, 28],
                                                  dtype="float32")
            target_placehold = fluid.layers.data(name='target',
                                                 shape=[10],
                                                 dtype="float32")
            shape_placehold = fluid.layers.data(name="shape",
                                                shape=[1],
                                                dtype="float32")
            # k_placehold = fluid.layers.data(name='k',shape=[1],dtype="float32")
            c_placehold = fluid.layers.data(name='c',
                                            shape=[1],
                                            dtype="float32")

            # get fluid.layer object from prebuilt program
            # img_placehold_from_prebuilt_program = attack_main_program.block(0).var(self.model._input_name)
            # softmax_from_prebuilt_program = attack_main_program.block(0).var(self.model._softmax_name)
            # logits_from_prebuilt_program = attack_main_program.block(0).var(self.model._predict_name)

            t0, t1, t2, t3, t4 = self._loss_cw(img_0_1_placehold,
                                               target_placehold,
                                               shape_placehold,
                                               c_placehold)  # ,
            # img_placehold_from_prebuilt_program,
            # softmax_from_prebuilt_program,
            # logits_from_prebuilt_program)

            # Adam optimizer as suggested in paper
            optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
            optimizer.minimize(t2, parameter_list=['parameter'])

        # initial variables and parameters every time before attack
        self.exe.run(self.attack_startup_program)
        # init ad perturbation
        ret = fluid.global_scope().find_var("parameter").get_tensor()
        # print(np.array(ret))
        ret.set(0.001 * np.random.random_sample((1, 28, 28)).astype('float32'),
                self.place)
        # print(np.array(ret))
        # print(attack_main_program.current_block()["parameter"])
        # pdb.set_trace()
        c1 = self.attack_main_program.block(0).var("conv2d_2.b_0")
        c2 = self.attack_main_program.block(0).var("conv2d_2.w_0")
        c3 = self.attack_main_program.block(0).var("conv2d_3.b_0")
        c4 = self.attack_main_program.block(0).var("conv2d_3.w_0")
        f1 = self.attack_main_program.block(0).var("fc_2.b_0")
        f2 = self.attack_main_program.block(0).var("fc_2.w_0")
        f3 = self.attack_main_program.block(0).var("fc_3.b_0")
        f4 = self.attack_main_program.block(0).var("fc_3.w_0")
        var_list = [c1, c2, c3, c4, f1, f2, f3, f4]

        fluid.io.load_vars(
            executor=self.exe,
            dirname="../advbox/attacks/mnist/",
            vars=var_list,
            main_program=self.attack_main_program)  # ../advbox/attacks/mnist/
        #########################################

    def _apply(self,
               adversary,
               nb_classes=10,
               learning_rate=0.01,
               attack_iterations=100,
               epsilon=1,
               targeted=True,
               k=0,
               noise=2):

        # put adversary instance inside of the attack instance so all other function within can access
        self._adversary = adversary

        if not adversary.is_targeted_attack:
            raise ValueError(
                "This attack method only support targeted attack!")

        # locate the range of c which makes the attack successful
        c = epsilon
        img = self._adversary.original  # original image to be attacked
        '''
        guess = self.model.predict(img)
        print('guess img before preprocess:',guess)
        '''
        for i in range(10):
            c = 2 * c
            print('Checking if the range {0:f} include a successful c.'.format(
                c))
            is_adversary, f6 = self._cwb(img,
                                         c,
                                         attack_steps=attack_iterations,
                                         k=k,
                                         learning_rate=learning_rate,
                                         noise=noise,
                                         nb_classes=nb_classes)
            if is_adversary:
                break
        if not is_adversary:
            logging.info('This CW attack failed!')
            return adversary

        # binary search for smaller c that makes fx<=0
        print('searching for the smallest c that makes attack possible.')
        c_low = 0
        c_high = c
        while c_high - c_low >= epsilon:
            logging.info('c_high={}, c_low={}, diff={}, epsilon={}'.format(
                c_high, c_low, c_high - c_low, epsilon))

            c_half = (c_low + c_high) / 2

            is_adversary, f6 = self._cwb(img,
                                         c,
                                         attack_steps=attack_iterations,
                                         k=k,
                                         learning_rate=learning_rate,
                                         noise=noise,
                                         nb_classes=nb_classes)
            # pdb.set_trace()
            is_f6_smaller_than_0 = f6 <= 0
            if is_adversary and is_f6_smaller_than_0:
                c_high = c_half
            else:
                c_low = c_half

        return adversary

    def _cwb(self, img, c, attack_steps, k, learning_rate, noise, nb_classes):
        '''
        use CW attack on an original image for a
        limited number of iterations
        :return bool
        '''

        smallest_f6 = None
        corresponding_constrained = None

        # inital data
        screen_nontarget_logit = np.zeros(shape=[nb_classes], dtype="float32")
        screen_nontarget_logit[self._adversary.target_label] = 1

        feeder = fluid.DataFeeder(
            feed_list=["img_data_scaled", "target", "shape",
                       "c"],  # self.model._input_name,self.model._logits_name,
            place=self.place,
            program=self.attack_main_program)

        sub = -1
        div = 2

        img_0_1 = self._process_input(img, sub, div)
        # pdb.set_trace()

        for i in range(attack_steps):
            # print("steps:",i)
            result = self.exe.run(
                self.attack_main_program,
                feed=feeder.feed([(img_0_1, screen_nontarget_logit,
                                   np.zeros(shape=[1], dtype='float32'), c)
                                  ]),  # img_0_1,0,
                fetch_list=[
                    self.maxlogit_i_not_t, self.maxlogit_target, self.loss,
                    self.logits_i_not_t, self.constrained, self.softmax
                ])
            '''
            print("maxlogit_i_not_t:",result[0],\
                  "maxlogit_target:",result[1],\
                  "loss:",result[2],
                  "logits_i_not_t:",result[3],\
                  "softmax:",result[5])
            '''
            f6 = result[0] - result[1]
            if i == 0:
                smallest_f6 = f6
                corresponding_constrained = result[4]
            if f6 < smallest_f6:
                smallest_f6 = f6
                corresponding_constrained = result[4]

                ######
        # pdb.set_trace()
        # print(corresponding_constrained)
        # recover image (-1,1) from corresponding_constrained which is within (0,1)
        img_ad = self.reconstruct(corresponding_constrained)
        # convert into img.shape
        img_ad = np.squeeze(img_ad)
        img_ad = img_ad.reshape(img.shape)
        # let model guess
        adv_label = np.argmax(self.model.predict(img_ad))  # img,img_ad
        '''
        print(self._adversary.original_label,self.model.predict(img))
        print(self._adversary.target_label,screen_nontarget_logit)
        print(adv_label,self.model.predict(img_ad))
        #pdb.set_trace()
        '''
        # try to accept new result, success or fail
        return self._adversary.try_accept_the_example(
            img_ad, adv_label), f6  # img,img_ad

    # this build up the CW attack computation graph in Paddle
    def _loss_cw(self, img_0_1, target, shape,
                 c):  # ,img_input_entrance,softmax_entrance,logits_entrance
        ####
        # use layerhelper to init w
        self.helper = LayerHelper("Jay")
        # name a name for later to take it out
        self.param_attr = ParamAttr(name="parameter")

        # add this perturbation on w space, then, reconstruct as an image within (0,1)
        self.ad_perturbation = self.helper.create_parameter(
            attr=self.param_attr,
            shape=[1, 28, 28],
            dtype='float32',
            is_bias=False)

        self.y = 2 * img_0_1 - 1
        # compute arctan for y to get w
        self.xplus1 = 1 + self.y
        self.xminus1 = 1 - self.y
        self.ln = fluid.layers.log(self.xplus1 / self.xminus1)
        self.w = fluid.layers.scale(x=self.ln, scale=0.5)
        self.w_ad = self.w + self.ad_perturbation
        self.tanh_w = fluid.layers.tanh(self.w_ad)
        self.constrained = 0.5 * (self.tanh_w + 1)

        self.softmax, self.logits = mnist_cnn_model(self.constrained)

        self.sub = fluid.layers.elementwise_sub(img_0_1, self.constrained)
        self.squared = fluid.layers.elementwise_mul(self.sub, self.sub)
        self.distance_L2 = fluid.layers.reduce_sum(self.squared)

        self.negetive_screen_nontarget_logit = fluid.layers.scale(target,
                                                                  scale=-1.0)
        self.screen_target_logit = self.negetive_screen_nontarget_logit.__add__(
            fluid.layers.ones(shape=[10], dtype="float32"))

        self.logits_i_not_t = fluid.layers.elementwise_mul(
            self.screen_target_logit, self.logits)
        self.logit_target = fluid.layers.elementwise_mul(target, self.logits)

        self.maxlogit_i_not_t = fluid.layers.reduce_max(self.logits_i_not_t)
        self.maxlogit_target = fluid.layers.reduce_sum(self.logit_target)

        self.difference_between_two_logits = self.maxlogit_i_not_t - self.maxlogit_target

        self.f6 = fluid.layers.relu(self.difference_between_two_logits)

        self.loss = c * self.f6 + self.distance_L2

        return self.maxlogit_i_not_t, self.maxlogit_target, self.loss, self.logits_i_not_t, self.constrained  # distance_L2

    # reconstruct corresponding_constrained to an image in MNIST format
    def reconstruct(self, corresponding_constrained):
        """
        Restore the img from corresponding_constrained float32
        :return: numpy.ndarray
        """
        return corresponding_constrained * 2 - 1  # mnist is belong to (-1,1)

    def _f6(self, w):
        '''
        _f6 is the special f function CW chose as part of the
        objective function, this returns the values directly
        :return float32
        '''
        target = self._adversary.target_label
        img = (np.tanh(w) + 1) / 2
        Z_output = self._Z(img)
        f6 = max(
            max([Z for i, Z in enumerate(Z_output) if i != target]) -
            Z_output[target], 0)

        return f6

    def _Z(self, img):
        """
        Get the Zx logits as a numpy array.
        :return: numpy.ndarray
        """
        return self.model.get_logits(img)

    def _process_input(self, input_, sub, div):
        res = None

        if np.any(sub != 0):
            res = input_ - sub
        if not np.all(sub == 1):
            if res is None:  # "res = input_ - sub" is not executed!
                res = input_ / (div)
            else:
                res /= div
        if res is None:  # "res = (input_ - sub)/ div" is not executed!
            return input_

        res = np.where(res == 0, 0.00001, res)
        res = np.where(res == 1, 0.99999, res)  # no 0 or 1

        return res