Beispiel #1
0
    def log_prob(self, value):
        """Log probability density/mass function.

        Args:
          value (Tensor): The input tensor.

        Returns:
          Tensor: log probability.The data type is same with value.

        """
        value = self._check_values_dtype_in_probs(self.low, value)
        if _non_static_mode():
            # ensure value in [low, high]
            lb_bool = self.low < value
            ub_bool = value < self.high

            lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype',
                             value.dtype)
            ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype',
                             value.dtype)
            return nn.log(lb * ub) - nn.log(self.high - self.low)

        name = self.name + '_log_prob'
        lb_bool = self.low < value
        ub_bool = value < self.high
        lb = tensor.cast(lb_bool, dtype=value.dtype)
        ub = tensor.cast(ub_bool, dtype=value.dtype)
        return elementwise_sub(nn.log(lb * ub),
                               nn.log(self.high - self.low),
                               name=name)
Beispiel #2
0
    def probs(self, value):
        """Probability density/mass function.

        Args:
          value (Tensor): The input tensor.

        Returns:
          Tensor: probability.The data type is same with value.

        """
        value = self._check_values_dtype_in_probs(self.low, value)
        if _non_static_mode():
            lb_bool = self.low < value
            ub_bool = value < self.high

            lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype',
                             value.dtype)
            ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype',
                             value.dtype)
            return (lb * ub) / (self.high - self.low)

        name = self.name + '_probs'
        lb_bool = self.low < value
        ub_bool = value < self.high
        lb = tensor.cast(lb_bool, dtype=value.dtype)
        ub = tensor.cast(ub_bool, dtype=value.dtype)
        return elementwise_div((lb * ub), (self.high - self.low), name=name)
Beispiel #3
0
    def _check_values_dtype_in_probs(self, param, value):
        """
        Log_prob and probs methods have input ``value``, if value's dtype is different from param,
        convert value's dtype to be consistent with param's dtype.

        Args:
            param (Tensor): low and high in Uniform class, loc and scale in Normal class.
            value (Tensor): The input tensor.

        Returns:
            value (Tensor): Change value's dtype if value's dtype is different from param.
        """
        if in_dygraph_mode():
            if value.dtype != param.dtype and convert_dtype(
                    value.dtype) in ['float32', 'float64']:
                warnings.warn(
                    "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
                )
                return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype',
                                   param.dtype)
            return value

        check_variable_and_dtype(value, 'value', ['float32', 'float64'],
                                 'log_prob')
        if value.dtype != param.dtype:
            warnings.warn(
                "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
            )
            return tensor.cast(value, dtype=param.dtype)
        return value
Beispiel #4
0
    def astype(self, dtype):
        """

        Cast a Tensor to a specified data type.

        Args:
            dtype: The target data type.

        Returns:
            Tensor: a new Tensor with target dtype

        Examples:
            .. code-block:: python

                import paddle
                import numpy as np

                original_tensor = paddle.ones([2, 2])
                print("original tensor's dtype is: {}".format(original_tensor.dtype))
                new_tensor = original_tensor.astype('float32')
                print("new tensor's dtype is: {}".format(new_tensor.dtype))

        """
        if not isinstance(dtype, core.VarDesc.VarType):
            dtype = convert_np_dtype_to_dtype_(dtype)
        return _C_ops.cast(self, 'in_dtype', self.dtype, 'out_dtype', dtype)
Beispiel #5
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with Numpy array.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        """
        block = self._check_block(block)

        assert isinstance(var, framework.Variable)
        assert isinstance(block, framework.Block)

        # to be compatible of fp16 initalizers
        if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
            out_dtype = VarDesc.VarType.FP32
            np_value = self._value.astype("float32")
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['numpy_array_init', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=out_dtype,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_var = var
            out_dtype = var.dtype
            np_value = self._value

        if out_dtype == VarDesc.VarType.FP32:
            value_name = "fp32_values"
            values = [float(v) for v in np_value.flat]
        elif out_dtype == VarDesc.VarType.INT32:
            value_name = "int32_values"
            values = [int(v) for v in np_value.flat]
        else:
            raise ValueError("Unsupported dtype %s", self._value.dtype)
        if self._value.size > 1024 * 1024 * 1024:
            raise ValueError("The size of input is too big. Please consider "
                             "saving it to file and 'load_op' to load it")

        if framework._non_static_mode():
            _C_ops.assign_value(out_var, 'shape', list(self._value.shape),
                                'dtype', out_dtype, value_name, values)
            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
                var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
                                      'out_dtype', var.dtype)
                var_tmp._share_underline_tensor_to(var)
            else:
                out_var._share_underline_tensor_to(var)
            return None
        else:
            op = block.append_op(type='assign_value',
                                 outputs={'Out': out_var},
                                 attrs={
                                     'dtype': out_dtype,
                                     'shape': list(self._value.shape),
                                     value_name: values
                                 },
                                 stop_gradient=True)

            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                })

            var.op = op
            return op
Beispiel #6
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with Bilinear initialization.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        """
        block = self._check_block(block)

        if not isinstance(var, framework.Variable):
            raise ValueError("var must be framework.Variable.")

        if not isinstance(block, framework.Block):
            raise ValueError("block must be framework.Block.")

        shape = var.shape
        if len(shape) != 4:
            raise ValueError("the length of shape must be 4.")
        if shape[2] != shape[3]:
            raise ValueError("shape[2] must be equal to shape[3].")

        weight = np.zeros(np.prod(var.shape), dtype='float32')
        size = shape[3]
        # factor
        f = np.ceil(size / 2.)
        # center
        c = (2 * f - 1 - f % 2) / (2. * f)
        for i in range(np.prod(shape)):
            x = i % size
            y = (i / size) % size
            weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
        weight = np.reshape(weight, shape)

        # to be compatible of fp16 initalizers
        if var.dtype in [
                VarDesc.VarType.FP16, VarDesc.VarType.BF16,
                VarDesc.VarType.FP64
        ]:
            out_dtype = VarDesc.VarType.FP32
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['bilinear_init', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=out_dtype,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_dtype = var.dtype
            out_var = var

        if out_dtype == VarDesc.VarType.FP32:
            value_name = "fp32_values"
            values = [float(v) for v in weight.flat]
        else:
            raise TypeError("Unsupported dtype %s", var.dtype)

        if np.prod(shape) > 1024 * 1024:
            raise ValueError("The size of input is too big. ")

        if framework._non_static_mode():
            _C_ops.assign_value(out_var, 'shape', list(shape), 'dtype',
                                out_dtype, value_name, values)
            if var.dtype in [
                    VarDesc.VarType.FP16, VarDesc.VarType.BF16,
                    VarDesc.VarType.FP64
            ]:
                var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
                                      'out_dtype', var.dtype)
                var_tmp._share_underline_tensor_to(var)
            else:
                out_var._share_underline_tensor_to(var)
            return None
        else:
            op = block.append_op(type='assign_value',
                                 outputs={'Out': [out_var]},
                                 attrs={
                                     'dtype': out_dtype,
                                     'shape': list(shape),
                                     value_name: values
                                 })

            if var.dtype in [
                    VarDesc.VarType.FP16, VarDesc.VarType.BF16,
                    VarDesc.VarType.FP64
            ]:
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                })

            var.op = op
            return op
Beispiel #7
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with MSRA initialization.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        """
        block = self._check_block(block)

        assert isinstance(var, framework.Variable)
        assert isinstance(block, framework.Block)
        f_in, f_out = self._compute_fans(var)

        # If fan_in is passed, use it
        fan_in = f_in if self._fan_in is None else self._fan_in

        if self._seed == 0:
            self._seed = block.program.random_seed

        # to be compatible of fp16 initalizers
        if var.dtype == VarDesc.VarType.FP16 or (
                var.dtype == VarDesc.VarType.BF16 and not self._uniform):
            out_dtype = VarDesc.VarType.FP32
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['masra_init', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=out_dtype,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_dtype = var.dtype
            out_var = var

        if framework._non_static_mode():
            if self._uniform:
                limit = math.sqrt(6.0 / float(fan_in))
                out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
                                                -limit, 'max', limit, 'seed',
                                                self._seed, 'dtype',
                                                int(out_dtype))
            else:
                std = math.sqrt(2.0 / float(fan_in))
                if in_dygraph_mode():
                    place = _current_expected_place()
                    out_var = _C_ops.final_state_gaussian_random(
                        out_var.shape, 0.0, std, self._seed, out_dtype, place)
                else:
                    out_var = _C_ops.gaussian_random('shape',
                                                     out_var.shape, 'dtype',
                                                     int(out_dtype), 'mean',
                                                     0.0, 'std', std, 'seed',
                                                     self._seed)

            if var.dtype == VarDesc.VarType.FP16 or (
                    var.dtype == VarDesc.VarType.BF16 and not self._uniform):
                var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
                                      'out_dtype', var.dtype)
                var_tmp._share_underline_tensor_to(var)
            else:
                out_var._share_underline_tensor_to(var)
            return None
        else:
            if self._uniform:
                limit = math.sqrt(6.0 / float(fan_in))
                op = block.append_op(type="uniform_random",
                                     inputs={},
                                     outputs={"Out": out_var},
                                     attrs={
                                         "shape": out_var.shape,
                                         "dtype": int(out_dtype),
                                         "min": -limit,
                                         "max": limit,
                                         "seed": self._seed
                                     },
                                     stop_gradient=True)

            else:
                std = math.sqrt(2.0 / float(fan_in))
                op = block.append_op(type="gaussian_random",
                                     outputs={"Out": out_var},
                                     attrs={
                                         "shape": out_var.shape,
                                         "dtype": int(out_dtype),
                                         "mean": 0.0,
                                         "std": std,
                                         "seed": self._seed
                                     },
                                     stop_gradient=True)

            if var.dtype == VarDesc.VarType.FP16 or (
                    var.dtype == VarDesc.VarType.BF16 and not self._uniform):
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                })

            var.op = op
            return op
Beispiel #8
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with TruncatedNormal distribution.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        """
        block = self._check_block(block)

        assert isinstance(var, framework.Variable)
        assert isinstance(block, framework.Block)

        if self._seed == 0:
            self._seed = block.program.random_seed

        # to be compatible of fp16 initalizers
        if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
            out_dtype = VarDesc.VarType.FP32
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['truncated_gaussian_random', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=out_dtype,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_dtype = var.dtype
            out_var = var

        if in_dygraph_mode():
            out_var = _C_ops.final_state_truncated_gaussian_random(
                var.shape, self._mean, self._std_dev, self._seed, out_dtype,
                _current_expected_place())
            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
                var_tmp = _C_ops.final_state_cast(out_var, var.dtype)
                var_tmp._share_underline_tensor_to(var)
            else:
                out_var._share_underline_tensor_to(var)
            return None

        if _in_legacy_dygraph():
            out_var = _C_ops.truncated_gaussian_random('shape', var.shape,
                                                       'dtype', out_dtype,
                                                       'mean', self._mean,
                                                       'std', self._std_dev,
                                                       'seed', self._seed)
            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
                var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
                                      'out_dtype', var.dtype)
                var_tmp._share_underline_tensor_to(var)
            else:
                out_var._share_underline_tensor_to(var)
            return None
        else:
            op = block.append_op(type="truncated_gaussian_random",
                                 outputs={"Out": out_var},
                                 attrs={
                                     "shape": var.shape,
                                     "dtype": out_dtype,
                                     "mean": self._mean,
                                     "std": self._std_dev,
                                     "seed": self._seed
                                 },
                                 stop_gradient=True)

            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                })
            var.op = op
            return op
Beispiel #9
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with Uniform distribution.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        """
        block = self._check_block(block)

        assert isinstance(block, framework.Block)
        check_variable_and_dtype(var, "Out",
                                 ["uint16", "float16", "float32", "float64"],
                                 "uniform_random")

        if self._seed == 0:
            self._seed = block.program.random_seed

        # to be compatible of fp16 initializers
        if var.dtype == VarDesc.VarType.FP16:
            out_dtype = VarDesc.VarType.FP32
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['uniform_random', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=out_dtype,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_dtype = var.dtype
            out_var = var

        if framework._non_static_mode():
            out_var = _C_ops.uniform_random(
                'shape', var.shape, 'min', self._low, 'max', self._high,
                'seed', self._seed, 'dtype', out_dtype, 'diag_num',
                self._diag_num, 'diag_step', self._diag_step, 'diag_val',
                self._diag_val)
            if var.dtype == VarDesc.VarType.FP16:
                var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
                                      'out_dtype', var.dtype)
                var_tmp._share_underline_tensor_to(var)
            else:
                out_var._share_underline_tensor_to(var)
            return None
        else:
            op = block.append_op(type="uniform_random",
                                 inputs={},
                                 outputs={"Out": out_var},
                                 attrs={
                                     "shape": var.shape,
                                     "dtype": out_dtype,
                                     "min": self._low,
                                     "max": self._high,
                                     "seed": self._seed,
                                     "diag_num": self._diag_num,
                                     "diag_step": self._diag_step,
                                     "diag_val": self._diag_val
                                 },
                                 stop_gradient=True)

            if var.dtype == VarDesc.VarType.FP16:
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                })

            var.op = op
            return op
Beispiel #10
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with dirac initializer.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The most critical OP(scatter) in this initializer, which contains 7~8 ops in total.
        """
        block = self._check_block(block)
        assert isinstance(var, framework.Parameter)
        assert isinstance(block, framework.Block)
        check_variable_and_dtype(var, "Out",
                                 ['float16', 'bfloat16', 'float32', 'float64'],
                                 'Dirac')

        assert len(var.shape) in [
            3, 4, 5
        ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac"
        assert (var.shape[0] % self._groups
                ) == 0, "Tensor 0-dimension must be divisible by groups"

        if var.dtype != VarDesc.VarType.FP32:
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['dirac', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=VarDesc.VarType.FP32,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_var = var
        op = None
        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu',
                                     False,
                                     'dtype', out_var.dtype, 'str_value',
                                     str(float(0)), 'shape', out_var.shape)
        else:
            block.append_op(type='fill_constant',
                            inputs={},
                            outputs={'Out': out_var},
                            attrs={
                                'value': float(0),
                                'dtype': out_var.dtype,
                                'shape': out_var.shape,
                            },
                            stop_gradient=True)

        origin_shape = var.shape
        num_per_group = origin_shape[0] // self._groups
        min_shape = min(num_per_group, origin_shape[1])

        idx_list = []
        value_list = []
        strides = []
        prod = 1
        for dim in reversed(origin_shape):
            strides.insert(0, prod)
            prod *= dim
        for i in range(self._groups):
            for j in range(min_shape):
                value_list.append(1.0)
                offset = 0
                for (k, stride) in enumerate(strides):
                    if (k == 0):
                        offset += (j + i * num_per_group) * stride
                    elif (k == 1):
                        offset += j * stride
                    else:
                        offset += origin_shape[k] // 2 * stride
                idx_list.append(offset)
        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_out, _ = _C_ops.reshape2(out_var, None, 'shape', [-1])
                tmp_out._share_underline_tensor_to(out_var)
        else:
            x_shape = block.create_var(name=unique_name.generate(".".join(
                [out_var.name, "XShape"])),
                                       dtype=out_var.dtype,
                                       shape=out_var.shape,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False,
                                       stop_gradient=True)
            block.append_op(type="reshape2",
                            inputs={"X": out_var},
                            attrs={'shape': [-1]},
                            outputs={
                                "Out": out_var,
                                "XShape": x_shape
                            },
                            stop_gradient=True)

        index_tensor = block.create_var(
            name=unique_name.generate('scatter_index'),
            persistable=False,
            stop_gradient=True)

        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_tensor = framework._varbase_creator()
                _C_ops.assign_value(tmp_tensor, 'shape', [len(idx_list)],
                                    'dtype', VarDesc.VarType.INT64,
                                    'int64_values', idx_list)
                tmp_tensor._share_underline_tensor_to(index_tensor)
        else:
            block.append_op(type='assign_value',
                            outputs={'Out': index_tensor},
                            attrs={
                                'dtype': VarDesc.VarType.INT64,
                                'shape': [len(idx_list)],
                                'int64_values': idx_list
                            },
                            stop_gradient=True)

        value_tensor = block.create_var(
            name=unique_name.generate('scatter_value'),
            persistable=False,
            stop_gradient=True)

        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_tensor = framework._varbase_creator()
                _C_ops.assign_value(tmp_tensor, 'shape', [len(value_list)],
                                    'dtype', VarDesc.VarType.FP32,
                                    'fp32_values', value_list)
                tmp_tensor._share_underline_tensor_to(value_tensor)
        else:
            block.append_op(type='assign_value',
                            outputs={'Out': value_tensor},
                            attrs={
                                'dtype': VarDesc.VarType.FP32,
                                'shape': [len(value_list)],
                                'fp32_values': value_list
                            },
                            stop_gradient=True)

        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_out = _C_ops.final_state_scatter(out_var, index_tensor,
                                                     value_tensor, True)
                tmp_out._share_underline_tensor_to(out_var)
                tmp_reshape_out, _ = _C_ops.reshape2(out_var, None, 'shape',
                                                     origin_shape)
                tmp_reshape_out._share_underline_tensor_to(out_var)
                if var.dtype != VarDesc.VarType.FP32:
                    tmp_cast_out = _C_ops.cast(out_var, 'in_dtype',
                                               out_var.dtype, 'out_dtype',
                                               var.dtype)
                    tmp_cast_out._share_underline_tensor_to(var)

        else:
            op = block.append_op(type="scatter",
                                 inputs={
                                     "X": out_var,
                                     "Ids": index_tensor,
                                     "Updates": value_tensor
                                 },
                                 attrs={'overwrite': True},
                                 outputs={"Out": out_var},
                                 stop_gradient=True)
            x_shape = block.create_var(name=unique_name.generate(".".join(
                [out_var.name, "XShape"])),
                                       dtype=out_var.dtype,
                                       shape=out_var.shape,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False,
                                       stop_gradient=True)
            block.append_op(type="reshape2",
                            inputs={"X": out_var},
                            attrs={'shape': origin_shape},
                            outputs={
                                "Out": out_var,
                                "XShape": x_shape
                            },
                            stop_gradient=True)
            if var.dtype != VarDesc.VarType.FP32:
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                },
                                stop_gradient=True)
        if not in_dynamic_mode():
            var.op = op
        return op
Beispiel #11
0
def softmax(x, axis=-1, dtype=None, name=None):
    r"""
    This operator implements the softmax layer. The calculation process is as follows:

    1. The dimension :attr:`axis` of ``x`` will be permuted to the last.

    2. Then ``x`` will be logically flattened to a 2-D matrix. The matrix's second
    dimension(row length) is the same as the dimension :attr:`axis` of ``x``,
    and the first dimension(column length) is the product of all other dimensions
    of ``x``. For each row of the matrix, the softmax operator squashes the
    K-dimensional(K is the width of the matrix, which is also the size of ``x``'s
    dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional
    vector of real values in the range [0, 1] that add up to 1.

    3. After the softmax operation is completed, the inverse operations of steps 1 and 2
    are performed to restore the two-dimensional matrix to the same dimension as the ``x`` .

    It computes the exponential of the given dimension and the sum of exponential
    values of all the other dimensions in the K-dimensional vector input.
    Then the ratio of the exponential of the given dimension and the sum of
    exponential values of all the other dimensions is the output of the softmax
    operator.

    For each row :math:`i` and each column :math:`j` in the matrix, we have:

    .. math::

        softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])}

    Example:

    .. code-block:: text

        Case 1:
          Input:
            x.shape = [2, 3, 4]
            x.data = [[[2.0, 3.0, 4.0, 5.0],
                       [3.0, 4.0, 5.0, 6.0],
                       [7.0, 8.0, 8.0, 9.0]],
                      [[1.0, 2.0, 3.0, 4.0],
                       [5.0, 6.0, 7.0, 8.0],
                       [6.0, 7.0, 8.0, 9.0]]]

          Attrs:
            axis = -1

          Output:
            out.shape = [2, 3, 4]
            out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
                         [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
                        [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]

        Case 2:
          Input:
            x.shape = [2, 3, 4]
            x.data = [[[2.0, 3.0, 4.0, 5.0],
                       [3.0, 4.0, 5.0, 6.0],
                       [7.0, 8.0, 8.0, 9.0]],
                      [[1.0, 2.0, 3.0, 4.0],
                       [5.0, 6.0, 7.0, 8.0],
                       [6.0, 7.0, 8.0, 9.0]]]
          Attrs:
            axis = 1

          Output:
            out.shape = [2, 3, 4]
            out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
                         [0.01786798, 0.01786798, 0.04661262, 0.04661262],
                         [0.97555875, 0.97555875, 0.93623955, 0.93623955]],
                        [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
                         [0.26762315, 0.26762315, 0.26762315, 0.26762315],
                         [0.72747516, 0.72747516, 0.72747516, 0.72747516]]]

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
        axis (int, optional): The axis along which to perform log_softmax
            calculations. It should be in range [-D, D), where D is the
            dimensions of ``x`` . If ``axis`` < 0, it works the same way as
            :math:`axis + D` . Default is -1.
        dtype (str, optional): The data type of the output tensor, can be float32, float64.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        A Tensor with the same shape and data type (use ``dtype`` if it is
        specified) as x.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.nn.functional as F
            import numpy as np

            x = np.array([[[2.0, 3.0, 4.0, 5.0],
                        [3.0, 4.0, 5.0, 6.0],
                        [7.0, 8.0, 8.0, 9.0]],
                        [[1.0, 2.0, 3.0, 4.0],
                        [5.0, 6.0, 7.0, 8.0],
                        [6.0, 7.0, 8.0, 9.0]]], 'float32')
            x = paddle.to_tensor(x)
            out1 = F.softmax(x)
            out2 = F.softmax(x, dtype='float64')
            # out1's data type is float32; out2's data type is float64
            # out1 and out2's value is as follows:
            # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
            #   [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
            #   [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
            # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
            #   [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
            #   [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
    """

    if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
        dtype = convert_np_dtype_to_dtype_(dtype)
    use_cudnn = True

    if in_dygraph_mode():
        outs_cast = x if dtype is None \
            else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
        return _C_ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', use_cudnn)

    if dtype is None:
        check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                 'softmax')
    else:
        check_dtype(dtype, 'dtype', ['float32', 'float64'], 'softmax',
                    'If dtype is not None, it only support float32 or float64.')

    helper = LayerHelper("softmax", **locals())
    outs_cast = x
    if dtype is not None:
        outs_cast = helper.create_variable_for_type_inference(dtype)
        helper.append_op(
            type='cast',
            inputs={'X': x},
            outputs={'Out': outs_cast},
            attrs={'in_dtype': x.dtype,
                   'out_dtype': dtype})

    outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype)
    helper.append_op(
        type='softmax',
        inputs={'X': outs_cast},
        outputs={'Out': outs_softmax},
        attrs={'axis': axis,
               'use_cudnn': use_cudnn})

    return outs_softmax
Beispiel #12
0
def log_softmax(x, axis=-1, dtype=None, name=None):
    r"""
    This operator implements the log_softmax layer. The calculation process is
    as follows:

    .. math::

        \begin{aligned} 
        log\_softmax[i, j] &= log(softmax(x)) \\
        &= log(\frac{\exp(X[i, j])}{\sum_j(\exp(X[i, j])})
        \end{aligned}

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
        axis (int, optional): The axis along which to perform log_softmax
            calculations. It should be in range [-D, D), where D is the
            dimensions of ``x`` . If ``axis`` < 0, it works the same way as
            :math:`axis + D` . Default is -1.
        dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data
            type of the output tensor. If dtype is specified, ``x`` is casted
            to ``dtype`` before the operation is performed. This is useful for
            preventing data type overflows. Supported dtype: float32, float64.
            If ``dtype`` is None, the output Tensor has the same dtype as x.
            Default is None.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        A Tensor with the same shape and data type (use ``dtype`` if it is
        specified) as x.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.nn.functional as F

            x = [[[-2.0, 3.0, -4.0, 5.0],
                  [3.0, -4.0, 5.0, -6.0],
                  [-7.0, -8.0, 8.0, 9.0]],
                 [[1.0, -2.0, -3.0, 4.0],
                  [-5.0, 6.0, 7.0, -8.0],
                  [6.0, 7.0, 8.0, 9.0]]]
            x = paddle.to_tensor(x)
            out1 = F.log_softmax(x)
            out2 = F.log_softmax(x, dtype='float64')
            # out1's data type is float32; out2's data type is float64
            # out1 and out2's value is as follows:
            # [[[ -7.1278396   -2.1278396   -9.127839    -0.12783948]
            #   [ -2.1270514   -9.127051    -0.12705144 -11.127051  ]
            #   [-16.313261   -17.313261    -1.3132617   -0.31326184]]
            #  [[ -3.0518122   -6.051812    -7.051812    -0.051812  ]
            #   [-12.313267    -1.3132664   -0.3132665  -15.313267  ]
            #   [ -3.4401896   -2.4401896   -1.4401896   -0.44018966]]]
    """

    if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
        dtype = convert_np_dtype_to_dtype_(dtype)

    if in_dygraph_mode():
        if dtype is not None:
            x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
        return _C_ops.log_softmax(x, 'axis', axis)

    if dtype is None:
        check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                 'log_softmax')
    else:
        check_dtype(dtype, 'dtype', ['float32', 'float64'], 'log_softmax',
                    'If dtype is not None, it only support float32 or float64.')

    helper = LayerHelper("log_softmax", **locals())
    out_cast = x
    if dtype is not None:
        out_cast = helper.create_variable_for_type_inference(dtype)
        helper.append_op(
            type='cast',
            inputs={'X': x},
            outputs={'Out': out_cast},
            attrs={'in_dtype': x.dtype,
                   'out_dtype': dtype})

    out = helper.create_variable_for_type_inference(out_cast.dtype)
    helper.append_op(
        type='log_softmax',
        inputs={'X': out_cast},
        outputs={'Out': out},
        attrs={'axis': axis})

    return out