def log_prob(self, value): """Log probability density/mass function. Args: value (Tensor): The input tensor. Returns: Tensor: log probability.The data type is same with value. """ value = self._check_values_dtype_in_probs(self.low, value) if _non_static_mode(): # ensure value in [low, high] lb_bool = self.low < value ub_bool = value < self.high lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', value.dtype) ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype) return nn.log(lb * ub) - nn.log(self.high - self.low) name = self.name + '_log_prob' lb_bool = self.low < value ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_sub(nn.log(lb * ub), nn.log(self.high - self.low), name=name)
def probs(self, value): """Probability density/mass function. Args: value (Tensor): The input tensor. Returns: Tensor: probability.The data type is same with value. """ value = self._check_values_dtype_in_probs(self.low, value) if _non_static_mode(): lb_bool = self.low < value ub_bool = value < self.high lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', value.dtype) ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype) return (lb * ub) / (self.high - self.low) name = self.name + '_probs' lb_bool = self.low < value ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_div((lb * ub), (self.high - self.low), name=name)
def _check_values_dtype_in_probs(self, param, value): """ Log_prob and probs methods have input ``value``, if value's dtype is different from param, convert value's dtype to be consistent with param's dtype. Args: param (Tensor): low and high in Uniform class, loc and scale in Normal class. value (Tensor): The input tensor. Returns: value (Tensor): Change value's dtype if value's dtype is different from param. """ if in_dygraph_mode(): if value.dtype != param.dtype and convert_dtype( value.dtype) in ['float32', 'float64']: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype', param.dtype) return value check_variable_and_dtype(value, 'value', ['float32', 'float64'], 'log_prob') if value.dtype != param.dtype: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return tensor.cast(value, dtype=param.dtype) return value
def astype(self, dtype): """ Cast a Tensor to a specified data type. Args: dtype: The target data type. Returns: Tensor: a new Tensor with target dtype Examples: .. code-block:: python import paddle import numpy as np original_tensor = paddle.ones([2, 2]) print("original tensor's dtype is: {}".format(original_tensor.dtype)) new_tensor = original_tensor.astype('float32') print("new tensor's dtype is: {}".format(new_tensor.dtype)) """ if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) return _C_ops.cast(self, 'in_dtype', self.dtype, 'out_dtype', dtype)
def __call__(self, var, block=None): """Initialize the input tensor with Numpy array. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ block = self._check_block(block) assert isinstance(var, framework.Variable) assert isinstance(block, framework.Block) # to be compatible of fp16 initalizers if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 np_value = self._value.astype("float32") out_var = block.create_var(name=unique_name.generate(".".join( ['numpy_array_init', var.name, 'tmp'])), shape=var.shape, dtype=out_dtype, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_var = var out_dtype = var.dtype np_value = self._value if out_dtype == VarDesc.VarType.FP32: value_name = "fp32_values" values = [float(v) for v in np_value.flat] elif out_dtype == VarDesc.VarType.INT32: value_name = "int32_values" values = [int(v) for v in np_value.flat] else: raise ValueError("Unsupported dtype %s", self._value.dtype) if self._value.size > 1024 * 1024 * 1024: raise ValueError("The size of input is too big. Please consider " "saving it to file and 'load_op' to load it") if framework._non_static_mode(): _C_ops.assign_value(out_var, 'shape', list(self._value.shape), 'dtype', out_dtype, value_name, values) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) return None else: op = block.append_op(type='assign_value', outputs={'Out': out_var}, attrs={ 'dtype': out_dtype, 'shape': list(self._value.shape), value_name: values }, stop_gradient=True) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }) var.op = op return op
def __call__(self, var, block=None): """Initialize the input tensor with Bilinear initialization. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ block = self._check_block(block) if not isinstance(var, framework.Variable): raise ValueError("var must be framework.Variable.") if not isinstance(block, framework.Block): raise ValueError("block must be framework.Block.") shape = var.shape if len(shape) != 4: raise ValueError("the length of shape must be 4.") if shape[2] != shape[3]: raise ValueError("shape[2] must be equal to shape[3].") weight = np.zeros(np.prod(var.shape), dtype='float32') size = shape[3] # factor f = np.ceil(size / 2.) # center c = (2 * f - 1 - f % 2) / (2. * f) for i in range(np.prod(shape)): x = i % size y = (i / size) % size weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) weight = np.reshape(weight, shape) # to be compatible of fp16 initalizers if var.dtype in [ VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 ]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var(name=unique_name.generate(".".join( ['bilinear_init', var.name, 'tmp'])), shape=var.shape, dtype=out_dtype, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_dtype = var.dtype out_var = var if out_dtype == VarDesc.VarType.FP32: value_name = "fp32_values" values = [float(v) for v in weight.flat] else: raise TypeError("Unsupported dtype %s", var.dtype) if np.prod(shape) > 1024 * 1024: raise ValueError("The size of input is too big. ") if framework._non_static_mode(): _C_ops.assign_value(out_var, 'shape', list(shape), 'dtype', out_dtype, value_name, values) if var.dtype in [ VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 ]: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) return None else: op = block.append_op(type='assign_value', outputs={'Out': [out_var]}, attrs={ 'dtype': out_dtype, 'shape': list(shape), value_name: values }) if var.dtype in [ VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 ]: block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }) var.op = op return op
def __call__(self, var, block=None): """Initialize the input tensor with MSRA initialization. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ block = self._check_block(block) assert isinstance(var, framework.Variable) assert isinstance(block, framework.Block) f_in, f_out = self._compute_fans(var) # If fan_in is passed, use it fan_in = f_in if self._fan_in is None else self._fan_in if self._seed == 0: self._seed = block.program.random_seed # to be compatible of fp16 initalizers if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): out_dtype = VarDesc.VarType.FP32 out_var = block.create_var(name=unique_name.generate(".".join( ['masra_init', var.name, 'tmp'])), shape=var.shape, dtype=out_dtype, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_dtype = var.dtype out_var = var if framework._non_static_mode(): if self._uniform: limit = math.sqrt(6.0 / float(fan_in)) out_var = _C_ops.uniform_random('shape', out_var.shape, 'min', -limit, 'max', limit, 'seed', self._seed, 'dtype', int(out_dtype)) else: std = math.sqrt(2.0 / float(fan_in)) if in_dygraph_mode(): place = _current_expected_place() out_var = _C_ops.final_state_gaussian_random( out_var.shape, 0.0, std, self._seed, out_dtype, place) else: out_var = _C_ops.gaussian_random('shape', out_var.shape, 'dtype', int(out_dtype), 'mean', 0.0, 'std', std, 'seed', self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) return None else: if self._uniform: limit = math.sqrt(6.0 / float(fan_in)) op = block.append_op(type="uniform_random", inputs={}, outputs={"Out": out_var}, attrs={ "shape": out_var.shape, "dtype": int(out_dtype), "min": -limit, "max": limit, "seed": self._seed }, stop_gradient=True) else: std = math.sqrt(2.0 / float(fan_in)) op = block.append_op(type="gaussian_random", outputs={"Out": out_var}, attrs={ "shape": out_var.shape, "dtype": int(out_dtype), "mean": 0.0, "std": std, "seed": self._seed }, stop_gradient=True) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }) var.op = op return op
def __call__(self, var, block=None): """Initialize the input tensor with TruncatedNormal distribution. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ block = self._check_block(block) assert isinstance(var, framework.Variable) assert isinstance(block, framework.Block) if self._seed == 0: self._seed = block.program.random_seed # to be compatible of fp16 initalizers if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var(name=unique_name.generate(".".join( ['truncated_gaussian_random', var.name, 'tmp'])), shape=var.shape, dtype=out_dtype, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_dtype = var.dtype out_var = var if in_dygraph_mode(): out_var = _C_ops.final_state_truncated_gaussian_random( var.shape, self._mean, self._std_dev, self._seed, out_dtype, _current_expected_place()) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: var_tmp = _C_ops.final_state_cast(out_var, var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) return None if _in_legacy_dygraph(): out_var = _C_ops.truncated_gaussian_random('shape', var.shape, 'dtype', out_dtype, 'mean', self._mean, 'std', self._std_dev, 'seed', self._seed) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) return None else: op = block.append_op(type="truncated_gaussian_random", outputs={"Out": out_var}, attrs={ "shape": var.shape, "dtype": out_dtype, "mean": self._mean, "std": self._std_dev, "seed": self._seed }, stop_gradient=True) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }) var.op = op return op
def __call__(self, var, block=None): """Initialize the input tensor with Uniform distribution. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ block = self._check_block(block) assert isinstance(block, framework.Block) check_variable_and_dtype(var, "Out", ["uint16", "float16", "float32", "float64"], "uniform_random") if self._seed == 0: self._seed = block.program.random_seed # to be compatible of fp16 initializers if var.dtype == VarDesc.VarType.FP16: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var(name=unique_name.generate(".".join( ['uniform_random', var.name, 'tmp'])), shape=var.shape, dtype=out_dtype, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_dtype = var.dtype out_var = var if framework._non_static_mode(): out_var = _C_ops.uniform_random( 'shape', var.shape, 'min', self._low, 'max', self._high, 'seed', self._seed, 'dtype', out_dtype, 'diag_num', self._diag_num, 'diag_step', self._diag_step, 'diag_val', self._diag_val) if var.dtype == VarDesc.VarType.FP16: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) return None else: op = block.append_op(type="uniform_random", inputs={}, outputs={"Out": out_var}, attrs={ "shape": var.shape, "dtype": out_dtype, "min": self._low, "max": self._high, "seed": self._seed, "diag_num": self._diag_num, "diag_step": self._diag_step, "diag_val": self._diag_val }, stop_gradient=True) if var.dtype == VarDesc.VarType.FP16: block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }) var.op = op return op
def __call__(self, var, block=None): """Initialize the input tensor with dirac initializer. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The most critical OP(scatter) in this initializer, which contains 7~8 ops in total. """ block = self._check_block(block) assert isinstance(var, framework.Parameter) assert isinstance(block, framework.Block) check_variable_and_dtype(var, "Out", ['float16', 'bfloat16', 'float32', 'float64'], 'Dirac') assert len(var.shape) in [ 3, 4, 5 ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac" assert (var.shape[0] % self._groups ) == 0, "Tensor 0-dimension must be divisible by groups" if var.dtype != VarDesc.VarType.FP32: out_var = block.create_var(name=unique_name.generate(".".join( ['dirac', var.name, 'tmp'])), shape=var.shape, dtype=VarDesc.VarType.FP32, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_var = var op = None if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu', False, 'dtype', out_var.dtype, 'str_value', str(float(0)), 'shape', out_var.shape) else: block.append_op(type='fill_constant', inputs={}, outputs={'Out': out_var}, attrs={ 'value': float(0), 'dtype': out_var.dtype, 'shape': out_var.shape, }, stop_gradient=True) origin_shape = var.shape num_per_group = origin_shape[0] // self._groups min_shape = min(num_per_group, origin_shape[1]) idx_list = [] value_list = [] strides = [] prod = 1 for dim in reversed(origin_shape): strides.insert(0, prod) prod *= dim for i in range(self._groups): for j in range(min_shape): value_list.append(1.0) offset = 0 for (k, stride) in enumerate(strides): if (k == 0): offset += (j + i * num_per_group) * stride elif (k == 1): offset += j * stride else: offset += origin_shape[k] // 2 * stride idx_list.append(offset) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_out, _ = _C_ops.reshape2(out_var, None, 'shape', [-1]) tmp_out._share_underline_tensor_to(out_var) else: x_shape = block.create_var(name=unique_name.generate(".".join( [out_var.name, "XShape"])), dtype=out_var.dtype, shape=out_var.shape, type=VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=True) block.append_op(type="reshape2", inputs={"X": out_var}, attrs={'shape': [-1]}, outputs={ "Out": out_var, "XShape": x_shape }, stop_gradient=True) index_tensor = block.create_var( name=unique_name.generate('scatter_index'), persistable=False, stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_tensor = framework._varbase_creator() _C_ops.assign_value(tmp_tensor, 'shape', [len(idx_list)], 'dtype', VarDesc.VarType.INT64, 'int64_values', idx_list) tmp_tensor._share_underline_tensor_to(index_tensor) else: block.append_op(type='assign_value', outputs={'Out': index_tensor}, attrs={ 'dtype': VarDesc.VarType.INT64, 'shape': [len(idx_list)], 'int64_values': idx_list }, stop_gradient=True) value_tensor = block.create_var( name=unique_name.generate('scatter_value'), persistable=False, stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_tensor = framework._varbase_creator() _C_ops.assign_value(tmp_tensor, 'shape', [len(value_list)], 'dtype', VarDesc.VarType.FP32, 'fp32_values', value_list) tmp_tensor._share_underline_tensor_to(value_tensor) else: block.append_op(type='assign_value', outputs={'Out': value_tensor}, attrs={ 'dtype': VarDesc.VarType.FP32, 'shape': [len(value_list)], 'fp32_values': value_list }, stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_out = _C_ops.final_state_scatter(out_var, index_tensor, value_tensor, True) tmp_out._share_underline_tensor_to(out_var) tmp_reshape_out, _ = _C_ops.reshape2(out_var, None, 'shape', origin_shape) tmp_reshape_out._share_underline_tensor_to(out_var) if var.dtype != VarDesc.VarType.FP32: tmp_cast_out = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) tmp_cast_out._share_underline_tensor_to(var) else: op = block.append_op(type="scatter", inputs={ "X": out_var, "Ids": index_tensor, "Updates": value_tensor }, attrs={'overwrite': True}, outputs={"Out": out_var}, stop_gradient=True) x_shape = block.create_var(name=unique_name.generate(".".join( [out_var.name, "XShape"])), dtype=out_var.dtype, shape=out_var.shape, type=VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=True) block.append_op(type="reshape2", inputs={"X": out_var}, attrs={'shape': origin_shape}, outputs={ "Out": out_var, "XShape": x_shape }, stop_gradient=True) if var.dtype != VarDesc.VarType.FP32: block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }, stop_gradient=True) if not in_dynamic_mode(): var.op = op return op
def softmax(x, axis=-1, dtype=None, name=None): r""" This operator implements the softmax layer. The calculation process is as follows: 1. The dimension :attr:`axis` of ``x`` will be permuted to the last. 2. Then ``x`` will be logically flattened to a 2-D matrix. The matrix's second dimension(row length) is the same as the dimension :attr:`axis` of ``x``, and the first dimension(column length) is the product of all other dimensions of ``x``. For each row of the matrix, the softmax operator squashes the K-dimensional(K is the width of the matrix, which is also the size of ``x``'s dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional vector of real values in the range [0, 1] that add up to 1. 3. After the softmax operation is completed, the inverse operations of steps 1 and 2 are performed to restore the two-dimensional matrix to the same dimension as the ``x`` . It computes the exponential of the given dimension and the sum of exponential values of all the other dimensions in the K-dimensional vector input. Then the ratio of the exponential of the given dimension and the sum of exponential values of all the other dimensions is the output of the softmax operator. For each row :math:`i` and each column :math:`j` in the matrix, we have: .. math:: softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])} Example: .. code-block:: text Case 1: Input: x.shape = [2, 3, 4] x.data = [[[2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 8.0, 9.0]], [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [6.0, 7.0, 8.0, 9.0]]] Attrs: axis = -1 Output: out.shape = [2, 3, 4] out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], [0.0320586 , 0.08714432, 0.23688282, 0.64391426], [0.07232949, 0.19661193, 0.19661193, 0.53444665]], [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], [0.0320586 , 0.08714432, 0.23688282, 0.64391426], [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] Case 2: Input: x.shape = [2, 3, 4] x.data = [[[2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 8.0, 9.0]], [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [6.0, 7.0, 8.0, 9.0]]] Attrs: axis = 1 Output: out.shape = [2, 3, 4] out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783], [0.01786798, 0.01786798, 0.04661262, 0.04661262], [0.97555875, 0.97555875, 0.93623955, 0.93623955]], [[0.00490169, 0.00490169, 0.00490169, 0.00490169], [0.26762315, 0.26762315, 0.26762315, 0.26762315], [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] Parameters: x (Tensor): The input Tensor with data type float32, float64. axis (int, optional): The axis along which to perform log_softmax calculations. It should be in range [-D, D), where D is the dimensions of ``x`` . If ``axis`` < 0, it works the same way as :math:`axis + D` . Default is -1. dtype (str, optional): The data type of the output tensor, can be float32, float64. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: A Tensor with the same shape and data type (use ``dtype`` if it is specified) as x. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np x = np.array([[[2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 8.0, 9.0]], [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [6.0, 7.0, 8.0, 9.0]]], 'float32') x = paddle.to_tensor(x) out1 = F.softmax(x) out2 = F.softmax(x, dtype='float64') # out1's data type is float32; out2's data type is float64 # out1 and out2's value is as follows: # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [0.07232949, 0.19661193, 0.19661193, 0.53444665]], # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] """ if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): dtype = convert_np_dtype_to_dtype_(dtype) use_cudnn = True if in_dygraph_mode(): outs_cast = x if dtype is None \ else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) return _C_ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', use_cudnn) if dtype is None: check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'softmax') else: check_dtype(dtype, 'dtype', ['float32', 'float64'], 'softmax', 'If dtype is not None, it only support float32 or float64.') helper = LayerHelper("softmax", **locals()) outs_cast = x if dtype is not None: outs_cast = helper.create_variable_for_type_inference(dtype) helper.append_op( type='cast', inputs={'X': x}, outputs={'Out': outs_cast}, attrs={'in_dtype': x.dtype, 'out_dtype': dtype}) outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype) helper.append_op( type='softmax', inputs={'X': outs_cast}, outputs={'Out': outs_softmax}, attrs={'axis': axis, 'use_cudnn': use_cudnn}) return outs_softmax
def log_softmax(x, axis=-1, dtype=None, name=None): r""" This operator implements the log_softmax layer. The calculation process is as follows: .. math:: \begin{aligned} log\_softmax[i, j] &= log(softmax(x)) \\ &= log(\frac{\exp(X[i, j])}{\sum_j(\exp(X[i, j])}) \end{aligned} Parameters: x (Tensor): The input Tensor with data type float32, float64. axis (int, optional): The axis along which to perform log_softmax calculations. It should be in range [-D, D), where D is the dimensions of ``x`` . If ``axis`` < 0, it works the same way as :math:`axis + D` . Default is -1. dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data type of the output tensor. If dtype is specified, ``x`` is casted to ``dtype`` before the operation is performed. This is useful for preventing data type overflows. Supported dtype: float32, float64. If ``dtype`` is None, the output Tensor has the same dtype as x. Default is None. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: A Tensor with the same shape and data type (use ``dtype`` if it is specified) as x. Examples: .. code-block:: python import paddle import paddle.nn.functional as F x = [[[-2.0, 3.0, -4.0, 5.0], [3.0, -4.0, 5.0, -6.0], [-7.0, -8.0, 8.0, 9.0]], [[1.0, -2.0, -3.0, 4.0], [-5.0, 6.0, 7.0, -8.0], [6.0, 7.0, 8.0, 9.0]]] x = paddle.to_tensor(x) out1 = F.log_softmax(x) out2 = F.log_softmax(x, dtype='float64') # out1's data type is float32; out2's data type is float64 # out1 and out2's value is as follows: # [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] # [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] # [-16.313261 -17.313261 -1.3132617 -0.31326184]] # [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] # [-12.313267 -1.3132664 -0.3132665 -15.313267 ] # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] """ if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): if dtype is not None: x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) return _C_ops.log_softmax(x, 'axis', axis) if dtype is None: check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'log_softmax') else: check_dtype(dtype, 'dtype', ['float32', 'float64'], 'log_softmax', 'If dtype is not None, it only support float32 or float64.') helper = LayerHelper("log_softmax", **locals()) out_cast = x if dtype is not None: out_cast = helper.create_variable_for_type_inference(dtype) helper.append_op( type='cast', inputs={'X': x}, outputs={'Out': out_cast}, attrs={'in_dtype': x.dtype, 'out_dtype': dtype}) out = helper.create_variable_for_type_inference(out_cast.dtype) helper.append_op( type='log_softmax', inputs={'X': out_cast}, outputs={'Out': out}, attrs={'axis': axis}) return out