def test_global_properties(self): print("Test_global_properties") _disable_legacy_dygraph() self.assertTrue(in_dygraph_mode()) with _test_eager_guard(): self.assertTrue(in_dygraph_mode()) self.assertFalse(in_dygraph_mode())
def forward(self, input): if in_dygraph_mode(): attrs = ('moving_rate', self._moving_rate, 'is_test', not self.training) state = self._state if self.training else None accum = self._accum if self.training else None self._scale, _, _ = core.ops.moving_average_abs_max_scale( input, accum, state, self._scale, state, accum, *attrs) return self._scale check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'MovingAverageAbsMaxScale') attrs = { 'moving_rate': self._moving_rate, 'is_test': not self.training } inputs = {"X": [input]} outputs = {"OutScale": [self._scale]} if self.training: inputs['InState'] = [self._state] inputs['InAccum'] = [self._accum] outputs['OutState'] = [self._state] outputs['OutAccum'] = [self._accum] self._helper.append_op(type="moving_average_abs_max_scale", inputs=inputs, outputs=outputs, attrs=attrs) return self._scale
def forward(self, input): if not in_dygraph_mode(): _logger.error("NOT support static graph") feature_dim = int(input.shape[1]) weight = self.weight[:feature_dim] bias = self.bias[:feature_dim] mean = self._mean[:feature_dim] variance = self._variance[:feature_dim] mean_out = mean variance_out = variance attrs = ("momentum", self._momentum, "epsilon", self._epsilon, "is_test", not self.training, "data_layout", self._data_layout, "use_mkldnn", False, "fuse_with_relu", self._fuse_with_relu, "use_global_stats", self._use_global_stats, 'trainable_statistics', self._trainable_statistics) batch_norm_out, _, _, _, _, _ = core.ops.batch_norm( input, weight, bias, mean, variance, mean_out, variance_out, *attrs) return dygraph_utils._append_activation_in_dygraph(batch_norm_out, act=self._act)
def forward(self, input): if in_dygraph_mode(): attrs = ('pooling_type', self._pool_type, 'ksize', self._pool_size, 'global_pooling', self._global_pooling, 'strides', self._pool_stride, 'paddings', self._pool_padding, 'use_cudnn', self._use_cudnn, 'ceil_mode', self._ceil_mode, 'use_mkldnn', False, 'exclusive', self._exclusive) return core.ops.pool3d(input, *attrs) check_variable_and_dtype(input,'input',['int8','uint8','float16','float32','float64'],'Pool3D') attrs = { "pooling_type": self._pool_type, "ksize": self._pool_size, "global_pooling": self._global_pooling, "strides": self._pool_stride, "paddings": self._pool_padding, "use_cudnn": self._use_cudnn, "ceil_mode": self._ceil_mode, "use_mkldnn": False, "exclusive": self._exclusive, } inputs = {"X": [input]} pool_out = self._helper.create_variable_for_type_inference(self._dtype) self._helper.append_op( type=self._l_type, inputs={"X": input}, outputs={"Out": pool_out}, attrs=attrs) return pool_out
def param_guard(parameters): # Note: parameters is a reference of self._parameters or self._buffers if not framework.in_dygraph_mode() and parameters: origin_parameters = parameters.copy() for name, var_base in parameters.items(): if isinstance(var_base, core.VarBase): # Convert ParamBase into Parameter with same attributes in dy2stat. if isinstance(var_base, framework.ParamBase): new_var = var_base._to_static_var(to_parameter=True) else: # Check whether has been created before. if var_base.name in var_base.block.vars: new_var = var_base.block.vars[var_base.name] # Note(Aurelius84): Convert VarBase in self._buffers into Variabe with # same attributes and set persistable=True to allow saving this var. # Because users can create a VarBase in `__init__` like a # `mask` Tensor or `hidden_0` in RNN layers, which is equivalent to a Parameter # and necessary for inferring. It will be pruned if it's not necessary for inferring. else: # But if its shape is empty while created from `create_variable()`, we consider this buffer # non-persistable. See case of `drop_state` in lstm api. is_persistable = len(var_base.shape) > 0 new_var = var_base._to_static_var( to_parameter=False, persistable=is_persistable) parameters[name] = new_var yield parameters.update(origin_parameters) else: yield
def _check_values_dtype_in_probs(self, param, value): """ Log_prob and probs methods have input ``value``, if value's dtype is different from param, convert value's dtype to be consistent with param's dtype. Args: param (Tensor): low and high in Uniform class, loc and scale in Normal class. value (Tensor): The input tensor. Returns: value (Tensor): Change value's dtype if value's dtype is different from param. """ if in_dygraph_mode(): if value.dtype != param.dtype and convert_dtype( value.dtype) in ['float32', 'float64']: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return core.ops.cast(value, 'in_dtype', value.dtype, 'out_dtype', param.dtype) return value check_variable_and_dtype(value, 'value', ['float32', 'float64'], 'log_prob') if value.dtype != param.dtype: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return tensor.cast(value, dtype=param.dtype) return value
def enabled(): """ This function checks whether the program runs in dynamic graph mode or not. You can enter dynamic graph mode with :ref:`api_fluid_dygraph_guard` api, or enable and disable dynamic graph mode with :ref:`api_fluid_dygraph_enable_dygraph` and :ref:`api_fluid_dygraph_disable_dygraph` api . **Note**: ``fluid.dygraph.enabled`` is the alias of ``fluid.in_dygraph_mode``, and ``fluid.in_dygraph_mode`` is recommended to use. Returns: bool: Whether the program is running in dynamic graph mode. Examples: .. code-block:: python import paddle.fluid as fluid fluid.enable_dygraph() # Now we are in dygragh mode print(fluid.dygraph.enabled()) # True fluid.disable_dygraph() print(fluid.dygraph.enabled()) # False """ return framework.in_dygraph_mode()
def segment_max(data, segment_ids, name=None): r""" Segment max operator. This operator calculate the maximum elements of input `data` which with the same index in `segment_ids`. It computes a tensor such that $out_i = \\max_{j} data_{j}$ where max is over j such that `segment_ids[j] == i`. Args: data (tensor): a tensor, available data type float32, float64, int32, int64. segment_ids (tensor): a 1-d tensor, which have the same size with the first dimension of input data. available data type is int32, int64. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: output (Tensor): the reduced result. Examples: .. code-block:: python import paddle data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32') out = paddle.incubate.segment_max(data, segment_ids) #Outputs: [[3., 2., 3.], [4., 5., 6.]] """ if in_dygraph_mode(): out, tmp = _C_ops.final_state_segment_pool(data, segment_ids, "MAX") return out if _non_static_mode(): out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MAX") return out check_variable_and_dtype(data, "X", ("float32", "float64", "int32", "int64"), "segment_pool") check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool") helper = LayerHelper("segment_max", **locals()) out = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) helper.append_op(type="segment_pool", inputs={ "X": data, "SegmentIds": segment_ids }, outputs={ "Out": out, "SummedIds": summed_ids }, attrs={"pooltype": "MAX"}) return out
def _ndarray_to_tensor(obj, return_numpy): if return_numpy: return obj if in_dygraph_mode(): return paddle.to_tensor(obj) else: return _to_LodTensor(obj)
def frobenius_norm(input, dim=None, keepdim=False, name=None): """ The frobenius norm OP is to calculate the frobenius norm of certain two dimensions of Tensor `input`. Args: input (Variable): Tensor, data type float32, float64. dim (list, optional): None for last two dimensions. keepdim (bool, optional): Whether keep the dimensions as the `input`, Default False. """ if dim is not None and not (isinstance(dim, list) and len(dim) == 2): raise ValueError( "The dim of frobenius norm op should be None or two elements list!" ) if in_dygraph_mode(): if dim is None: return core.ops.frobenius_norm(input, 'keep_dim', keepdim, 'reduce_all', True) return core.ops.frobenius_norm(input, 'dim', dim, 'keep_dim', keepdim, 'reduce_all', False) attrs = {'dim': dim, 'keep_dim': keepdim, 'reduce_all': False} if dim is None: attrs['reduce_all'] = True check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'frobenius_norm') helper = LayerHelper('frobenius_norm', **locals()) out = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) helper.append_op(type='frobenius_norm', inputs={'X': input}, outputs={'Out': out}, attrs=attrs) return out
def func_test_to_api_numpy_dtype(self): self.linear.to(dtype=np.float64) self.assertEqual(self.linear.weight.dtype, paddle.fluid.core.VarDesc.VarType.FP64) self.assertEqual(self.linear.buf_name.dtype, paddle.fluid.core.VarDesc.VarType.FP64) self.assertTrue( np.allclose(self.linear.weight.grad.numpy(), self.new_grad)) self.assertEqual(self.linear.weight._grad_ivar().dtype, paddle.fluid.core.VarDesc.VarType.FP64) self.linear.to() self.assertEqual(self.linear.weight.dtype, paddle.fluid.core.VarDesc.VarType.FP64) self.assertEqual(self.linear.buf_name.dtype, paddle.fluid.core.VarDesc.VarType.FP64) self.assertTrue( np.allclose(self.linear.weight.grad.numpy(), self.new_grad)) self.assertEqual(self.linear.weight._grad_ivar().dtype, paddle.fluid.core.VarDesc.VarType.FP64) for p in self.linear.parameters(): if in_dygraph_mode(): self.assertTrue( isinstance(p, paddle.fluid.framework.EagerParamBase)) else: self.assertTrue(isinstance(p, paddle.fluid.framework.ParamBase))
def func(x, name=None): final_state_op_type = "final_state_%s" % op_type if in_dygraph_mode() and hasattr(_C_ops, final_state_op_type): op = getattr(_C_ops, final_state_op_type) return op(x) # TODO(dev): Because some ops' yaml has not been migrated. # Replace it with _in_legacy_dygraph while all yaml work is done. if _non_static_mode(): op = getattr(_C_ops, op_type) return op(x) if op_type not in ["abs", "exp", "square"]: check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], op_type) else: # abs exp square ops support dtype(int32, int64, float16, float32, float64) check_variable_and_dtype(x, 'x', [ 'int32', 'int64', 'float16', 'float32', 'float64', 'complex64', 'complex128' ], op_type) helper = LayerHelper(op_type, **locals()) output = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type=op_type, inputs={"X": x}, outputs={"Out": output}) return output
def _parse_load_result(obj, return_numpy): def is_layer(obj): return isinstance(obj, fluid.Layer) def parse_layer(obj): temp_dict = _parse_load_result(obj.__dict__, False) obj.__dict__.update(temp_dict) return obj if _contain_x(obj, is_layer): if not in_dygraph_mode(): raise ValueError( "Layer can only be loaded in dynamic graph mode, but now in static graph mode." ) _parse_every_object(obj, is_layer, parse_layer) def tuple_to_tensor(obj): return _tuple_to_tensor(obj, return_numpy=return_numpy) def ndarray_to_tensor(obj): return _ndarray_to_tensor(obj, return_numpy=return_numpy) # tuple(name, ndarry) was converted from varbase of paddle2.1, # and all tuple(name, ndarry) are converted to tensor. if _contain_x(obj, _transformed_from_varbase): return _parse_every_object(obj, _transformed_from_varbase, tuple_to_tensor) # If there is no tuple(name, ndary), it is considered to be saved by paddle2.0 # or converted from LoDTensor, and all ndarrays are converted to tensor. else: return _parse_every_object(obj, _transformed_from_lodtensor, ndarray_to_tensor)
def forward(self, input): in_nc = int(input.shape[1]) scale = self.scale[:in_nc] bias = self.scale[:in_nc] if in_dygraph_mode(): out, _, _ = core.ops.instance_norm(input, scale, bias, 'epsilon', self._epsilon) return out check_variable_and_dtype(input, 'input', ['float32', 'float64'], "SuperInstanceNorm") attrs = {"epsilon": self._epsilon} inputs = {"X": [input], "Scale": [scale], "Bias": [bias]} saved_mean = self._helper.create_variable_for_type_inference( dtype=self._dtype, stop_gradient=True) saved_variance = self._helper.create_variable_for_type_inference( dtype=self._dtype, stop_gradient=True) instance_norm_out = self._helper.create_variable_for_type_inference( self._dtype) outputs = { "Y": [instance_norm_out], "SavedMean": [saved_mean], "SavedVariance": [saved_variance] } self._helper.append_op(type="instance_norm", inputs=inputs, outputs=outputs, attrs=attrs) return instance_norm_out
def forward(self, input, expand_ratio=None, channel=None): if not in_dygraph_mode(): _logger.error("NOT support static graph") ### weight: (Cin, Cout) in_nc = int(input.shape[1]) assert ( expand_ratio == None or channel == None ), "expand_ratio and channel CANNOT be NOT None at the same time." if expand_ratio != None: out_nc = int(expand_ratio * self.base_output_dim) elif channel != None: out_nc = int(channel) else: out_nc = self.output_dim weight = self.weight[:in_nc, :out_nc] if self._bias_attr != False: bias = self.bias[:out_nc] use_bias = True pre_bias = _varbase_creator(dtype=input.dtype) core.ops.matmul(input, weight, pre_bias, 'transpose_X', False, 'transpose_Y', False, "alpha", 1) if self._bias_attr != False: pre_act = dygraph_utils._append_bias_in_dygraph( pre_bias, bias, axis=len(input.shape) - 1) else: pre_act = pre_bias return dygraph_utils._append_activation_in_dygraph(pre_act, self._act)
def segment_pool(data, segment_ids, pool_type, name=None): """ Segment Operator. """ pool_type = pool_type.upper() if in_dygraph_mode(): out, tmp = core.ops.segment_pool(data, segment_ids, 'pooltype', pool_type) return out check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool") check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool") helper = LayerHelper("segment_pool", **locals()) out = helper.create_variable_for_type_inference(dtype=data.dtype) pool_ids = helper.create_variable_for_type_inference(dtype=data.dtype) helper.append_op( type="segment_pool", inputs={"X": data, "SegmentIds": segment_ids}, outputs={"Out": out, "SummedIds": pool_ids}, attrs={"pooltype": pool_type}) return out
def fused_allreduce_gradients(parameter_list, hcg): data_parallel_group = None if hcg is None else hcg.get_data_parallel_group() logger.debug("dp start fuse allreduce gradients") apply_func = _apply_collective_grads_eager if in_dygraph_mode( ) else _apply_collective_grads with framework.no_grad(): apply_func(parameter_list, data_parallel_group)
def forward(self, x, y): if in_dygraph_mode(): sub = _C_ops.elementwise_sub(x, y) return _C_ops.final_state_p_norm(sub, self.p, 1, self.epsilon, self.keepdim, False) if _in_legacy_dygraph(): sub = _C_ops.elementwise_sub(x, y) return _C_ops.p_norm(sub, 'axis', 1, 'porder', self.p, 'keepdim', self.keepdim, 'epsilon', self.epsilon) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'PairwiseDistance') check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'PairwiseDistance') sub = paddle.subtract(x, y) helper = LayerHelper("PairwiseDistance", name=self.name) attrs = { 'axis': 1, 'porder': self.p, 'keepdim': self.keepdim, 'epsilon': self.epsilon, } out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type='p_norm', inputs={'X': sub}, outputs={'Out': out}, attrs=attrs) return out
def _split_tensors(coalesced_grads_and_grad_vars): if _in_legacy_dygraph(): for coalesced_grad, origin_grad_vars, grad_shapes in coalesced_grads_and_grad_vars: grad_var_len = [np.prod(g_shape) for g_shape in grad_shapes] framework._dygraph_tracer().trace_op( type='split', inputs={'X': coalesced_grad}, outputs={'Out': origin_grad_vars}, attrs={ 'sections': grad_var_len, 'axis': 0 }) for g_var, g_shape in zip(origin_grad_vars, grad_shapes): _reshape_inplace(x=g_var, shape=g_shape) assert g_var.shape == g_shape elif in_dygraph_mode(): for coalesced_grad, origin_grad_vars, grad_shapes in coalesced_grads_and_grad_vars: grad_var_len = [np.prod(g_shape) for g_shape in grad_shapes] attrs = () attrs += ('sections', grad_var_len) attrs += ('axis', 0) _C_ops.split(coalesced_grad, origin_grad_vars, *attrs) for g_var, g_shape in zip(origin_grad_vars, grad_shapes): g_var.reshape_(shape=g_shape) assert g_var.shape == g_shape
def prepare_context(strategy=None): ''' :api_attr: imperative ''' if strategy is None: strategy = ParallelStrategy() strategy.nranks = Env().nranks strategy.local_rank = Env().local_rank strategy.trainer_endpoints = Env().trainer_endpoints strategy.current_endpoint = Env().current_endpoint if strategy.nranks < 2: return assert framework.in_dygraph_mode() is True, \ "dygraph.prepare_context should be used with dygraph mode." place = framework._current_expected_place() assert place is not None, \ "dygraph.prepare_context should be used in fluid.dygraph.guard(place) guard." if not parallel_helper._is_parallel_ctx_initialized(): if isinstance(place, core.CUDAPlace): parallel_helper._set_parallel_ctx( core.NCCLParallelContext(strategy, place)) else: # TODO(Yancey1989): add Gloo Parallel Context to support CPU parallel computation assert ("Only support CUDAPlace for now.") parallel_helper._init_parallel_ctx() return strategy
def histogram(input, bins=100, min=0, max=0): """ Computes the histogram of a tensor. The elements are sorted into equal width bins between min and max. If min and max are both zero, the minimum and maximum values of the data are used. Args: input (Variable): A Tensor(or LoDTensor) with shape :math:`[N_1, N_2,..., N_k]` . The data type of the input Tensor should be float32, float64, int32, int32. bins (int): number of histogram bins min (int): lower end of the range (inclusive) max (int): upper end of the range (inclusive) Returns: Variable: Tensor or LoDTensor calculated by histogram layer. The data type is int32. Code Example 1: .. code-block:: python import paddle import numpy as np startup_program = paddle.static.Program() train_program = paddle.static.Program() with paddle.static.program_guard(train_program, startup_program): inputs = paddle.data(name='input', dtype='int32', shape=[2,3]) output = paddle.histogram(inputs, bins=5, min=1, max=5) place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_program) img = np.array([[2, 4, 2], [2, 5, 4]]).astype(np.int32) res = exe.run(train_program, feed={'input': img}, fetch_list=[output]) print(np.array(res[0])) # [0,3,0,2,1] Code Example 2: .. code-block:: python import paddle paddle.disable_static(paddle.CPUPlace()) inputs = paddle.to_tensor([1, 2, 1]) result = paddle.histogram(inputs, bins=4, min=0, max=3) print(result) # [0, 2, 1, 0] paddle.enable_static() """ if in_dygraph_mode(): return core.ops.histogram(input, "bins", bins, "min", min, "max", max) helper = LayerHelper('histogram', **locals()) check_variable_and_dtype(input, 'X', ['int32', 'int32', 'float32', 'float64'], 'histogram') out = helper.create_variable_for_type_inference(VarDesc.VarType.INT64) helper.append_op(type='histogram', inputs={'X': input}, outputs={'Out': out}, attrs={ 'bins': bins, 'min': min, 'max': max }) return out
def to_variable(value, block=None, name=None, zero_copy=None): """ The API will create a ``Variable`` object from numpy\.ndarray or Variable object. Parameters: value(ndarray): The numpy\.ndarray object that needs to be converted, it can be multi-dimension, and the data type is one of numpy\.{float16, float32, float64, int16, int32, int64, uint8, uint16}. block(fluid.Block, optional): Which block this variable will be in. Default: None. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` zero_copy(bool, optional): Whether to share memory with the input numpy array. This parameter only works with CPUPlace and will be set to True when it is None. Default: None. Returns: Variable: ``Tensor`` created from the specified numpy\.ndarray object, data type and shape is the same as ``value`` . Examples: .. code-block:: python import numpy as np import paddle.fluid as fluid with fluid.dygraph.guard(fluid.CPUPlace()): x = np.ones([2, 2], np.float32) y = fluid.dygraph.to_variable(x, zero_copy=False) x[0][0] = -1 y[0][0].numpy() # array([1.], dtype=float32) y = fluid.dygraph.to_variable(x) x[0][0] = 0 y[0][0].numpy() # array([0.], dtype=float32) """ if isinstance(value, np.ndarray): assert framework.in_dygraph_mode( ), "to_variable could only be called in dygraph mode" if not block: block = framework.default_main_program().current_block() py_var = framework.Variable(block, type=core.VarDesc.VarType.LOD_TENSOR, name=name, shape=value.shape, dtype=value.dtype, stop_gradient=True) var = py_var._ivar.value() tensor = var.get_tensor() if isinstance(framework._current_expected_place(), framework.core.CPUPlace): if zero_copy is None: zero_copy = True tensor.set(value, framework._current_expected_place(), zero_copy) else: assert not zero_copy, "zero_copy mode can only be used with CPUPlace" tensor.set(value, framework._current_expected_place(), False) return py_var elif isinstance(value, framework.Variable): return value else: raise TypeError( "to_variable only accepts 'ndarray' and 'Variable' as value's input" )
def __call__(self, outputs, labels): labels = to_list(labels) if in_dygraph_mode(): labels = [to_variable(l) for l in labels] losses = to_list(self.forward(to_list(outputs), labels)) if not self.average: return losses return [fluid.layers.reduce_mean(l) for l in losses]
def __call__(self, var, block=None): """Initialize the input tensor with MSRA initialization. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ block = self._check_block(block) assert isinstance(var, framework.Variable) assert isinstance(block, framework.Block) f_in, f_out = self._compute_fans(var) if self._seed == 0: self._seed = block.program.random_seed # to be compatible of fp16 initalizers if var.dtype == paddle_dtypes.t_float16: out_dtype = paddle_dtypes.t_float32 out_var = block.create_var( name=unique_name.generate(".".join( ['masra_init', var.name, 'tmp'])), shape=var.shape, dtype=out_dtype, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_dtype = var.dtype out_var = var fan = _calculate_correct_fan(var, self.mode) gain = _calculate_gain(self.nonlinearity, self.a) std = gain / math.sqrt(fan) op = block._prepend_op( type="gaussian_random", outputs={"Out": out_var}, attrs={ "shape": out_var.shape, "dtype": int(out_dtype), "mean": 0.0, "std": std, "seed": self._seed }, stop_gradient=True) if var.dtype == VarDesc.VarType.FP16: block.append_op( type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) if not framework.in_dygraph_mode(): var.op = op return op
def _find_varbase(self, obj): var_type = core.eager.Tensor if in_dygraph_mode() else core.VarBase if isinstance(obj, var_type): return [obj] if isinstance(obj, (list, tuple)): return itertools.chain(*map(self._find_varbase, obj)) if isinstance(obj, dict): return itertools.chain(*map(self._find_varbase, obj.values())) return []
def __init__(self, size, num_partitions=1, gather_out=True, param_attr=None, bias_attr=None, name=None): super().__init__() if in_dygraph_mode(): rank = paddle.distributed.get_rank() nranks = paddle.distributed.get_world_size() else: assert fleet._role_maker, ("To use paddle.distributed.split, " "you must call fleet.init() firstly.") rank = fleet.worker_index() nranks = fleet.worker_num() # rank within a model parallel group inner_rank = rank % num_partitions self.gather_out = gather_out assert size[1] % num_partitions == 0, ( "Number of column of the weight for linear ({}) must be" " divisible by num_partitions ({})".format(size[1], num_partitions)) self.per_part_size = size[1] // num_partitions linear_size = (size[0], self.per_part_size) num_rows, num_cols = linear_size if not name: name = "fc_by_col_rank_%d" % inner_rank else: name = name + "_by_col_rank_%d" % inner_rank self.linear = paddle.nn.Linear(num_rows, num_cols, weight_attr=param_attr, bias_attr=bias_attr, name=name) weight = self.linear.weight weight.is_distributed = True # alias for weight tensor self.weight = self.linear.weight startup_block = paddle.static.default_startup_program().global_block() main_block = paddle.static.default_main_program().global_block() startup_block.vars[weight.name].is_distributed = True main_block.vars[weight.name].is_distributed = True # set is_distributed for splited bias # if a linear layer is splited by col, the bias would also be split into each rank as its weight if self.linear._bias_attr != False: startup_block.vars[self.linear.bias.name].is_distributed = True main_block.vars[self.linear.bias.name].is_distributed = True self.bias = self.linear.bias
def step(self, time, inputs, states, **kwargs): # Steps for decoding. # Compared to RNN, Transformer has 3D data at every decoding step inputs = paddle.reshape(inputs, [-1, 1]) # token pos = paddle.ones_like(inputs) * time # pos cell_states = map_structure(self._merge_batch_beams_with_var_dim, states.cell_states) cell_outputs, next_cell_states = self.cell((inputs, pos), cell_states, **kwargs) # Squeeze to adapt to BeamSearchDecoder which use 2D logits cell_outputs = map_structure( lambda x: paddle.squeeze(x, [1]) if len(x.shape) == 3 else x, cell_outputs) cell_outputs = map_structure(self._split_batch_beams, cell_outputs) next_cell_states = map_structure(self._split_batch_beams_with_var_dim, next_cell_states) beam_search_output, beam_search_state = self._beam_search_step( time=time, logits=cell_outputs, next_cell_states=next_cell_states, beam_state=states) if kwargs.get("trg_word", None) is not None: if in_dygraph_mode(): if paddle.shape(kwargs.get("trg_word"))[1] > time: beam_search_output, beam_search_state = self.force_decoding( beam_search_output, beam_search_state, kwargs.get("trg_word"), kwargs.get("trg_length"), time) else: def condition(trg_word, time): return paddle.shape(trg_word)[1] > time def default_fn(beam_search_output, beam_search_state): return beam_search_output, beam_search_state from functools import partial beam_search_output, beam_search_state = paddle.static.nn.case( [(condition(kwargs.get("trg_word"), time), partial(self.force_decoding, beam_search_output=beam_search_output, beam_search_state=beam_search_state, trg_word=kwargs.get("trg_word"), trg_length=kwargs.get("trg_length"), time=time))], default=partial(default_fn, beam_search_output=beam_search_output, beam_search_state=beam_search_state)) next_inputs, finished = (beam_search_output.predicted_ids, beam_search_state.finished) return (beam_search_output, beam_search_state, next_inputs, finished)
def init_reducer(self): layers_param = [] params_set = set() for sublayer in self.sublayers(): for _, param in sublayer.named_parameters(include_sublayers=False): if param is None or param in params_set: continue params_set.add(param) if not isinstance(param, self.var_dtype): raise TypeError("The data type of '%s' must be '%s'" % (param.name, self.var_dtype)) if param.trainable: layers_param.append((sublayer, param)) trainable_parameters = [param for _, param in layers_param] assert len(trainable_parameters) > 0, \ "This model does not have any parameters to train, and " \ "does not need to use DataParallel" # NOTE(shenliang03): Here we can only use the attributes to judge whether # parameter is sparse(or SelectedRows). The reason is that the sparse message # can't be obtained when bp hasn't happened yet. So if layer supports sparse parameter, # we should add the layer here like "paddle.nn.layer.common.Embedding". def check_layer_sparse(sublayer): if isinstance(sublayer, paddle.nn.layer.common.Embedding): return sublayer._sparse # NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding # is removed in the future, the check will also be removed here. if isinstance(sublayer, paddle.fluid.dygraph.Embedding): return sublayer._is_sparse return False is_sparse_gradient = [ check_layer_sparse(sublayer) for sublayer, _ in layers_param ] if in_dygraph_mode(): self.group_indices = core.eager_assign_group_by_size( trainable_parameters, is_sparse_gradient, [self.last_comm_buffer_size, self.comm_buffer_size]) self._reducer = core.EagerReducer( trainable_parameters, list(reversed(self.group_indices)), is_sparse_gradient, self.group.process_group, [self.last_comm_buffer_size, self.comm_buffer_size], self.find_unused_parameters) elif _in_legacy_dygraph(): self.group_indices = core.assign_group_by_size( trainable_parameters, is_sparse_gradient, [self.last_comm_buffer_size, self.comm_buffer_size]) self._reducer = core.Reducer( trainable_parameters, list(reversed(self.group_indices)), is_sparse_gradient, parallel_helper.__parallel_ctx__clz__, [self.last_comm_buffer_size, self.comm_buffer_size], self.find_unused_parameters)
def forward(self, input, config): in_nc = int(input.shape[1]) out_nc = int(config['channel']) weight = self.weight[:in_nc, :out_nc, :, :] if in_dygraph_mode(): op = getattr(core.ops, self._op_type) out = op(input, weight, 'output_size', self._output_size, 'strides', self._stride, 'paddings', self._padding, 'dilations', self._dilation, 'groups', self._groups, 'use_cudnn', self._use_cudnn) pre_bias = out if self.bias is not None: bias = self.bias[:out_nc] pre_act = dygraph_utils._append_bias_in_dygraph( pre_bias, bias, 1) else: pre_act = pre_bias return dygraph_utils._append_activation_in_dygraph(pre_act, act=self._act) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], "SuperConv2DTranspose") inputs = {'Input': [input], 'Filter': [weight]} attrs = { 'output_size': self._output_size, 'strides': self._stride, 'paddings': self._padding, 'dilations': self._dilation, 'groups': self._groups, 'use_cudnn': self._use_cudnn } pre_bias = self._helper.create_variable_for_type_inference( dtype=input.dtype) self._helper.append_op(type=self._op_type, inputs=inputs, outputs={'Output': pre_bias}, attrs=attrs) if self.bias is not None: pre_act = self._helper.create_variable_for_type_inference( dtype=self._dtype) self._helper.append_op(type='elementwise_add', inputs={ 'X': [pre_bias], 'Y': [bias] }, outputs={'Out': [pre_act]}, attrs={'axis': 1}) else: pre_act = pre_bias out = self._helper.append_activation(pre_act, act=self._act) return out
def bernoulli(x, name=None): """ For each element :math:`x_i` in input ``x``, take a sample from the Bernoulli distribution, also called two-point distribution, with success probability :math:`x_i`. The Bernoulli distribution with success probability :math:`x_i` is a discrete probability distribution with probability mass function .. math:: p(y)=\\begin{cases} x_i,&y=1\\\\ 1-x_i,&y=0 \end{cases}. Args: x (Tensor): The input Tensor, it's data type should be float32, float64. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: Tensor: A Tensor filled samples from Bernoulli distribution, whose shape and dtype are same as ``x``. Examples: .. code-block:: python :name: bernoulli-example import paddle paddle.set_device('cpu') # on CPU device paddle.seed(100) x = paddle.rand([2,3]) print(x) # [[0.55355281, 0.20714243, 0.01162981], # [0.51577556, 0.36369765, 0.26091650]] out = paddle.bernoulli(x) print(out) # [[1., 0., 1.], # [0., 1., 0.]] """ if in_dygraph_mode(): return _C_ops.final_state_bernoulli(x) if _in_legacy_dygraph(): return _C_ops.bernoulli(x) check_variable_and_dtype(x, "x", ["float32", "float64"], "bernoulli") helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( dtype=x.dtype) # maybe set out to int32 ? helper.append_op(type='bernoulli', inputs={"X": x}, outputs={'Out': out}, attrs={}) out.stop_gradient = True return out