def SerializeMultiPassDesc(self): switch_static_mode = paddle.in_dynamic_mode() if switch_static_mode: paddle.enable_static() multi_pass_desc = pass_desc_pb2.MultiPassDesc() multi_pass_desc.pass_type = self._pass_type # Traverse all pass pairs and convert them to PassDesc data. # Here need to add cache in the future. for (pattern, replace) in self._pass_pairs: pass_desc = multi_pass_desc.pass_descs.add() # Convert ProgramDescs of pattern and replace subgraphs. pattern_vars, pattern_ops = self._func_to_program_desc( pattern, pass_desc.pattern) replace_vars, replace_ops = self._func_to_program_desc( replace, pass_desc.replace) self._convert_vars_to_pass_desc(pattern_vars, replace_vars, pass_desc) self._convert_ops_to_pass_desc(pattern_ops, replace_ops, pass_desc) if switch_static_mode: paddle.disable_static() return multi_pass_desc.SerializeToString()
def save_pretrained(self, save_dir): """ Saves model configuration and related resources (model state) as files under `save_dir`. The model configuration would be saved into a file named "model_config.json", and model state would be saved into a file named "model_state.pdparams". The `save_dir` can be used in `from_pretrained` as argument value of `pretrained_model_name_or_path` to re-load the trained model. Args: save_dir (str): Directory to save files into. Example: .. code-block:: from paddlenlp.transformers import BertForSequenceClassification model = BertForSequenceClassification.from_pretrained('bert-base-uncased') model.save_pretrained('./trained_model/') # reload from save_directory model = BertForSequenceClassification.from_pretrained('./trained_model/') """ assert not os.path.isfile( save_dir ), "Saving directory ({}) should be a directory, not a file".format( save_dir) os.makedirs(save_dir, exist_ok=True) # Save model config self.save_model_config(save_dir) # Save model if paddle.in_dynamic_mode(): file_name = os.path.join( save_dir, list(self.resource_files_names.values())[0]) paddle.save(self.state_dict(), file_name) else: logger.warning( "Save pretrained model only supported dygraph mode for now!")
def hook(layer, input, output): class_name = str(layer.__class__).split(".")[-1].split("'")[0] try: layer_idx = int(layer._full_name.split('_')[-1]) except: layer_idx = len(summary) m_key = "%s-%i" % (class_name, layer_idx + 1) summary[m_key] = OrderedDict() summary[m_key]["input_shape"] = list(input[0].shape) summary[m_key]["input_shape"][0] = batch_size if isinstance(output, (list, tuple)): summary[m_key]["output_shape"] = [[-1] + list(o.shape)[1:] for o in output] else: summary[m_key]["output_shape"] = list(output.shape) summary[m_key]["output_shape"][0] = batch_size params = 0 if paddle.in_dynamic_mode(): layer_state_dict = layer._parameters else: layer_state_dict = layer.state_dict() for k, v in layer_state_dict.items(): params += np.prod(v.shape) try: if (getattr(getattr(layer, k), 'trainable')) and ( not getattr(getattr(layer, k), 'stop_gradient')): summary[m_key]["trainable"] = True else: summary[m_key]["trainable"] = False except: summary[m_key]["trainable"] = True summary[m_key]["nb_params"] = params
def read_file_decode_jpeg(self): if not paddle.is_compiled_with_cuda(): return img_bytes = read_file('fake.jpg') img = decode_jpeg(img_bytes, mode='gray') img = decode_jpeg(img_bytes, mode='rgb') img = decode_jpeg(img_bytes) img_cv2 = cv2.imread('fake.jpg') if paddle.in_dynamic_mode(): np.testing.assert_equal(img.shape, img_cv2.transpose(2, 0, 1).shape) else: place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) out = exe.run(paddle.static.default_main_program(), fetch_list=[img]) np.testing.assert_equal(out[0].shape, img_cv2.transpose(2, 0, 1).shape)
def setUp(self): self.op_type = "lu_unpack" self.config() x = np.random.random(self.x_shape).astype(self.dtype) if paddle.in_dynamic_mode(): xt = paddle.to_tensor(x) lu, pivots = paddle.linalg.lu(xt) lu = lu.numpy() pivots = pivots.numpy() else: with fluid.program_guard(fluid.Program(), fluid.Program()): place = fluid.CPUPlace() if core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) xv = paddle.fluid.data(name="input", shape=self.x_shape, dtype=self.dtype) lu, p = paddle.linalg.lu(xv) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": x}, fetch_list=[lu, p]) lu, pivots = fetches[0], fetches[1] self.inputs = {'X': lu, 'Pivots': pivots} self.attrs = { 'unpack_ludata': self.unpack_ludata, 'unpack_pivots': self.unpack_pivots } self.set_output(x) self.outputs = { 'Pmat': self.P, 'L': self.L, 'U': self.U, }
def sin(x, name=None): """ Calculate sin of x, requiring x to be a sparse coo or sparse csr tensor. .. math:: out = sin(x) Parameters: x (Tensor): The input Sparse Tensor with data type float32, float64. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: A Sparse Tensor with the same data type and shape as ``x`` . Examples: .. code-block:: python import paddle from paddle.fluid.framework import _test_eager_guard with _test_eager_guard(): dense_x = paddle.to_tensor([-2, 0, 3], dtype='float32') sparse_x = dense_x.to_sparse_coo(1) out = paddle.incubate.sparse.sin(sparse_x) """ assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode" if x.is_sparse_coo() or x.is_sparse_csr(): return _C_ops.final_state_sparse_sin(x) else: raise ValueError( "Currently, sparse.sin only support the input of SparseCooTensor or SparseCsrTensor" )
def randint_like(x, low=0, high=None, dtype=None, name=None): """ Returns a Tensor filled with random integers from a discrete uniform distribution in the range [``low``, ``high``), with the same shape as ``x``. (use ``dtype`` if ``dtype`` is not None) If ``high`` is None (the default), the range is [0, ``low``). Args: x (Tensor): The input tensor which specifies shape. The dtype of ``x`` can be bool, int32, int64, float16, float32, float64. low (int): The lower bound on the range of random values to generate. The ``low`` is included in the range. If ``high`` is None, the range is [0, ``low``). Default is 0. high (int, optional): The upper bound on the range of random values to generate, the ``high`` is excluded in the range. Default is None (see above for behavior if high = None). Default is None. dtype (str|np.dtype, optional): The data type of the output tensor. Supported data types: bool, int32, int64, float16, float32, float64. If ``dytpe`` is None, the data type is the same as x's data type. Default is None. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor: A Tensor filled with random integers from a discrete uniform distribution in the range [``low``, ``high``), with ``shape`` and ``dtype``. Examples: .. code-block:: python import paddle # example 1: # dtype is None and the dtype of x is float16 x = paddle.zeros((1,2)).astype("float16") out1 = paddle.randint_like(x, low=-5, high=5) print(out1) print(out1.dtype) # [[0, -3]] # random # paddle.float16 # example 2: # dtype is None and the dtype of x is float32 x = paddle.zeros((1,2)).astype("float32") out2 = paddle.randint_like(x, low=-5, high=5) print(out2) print(out2.dtype) # [[0, -3]] # random # paddle.float32 # example 3: # dtype is None and the dtype of x is float64 x = paddle.zeros((1,2)).astype("float64") out3 = paddle.randint_like(x, low=-5, high=5) print(out3) print(out3.dtype) # [[0, -3]] # random # paddle.float64 # example 4: # dtype is None and the dtype of x is int32 x = paddle.zeros((1,2)).astype("int32") out4 = paddle.randint_like(x, low=-5, high=5) print(out4) print(out4.dtype) # [[0, -3]] # random # paddle.int32 # example 5: # dtype is None and the dtype of x is int64 x = paddle.zeros((1,2)).astype("int64") out5 = paddle.randint_like(x, low=-5, high=5) print(out5) print(out5.dtype) # [[0, -3]] # random # paddle.int64 # example 6: # dtype is float64 and the dtype of x is float32 x = paddle.zeros((1,2)).astype("float32") out6 = paddle.randint_like(x, low=-5, high=5, dtype="float64") print(out6) print(out6.dtype) # [[0, -1]] # random # paddle.float64 # example 7: # dtype is bool and the dtype of x is float32 x = paddle.zeros((1,2)).astype("float32") out7 = paddle.randint_like(x, low=-5, high=5, dtype="bool") print(out7) print(out7.dtype) # [[0, -1]] # random # paddle.bool # example 8: # dtype is int32 and the dtype of x is float32 x = paddle.zeros((1,2)).astype("float32") out8 = paddle.randint_like(x, low=-5, high=5, dtype="int32") print(out8) print(out8.dtype) # [[0, -1]] # random # paddle.int32 # example 9: # dtype is int64 and the dtype of x is float32 x = paddle.zeros((1,2)).astype("float32") out9 = paddle.randint_like(x, low=-5, high=5, dtype="int64") print(out9) print(out9.dtype) # [[0, -1]] # random # paddle.int64 # example 10: # dtype is int64 and the dtype of x is bool x = paddle.zeros((1,2)).astype("bool") out10 = paddle.randint_like(x, low=-5, high=5, dtype="int64") print(out10) print(out10.dtype) # [[0, -1]] # random # paddle.int64 """ if high is None: if low <= 0: raise ValueError( "If high is None, low must be greater than 0, but received low = {0}." .format(low)) high = low low = 0 if dtype is None: dtype = x.dtype if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) shape = x.shape if low >= high: raise ValueError( "randint_like's low must less then high, but received low = {0}, " "high = {1}".format(low, high)) if paddle.in_dynamic_mode(): shape = utils.convert_shape_to_list(shape) out = _C_ops.randint('shape', shape, 'low', low, 'high', high, 'seed', 0, 'dtype', core.VarDesc.VarType.INT64) out = paddle.cast(out, dtype) return out check_shape(shape, 'randint_like') check_dtype(dtype, 'dtype', ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], 'randint_like') inputs = dict() attrs = { 'low': low, 'high': high, 'seed': 0, 'dtype': core.VarDesc.VarType.INT64 } utils.get_shape_tensor_inputs(inputs=inputs, attrs=attrs, shape=shape, op_type='randint_like') helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.INT64) helper.append_op(type='randint', inputs=inputs, outputs={'Out': out}, attrs=attrs) out.stop_gradient = True out = paddle.cast(out, dtype) return out
def construct_samples_and_shuffle_data(name, data_prefix, documents, sizes, num_samples, seq_length, seed, build_data_file): """ documents: document index from 0 to len(docs) sizes: the length list of all docs. num_samples: total step*bs iterations of data. seq_length: the sequence length. sum(sizes) = tokens_per_epoch data_nums = num_samples * micro_batch_size num_epochs = (data_nums + 1) // sum(sizes) len(doc_idx) = num_epochs * sum(sizes) """ # Number of tokens in each epoch and number of required epochs. tokens_per_epoch = _num_tokens(documents, sizes) num_epochs = _num_epochs(tokens_per_epoch, seq_length, num_samples) # Rng state np_rng = np.random.RandomState(seed=seed) # Filename of the index mappings. _filename = data_prefix _filename += '_{}_indexmap'.format(name) _filename += '_{}ns'.format(num_samples) _filename += '_{}sl'.format(seq_length) doc_idx_filename = _filename + '_doc_idx.npy' sample_idx_filename = _filename + '_sample_idx.npy' shuffle_idx_filename = _filename + '_shuffle_idx.npy' # Build the indexed mapping if not exist. if build_data_file: if (not os.path.isfile(doc_idx_filename)) or \ (not os.path.isfile(sample_idx_filename)) or \ (not os.path.isfile(shuffle_idx_filename)): if num_epochs == 1: separate_last_epoch = False else: num_samples_from_epochs_minus_one = ( (num_epochs - 1) * tokens_per_epoch - 1) // seq_length last_epoch_num_samples = num_samples - \ num_samples_from_epochs_minus_one assert last_epoch_num_samples >= 0, \ 'last epoch number of samples should be non-negative.' num_samples_per_epoch = (tokens_per_epoch - 1) // seq_length assert last_epoch_num_samples < (num_samples_per_epoch + 1), \ 'last epoch number of samples exceeded max value.' separate_last_epoch = (last_epoch_num_samples < int( 0.80 * num_samples_per_epoch)) # Note. len(doc_idx) = num_epochs * len(doc) start_time = time.time() doc_idx = _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch) np.save(doc_idx_filename, doc_idx, allow_pickle=True) print(' > elasped time to build and save doc-idx mapping ' '(seconds): {:4f}'.format(time.time() - start_time)) # sample-idx. pos of each seq_len of data. start_time = time.time() assert doc_idx.dtype == np.int32 assert sizes.dtype == np.int32 import data_tools.helpers as helpers sample_idx = helpers.build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch) np.save(sample_idx_filename, sample_idx, allow_pickle=True) print(' > elasped time to build and save sample-idx mapping ' '(seconds): {:4f}'.format(time.time() - start_time)) # shuffle-idx. start_time = time.time() if separate_last_epoch: num_samples_ = num_samples_from_epochs_minus_one else: num_samples_ = sample_idx.shape[0] - 1 # Shuffle all seq len data. shuffle_idx = _build_shuffle_idx(num_samples_, sample_idx.shape[0] - 1, np_rng) np.save(shuffle_idx_filename, shuffle_idx, allow_pickle=True) print(' > elasped time to build and save shuffle-idx mapping' ' (seconds): {:4f}'.format(time.time() - start_time)) else: while True: if (not os.path.isfile(doc_idx_filename)) or \ (not os.path.isfile(sample_idx_filename)) or \ (not os.path.isfile(shuffle_idx_filename)): time.sleep(3) else: try: np.load(shuffle_idx_filename, allow_pickle=True, mmap_mode='r') break except Exception as e: print( "%s file is still writing or damaged, please wait a moment." % shuffle_idx_filename) time.sleep(3) if paddle.distributed.get_world_size() > 1: if paddle.in_dynamic_mode(): paddle.distributed.barrier() # Load mappings. doc_idx = np.load(doc_idx_filename, allow_pickle=True, mmap_mode='r') sample_idx = np.load(sample_idx_filename, allow_pickle=True, mmap_mode='r') shuffle_idx = np.load(shuffle_idx_filename, allow_pickle=True, mmap_mode='r') return doc_idx, sample_idx, shuffle_idx
def where(condition, x=None, y=None, name=None): r""" Return a tensor of elements selected from either $x$ or $y$, depending on $condition$. **Note**: ``paddle.where(condition)`` is identical to ``paddle.nonzero(condition, as_tuple=True)``. .. math:: out_i = \begin{cases} x_i, \quad \text{if} \ condition_i \ is \ True \\ y_i, \quad \text{if} \ condition_i \ is \ False \\ \end{cases} Args: condition(Tensor): The condition to choose x or y. When True(nonzero), yield x, otherwise yield y. x(Tensor or Scalar, optional): x is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given. y(Tensor or Scalar, optional): y is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor: A Tensor with the same data dype as x. Examples: .. code-block:: python import paddle x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2]) y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0]) out = paddle.where(x>1, x, y) print(out) #out: [1.0, 1.0, 3.2, 1.2] out = paddle.where(x>1) print(out) #out: (Tensor(shape=[2, 1], dtype=int64, place=CPUPlace, stop_gradient=True, # [[2], # [3]]),) """ if np.isscalar(x): x = paddle.full([1], x, np.array([x]).dtype.name) if np.isscalar(y): y = paddle.full([1], y, np.array([y]).dtype.name) if x is None and y is None: return nonzero(condition, as_tuple=True) if x is None or y is None: raise ValueError("either both or neither of x and y should be given") if not paddle.in_dynamic_mode(): check_variable_and_dtype(condition, 'condition', ['bool'], 'where') check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], 'where') check_variable_and_dtype(y, 'y', ['float32', 'float64', 'int32', 'int64'], 'where') condition_shape = list(condition.shape) x_shape = list(x.shape) y_shape = list(y.shape) if x_shape == y_shape and condition_shape == x_shape: broadcast_condition = condition broadcast_x = x broadcast_y = y else: if core.is_compiled_with_xpu(): cond_int = paddle.cast(condition, x.dtype) cond_not_int = paddle.cast(logical_not(condition), x.dtype) out1 = paddle.multiply(x, cond_int) out2 = paddle.multiply(y, cond_not_int) out = paddle.add(out1, out2) return out zeros_like_x = paddle.zeros_like(x) zeros_like_y = paddle.zeros_like(y) zeros_like_condition = paddle.zeros_like(condition) zeros_like_condition = paddle.cast(zeros_like_condition, x.dtype) cast_cond = paddle.cast(condition, x.dtype) broadcast_zeros = paddle.add(zeros_like_x, zeros_like_y) broadcast_zeros = paddle.add(broadcast_zeros, zeros_like_condition) broadcast_x = paddle.add(x, broadcast_zeros) broadcast_y = paddle.add(y, broadcast_zeros) broadcast_condition = paddle.add(cast_cond, broadcast_zeros) broadcast_condition = paddle.cast(broadcast_condition, 'bool') if in_dygraph_mode(): return _C_ops.final_state_where(broadcast_condition, broadcast_x, broadcast_y) else: if _in_legacy_dygraph(): return _C_ops.where(broadcast_condition, broadcast_x, broadcast_y) else: helper = LayerHelper("where", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type='where', inputs={ 'Condition': broadcast_condition, 'X': broadcast_x, 'Y': broadcast_y }, outputs={'Out': [out]}) return out
def get_samples_mapping(indexed_dataset, data_prefix, num_epochs, max_num_samples, max_seq_length, short_seq_prob, seed, name, binary_head, share_folder): """Get a list that maps a sample index to a starting sentence index, end sentence index, and length""" if not num_epochs: if not max_num_samples: raise ValueError("Need to specify either max_num_samples " "or num_epochs") num_epochs = np.iinfo(np.int32).max - 1 if not max_num_samples: max_num_samples = np.iinfo(np.int64).max - 1 # Filename of the index mapping indexmap_filename = data_prefix indexmap_filename += '_{}_indexmap'.format(name) if num_epochs != (np.iinfo(np.int32).max - 1): indexmap_filename += '_{}ep'.format(num_epochs) if max_num_samples != (np.iinfo(np.int64).max - 1): indexmap_filename += '_{}mns'.format(max_num_samples) indexmap_filename += '_{}msl'.format(max_seq_length) indexmap_filename += '_{:0.2f}ssp'.format(short_seq_prob) indexmap_filename += '_{}s'.format(seed) indexmap_filename += '.npy' local_rank = 0 if fleet.local_rank() is None else int(fleet.local_rank()) if share_folder: local_rank = fleet.worker_index() # Build the indexed mapping if not exist. if local_rank == 0 and \ not os.path.isfile(indexmap_filename): print(' > WARNING: could not find index map file {}, building ' 'the indices on rank 0 ...'.format(indexmap_filename)) # Make sure the types match the helpers input types. assert indexed_dataset.doc_idx.dtype == np.int64 print(indexed_dataset.sizes.dtype) assert indexed_dataset.sizes.dtype == np.int32 # Build samples mapping verbose = local_rank == 0 start_time = time.time() print_rank_0( ' > building sapmles index mapping for {} ...'.format(name)) # First compile and then import. if local_rank == 0: compile_helper() import data_tools.helpers as helpers samples_mapping = helpers.build_mapping(indexed_dataset.doc_idx, indexed_dataset.sizes, num_epochs, max_num_samples, max_seq_length, short_seq_prob, seed, verbose, 2 if binary_head else 1) print_rank_0(' > done building sapmles index maping') np.save(indexmap_filename, samples_mapping, allow_pickle=True) print_rank_0( ' > saved the index mapping in {}'.format(indexmap_filename)) # Make sure all the ranks have built the mapping print_rank_0(' > elasped time to build and save samples mapping ' '(seconds): {:4f}'.format(time.time() - start_time)) else: while True: if (not os.path.isfile(indexmap_filename)): time.sleep(3) else: try: np.load(indexmap_filename, allow_pickle=True, mmap_mode='r') break except Exception as e: print( "%s file is still writing or damaged, please wait a moment." % indexmap_filename) time.sleep(3) # This should be a barrier but nccl barrier assumes # device_index=rank which is not the case for model # parallel case if paddle.distributed.get_world_size() > 1: if paddle.in_dynamic_mode(): paddle.distributed.barrier() # Load indexed dataset. print_rank_0( ' > loading indexed mapping from {}'.format(indexmap_filename)) start_time = time.time() samples_mapping = np.load(indexmap_filename, allow_pickle=True, mmap_mode='r') print_rank_0( ' loaded indexed file in {:3.3f} seconds'.format(time.time() - start_time)) print_rank_0(' total number of samples: {}'.format( samples_mapping.shape[0])) return samples_mapping
def save_quantized_model(self, layer, path, input_spec=None, **config): """ Save the quantized model for the inference. Args: layer (Layer): The Layer to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance( layer, dygraph.Layer), "model must be the instance of dygraph.Layer" is_dynamic_mode = False with dygraph.guard(): layer.eval() for handle in self._register_hook_handle_list: handle.remove() for key in self._out_scale_dict: self._out_scale_dict[key] = float( self._out_scale_dict[key].numpy()) paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) file_prefix = os.path.basename(path) dirname = os.path.dirname(path) model_filename = file_prefix + INFER_MODEL_SUFFIX params_filename = file_prefix + INFER_PARAMS_SUFFIX [inference_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) # Traverse all ops in the program and find out the op matching # the Layer in the dynamic graph. layer_var_dict = {} for block in inference_program.blocks: for op in block.ops: if op.type in _op_real_in_out_name: output_var_names = quantization_pass._get_op_output_var_names( op) for output_var_name in output_var_names: output_var_tensor = block.var(output_var_name) if output_var_tensor.dtype not in [ core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32 ]: continue # Because the Layer in dygraph may correspond to multiple ops # in static program after being saved. To ensure correctness, # the outscale collected for output of dygraph Layer can only # be set to the last op in the corresponding ops in static program. # # We can judge the execution order of the ops which corresponding # to dygraph Layer by the name of output. And use dict to save # the corresponding relationship between the dygraph Layer and the # static graph op that needs to set the outscale attribute. if '.' not in output_var_name: continue dynamic_layer_name, var_name_suffix = output_var_name.split( ".") if dynamic_layer_name in layer_var_dict: if layer_var_dict[dynamic_layer_name][ 0] < var_name_suffix: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] else: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] # Because the naming styles of static and dynamic graph are different, # in order to avoid mistakes, we unify the name here. for (layer_name, var_name_op_list) in layer_var_dict.items(): if 'prelu' in layer_name: layer_name = layer_name.replace('prelu', 'p_re_lu') if 'relu' in layer_name: layer_name = layer_name.replace('relu', 're_lu') if layer_name not in self._out_scale_dict: continue var_name_op_list[1]._set_attr('out_threshold', self._out_scale_dict[layer_name]) # Save the processed program. save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=inference_program.clone(), model_filename=model_filename, params_filename=params_filename) if is_dynamic_mode: paddle.disable_static()
def test_dygraph_mode(self): self.assertTrue( paddle.in_dynamic_mode(), 'Default Mode of Unittest should be dygraph mode, but get static mode.' )
def local_response_norm(x, size, alpha=1e-4, beta=0.75, k=1., data_format="NCHW", name=None): r""" Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions. For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_ The formula is as follows: .. math:: Output(i, x, y) = Input(i, x, y) / \left(k + \alpha \sum\limits^{\min(C-1, i + size/2)}_{j = \max(0, i - size/2)}(Input(j, x, y))^2\right)^{\beta} In the above equation: - :math:`size` : The number of channels to sum over. - :math:`k` : The offset (avoid being divided by 0). - :math:`\\alpha` : The scaling parameter. - :math:`\\beta` : The exponent parameter. Args: x (Tensor): The input 3-D/4-D/5-D tensor. The data type is float32. size (int): The number of channels to sum over. alpha (float, optional): The scaling parameter, positive. Default:1e-4 beta (float, optional): The exponent, positive. Default:0.75 k (float, optional): An offset, positive. Default: 1.0 data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: If x is 3-D Tensor, the string could be `"NCL"` or `"NLC"` . When it is `"NCL"`, the data is stored in the order of: `[batch_size, input_channels, feature_length]`. If x is 4-D Tensor, the string could be `"NCHW"`, `"NHWC"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. If x is 5-D Tensor, the string could be `"NCDHW"`, `"NDHWC"` . When it is `"NCDHW"`, the data is stored in the order of: `[batch_size, input_channels, input_depth, input_height, input_width]`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: A tensor storing the transformation result with the same shape and data type as input. Examples: .. code-block:: python import paddle x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32") y = paddle.nn.functional.local_response_norm(x, size=5) print(y.shape) # [3, 3, 112, 112] """ if not in_dynamic_mode(): check_variable_and_dtype(x, 'x', ['float32'], 'local_response_norm') if data_format not in ['NCL', 'NLC', 'NCHW', 'NHWC', 'NCDHW', 'NDHWC']: raise ValueError( "data_format should be in one of [NCL, NCHW, NCDHW, NLC, NHWC, NDHWC], " \ "but got {}".format(data_format)) sizes = x.shape dim = len(sizes) if dim < 3: raise ValueError( 'Expected 3D or higher dimensionality input, but got {} dimensions' .format(dim)) for i, sz in enumerate(sizes): if not sz > 0 and i > 0: raise ValueError("Expected every dim's size to be larger than 0, " "but the size of the {}-th dim is {}".format( i, sz)) channel_last = True if data_format[-1] == "C" else False from functools import reduce sum_sizes = reduce(lambda x, y: x * y, sizes[1:]) div = paddle.unsqueeze(paddle.multiply(x, x), axis=1) if not channel_last: pad4d_shape = [0, 0, size // 2, (size - 1) // 2] pool2d_shape = (size, 1) reshape_shape = [ sizes[0], 1, sizes[1], sizes[2], int(sum_sizes / (sizes[1] * sizes[2])) ] pad5d_shape = [0, 0, 0, 0, size // 2, (size - 1) // 2] pool3d_shape = (size, 1, 1) else: pad4d_shape = [size // 2, (size - 1) // 2, 0, 0] pool2d_shape = (1, size) reshape_shape = [ sizes[0], 1, sizes[1], int(sum_sizes / (sizes[1] * sizes[-1])), sizes[-1] ] pad5d_shape = [size // 2, (size - 1) // 2, 0, 0, 0, 0] pool3d_shape = (1, 1, size) if dim == 3: div = paddle.nn.functional.pad(div, pad=pad4d_shape) div = paddle.nn.functional.avg_pool2d(div, kernel_size=pool2d_shape, stride=1) div = paddle.squeeze(div, axis=1) else: div = paddle.reshape(div, shape=reshape_shape) div = paddle.nn.functional.pad(div, pad=pad5d_shape, data_format='NCDHW') div = paddle.nn.functional.avg_pool3d(div, kernel_size=pool3d_shape, stride=1) div = paddle.reshape(paddle.squeeze(div, axis=1), sizes) div = paddle.scale(div, scale=alpha, bias=k) div = paddle.pow(div, beta) res = paddle.divide(x, div, name=name) return res
def affine_grid(theta, out_shape, align_corners=True, name=None): """ It generates a grid of (x,y) coordinates using the parameters of the affine transformation that correspond to a set of points where the input feature map should be sampled to produce the transformed output feature map. Args: theta (Tensor) - A tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters. The data type can be float32 or float64. out_shape (Tensor | list | tuple): The shape of target output with format [batch_size, channel, height, width]. ``out_shape`` can be a Tensor or a list or tuple. The data type must be int32. align_corners(bool): Whether to align corners of target feature map and source feature map. Default: True. name(str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor, A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`. Raises: ValueError: If the type of arguments is not supported. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np # theta shape = [1, 2, 3] theta = np.array([[[-0.7, -0.4, 0.3], [ 0.6, 0.5, 1.5]]]).astype("float32") theta_t = paddle.to_tensor(theta) y_t = F.affine_grid( theta_t, [1, 2, 3, 3], align_corners=False) print(y_t) #[[[[ 1.0333333 0.76666665] # [ 0.76666665 1.0999999 ] # [ 0.5 1.4333333 ]] # # [[ 0.5666667 1.1666666 ] # [ 0.3 1.5 ] # [ 0.03333333 1.8333334 ]] # # [[ 0.10000002 1.5666667 ] # [-0.16666666 1.9000001 ] # [-0.43333334 2.2333333 ]]]] """ if not isinstance(theta, Variable): raise ValueError("The theta should be a Tensor.") cudnn_version = get_cudnn_version() if cudnn_version is not None and cudnn_version >= 6000 and align_corners: use_cudnn = True else: use_cudnn = False if is_compiled_with_rocm(): use_cudnn = False # ROCM platform do not have MIOPEN kernel for affine_grid if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \ isinstance(out_shape, Variable)): raise ValueError("The out_shape should be a list, tuple or Tensor.") if in_dynamic_mode(): _out_shape = out_shape.numpy().tolist() if isinstance( out_shape, Variable) else out_shape return _C_ops.affine_grid(theta, "output_shape", _out_shape, "align_corners", align_corners, "use_cudnn", use_cudnn) helper = LayerHelper('affine_grid') check_variable_and_dtype(theta, 'theta', ['float32', 'float64'], 'affine_grid') out = helper.create_variable_for_type_inference(theta.dtype) ipts = {'Theta': theta} attrs = {"align_corners": align_corners, "use_cudnn": use_cudnn} if isinstance(out_shape, Variable): ipts['OutputShape'] = out_shape check_variable_and_dtype(out_shape, 'out_shape', ['int32'], 'affine_grid') else: attrs['output_shape'] = out_shape helper.append_op(type='affine_grid', inputs=ipts, outputs={'Output': out}, attrs=None if len(attrs) == 0 else attrs) return out
def grid_sample(x, grid, mode='bilinear', padding_mode='zeros', align_corners=True, name=None): """ This operation samples input X by using bilinear interpolation or nearest interpolation based on flow field grid, which is usually generated by :code:`affine_grid` . The grid of shape [N, H, W, 2] is the concatenation of (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of input data x and y is indexing the 3rd dimension (in height dimension), finally results is the bilinear interpolation or nearest value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. Step 1: Get (x, y) grid coordinates and scale to [0, H-1/W-1]. .. code-block:: text grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1) grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1) Step 2: Indices input data X with grid (x, y) in each [H, W] area, and bilinear interpolate point value by 4 nearest points or nearest interpolate point value by nearest point. .. code-block:: text wn ------- y_n ------- en | | | | d_n | | | | x_w --d_w-- grid--d_e-- x_e | | | | d_s | | | | ws ------- y_s ------- wn For bilinear interpolation: x_w = floor(x) // west side x coord x_e = x_w + 1 // east side x coord y_n = floor(y) // north side y coord y_s = y_s + 1 // south side y coord d_w = grid_x - x_w // distance to west side d_e = x_e - grid_x // distance to east side d_n = grid_y - y_n // distance to north side d_s = y_s - grid_y // distance to south side wn = X[:, :, y_n, x_w] // north-west point value en = X[:, :, y_n, x_e] // north-east point value ws = X[:, :, y_s, x_w] // south-east point value es = X[:, :, y_s, x_w] // north-east point value output = wn * d_e * d_s + en * d_w * d_s + ws * d_e * d_n + es * d_w * d_n Args: x(Tensor): The input tensor, which is a 4-d tensor with shape [N, C, H, W], N is the batch size, C is the channel number, H and W is the feature height and width. The data type is float32 or float64. grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The data type is float32 or float64. mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'. Default: 'bilinear'. padding_mode(str, optional) The padding method used when source index is out of input images. It can be 'zeros', 'reflection' and 'border'. Default: zeros. align_corners(bool, optional): If `align_corners` is true, it will projects -1 and 1 to the centers of the corner pixels. Otherwise, it will projects -1 and 1 to the image edges. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np # shape=[1, 1, 3, 3] x = np.array([[[[-0.6, 0.8, -0.5], [-0.5, 0.2, 1.2], [ 1.4, 0.3, -0.2]]]]).astype("float64") # grid shape = [1, 3, 4, 2] grid = np.array( [[[[ 0.2, 0.3], [-0.4, -0.3], [-0.9, 0.3], [-0.9, -0.6]], [[ 0.4, 0.1], [ 0.9, -0.8], [ 0.4, 0.5], [ 0.5, -0.2]], [[ 0.1, -0.8], [-0.3, -1. ], [ 0.7, 0.4], [ 0.2, 0.8]]]]).astype("float64") x = paddle.to_tensor(x) grid = paddle.to_tensor(grid) y_t = F.grid_sample( x, grid, mode='bilinear', padding_mode='border', align_corners=True) print(y_t) # output shape = [1, 1, 3, 4] # [[[[ 0.34 0.016 0.086 -0.448] # [ 0.55 -0.076 0.35 0.59 ] # [ 0.596 0.38 0.52 0.24 ]]]] """ _modes = ['bilinear', 'nearest'] _padding_modes = ['zeros', 'reflection', 'border'] if mode not in _modes: raise ValueError( "The mode of grid sample function should be in {}, but got: {}". format(_modes, mode)) if padding_mode not in _padding_modes: raise ValueError( "The padding mode of grid sample function should be in {}, but got: {}" .format(_padding_modes, padding_mode)) if not isinstance(align_corners, bool): raise ValueError( "The align corners should be bool, but got: {}".format( align_corners)) cudnn_version = get_cudnn_version() use_cudnn = False if not is_compiled_with_rocm() and ( cudnn_version is not None ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros': use_cudnn = True # CUDNN always computes gradients for all inputs x.stop_gradient = False grid.stop_gradient = False if in_dynamic_mode(): attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners', align_corners, 'use_cudnn', use_cudnn) out = getattr(_C_ops, 'grid_sampler')(x, grid, *attrs) else: helper = LayerHelper("grid_sample", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sample') check_variable_and_dtype(grid, 'grid', ['float32', 'float64'], 'grid_sample') ipts = {'X': x, 'Grid': grid} attrs = { 'mode': mode, 'padding_mode': padding_mode, 'align_corners': align_corners, 'use_cudnn': use_cudnn } out = helper.create_variable_for_type_inference(x.dtype) helper.append_op(type='grid_sampler', inputs=ipts, attrs=attrs, outputs={'Output': out}) return out
def diag_embed(input, offset=0, dim1=-2, dim2=-1): """ This OP creates a tensor whose diagonals of certain 2D planes (specified by dim1 and dim2) are filled by ``input``. By default, a 2D plane formed by the last two dimensions of the returned tensor will be selected. The argument ``offset`` determines which diagonal is generated: - If offset = 0, it is the main diagonal. - If offset > 0, it is above the main diagonal. - If offset < 0, it is below the main diagonal. Args: input(Tensor|numpy.ndarray): The input tensor. Must be at least 1-dimensional. The input data type should be float32, float64, int32, int64. offset(int, optional): Which diagonal to consider. Default: 0 (main diagonal). dim1(int, optional): The first dimension with respect to which to take diagonal. Default: -2. dim2(int, optional): The second dimension with respect to which to take diagonal. Default: -1. Returns: Tensor, the output data type is the same as input data type. Examples: .. code-block:: python import paddle.nn.functional as F import numpy as np diag_embed = np.random.randn(2, 3).astype('float32') # [[ 0.7545889 , -0.25074545, 0.5929117 ], # [-0.6097662 , -0.01753256, 0.619769 ]] data1 = F.diag_embed(diag_embed) data1.numpy() # [[[ 0.7545889 , 0. , 0. ], # [ 0. , -0.25074545, 0. ], # [ 0. , 0. , 0.5929117 ]], # [[-0.6097662 , 0. , 0. ], # [ 0. , -0.01753256, 0. ], # [ 0. , 0. , 0.619769 ]]] data2 = F.diag_embed(diag_embed, offset=-1, dim1=0, dim2=2) data2.numpy() # [[[ 0. , 0. , 0. , 0. ], # [ 0.7545889 , 0. , 0. , 0. ], # [ 0. , -0.25074545, 0. , 0. ], # [ 0. , 0. , 0.5929117 , 0. ]], # # [[ 0. , 0. , 0. , 0. ], # [-0.6097662 , 0. , 0. , 0. ], # [ 0. , -0.01753256, 0. , 0. ], # [ 0. , 0. , 0.619769 , 0. ]]] data3 = F.diag_embed(diag_embed, offset=1, dim1=0, dim2=2) data3.numpy() # [[[ 0. , 0.7545889 , 0. , 0. ], # [ 0. , -0.6097662 , 0. , 0. ]], # # [[ 0. , 0. , -0.25074545, 0. ], # [ 0. , 0. , -0.01753256, 0. ]], # # [[ 0. , 0. , 0. , 0.5929117 ], # [ 0. , 0. , 0. , 0.619769 ]], # # [[ 0. , 0. , 0. , 0. ], # [ 0. , 0. , 0. , 0. ]]] """ inputs = {'Input': [input]} attrs = {'offset': offset, 'dim1': dim1, 'dim2': dim2} if not isinstance(input, Variable): input = assign(input) def __check_input(input, offset, dim1, dim2): check_dtype(input.dtype, 'Input', ['int32', 'int64', 'float16', 'float32', 'float64'], 'diag_embed') input_shape = list(input.shape) assert len(input_shape) >= 1, \ "Input must be at least 1-dimensional, " \ "But received Input's dimensional: %s.\n" % \ len(input_shape) assert np.abs(dim1) <= len(input_shape), \ "Dim1 is out of range (expected to be in range of [%d, %d], but got %d).\n" \ % (-(len(input_shape) + 1), len(input_shape), dim1) assert np.abs(dim2) <= len(input_shape), \ "Dim2 is out of range (expected to be in range of [%d, %d], but got %d).\n" \ % (-(len(input_shape) + 1), len(input_shape), dim2) dim1_ = dim1 if dim1 >= 0 else len(input_shape) + dim1 + 1 dim2_ = dim2 if dim2 >= 0 else len(input_shape) + dim2 + 1 assert dim1_ != dim2_, \ "dim1 and dim2 cannot be the same dimension." \ "But received dim1 = %d, dim2 = %d\n"%(dim1, dim2) if not in_dynamic_mode(): __check_input(input, offset, dim1, dim2) helper = LayerHelper("diag_embed", **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op(type='diag_embed', inputs={'Input': [input]}, attrs={ 'offset': offset, 'dim1': dim1, 'dim2': dim2 }, outputs={'Out': [out]}) out.stop_gradient = True return out
def summary(net, input_size, batch_size=None, dtypes=None): """Prints a string summary of the network. Args: net (Layer): the network which must be a subinstance of Layer. input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only have one input, input_size can be tuple or InputSpec. if model have multiple input, input_size must be a list which contain every input's shape. batch_size (int, optional): batch size of input tensor, Default: None. dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. Returns: Dict: a summary of the network including total params and total trainable params. Examples: .. code-block:: python import paddle import paddle.nn as nn class LeNet(nn.Layer): def __init__(self, num_classes=10): super(LeNet, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d( 1, 6, 3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d( 6, 16, 5, stride=1, padding=0), nn.ReLU(), nn.MaxPool2d(2, 2)) if num_classes > 0: self.fc = nn.Sequential( nn.Linear(400, 120), nn.Linear(120, 84), nn.Linear( 84, 10)) def forward(self, inputs): x = self.features(inputs) if self.num_classes > 0: x = paddle.flatten(x, 1) x = self.fc(x) return x lenet = LeNet() params_info = paddle.summary(lenet, (1, 28, 28)) print(params_info) """ if isinstance(input_size, InputSpec): _input_size = tuple(input_size.shape[1:]) if batch_size is None: batch_size = input_size.shape[0] elif isinstance(input_size, list): _input_size = [] for item in input_size: if isinstance(item, int): item = (item, ) assert isinstance(item, (tuple, InputSpec)), 'When input_size is list, \ expect item in input_size is a tuple or InputSpec, but got {}'.format( type(item)) if isinstance(item, InputSpec): _input_size.append(tuple(item.shape[1:])) if batch_size is None: batch_size = item.shape[0] else: _input_size.append(item) elif isinstance(input_size, int): _input_size = (input_size, ) else: _input_size = input_size if batch_size is None: batch_size = -1 if not paddle.in_dynamic_mode(): warnings.warn( "Your model was created in static mode, this may not get correct summary information!" ) result, params_info = summary_string(net, _input_size, batch_size, dtypes) print(result) return params_info
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): """ Creates an instance of `PretrainedModel`. Model weights are loaded by specifying name of a built-in pretrained model, or a community contributed model, or a local file directory path. Args: pretrained_model_name_or_path (str): Name of pretrained model or dir path to load from. The string can be: - Name of a built-in pretrained model - Name of a community-contributed pretrained model. - Local directory path which contains model weights file("model_state.pdparams") and model config file ("model_config.json"). *args (tuple): Position arguments for model `__init__`. If provided, use these as position argument values for model initialization. **kwargs (dict): Keyword arguments for model `__init__`. If provided, use these to update pre-defined keyword argument values for model initialization. If the keyword is in `__init__` argument names of base model, update argument values of the base model; else update argument values of derived model. Returns: PretrainedModel: An instance of `PretrainedModel`. Example: .. code-block:: from paddlenlp.transformers import BertForSequenceClassification # Name of built-in pretrained model model = BertForSequenceClassification.from_pretrained('bert-base-uncased') # Name of community-contributed pretrained model model = BertForSequenceClassification.from_pretrained('yingyibiao/bert-base-uncased-sst-2-finetuned') # Load from local directory path model = BertForSequenceClassification.from_pretrained('./my_bert/') """ pretrained_models = list(cls.pretrained_init_configuration.keys()) resource_files = {} init_configuration = {} # From built-in pretrained models if pretrained_model_name_or_path in pretrained_models: for file_id, map_list in cls.pretrained_resource_files_map.items(): resource_files[file_id] = map_list[ pretrained_model_name_or_path] init_configuration = copy.deepcopy( cls. pretrained_init_configuration[pretrained_model_name_or_path]) # From local dir path elif os.path.isdir(pretrained_model_name_or_path): for file_id, file_name in cls.resource_files_names.items(): full_file_name = os.path.join(pretrained_model_name_or_path, file_name) resource_files[file_id] = full_file_name resource_files["model_config_file"] = os.path.join( pretrained_model_name_or_path, cls.model_config_file) else: # Assuming from community-contributed pretrained models for file_id, file_name in cls.resource_files_names.items(): full_file_name = os.path.join(COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, file_name) resource_files[file_id] = full_file_name resource_files["model_config_file"] = os.path.join( COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, cls.model_config_file) default_root = os.path.join(MODEL_HOME, pretrained_model_name_or_path) resolved_resource_files = {} for file_id, file_path in resource_files.items(): if file_path is None or os.path.isfile(file_path): resolved_resource_files[file_id] = file_path continue path = os.path.join(default_root, file_path.split('/')[-1]) if os.path.exists(path): logger.info("Already cached %s" % path) resolved_resource_files[file_id] = path else: logger.info("Downloading %s and saved to %s" % (file_path, default_root)) try: resolved_resource_files[file_id] = get_path_from_url( file_path, default_root) except RuntimeError as err: logger.error(err) raise RuntimeError( f"Can't load weights for '{pretrained_model_name_or_path}'.\n" f"Please make sure that '{pretrained_model_name_or_path}' is:\n" "- a correct model-identifier of built-in pretrained models,\n" "- or a correct model-identifier of community-contributed pretrained models,\n" "- or the correct path to a directory containing relevant modeling files(model_weights and model_config).\n" ) # Prepare model initialization kwargs # Did we saved some inputs and kwargs to reload ? model_config_file = resolved_resource_files.pop( "model_config_file", None) if model_config_file is not None: with io.open(model_config_file, encoding="utf-8") as f: init_kwargs = json.load(f) else: init_kwargs = init_configuration # position args are stored in kwargs, maybe better not include init_args = init_kwargs.pop("init_args", ()) # class name corresponds to this configuration init_class = init_kwargs.pop("init_class", cls.base_model_class.__name__) # Check if the loaded config matches the current model class's __init__ # arguments. If not match, the loaded config is for the base model class. if init_class == cls.base_model_class.__name__: base_args = init_args base_kwargs = init_kwargs derived_args = () derived_kwargs = {} base_arg_index = None else: # extract config for base model derived_args = list(init_args) derived_kwargs = init_kwargs base_arg = None for i, arg in enumerate(init_args): if isinstance(arg, dict) and "init_class" in arg: assert arg.pop( "init_class") == cls.base_model_class.__name__, ( "pretrained base model should be {}").format( cls.base_model_class.__name__) base_arg_index = i base_arg = arg break for arg_name, arg in init_kwargs.items(): if isinstance(arg, dict) and "init_class" in arg: assert arg.pop( "init_class") == cls.base_model_class.__name__, ( "pretrained base model should be {}").format( cls.base_model_class.__name__) base_arg_index = arg_name base_arg = arg break base_args = base_arg.pop("init_args", ()) base_kwargs = base_arg if cls == cls.base_model_class: # Update with newly provided args and kwargs for base model base_args = base_args if not args else args base_kwargs.update(kwargs) model = cls(*base_args, **base_kwargs) else: # Update with newly provided args and kwargs for derived model base_parameters_dict = inspect.signature( cls.base_model_class.__init__).parameters for k, v in kwargs.items(): if k in base_parameters_dict: base_kwargs[k] = v base_model = cls.base_model_class(*base_args, **base_kwargs) if base_arg_index is not None: derived_args[base_arg_index] = base_model else: derived_args = (base_model, ) # assume at the first position derived_args = derived_args if not args else args derived_parameters_dict = inspect.signature( cls.__init__).parameters for k, v in kwargs.items(): if k in derived_parameters_dict: derived_kwargs[k] = v model = cls(*derived_args, **derived_kwargs) # Maybe need more ways to load resources. weight_path = resolved_resource_files["model_state"] assert weight_path.endswith( ".pdparams"), "suffix of weight must be .pdparams" state_dict = paddle.load(weight_path) # Make sure we are able to load base models as well as derived models # (with heads) start_prefix = "" model_to_load = model state_to_load = state_dict unexpected_keys = [] missing_keys = [] if not hasattr(model, cls.base_model_prefix) and any( s.startswith(cls.base_model_prefix) for s in state_dict.keys()): # base model state_to_load = {} start_prefix = cls.base_model_prefix + "." for k, v in state_dict.items(): if k.startswith(cls.base_model_prefix): state_to_load[k[len(start_prefix):]] = v else: unexpected_keys.append(k) if hasattr(model, cls.base_model_prefix) and not any( s.startswith(cls.base_model_prefix) for s in state_dict.keys()): # derived model (base model with heads) model_to_load = getattr(model, cls.base_model_prefix) for k in model.state_dict().keys(): if not k.startswith(cls.base_model_prefix): missing_keys.append(k) if len(missing_keys) > 0: logger.info( "Weights of {} not initialized from pretrained model: {}". format(model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: logger.info( "Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) if paddle.in_dynamic_mode(): model_to_load.set_state_dict(state_to_load) return model return model, state_to_load
def __call__(self, var, block=None): """Initialize the input tensor with dirac initializer. Args: var(Tensor): Tensor that needs to be initialized. block(Block, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The most critical OP(scatter) in this initializer, which contains 7~8 ops in total. """ block = self._check_block(block) assert isinstance(var, framework.Parameter) assert isinstance(block, framework.Block) check_variable_and_dtype(var, "Out", ['float16', 'bfloat16', 'float32', 'float64'], 'Dirac') assert len(var.shape) in [ 3, 4, 5 ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac" assert (var.shape[0] % self._groups ) == 0, "Tensor 0-dimension must be divisible by groups" if var.dtype != VarDesc.VarType.FP32: out_var = block.create_var(name=unique_name.generate(".".join( ['dirac', var.name, 'tmp'])), shape=var.shape, dtype=VarDesc.VarType.FP32, type=VarDesc.VarType.LOD_TENSOR, persistable=False) else: out_var = var op = None if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu', False, 'dtype', out_var.dtype, 'str_value', str(float(0)), 'shape', out_var.shape) else: block.append_op(type='fill_constant', inputs={}, outputs={'Out': out_var}, attrs={ 'value': float(0), 'dtype': out_var.dtype, 'shape': out_var.shape, }, stop_gradient=True) origin_shape = var.shape num_per_group = origin_shape[0] // self._groups min_shape = min(num_per_group, origin_shape[1]) idx_list = [] value_list = [] strides = [] prod = 1 for dim in reversed(origin_shape): strides.insert(0, prod) prod *= dim for i in range(self._groups): for j in range(min_shape): value_list.append(1.0) offset = 0 for (k, stride) in enumerate(strides): if (k == 0): offset += (j + i * num_per_group) * stride elif (k == 1): offset += j * stride else: offset += origin_shape[k] // 2 * stride idx_list.append(offset) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_out, _ = _C_ops.reshape2(out_var, None, 'shape', [-1]) tmp_out._share_underline_tensor_to(out_var) else: x_shape = block.create_var(name=unique_name.generate(".".join( [out_var.name, "XShape"])), dtype=out_var.dtype, shape=out_var.shape, type=VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=True) block.append_op(type="reshape2", inputs={"X": out_var}, attrs={'shape': [-1]}, outputs={ "Out": out_var, "XShape": x_shape }, stop_gradient=True) index_tensor = block.create_var( name=unique_name.generate('scatter_index'), persistable=False, stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_tensor = framework._varbase_creator() _C_ops.assign_value(tmp_tensor, 'shape', [len(idx_list)], 'dtype', VarDesc.VarType.INT64, 'int64_values', idx_list) tmp_tensor._share_underline_tensor_to(index_tensor) else: block.append_op(type='assign_value', outputs={'Out': index_tensor}, attrs={ 'dtype': VarDesc.VarType.INT64, 'shape': [len(idx_list)], 'int64_values': idx_list }, stop_gradient=True) value_tensor = block.create_var( name=unique_name.generate('scatter_value'), persistable=False, stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_tensor = framework._varbase_creator() _C_ops.assign_value(tmp_tensor, 'shape', [len(value_list)], 'dtype', VarDesc.VarType.FP32, 'fp32_values', value_list) tmp_tensor._share_underline_tensor_to(value_tensor) else: block.append_op(type='assign_value', outputs={'Out': value_tensor}, attrs={ 'dtype': VarDesc.VarType.FP32, 'shape': [len(value_list)], 'fp32_values': value_list }, stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): tmp_out = _C_ops.final_state_scatter(out_var, index_tensor, value_tensor, True) tmp_out._share_underline_tensor_to(out_var) tmp_reshape_out, _ = _C_ops.reshape2(out_var, None, 'shape', origin_shape) tmp_reshape_out._share_underline_tensor_to(out_var) if var.dtype != VarDesc.VarType.FP32: tmp_cast_out = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) tmp_cast_out._share_underline_tensor_to(var) else: op = block.append_op(type="scatter", inputs={ "X": out_var, "Ids": index_tensor, "Updates": value_tensor }, attrs={'overwrite': True}, outputs={"Out": out_var}, stop_gradient=True) x_shape = block.create_var(name=unique_name.generate(".".join( [out_var.name, "XShape"])), dtype=out_var.dtype, shape=out_var.shape, type=VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=True) block.append_op(type="reshape2", inputs={"X": out_var}, attrs={'shape': origin_shape}, outputs={ "Out": out_var, "XShape": x_shape }, stop_gradient=True) if var.dtype != VarDesc.VarType.FP32: block.append_op(type="cast", inputs={"X": out_var}, outputs={"Out": var}, attrs={ "in_dtype": out_var.dtype, "out_dtype": var.dtype }, stop_gradient=True) if not in_dynamic_mode(): var.op = op return op
def save_quantized_model(self, model, path, input_spec=None, onnx_format=False, **config): """ Save the quantized model for the inference. Args: model (Layer): The model to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. onnx_format (bool, optional): Whether to export the quantized model with format of ONNX. Default is False. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config) is_dynamic_mode = False if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() place = core.CPUPlace() scope = global_scope() exe = Executor(place) dirname = os.path.dirname(path) basename = os.path.basename(path) model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX [infer_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) self._gather_scales(infer_program, scope, fetch_targets) # Remove `moving_average_abs_max_scale` node in sub graphs. graph = IrGraph(core.Graph(infer_program.desc), for_test=False) for sub_graph in graph.all_sub_graphs(): for _op in sub_graph.all_op_nodes(): if _op.name() == "moving_average_abs_max_scale": sub_graph.safe_remove_nodes(_op) sub_graph.resolve_hazard() infer_program = graph.to_program() self._set_skip_quant_attr(infer_program) clip_extra = False if onnx_format: graph = IrGraph(core.Graph(infer_program.desc), for_test=False) transform_pass = ReplaceFakeQuantDequantPass(scope, place) transform_pass.apply(graph) quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(graph) infer_program = graph.to_program() clip_extra = True save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=infer_program.clone(), model_filename=model_filename, params_filename=params_filename, clip_extra=clip_extra) if is_dynamic_mode: paddle.disable_static()
def max_pool3d(x, kernel_size, stride=None, padding=0, ceil_mode=False, data_format="NDHWC", name=None): """ Implements sparse max pooling 3d operation. See more details in :ref:`api_sparse_pooling_MaxPool3d` . Args: x (Tensor): The input SparseCooTensor of pooling operator, which is a 5-D tensor with shape [N, D, H, W, C]. The format of input tensor `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively. kernel_size (int|list|tuple): The pool kernel size. If the kernel size is a tuple or list, it must contain three integers, (kernel_size_Depth, kernel_size_Height, kernel_size_Width). Otherwise, the pool kernel size will be the cube of an int. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, it must contain three integers, [stride_Depth, stride_Height, stride_Width). Otherwise, the pool stride size will be a cube of an int. padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. 1. A string in ['valid', 'same']. 2. An int, which means the feature map is zero padded by size of `padding` on every sides. 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension. 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). The default value is 0. ceil_mode (bool): ${ceil_mode_comment} data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: `[batch_size, input_channels, input_depth, input_height, input_width]`. Currently only support `"NDHWC"` . name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: Tensor: The output tensor of pooling result. The data type is same as input tensor. Examples: .. code-block:: python import paddle from paddle.fluid.framework import _test_eager_guard with _test_eager_guard(): dense_x = paddle.randn((1, 4, 4, 4, 3)) sparse_x = dense_x.to_sparse_coo(4) kernel_sizes = [3, 3, 3] paddings = [0, 0, 0] strides = [1, 1, 1] out = paddle.incubate.sparse.nn.functional.max_pool3d(sparse_x, kernel_sizes, stride=strides, padding=paddings) #[1, 2, 2, 2, 3] """ assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode" assert x.is_sparse_coo( ), "Currently, sparse.relu only support the input of SparseCooTensor" assert data_format == 'NDHWC', "Currently, sparse.max_pool3d only support data format of 'NDHWC'" kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') if stride is None: stride = kernel_size else: stride = utils.convert_to_list(stride, 3, 'pool_stride') channel_last = True padding, padding_algorithm = _update_padding_nd(padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) #TODO(zkh2016): remove the dependency on dilation from the backend dilation = [1, 1, 1] return _C_ops.final_state_sparse_maxpool(x, kernel_size, padding, dilation, stride)
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): """ Creates an instance of `PretrainedModel`. Model weights are loaded by specifying name of a built-in pretrained model, or a community contributed model, or a local file directory path. Args: pretrained_model_name_or_path (str): Name of pretrained model or dir path to load from. The string can be: - Name of a built-in pretrained model - Name of a community-contributed pretrained model. - Local directory path which contains model weights file("model_state.pdparams") and model config file ("model_config.json"). *args (tuple): Position arguments for model `__init__`. If provided, use these as position argument values for model initialization. **kwargs (dict): Keyword arguments for model `__init__`. If provided, use these to update pre-defined keyword argument values for model initialization. If the keyword is in `__init__` argument names of base model, update argument values of the base model; else update argument values of derived model. load_state_as_np (bool, optional): The weights read in can be choosed to place on CPU or GPU though the model is on the default device. If `True`, load the model weights as `numpy.ndarray` on CPU. Otherwise, weights would be loaded as tensors on the default device. Note that if on GPU, the latter would creates extra temporary tensors in addition to the model weights, which doubles the memory usage . Thus it is suggested to use `True` for big models on GPU. Default to `False`. Returns: PretrainedModel: An instance of `PretrainedModel`. Example: .. code-block:: from paddlenlp.transformers import BertForSequenceClassification # Name of built-in pretrained model model = BertForSequenceClassification.from_pretrained('bert-base-uncased') # Name of community-contributed pretrained model model = BertForSequenceClassification.from_pretrained('yingyibiao/bert-base-uncased-sst-2-finetuned') # Load from local directory path model = BertForSequenceClassification.from_pretrained('./my_bert/') """ pretrained_models = list(cls.pretrained_init_configuration.keys()) resource_files = {} init_configuration = {} load_state_as_np = kwargs.pop("load_state_as_np", False) # From built-in pretrained models if pretrained_model_name_or_path in pretrained_models: for file_id, map_list in cls.pretrained_resource_files_map.items(): resource_files[file_id] = map_list[ pretrained_model_name_or_path] init_configuration = copy.deepcopy( cls. pretrained_init_configuration[pretrained_model_name_or_path]) # From local dir path elif os.path.isdir(pretrained_model_name_or_path): for file_id, file_name in cls.resource_files_names.items(): full_file_name = os.path.join(pretrained_model_name_or_path, file_name) resource_files[file_id] = full_file_name resource_files["model_config_file"] = os.path.join( pretrained_model_name_or_path, cls.model_config_file) else: # Assuming from community-contributed pretrained models for file_id, file_name in cls.resource_files_names.items(): full_file_name = os.path.join(COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, file_name) resource_files[file_id] = full_file_name resource_files["model_config_file"] = os.path.join( COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, cls.model_config_file) default_root = os.path.join(MODEL_HOME, pretrained_model_name_or_path) resolved_resource_files = {} for file_id, file_path in resource_files.items(): if file_path is None or os.path.isfile(file_path): resolved_resource_files[file_id] = file_path continue path = os.path.join(default_root, file_path.split('/')[-1]) if os.path.exists(path): logger.info("Already cached %s" % path) resolved_resource_files[file_id] = path else: logger.info("Downloading %s and saved to %s" % (file_path, default_root)) try: resolved_resource_files[file_id] = get_path_from_url( file_path, default_root) except RuntimeError as err: logger.error(err) raise RuntimeError( f"Can't load weights for '{pretrained_model_name_or_path}'.\n" f"Please make sure that '{pretrained_model_name_or_path}' is:\n" "- a correct model-identifier of built-in pretrained models,\n" "- or a correct model-identifier of community-contributed pretrained models,\n" "- or the correct path to a directory containing relevant modeling files(model_weights and model_config).\n" ) # Prepare model initialization kwargs # Did we saved some inputs and kwargs to reload ? model_config_file = resolved_resource_files.pop( "model_config_file", None) if model_config_file is not None: with io.open(model_config_file, encoding="utf-8") as f: init_kwargs = json.load(f) else: init_kwargs = init_configuration # position args are stored in kwargs, maybe better not include init_args = init_kwargs.pop("init_args", ()) # class name corresponds to this configuration init_class = init_kwargs.pop("init_class", cls.base_model_class.__name__) # Check if the loaded config matches the current model class's __init__ # arguments. If not match, the loaded config is for the base model class. if init_class == cls.base_model_class.__name__: base_args = init_args base_kwargs = init_kwargs derived_args = () derived_kwargs = {} base_arg_index = None else: # extract config for base model derived_args = list(init_args) derived_kwargs = init_kwargs base_arg = None for i, arg in enumerate(init_args): if isinstance(arg, dict) and "init_class" in arg: assert arg.pop( "init_class") == cls.base_model_class.__name__, ( "pretrained base model should be {}").format( cls.base_model_class.__name__) base_arg_index = i base_arg = arg break for arg_name, arg in init_kwargs.items(): if isinstance(arg, dict) and "init_class" in arg: assert arg.pop( "init_class") == cls.base_model_class.__name__, ( "pretrained base model should be {}").format( cls.base_model_class.__name__) base_arg_index = arg_name base_arg = arg break base_args = base_arg.pop("init_args", ()) base_kwargs = base_arg if cls == cls.base_model_class: # Update with newly provided args and kwargs for base model base_args = base_args if not args else args base_kwargs.update(kwargs) model = cls(*base_args, **base_kwargs) else: # Update with newly provided args and kwargs for derived model base_parameters_dict = inspect.signature( cls.base_model_class.__init__).parameters for k, v in kwargs.items(): if k in base_parameters_dict: base_kwargs[k] = v base_model = cls.base_model_class(*base_args, **base_kwargs) if base_arg_index is not None: derived_args[base_arg_index] = base_model else: derived_args = (base_model, ) # assume at the first position derived_args = derived_args if not args else args derived_parameters_dict = inspect.signature( cls.__init__).parameters for k, v in kwargs.items(): if k in derived_parameters_dict: derived_kwargs[k] = v model = cls(*derived_args, **derived_kwargs) # Maybe need more ways to load resources. weight_path = resolved_resource_files["model_state"] assert weight_path.endswith( ".pdparams"), "suffix of weight must be .pdparams" # NOTE: Allow to load partial model for model parallel. # TODO(guosheng): To make model loading for the model parallel automatic, # maybe we should make rank 0 worker load weights of the full model on # CPU, then split weights into multiple parts and pickle separately. # The other workers wait util pickle finish and then load the corresponding # partial weights. Also we can directly use separate weight files for # simplicity. state_dict = paddle.load(weight_path, return_numpy=load_state_as_np) # Make sure we are able to load base models as well as derived models # (with heads) start_prefix = "" model_to_load = model state_to_load = state_dict unexpected_keys = [] missing_keys = [] if not hasattr(model, cls.base_model_prefix) and any( s.startswith(cls.base_model_prefix) for s in state_dict.keys()): # base model state_to_load = {} start_prefix = cls.base_model_prefix + "." for k, v in state_dict.items(): if k.startswith(cls.base_model_prefix): state_to_load[k[len(start_prefix):]] = v else: unexpected_keys.append(k) if hasattr(model, cls.base_model_prefix) and not any( s.startswith(cls.base_model_prefix) for s in state_dict.keys()): # derived model (base model with heads) model_to_load = getattr(model, cls.base_model_prefix) for k in model.state_dict().keys(): if not k.startswith(cls.base_model_prefix): missing_keys.append(k) if len(missing_keys) > 0: logger.info( "Weights of {} not initialized from pretrained model: {}". format(model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: logger.info( "Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) # Allow the float16 model to load float32 weights, which decreases memory # usage in model loading stage and is useful to big models. dtype_prefix_len = len("paddle.") # paddle.float16 for k, v in model_to_load.state_dict().items(): if not isinstance(v, np.ndarray): dtype = str(v.dtype)[dtype_prefix_len:] # TODO(guosheng): add warnings for unmatched dtypes if k in state_to_load: state_to_load[k] = state_to_load[k].astype(dtype) # Logging model download statistics download_check(pretrained_model_name_or_path, "from_pretrained") # For model parallel if FasterGeneration # To avoid recursive import temporarily. import paddlenlp.ops.faster_transformer.transformer.decoding as ft_decoding state_to_load = ft_decoding.get_ft_para_conf().fit_partial_model( model_to_load, state_to_load) if paddle.in_dynamic_mode(): model_to_load.set_state_dict(state_to_load) return model return model, state_to_load
def summary(net, input_size, dtypes=None): """Prints a string summary of the network. Args: net (Layer): the network which must be a subinstance of Layer. input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only have one input, input_size can be tuple or InputSpec. if model have multiple input, input_size must be a list which contain every input's shape. Note that input_size only dim of batch_size can be None or -1. dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. Returns: Dict: a summary of the network including total params and total trainable params. Examples: .. code-block:: python import paddle import paddle.nn as nn class LeNet(nn.Layer): def __init__(self, num_classes=10): super(LeNet, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2D( 1, 6, 3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2D(2, 2), nn.Conv2D( 6, 16, 5, stride=1, padding=0), nn.ReLU(), nn.MaxPool2D(2, 2)) if num_classes > 0: self.fc = nn.Sequential( nn.Linear(400, 120), nn.Linear(120, 84), nn.Linear( 84, 10)) def forward(self, inputs): x = self.features(inputs) if self.num_classes > 0: x = paddle.flatten(x, 1) x = self.fc(x) return x lenet = LeNet() params_info = paddle.summary(lenet, (1, 1, 28, 28)) print(params_info) """ if isinstance(input_size, InputSpec): _input_size = tuple(input_size.shape) elif isinstance(input_size, list): _input_size = [] for item in input_size: if isinstance(item, int): item = (item, ) assert isinstance(item, (tuple, InputSpec)), 'When input_size is list, \ expect item in input_size is a tuple or InputSpec, but got {}'.format( type(item)) if isinstance(item, InputSpec): _input_size.append(tuple(item.shape)) else: _input_size.append(item) elif isinstance(input_size, int): _input_size = (input_size, ) else: _input_size = input_size if not paddle.in_dynamic_mode(): warnings.warn( "Your model was created in static mode, this may not get correct summary information!" ) in_train_mode = False else: in_train_mode = net.training if in_train_mode: net.eval() def _is_shape(shape): for item in shape: if isinstance(item, (list, tuple)): return False return True def _check_shape(shape): num_unknown = 0 new_shape = [] for i in range(len(shape)): item = shape[i] if item is None or item == -1: num_unknown += 1 if num_unknown > 1: raise ValueError( 'Option input_size only the dim of batch_size can be None or -1.' ) item = 1 elif isinstance(item, numbers.Number): if item <= 0: raise ValueError( "Expected element in input size greater than zero, but got {}". format(item)) new_shape.append(item) return tuple(new_shape) def _check_input(input_size): if isinstance(input_size, (list, tuple)) and _is_shape(input_size): return _check_shape(input_size) else: return [_check_input(i) for i in input_size] _input_size = _check_input(_input_size) result, params_info = summary_string(net, _input_size, dtypes) print(result) if in_train_mode: net.train() return params_info
def __init__( self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=False, qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.0, norm_layer=nn.LayerNorm, epsilon=1e-5, class_dim=1000, ): super().__init__() self.class_dim = class_dim self.num_features = self.embed_dim = embed_dim self.patch_embed = PatchEmbed( img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ) num_patches = self.patch_embed.num_patches self.pos_embed = add_parameter( self, paddle.zeros((1, num_patches + 1, embed_dim)) ) self.cls_token = add_parameter(self, paddle.zeros((1, 1, embed_dim))) self.pos_drop = nn.Dropout(p=drop_rate) dpr = np.linspace(0, drop_path_rate, depth) self.blocks = nn.LayerList( [ Block( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, epsilon=epsilon, ) for i in range(depth) ] ) self.norm = norm_layer(embed_dim, epsilon=epsilon) # Classifier head if class_dim > 0: self.head = nn.Linear(embed_dim, class_dim) if paddle.in_dynamic_mode(): trunc_normal_(self.pos_embed) trunc_normal_(self.cls_token) self.apply(self._init_weights)
def nonzero(x, as_tuple=False): """ Return a tensor containing the indices of all non-zero elements of the `input` tensor. If as_tuple is True, return a tuple of 1-D tensors, one for each dimension in `input`, each containing the indices (in that dimension) of all non-zero elements of `input`. Given a n-Dimensional `input` tensor with shape [x_1, x_2, ..., x_n], If as_tuple is False, we can get a output tensor with shape [z, n], where `z` is the number of all non-zero elements in the `input` tensor. If as_tuple is True, we can get a 1-D tensor tuple of length `n`, and the shape of each 1-D tensor is [z, 1]. Args: x (Tensor): The input tensor variable. as_tuple (bool): Return type, Tensor or tuple of Tensor. Returns: Tensor. The data type is int64. Examples: .. code-block:: python import paddle x1 = paddle.to_tensor([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]]) x2 = paddle.to_tensor([0.0, 1.0, 0.0, 3.0]) out_z1 = paddle.nonzero(x1) print(out_z1) #[[0 0] # [1 1] # [2 2]] out_z1_tuple = paddle.nonzero(x1, as_tuple=True) for out in out_z1_tuple: print(out) #[[0] # [1] # [2]] #[[0] # [1] # [2]] out_z2 = paddle.nonzero(x2) print(out_z2) #[[1] # [3]] out_z2_tuple = paddle.nonzero(x2, as_tuple=True) for out in out_z2_tuple: print(out) #[[1] # [3]] """ list_out = [] shape = x.shape rank = len(shape) if in_dygraph_mode(): outs = _C_ops.final_state_where_index(x) elif paddle.in_dynamic_mode(): outs = _C_ops.where_index(x) else: helper = LayerHelper("where_index", **locals()) outs = helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.INT64) helper.append_op(type='where_index', inputs={'Condition': x}, outputs={'Out': [outs]}) if not as_tuple: return outs elif rank == 1: return tuple([outs]) else: for i in range(rank): list_out.append( paddle.slice(outs, axes=[1], starts=[i], ends=[i + 1])) return tuple(list_out)
def sparse_attention(query, key, value, sparse_csr_offset, sparse_csr_columns, key_padding_mask=None, attn_mask=None, name=None): r""" This operator sparsify the Attention matrix in Transformer module to achieve the effect of reducing memory consumption and computation. The sparse layout is expressed in CSR format and contains two parameters, ``offset`` and ``columns``. The equation is: .. math:: result=softmax(\frac{ Q * K^T }{\sqrt{d}}) * V where : ``Q``, ``K``, and ``V`` represent the three input parameters of the attention module. The dimensions of the three parameters are the same. ``d`` represents the size of the last dimension of the three parameters. Warning: This API is only used in ``CUDA 11.3`` and above versions. Args: query(Tensor): The query tensor in the Attention module. 4-D tensor with shape: [batch_size, num_heads, seq_len, head_dim]. The dtype can be float32 and float64. key(Tensor): The key tensor in the Attention module. 4-D tensor with shape: [batch_size, num_heads, seq_len, head_dim]. The dtype can be float32 and float64. value(Tensor): The value tensor in the Attention module. 4-D tensor with shape: [batch_size, num_heads, seq_len, head_dim]. The dtype can be float32 and float64. sparse_csr_offset(Tensor): The sparsity feature in the Attention module is expressed in the CSR format, and the offset represents the number of non-zero elements in each row of the matrix. 3-D tensor with shape: [batch_size, num_heads, seq_len + 1]. The dtype should be int32. sparse_csr_columns(Tensor): The sparsity feature in the Attention module is expressed in the CSR format, and the columns represent the column index values of non-zero elements in the matrix. 3-D tensor with shape: [batch_size, num_heads, sparse_nnz]. The dtype should be int32. key_padding_mask(Tensor, optional):The key padding mask tensor in the Attention module. 2-D tensor with shape: [batch_size, seq_len]. The dtype can be float32 and float64. A value of 0 means that the position is masked. attn_mask(Tensor, optional):The attention mask tensor in the Attention module. 2-D tensor with shape: [seq_len, seq_len]. The dtype can be float32 and float64. A value of 0 means that the position is masked. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: 4-D tensor with shape: [batch_size, num_heads, seq_len, head_dim]. The dtype can be float32 or float64. Examples: .. code-block:: python # required: skiptest import paddle import numpy as np query_data = np.array([[[[0, 1,], [2, 3], [ 0, 1], [2, 3]]]]).astype("float32") key_data = np.array([[[[0, 1,], [2, 3], [ 0, 1], [2, 3]]]]).astype("float32") value_data = np.array([[[[0, 1,], [2, 3], [ 0, 1], [2, 3]]]]).astype("float32") sparse_csr_offset_data = np.array([[[0, 2, 4, 6, 8]]]).astype("int32") sparse_csr_columns_data = np.array([[[0, 1, 0, 1, 2, 3, 2, 3]]]).astype("int32") key_padding_mask_data = np.array([[1,1,1,0]]).astype("float32") attention_mask_data = np.array([[1,0,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]]).astype("float32") print(query_data.shape) # (1, 1, 4, 2) print(sparse_csr_offset_data.shape) # (1, 1, 5) print(sparse_csr_columns_data.shape) # (1, 1, 8) paddle.disable_static() query = paddle.to_tensor(query_data, stop_gradient=False, place=paddle.CUDAPlace(0)) key = paddle.to_tensor(key_data, stop_gradient=False, place=paddle.CUDAPlace(0)) value = paddle.to_tensor(value_data, stop_gradient=False, place=paddle.CUDAPlace(0)) offset = paddle.to_tensor(sparse_csr_offset_data, stop_gradient=False, place=paddle.CUDAPlace(0)) columns = paddle.to_tensor(sparse_csr_columns_data, stop_gradient=False, place=paddle.CUDAPlace(0)) key_padding_mask = paddle.to_tensor(key_padding_mask_data, stop_gradient=False, place=paddle.CUDAPlace(0)) attention_mask = paddle.to_tensor(attention_mask_data, stop_gradient=False, place=paddle.CUDAPlace(0)) output_mask = paddle.nn.functional.sparse_attention(query, key, value, offset, columns, key_padding_mask=key_padding_mask, attn_mask=attention_mask) print(output_mask) # [[[[0. , 1. ], # [1.99830270, 2.99830270], # [0. , 1. ], # [0. , 1. ]]]] output = paddle.nn.functional.sparse_attention(query, key, value, offset, columns) print(output) # [[[[1.60885942, 2.60885954], # [1.99830270, 2.99830270], # [1.60885942, 2.60885954], # [1.99830270, 2.99830270]]]] """ if in_dynamic_mode(): result_attention, result_sdd, result_softmax = _C_ops.sparse_attention( query, key, value, sparse_csr_offset, sparse_csr_columns, key_padding_mask, attn_mask) return result_attention helper = LayerHelper('sparse_attention', **locals()) dtype = helper.input_dtype(input_param_name='Q') out = helper.create_variable_for_type_inference(dtype) result_sdd = helper.create_variable_for_type_inference(dtype) result_softmax = helper.create_variable_for_type_inference(dtype) inputs = { 'Q': query, 'K': key, 'V': value, 'Offset': sparse_csr_offset, 'Columns': sparse_csr_columns, 'KeyPaddingMask': key_padding_mask, 'AttnMask': attn_mask, } outputs = { 'Out': out, 'SparseDotSdd': result_sdd, 'Softmax': result_softmax } helper.append_op(type='sparse_attention', inputs=inputs, outputs=outputs) return out
def __impl__(*args, **kwargs): if paddle.in_dynamic_mode(): return func(*args, **kwargs) else: with fluid.dygraph.guard(): return func(*args, **kwargs)
def kl_div(input, label, reduction='mean', name=None): """ This operator calculates the Kullback-Leibler divergence loss between Input(X) and Input(Target). Notes that Input(X) is the log-probability and Input(Target) is the probability. KL divergence loss is calculated as follows: $$l(x, y) = y * (\log(y) - x)$$ While :math:`x` is input and :math:`y` is label. While :attr:`reduction` is :attr:`none`, output loss is in the same shape as input, loss in each point is calculated seperately and no reduction is applied. While :attr:`reduction` is :attr:`mean`, output loss is in shape of [1] and loss value is the mean value of all losses. While :attr:`reduction` is :attr:`sum`, output loss is in shape of [1] and loss value is the sum value of all losses. While :attr:`reduction` is :attr:`batchmean`, output loss is in shape of [1] and loss value is the sum value of all losses divided by batch size. Args: input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of additional dimensions. It's data type should be float32, float64. label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64. reduction (Tensor): Indicate how to average the loss, the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. If `reduction` is ``'mean'``, the reduced mean loss is returned; If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; if `reduction` is ``'sum'``, the reduced sum loss is returned; if `reduction` is ``'none'``, no reduction will be apllied. Default is ``'mean'``. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor: The KL divergence loss. The data type is same as input tensor Examples: .. code-block:: python import paddle import numpy as np import paddle.nn.functional as F paddle.disable_static() shape = (5, 20) input = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32') # 'batchmean' reduction, loss shape will be [1] pred_loss = F.kl_div(paddle.to_tensor(input), paddle.to_tensor(target), reduction='batchmean') # shape=[1] # 'mean' reduction, loss shape will be [1] pred_loss = F.kl_div(paddle.to_tensor(input), paddle.to_tensor(target), reduction='mean') # shape=[1] # 'sum' reduction, loss shape will be [1] pred_loss = F.kl_div(paddle.to_tensor(input), paddle.to_tensor(target), reduction='sum') # shape=[1] # 'none' reduction, loss shape is same with input shape pred_loss = F.kl_div(paddle.to_tensor(input), paddle.to_tensor(target), reduction='none') # shape=[5, 20] """ if paddle.in_dynamic_mode(): out = core.ops.kldiv_loss(input, label, 'reduction', reduction) return out helper = LayerHelper('kl_div', **locals()) fluid.data_feeder.check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'kl_div') fluid.data_feeder.check_variable_and_dtype(label, 'label', ['float32', 'float64'], 'kl_div') fluid.data_feeder.check_type(reduction, 'reduction', str, 'kl_div') loss = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op(type='kldiv_loss', inputs={ 'X': input, 'Target': label }, outputs={'Loss': loss}, attrs={'reduction': reduction}) return loss
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): """ Instantiate an instance of `PretrainedModel` from a predefined model specified by name or path. Args: pretrained_model_name_or_path (str): A name of or a file path to a pretrained model. *args (tuple): position arguments for `__init__`. If provide, use this as position argument values for model initialization. **kwargs (dict): keyword arguments for `__init__`. If provide, use this to update pre-defined keyword argument values for model initialization. Returns: PretrainedModel: An instance of PretrainedModel. """ pretrained_models = list(cls.pretrained_init_configuration.keys()) resource_files = {} init_configuration = {} if pretrained_model_name_or_path in pretrained_models: for file_id, map_list in cls.pretrained_resource_files_map.items(): resource_files[file_id] = map_list[ pretrained_model_name_or_path] init_configuration = copy.deepcopy( cls. pretrained_init_configuration[pretrained_model_name_or_path]) else: if os.path.isdir(pretrained_model_name_or_path): for file_id, file_name in cls.resource_files_names.items(): full_file_name = os.path.join( pretrained_model_name_or_path, file_name) resource_files[file_id] = full_file_name resource_files["model_config_file"] = os.path.join( pretrained_model_name_or_path, cls.model_config_file) else: raise ValueError( "Calling {}.from_pretrained() with a model identifier or the " "path to a directory instead. The supported model " "identifiers are as follows: {}".format( cls.__name__, cls.pretrained_init_configuration.keys())) # FIXME(chenzeyu01): We should use another data path for storing model default_root = os.path.join(DATA_HOME, pretrained_model_name_or_path) resolved_resource_files = {} for file_id, file_path in resource_files.items(): path = os.path.join(default_root, file_path.split('/')[-1]) if file_path is None or os.path.isfile(file_path): resolved_resource_files[file_id] = file_path elif os.path.exists(path): logger.info("Already cached %s" % path) resolved_resource_files[file_id] = path else: logger.info("Downloading %s and saved to %s" % (file_path, default_root)) resolved_resource_files[file_id] = get_path_from_url( file_path, default_root) # Prepare model initialization kwargs # Did we saved some inputs and kwargs to reload ? model_config_file = resolved_resource_files.pop( "model_config_file", None) if model_config_file is not None: with io.open(model_config_file, encoding="utf-8") as f: init_kwargs = json.load(f) else: init_kwargs = init_configuration # position args are stored in kwargs, maybe better not include init_args = init_kwargs.pop("init_args", ()) # class name corresponds to this configuration init_class = init_kwargs.pop("init_class", cls.base_model_class.__name__) # Check if the loaded config matches the current model class's __init__ # arguments. If not match, the loaded config is for the base model class. if init_class == cls.base_model_class.__name__: base_args = init_args base_kwargs = init_kwargs derived_args = () derived_kwargs = {} base_arg_index = None else: # extract config for base model derived_args = list(init_args) derived_kwargs = init_kwargs for i, arg in enumerate(init_args): if isinstance(arg, dict) and "init_class" in arg: assert arg.pop( "init_class") == cls.base_model_class.__name__, ( "pretrained base model should be {}").format( cls.base_model_class.__name__) base_arg_index = i break for arg_name, arg in init_kwargs.items(): if isinstance(arg, dict) and "init_class" in arg: assert arg.pop( "init_class") == cls.base_model_class.__name__, ( "pretrained base model should be {}").format( cls.base_model_class.__name__) base_arg_index = arg_name break base_args = arg.pop("init_args", ()) base_kwargs = arg if cls == cls.base_model_class: # Update with newly provided args and kwargs for base model base_args = base_args if not args else args base_kwargs.update(kwargs) model = cls(*base_args, **base_kwargs) else: # Update with newly provided args and kwargs for derived model base_model = cls.base_model_class(*base_args, **base_kwargs) if base_arg_index is not None: derived_args[base_arg_index] = base_model else: derived_args = (base_model, ) # assume at the first position derived_args = derived_args if not args else args derived_kwargs.update(kwargs) model = cls(*derived_args, **derived_kwargs) # Maybe need more ways to load resources. weight_path = list(resolved_resource_files.values())[0] assert weight_path.endswith( ".pdparams"), "suffix of weight must be .pdparams" state_dict = paddle.load(weight_path) # Make sure we are able to load base models as well as derived models # (with heads) start_prefix = "" model_to_load = model state_to_load = state_dict unexpected_keys = [] missing_keys = [] if not hasattr(model, cls.base_model_prefix) and any( s.startswith(cls.base_model_prefix) for s in state_dict.keys()): # base model state_to_load = {} start_prefix = cls.base_model_prefix + "." for k, v in state_dict.items(): if k.startswith(cls.base_model_prefix): state_to_load[k[len(start_prefix):]] = v else: unexpected_keys.append(k) if hasattr(model, cls.base_model_prefix) and not any( s.startswith(cls.base_model_prefix) for s in state_dict.keys()): # derived model (base model with heads) model_to_load = getattr(model, cls.base_model_prefix) for k in model.state_dict().keys(): if not k.startswith(cls.base_model_prefix): missing_keys.append(k) if len(missing_keys) > 0: logger.info( "Weights of {} not initialized from pretrained model: {}". format(model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: logger.info( "Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) model_to_load.set_state_dict(state_to_load) if paddle.in_dynamic_mode(): return model return model, state_to_load
def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False): """ Compute the quantile of the input along the specified axis. Args: Args: x (Tensor): The input Tensor, it's data type can be float32, float64. q (int|float|list): The q for calculate quantile, which should be in range [0, 1]. If q is a list, each q will be calculated and the first dimension of output is same to the number of ``q`` . axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` . If ``axis`` is less than 0, it works the same way as :math:`axis + D`. If ``axis`` is a list, quantile is calculated over all elements of given axises. If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None. keepdim (bool, optional): Whether to reserve the reduced dimension(s) in the output Tensor. If ``keepdim`` is True, the dimensions of the output Tensor is the same as ``x`` except in the reduced dimensions(it is of size 1 in this case). Otherwise, the shape of the output Tensor is squeezed in ``axis`` . Default is False. ignore_nan: (bool, optional): Whether to ignore NaN of input Tensor. If ``ignore_nan`` is True, it will calculate nanquantile. Otherwise it will calculate quantile. Default is False. Returns: Tensor, results of quantile along ``axis`` of ``x``. In order to obtain higher precision, data type of results will be float64. """ # Validate x if not isinstance(x, Variable): raise TypeError("input x should be a Tensor.") # Validate q if isinstance(q, (int, float)): q = [q] elif isinstance(q, (list, tuple)): if len(q) <= 0: raise ValueError("q should not be empty") else: raise TypeError("Type of q should be int, float, list or tuple.") # Validate axis dims = len(x.shape) out_shape = list(x.shape) if axis is None: x = paddle.flatten(x) axis = 0 out_shape = [1] * dims else: if isinstance(axis, list): if len(axis) <= 0: raise ValueError("axis should not be empty") axis_src, axis_dst = [], [] for axis_single in axis: if not isinstance(axis_single, int) or not ( axis_single < dims and axis_single >= -dims): raise ValueError( "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))." ) if axis_single < 0: axis_single = axis_single + dims axis_src.append(axis_single) out_shape[axis_single] = 1 axis_dst = list(range(-len(axis), 0)) x = paddle.moveaxis(x, axis_src, axis_dst) x = paddle.flatten(x, axis_dst[0], axis_dst[-1]) axis = axis_dst[0] else: if not isinstance(axis, int) or not (axis < dims and axis >= -dims): raise ValueError( "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))." ) if axis < 0: axis += dims out_shape[axis] = 1 mask = x.isnan() valid_counts = mask.logical_not().sum(axis=axis, keepdim=True, dtype='float64') indices = [] for q_num in q: if q_num < 0 or q_num > 1: raise ValueError("q should be in range [0, 1]") if paddle.in_dynamic_mode(): q_num = paddle.to_tensor(q_num, dtype='float64') if ignore_nan: indices.append(q_num * (valid_counts - 1)) else: # TODO(Asthestarsfalll): Use paddle.index_fill instead of where index = q_num * (valid_counts - 1) last_index = x.shape[axis] - 1 nums = paddle.full_like(index, fill_value=last_index) index = paddle.where(mask.any(axis=axis, keepdim=True), nums, index) indices.append(index) sorted_tensor = paddle.sort(x, axis) outputs = [] # TODO(chenjianye): replace the for-loop to directly take elements. for index in indices: indices_below = paddle.floor(index).astype(paddle.int32) indices_upper = paddle.ceil(index).astype(paddle.int32) tensor_upper = paddle.take_along_axis( sorted_tensor, indices_upper, axis=axis) tensor_below = paddle.take_along_axis( sorted_tensor, indices_below, axis=axis) weights = (index - indices_below.astype('float64')) out = paddle.lerp( tensor_below.astype('float64'), tensor_upper.astype('float64'), weights) if not keepdim: out = paddle.squeeze(out, axis=axis) else: out = out.reshape(out_shape) outputs.append(out) if len(q) > 1: outputs = paddle.stack(outputs, 0) else: outputs = outputs[0] return outputs