def forward(self, input): if in_dygraph_mode(): attrs = ('bit_length', self._quant_bits, 'quant_axis', self._quant_axis) quant_out = _varbase_creator( type=input.type, name="{}.quantized.dequantized".format(input.name), shape=input.shape, dtype=input.dtype, persistable=False) out_scale = self._scale if out_scale is None: out_scale = _varbase_creator( type=core.VarDesc.VarType.LOD_TENSOR, name=self._scale_name, shape=[self._channel_num], dtype=self._dtype, persistable=False) out_scale.stop_gradient = True out, _, = _C_ops.fake_channel_wise_quantize_dequantize_abs_max( input, quant_out, out_scale, *attrs) return out check_variable_and_dtype(input, 'input', ['float32'], "FakeQuantChannelWiseAbsMax") attrs = { 'bit_length': self._quant_bits, 'quant_axis': self._quant_axis } inputs = {"X": [input]} quant_out = self._helper.create_variable( name="{}.quantized.dequantized".format(input.name), dtype=input.dtype, type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) out_scale = self._scale if not out_scale: out_scale = self._helper.create_variable( name=self._scale_name, dtype=self._dtype, type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=True) outputs = {"Out": [quant_out], "OutScale": [out_scale]} self._helper.append_op( type="fake_channel_wise_quantize_dequantize_abs_max", inputs=inputs, outputs=outputs, attrs=attrs) return quant_out
def _load_state_dict_from_save_params(model_path): # Try to load all the files in the directory in VarBase format, # the file name is used as the name of VarBase load_var_list = [] # 1. load file names var_name_list = [] for root, _, files in os.walk(model_path): for filename in files: file_path = os.path.join(root, filename) tmp_var_name = os.path.relpath(file_path, model_path) var_name = tmp_var_name.replace("\\", "/") var_name_list.append(var_name) # 2. create and load VarBase with fluid.dygraph.guard(): for name in var_name_list: new_var = _varbase_creator(name=name, persistable=True) _dygraph_tracer().trace_op( type='load', inputs={}, outputs={'Out': new_var}, attrs={'file_path': os.path.join(model_path, name)}) load_var_list.append(new_var) # 3. construct state_dict load_param_dict = dict() for var in load_var_list: load_param_dict[var.name] = var.numpy() return load_param_dict
def get_active_filter(self, in_nc, out_nc, kernel_size): ### Unsupport for asymmetric kernels if self._filter_size[0] != self._filter_size[1]: return self.weight[:out_nc, :in_nc, :, :] start, end = compute_start_end(self._filter_size[0], kernel_size) filters = self.weight[:in_nc, :out_nc, start:end, start:end] if self.transform_kernel != False and kernel_size < self._filter_size[ 0]: start_filter = self.weight[:in_nc, :out_nc, :, :] for i in range(len(self.ks_set) - 1, 0, -1): src_ks = self.ks_set[i] if src_ks <= kernel_size: break target_ks = self.ks_set[i - 1] start, end = compute_start_end(src_ks, target_ks) _input_filter = start_filter[:, :, start:end, start:end] _input_filter = fluid.layers.reshape( _input_filter, shape=[(_input_filter.shape[0] * _input_filter.shape[1]), -1]) _tmp_filter = _varbase_creator(dtype=_input_filter.dtype) core.ops.matmul(_input_filter, self.__getattr__('%dto%d_matrix' % (src_ks, target_ks)), _tmp_filter, 'transpose_X', False, 'transpose_Y', False, "alpha", 1) _tmp_filter = fluid.layers.reshape( _tmp_filter, shape=[ filters.shape[0], filters.shape[1], target_ks, target_ks ]) start_filter = _tmp_filter filters = start_filter return filters
def forward(self, input, expand_ratio=None, channel=None): self.cur_config = {'expand_ratio': expand_ratio, 'channel': channel} ### weight: (Cin, Cout) in_nc = int(input.shape[-1]) assert ( expand_ratio == None or channel == None ), "expand_ratio and channel CANNOT be NOT None at the same time." if expand_ratio != None: out_nc = int(expand_ratio * self.base_output_dim) elif channel != None: out_nc = int(channel) else: out_nc = self.output_dim weight = self.weight[:in_nc, :out_nc] if self._bias_attr != False: bias = self.bias[:out_nc] use_bias = True pre_bias = _varbase_creator(dtype=input.dtype) core.ops.matmul(input, weight, pre_bias, 'transpose_X', False, 'transpose_Y', False, "alpha", 1) if self._bias_attr != False: pre_act = dygraph_utils._append_bias_in_dygraph( pre_bias, bias, axis=len(input.shape) - 1) else: pre_act = pre_bias return dygraph_utils._append_activation_in_dygraph(pre_act, self._act)
def _load_persistable_vars_by_program(model_path, program_holder, params_filename=None): # make sure the path has been checked persistable_vars = _get_persistable_vars(program_holder.infer_program) load_var_dict = {} for each_var in persistable_vars: orig_each_name = program_holder._suffix_varname_dict[each_var.name()] if _is_parameter(each_var, program_holder.infer_program): # create output varbase new_var = framework.ParamBase(shape=each_var.shape(), dtype=each_var.dtype(), name=each_var.name(), type=each_var.type(), persistable=True) else: new_var = framework._varbase_creator(type=each_var.type(), name=each_var.name(), shape=each_var.shape(), dtype=each_var.dtype(), persistable=True) if params_filename is None: framework._dygraph_tracer().trace_op( type='load', inputs={}, outputs={'Out': new_var}, attrs={'file_path': os.path.join(model_path, orig_each_name)}) new_var.stop_gradient = False load_var_dict[each_var.name()] = new_var if params_filename is not None: load_var_list = [] for name in sorted(load_var_dict.keys()): load_var_list.append(load_var_dict[name]) framework._dygraph_tracer().trace_op( type='load_combine', inputs={}, outputs={'Out': load_var_list}, attrs={'file_path': os.path.join(model_path, params_filename)}) for each_var in persistable_vars: if not _is_parameter(each_var, program_holder.infer_program): continue param = load_var_dict[each_var.name()] param.stop_gradient = False # NOTE: [Recovery stop gradient information based on the program] # After loading the model, the stop_gradient information # of the original variable is lost, but if a parameter does not # have a corresponding @GRAD variable in the backward program, # it can be said that it is also stop_gradient all_var_names = _get_all_var_names(program_holder.train_program) for var_name in load_var_dict: grad_var_name = var_name + core.grad_var_suffix() if grad_var_name not in all_var_names: load_var_dict[var_name].stop_gradient = True return load_var_dict
def forward(self, x): self.weight = self.spectral_norm(self.weight_orig) pre_bias = _varbase_creator(dtype='float32') y = fluid.core.ops.matmul(x, self.weight, pre_bias, 'transpose_X', False, 'transpose_Y', True, 'alpha', 1) if self.bias is None: return y return y + L.unsqueeze(self.bias, 0)
def _reshape_inplace(x, shape): x_shape = framework._varbase_creator(dtype=x.dtype) framework._dygraph_tracer().trace_op(type="reshape2", inputs={'X': x}, outputs={ 'Out': x, 'XShape': x_shape }, attrs={'shape': shape})
def parameters_to_vector(parameters, name=None): """ Flatten parameters to a 1-D Tensor. Args: parameters(Iterable[Tensor]): Iterable Tensors that are trainable parameters of a Layer. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: A 1-D Tensor, which represents the parameters of a Layer. Examples: .. code-block:: python import paddle linear = paddle.nn.Linear(10, 15) paddle.nn.utils.parameters_to_vector(linear.parameters()) # 1-D Tensor: [165] """ dtype = parameters[0].dtype origin_shapes = [] for param in parameters: origin_shapes.append(param.shape) _inplace_reshape_dygraph(param, [-1]) out = _varbase_creator(dtype=dtype) if in_dygraph_mode(): with paddle.fluid.dygraph.no_grad(): tmp = _varbase_creator() _C_ops.concat(parameters, tmp, 'axis', 0) tmp._share_underline_tensor_to(out) else: _dygraph_tracer().trace_op(type='concat', inputs={'X': parameters}, outputs={'Out': [out]}, attrs={'axis': 0}, stop_gradient=True) for i, param in enumerate(parameters): _inplace_reshape_dygraph(param, origin_shapes[i]) return out
def _inplace_reshape_dygraph(x, shape): x_shape = _varbase_creator(dtype=x.dtype) _dygraph_tracer().trace_op(type="reshape2", inputs={'X': x}, outputs={ 'Out': x, 'XShape': x_shape }, attrs={'shape': shape}, stop_gradient=True)
def vector_to_parameters(vec, parameters, name=None): """ Transform a 1-D Tensor to the input ``parameters`` . Args: vec (Tensor): A 1-D Tensor, which will be sliced and copied to the input ``parameters`` . parameters (Iterable[Tensor]): Iterable Tensors that are trainable parameters of a Layer. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Examples: .. code-block:: python import paddle weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(3.)) linear1 = paddle.nn.Linear(10, 15, weight_attr) vec = paddle.nn.utils.parameters_to_vector(linear1.parameters()) linear2 = paddle.nn.Linear(10, 15) # copy weight of linear1 to linear2 paddle.nn.utils.vector_to_parameters(vec, linear2.parameters()) # weight: Tensor(shape=[10, 15], dtype=float32, place=CUDAPlace(0), stop_gradient=False, # [[3. , ..., 3. ], # [..., ..., ...], # [3. , ..., 3. ]]) """ origin_shapes = [] sections = [] for param in parameters: shape = param.shape origin_shapes.append(shape) numel = reduce(lambda x, y: x * y, shape) sections.append(numel) if in_dygraph_mode(): with paddle.fluid.dygraph.no_grad(): res = [_varbase_creator() for n in range(len(parameters))] _C_ops.split(vec, res, 'axis', 0, 'sections', sections) for i in range(0, len(res)): res[i]._share_underline_tensor_to(parameters[i]) else: _dygraph_tracer().trace_op(type='split', inputs={'X': [vec]}, outputs={'Out': parameters}, attrs={ 'axis': 0, 'sections': sections }, stop_gradient=True) for i, param in enumerate(parameters): _inplace_reshape_dygraph(param, origin_shapes[i]) return
def _inplace_reshape_dygraph(x, shape): x_shape = _varbase_creator(dtype='int64') if in_dygraph_mode(): with paddle.fluid.dygraph.no_grad(): tmp_out, _ = _C_ops.reshape2(x, None, 'shape', shape) tmp_out._share_underline_tensor_to(x) else: _dygraph_tracer().trace_op(type="reshape2", inputs={'X': x}, outputs={ 'Out': x, 'XShape': x_shape }, attrs={'shape': shape}, stop_gradient=True)
def forward(self, input): if in_dygraph_mode(): attrs = ('moving_rate', self._moving_rate, 'bit_length', self._quant_bits, 'is_test', not self.training) quant_out = _varbase_creator( type=input.type, name="{}.quantized.dequantized".format(input.name), shape=input.shape, dtype=input.dtype, persistable=False) state = self._state if self.training else None accum = self._accum if self.training else None out, _, _, _ = _C_ops.fake_quantize_dequantize_moving_average_abs_max( input, self._scale, accum, state, quant_out, self._scale, state, accum, *attrs) return out check_variable_and_dtype(input, 'input', ['float32'], "FakeQuantMovingAverageAbsMax") attrs = { 'moving_rate': self._moving_rate, 'bit_length': self._quant_bits, 'is_test': not self.training } inputs = {"X": [input], "InScale": [self._scale]} quant_out = self._helper.create_variable( name="{}.quantized.dequantized".format(input.name), dtype=input.dtype, type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) outputs = {"Out": [quant_out], "OutScale": [self._scale]} if self.training: inputs['InState'] = [self._state] inputs['InAccum'] = [self._accum] outputs['OutState'] = [self._state] outputs['OutAccum'] = [self._accum] self._helper.append_op( type="fake_quantize_dequantize_moving_average_abs_max", inputs=inputs, outputs=outputs, attrs=attrs) return quant_out
def prepare_dygraph_output(self): def create_var_base(is_input, name): var = framework._varbase_creator(dtype=None, shape=None, name=name) var.stop_gradient = False return var # build outputs outputs = {} outputs['Out'] = [] for name in self.output_names['Out']: outputs['Out'].append(create_var_base(False, name)) outputs['OutScope'] = framework._varbase_creator( type=core.VarDesc.VarType.STEP_SCOPES, name="program_out_scope", persistable=True) inner_scope = core.Scope() outputs['OutScope'].value().set_scope(inner_scope) return outputs
def forward(self, input): quant_input = self._fake_quant_input(input) quant_weight = self._fake_quant_weight(self.weight) if in_dygraph_mode(): pre_bias = _varbase_creator(dtype=input.dtype) core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X', False, 'transpose_Y', False, "alpha", 1) pre_act = dygraph_utils._append_bias_in_dygraph( pre_bias, self.bias, axis=len(input.shape) - 1) return dygraph_utils._append_activation_in_dygraph( pre_act, self._act) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], "QuantizedLinear") attrs = { "transpose_X": False, "transpose_Y": False, "alpha": 1, } inputs = {"X": [quant_input], "Y": [quant_weight]} mul_out = self._helper.create_variable_for_type_inference(self._dtype) self._helper.append_op(type="matmul", inputs=inputs, outputs={"Out": [mul_out]}, attrs=attrs) if self.bias is not None: pre_activation = self._helper.create_variable_for_type_inference( dtype=self._dtype) self._helper.append_op(type='elementwise_add', inputs={ 'X': [mul_out], 'Y': [self.bias] }, outputs={'Out': [pre_activation]}, attrs={'axis': len(input.shape) - 1}) else: pre_activation = mul_out return self._helper.append_activation(pre_activation, act=self._act)
def _load_persistable_vars(model_path, var_info_path, program_holder, separate_params=False, params_filename=None): # 1. load extra var info with open(var_info_path, 'rb') as f: extra_var_info = pickle.load(f) # 2. construct var dict load_var_dict = dict() load_var_list = [] inv_suffix_varname_dict = { value: key for key, value in program_holder._suffix_varname_dict.items() } # NOTE(chenweihang): we need load persistable vars based the program, # because the program may be pruned when `save_inference_model`, some # var in `extra_var_info` may have been pruned for name in sorted(inv_suffix_varname_dict): if name not in extra_var_info: raise RuntimeError( "The model to be loaded is not complete." "The variable `%s` of program cannot be found in loaded model.", name) # get suffix var name, see [why need to append suffix to persistable vars] new_name = inv_suffix_varname_dict[name] # create output varbase if extra_var_info[name].get('trainable', None) is not None: # use default shape and dtype new_var = framework.ParamBase( shape=[1], # only to pass check, this shape is not meaningful dtype=core.VarDesc.VarType.FP32, name=new_name, persistable=True) else: new_var = framework._varbase_creator(name=new_name, persistable=True) # load separate vars if separate_params is True: framework._dygraph_tracer().trace_op( type='load', inputs={}, outputs={'Out': new_var}, attrs={'file_path': os.path.join(model_path, name)}) new_var.stop_gradient = extra_var_info[name]['stop_gradient'] load_var_dict[new_name] = new_var load_var_list.append(new_var) # 3. load all vars if separate_params is False: if params_filename is not None: var_file_path = os.path.join(model_path, params_filename) else: var_file_path = os.path.join(model_path, VARIABLE_FILENAME) framework._dygraph_tracer().trace_op( type='load_combine', inputs={}, outputs={'Out': load_var_list}, attrs={'file_path': var_file_path}) return load_var_dict
def _lod_tensor2varbase(tensor): return_var = _varbase_creator() return_var.value().get_tensor().set(tensor, _current_expected_place()) return return_var
def create_var_base(is_input, name): var = framework._varbase_creator(dtype=None, shape=None, name=name) var.stop_gradient = False return var