def transpile(self, program, place, scope=None): ''' Transpile the program desc and cast the weights to float16 data type to enable float16 inference. Since the operator in a program desc will automatically choose the right compute kernel to run based on the data type of the input tensor. We actually don't need to change the program desc to run in float16 mode. However, in this way, users who are used to feeding and fetching tensors of float32 data type when running typical inference may find it confusing and difficult to run inference in float16 mode as they need to convert input data to float16 dtype and then convert the results back to float32 dtype to match the rest of code. So this function appends cast ops to the program desc where necessary so that users are able to run inference in float16 mode while providing input tensor (feed_holder) of float data type and obtaining output tensor (fetch_holder) of float data type. Moreover, it is desired that when we have the scope and program desc to run inference in float32 mode, we can use a single API to do the necessary modification and then user can run float16 inference on the fly. To make this happen, this function also create new parameters in the scope to have the converted float16 weights and change the operators in program desc to use these new parameters. :param program: program to transpile :type program: Program :param place: inference place :type place: Place :param scope: inference scope :type scope: Scope ''' if not isinstance(program, Program): raise TypeError("program should be as Program type") if not isinstance(place, core.CPUPlace) and not isinstance( place, core.CUDAPlace): raise TypeError("place should be as CPUPlace/CUDAPlace type") if scope is None: scope = global_scope() if not isinstance(scope, core.Scope): raise TypeError("scope should be as Scope type or None") self.scope = scope self.place = place self.block = program.block(0) self.input_map = {} # store the input names should be adjusted self._modify_feed_fetch() self._convert_param_to_float16() self._adjust_input(skip=True) self._remove_unused_var() # TODO(luotao): use clone() method to flush the program.desc in force, # since some large program.desc will not be flushed immediately. # And a better solution will be considered later. program = program.clone()
def sample_data(self): ''' Sampling the tensor data of variable. ''' for i in self.sampling_program.list_vars(): if i.name in self.sampling_vars: np_data = np.array(global_scope().find_var(i.name).get_tensor()) if i.name not in self._sampling_data: self._sampling_data[i.name] = [] self._sampling_data[i.name].append(np_data)
def _set_var(var, ndarray): t = global_scope().find_var(var.name).get_tensor() p = t._place() if p.is_cpu_place(): place = core.CPUPlace() elif p.is_cuda_pinned_place(): place = core.CUDAPinnedPlace() else: p = core.Place() p.set_place(t._place()) place = core.CUDAPlace(p.gpu_device_id()) t.set(ndarray, place)
def _load_optimizer(self, state): prog = self._progs.get('train', None) optim = list(filter(is_belong_to_optimizer, prog.list_vars())) if not optim: return fluid.core._create_loaded_parameter( optim, global_scope(), self._executor._default_executor) for var in optim: assert var.name in state, \ "variable [{}] is not in optimizer state file".format(var.name) self._set_var(var, state[var.name])
def _get_parameter(self, name, scope=None): if scope is None: scope = global_scope() p_t = scope.find_var(name).get_tensor() master_name = self._used_master_weights.get(name) if master_name is not None: master_p_t = scope.find_var(master_name).get_tensor() assert master_p_t._dtype() != p_t._dtype() assert master_p_t.shape() == p_t.shape() else: master_p_t = None return p_t, master_p_t
def load(self, param_state_pairs, optim_state): if self._executor is None: executor = fluid.Executor(fluid.CPUPlace())._default_executor else: executor = self._executor._default_executor # restore parameter states fluid.core._create_loaded_parameter( [param for param, state in param_state_pairs], global_scope(), executor) for param, state in param_state_pairs: self._set_var(param, state) # restore optimizer states # FIXME what if a different optimizer is used? if not self.model._optimizer or not optim_state: return self._load_optimizer(optim_state, executor)
def _get_parameter(self, name, scope=None): if scope is None: scope = global_scope() master_param = self._param_to_master_param.get(name) assert master_param is not None master_param_t = scope.find_var(master_param).get_tensor() assert master_param_t._dtype() == core.VarDesc.VarType.FP32 param_t = scope.find_var(name).get_tensor() if param_t._dtype() == core.VarDesc.VarType.FP32: assert param_t._ptr() == master_param_t._ptr() return param_t, None else: assert param_t._dtype() == core.VarDesc.VarType.FP16 assert param_t.shape() == master_param_t.shape() return param_t, master_param_t
def _serialize_persistables(program, executor): """ Serialize parameters using given program and executor. """ vars_ = list(filter(is_persistable, program.list_vars())) # warn if no variable found in model if len(vars_) == 0: warnings.warn("no variable in your model, please ensure there are any " "variables in your model to save") return None # create a new program and clone persitable vars to it save_program = Program() save_block = save_program.global_block() save_var_map = {} for var in vars_: if var.type != core.VarDesc.VarType.RAW: var_copy = _clone_var_in_block(save_block, var) save_var_map[var_copy.name] = var # create in_vars and out_var, then append a save_combine op to save_program in_vars = [] for name in sorted(save_var_map.keys()): in_vars.append(save_var_map[name]) out_var_name = unique_name.generate("out_var") out_var = save_block.create_var(type=core.VarDesc.VarType.RAW, name=out_var_name) out_var.desc.set_persistable(True) save_block.append_op(type='save_combine', inputs={'X': in_vars}, outputs={'Y': out_var}, attrs={ 'file_path': '', 'save_to_memory': True }) # run save_program to save vars # NOTE(zhiqiu): save op will add variable kLookupTablePath to save_program.desc, # which leads to diff between save_program and its desc. Call _sync_with_cpp # to keep consistency. save_program._sync_with_cpp() executor.run(save_program) # return serialized bytes in out_var return global_scope().find_var(out_var_name).get_bytes()
def load(self, path): def _load(path): if not os.path.exists(path): return with open(path, 'rb') as f: return pickle.load(f) param_path = path + ".pdparams" param_state = _load(param_path) assert param_state, "failed to load parameters, please check path" if self._executor is None: executor = fluid.Executor(fluid.CPUPlace())._default_executor else: executor = self._executor._default_executor fluid.core._create_loaded_parameter( list(self.model.state_dict().values()), global_scope(), executor) for key, var in self.model.state_dict().items(): assert key in param_state, \ "parameter [{}] is not found in model file [{}]".format( key, param_path) self._set_var(var, param_state[key]) # FIXME what if a different optimizer is used? if not self.model._optimizer: return optim_path = path + ".pdopt" optim_state = _load(optim_path) if optim_state is None: return assert '__static_graph_only__' in optim_state, \ "optimizer saved in dygraph mode is not usable in static graph" if self._executor is not None: self._load_optimizer(optim_state) else: self._lazy_load_optimizer = optim_state
def _initialize(self, mode): # Get the current content from the distributed context self._serial_main_progs[mode] = self._dist_contexts[ mode].serial_main_program self._serial_startup_progs[mode] = self._dist_contexts[ mode].serial_startup_program self._dist_main_progs[mode] = self._dist_contexts[ mode].dist_main_programs self._dist_startup_progs[mode] = self._dist_contexts[ mode].dist_startup_programs self._feed_vars[mode] = self._dist_contexts[mode].serial_feed_vars self._fetch_vars[mode] = self._dist_contexts[mode].serial_fetch_vars if self._nranks > 1: # Traverse different rank programs and traverse each op of them, # instantiate communication by process_mapping. all_process_groups = get_all_process_groups() for process_group in all_process_groups: if self._cur_rank not in process_group.ranks: continue process_group.instantiate() # initialize self._place = _get_device() if isinstance(self._place, fluid.CUDAPlace): self._place = fluid.CUDAPlace(ParallelEnv().dev_id) if self._executor is None: self._executor = paddle.static.Executor(self._place) uninitialized = [] dist_startup_prog = self._dist_startup_progs[mode][self._cur_rank] for var in dist_startup_prog.list_vars(): scope_var = global_scope().find_var(var.name) if scope_var and scope_var.get_tensor()._is_initialized(): continue uninitialized.append(var) if uninitialized: prune_startup_prog = dist_startup_prog._prune(uninitialized) self._executor.run(prune_startup_prog)
def convert_to_int8(self, program, place, scope=None): scope = global_scope() if scope is None else scope program = default_main_program() if program is None else program def _load_var(name): return np.array(scope.find_var(name).get_tensor()) global_block = program.global_block() def convert_to_int8(var): int8_var_name = var.name + ".int8" int8_var = global_block.create_parameter( name=int8_var_name.encode('ascii'), type=var.type, dtype=core.VarDesc.VarType.INT8, shape=var.shape) tensor = _load_var(var.name) scope.var(int8_var_name) int8_tensor = scope.find_var(int8_var_name).get_tensor() int8_tensor.set(tensor.astype(np.int8), place) return int8_var input_map = {} for block in program.blocks: for op in list(block.ops): if op.type in _QUANTIZABLE_OP_TYPES: for name in op.input_arg_names: var = block.var(name) if var.persistable: if name not in input_map: int8_var = convert_to_int8(var) input_map[name] = int8_var.name op._rename_input(name, input_map[name]) self._remove_unused_var(program)
def set_state_dict(self, state_dict, include_sublayers=True, use_structured_name=True): ''' Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters and persistable buffers. include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. Default: True Returns: None Examples: .. code-block:: python import paddle paddle.disable_static() emb = paddle.nn.Embedding([10, 10]) state_dict = emb.state_dict() paddle.save(state_dict, "paddle_dy") para_state_dict, _ = paddle.load("paddle_dy") emb.set_state_dict(para_state_dict) ''' def _check_match(key, param): state = state_dict.get(key, None) if state is None: raise ValueError( "{} is not found in the provided dict.".format(key)) if list(state.shape) != list(param.shape): raise ValueError( "{} receives a shape {}, but the expected shape is {}.". format(key, list(state.shape), list(param.shape))) return param, state matched_param_state = [] for key, param in self.state_dict().items(): key_name = key if use_structured_name else param.name try: match_res = _check_match(key_name, param) matched_param_state.append(match_res) except ValueError as err: warnings.warn(("Skip loading for {}. ".format(key) + str(err))) if in_dygraph_mode(): for param, state in matched_param_state: param.set_value(state) else: def _set_var(var, ndarray): t = global_scope().find_var(var.name).get_tensor() p = t._place() if p.is_cpu_place(): place = core.CPUPlace() elif p.is_cuda_pinned_place(): place = core.CUDAPinnedPlace() else: p = core.Place() p.set_place(t._place()) place = core.CUDAPlace(p.gpu_device_id()) t.set(ndarray, place) executor = Executor(_get_device())._default_executor # restore parameter states core._create_loaded_parameter( [param for param, state in matched_param_state], global_scope(), executor) for param, state in matched_param_state: _set_var(param, state)
def train_loop(args, train_program, reader, py_reader, loss, trainer_id): train_reader = paddle.batch( paddle.reader.shuffle( reader.train((args.with_hs or (not args.with_nce))), buf_size=args.batch_size * 100), batch_size=args.batch_size) py_reader.decorate_paddle_reader(train_reader) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) exec_strategy = fluid.ExecutionStrategy() print("CPU_NUM:" + str(os.getenv("CPU_NUM"))) exec_strategy.num_threads = int(os.getenv("CPU_NUM")) build_strategy = fluid.BuildStrategy() if int(os.getenv("CPU_NUM")) > 1: build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce train_exe = fluid.ParallelExecutor( use_cuda=False, loss_name=loss.name, main_program=train_program, build_strategy=build_strategy, exec_strategy=exec_strategy) profile_state = "CPU" profiler_step = 0 profiler_step_start = 20 profiler_step_end = 30 for pass_id in range(args.num_passes): epoch_start = time.time() py_reader.start() batch_id = 0 start = time.clock() try: while True: if profiler_step == profiler_step_start: fluid.profiler.start_profiler(profile_state) loss_val = train_exe.run(fetch_list=[loss.name]) loss_val = np.mean(loss_val) if profiler_step == profiler_step_end: fluid.profiler.stop_profiler('total', 'trainer_profile.log') profiler_step += 1 else: profiler_step += 1 if batch_id % 50 == 0: logger.info( "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}". format(pass_id, batch_id, loss_val.mean() / args.batch_size, py_reader.queue.size())) if args.with_speed: if batch_id % 1000 == 0 and batch_id != 0: elapsed = (time.clock() - start) start = time.clock() samples = 1001 * args.batch_size * int( os.getenv("CPU_NUM")) logger.info("Time used: {}, Samples/Sec: {}".format( elapsed, samples / elapsed)) # calculate infer result each 100 batches when using --with_infer_test if args.with_infer_test: if batch_id % 1000 == 0 and batch_id != 0: model_dir = args.model_output_dir + '/batch-' + str( batch_id) inference_test(global_scope(), model_dir, args) if batch_id % 500000 == 0 and batch_id != 0: model_dir = args.model_output_dir + '/batch-' + str( batch_id) fluid.io.save_persistables(executor=exe, dirname=model_dir) with open(model_dir + "/_success", 'w+') as f: f.write(str(batch_id)) batch_id += 1 except fluid.core.EOFException: py_reader.reset() epoch_end = time.time() logger.info("Epoch: {0}, Train total expend: {1} ".format( pass_id, epoch_end - epoch_start)) model_dir = args.model_output_dir + '/pass-' + str(pass_id) if trainer_id == 0: fluid.io.save_persistables(executor=exe, dirname=model_dir) with open(model_dir + "/_success", 'w+') as f: f.write(str(pass_id))
def get_tensor(var_name): t = global_scope().find_var(var_name).get_tensor() return numpy.array(t)
def save_quantized_model(self, model, path, input_spec=None, onnx_format=False, **config): """ Save the quantized model for the inference. Args: model (Layer): The model to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. onnx_format (bool, optional): Whether to export the quantized model with format of ONNX. Default is False. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config) is_dynamic_mode = False if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() place = core.CPUPlace() scope = global_scope() exe = Executor(place) dirname = os.path.dirname(path) basename = os.path.basename(path) model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX [infer_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) self._gather_scales(infer_program, scope, fetch_targets) # Remove `moving_average_abs_max_scale` node in sub graphs. graph = IrGraph(core.Graph(infer_program.desc), for_test=False) for sub_graph in graph.all_sub_graphs(): for _op in sub_graph.all_op_nodes(): if _op.name() == "moving_average_abs_max_scale": sub_graph.safe_remove_nodes(_op) sub_graph.resolve_hazard() infer_program = graph.to_program() self._set_skip_quant_attr(infer_program) clip_extra = False if onnx_format: graph = IrGraph(core.Graph(infer_program.desc), for_test=False) transform_pass = ReplaceFakeQuantDequantPass(scope, place) transform_pass.apply(graph) quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(graph) infer_program = graph.to_program() clip_extra = True save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=infer_program.clone(), model_filename=model_filename, params_filename=params_filename, clip_extra=clip_extra) if is_dynamic_mode: paddle.disable_static()
def create_quant_model(model, params, activation_quantize_type='moving_average_abs_max', weight_quantize_type='channel_wise_abs_max', save=False): place = paddle.CUDAPlace(0) scope = global_scope() exe = paddle.static.Executor(place) [inference_program, feed_target_names, fetch_targets ] = paddle.static.load_inference_model(path_prefix=None, executor=exe, model_filename=model, params_filename=params) graph = IrGraph(core.Graph(inference_program.desc), for_test=True) out_scale_op_list = [ "conv2d", "depthwise_conv2d", "mul", "matmul", "relu", "leaky_relu", "relu6", "sigmoid", "tanh", "prelu", "swish", "softmax", "batch_norm", "layer_norm", "elementwise_add", "pool2d", "reshape2", "transpose2", "concat", "elementwise_mul", "scale", "slice", "hard_swish", "hard_sigmoid", "conv2d_transpose", "gru", "bilinear_interp", "nearest_interp", "trilinear_interp", "flatten", "flatten2", "transpose", "pad2d", "reshape", "layer_norm", ] op_real_in_out_name = { "conv2d": [["Input", "Filter"], ["Output"]], "depthwise_conv2d": [["Input", "Filter"], ["Output"]], "conv2d_transpose": [["Input", "Filter"], ["Output"]], "mul": [["X", "Y"], ["Out"]], "matmul": [["X", "Y"], ["Out"]], "pool2d": [["X"], ["Out"]], "elementwise_add": [["X", "Y"], ["Out"]], "concat": [["X"], ["Out"]], "softmax": [["X"], ["Out"]], "argmax": [["X"], ["Out"]], "transpose": [["X"], ["Out"]], "equal": [["X", "Y"], ["Out"]], "gather": [["X"], ["Out"]], "greater_equal": [["X", "Y"], ["Out"]], "greater_than": [["X", "Y"], ["Out"]], "less_equal": [["X", "Y"], ["Out"]], "less_than": [["X", "Y"], ["Out"]], "mean": [["X"], ["Out"]], "not_equal": [["X", "Y"], ["Out"]], "reshape": [["X"], ["Out"]], "reshape2": [["X"], ["Out"]], "transpose2": [["X"], ["Out"]], "bilinear_interp": [["X"], ["Out"]], "nearest_interp": [["X"], ["Out"]], "trilinear_interp": [["X"], ["Out"]], "slice": [["Input"], ["Out"]], "squeeze": [["X"], ["Out"]], "elementwise_sub": [["X", "Y"], ["Out"]], "relu": [["X"], ["Out"]], "relu6": [["X"], ["Out"]], "leaky_relu": [["X"], ["Out"]], "prelu": [["X"], ["Out"]], "tanh": [["X"], ["Out"]], "swish": [["X"], ["Out"]], "dropout": [["X"], ["Out"]], "batch_norm": [["X"], ["Y"]], "layer_norm": [["X"], ["Y"]], "sigmoid": [["X"], ["Out"]], "elementwise_mul": [["X", "Y"], ["Out"]], "scale": [["X"], ["Out"]], "hard_swish": [["X"], ["Out"]], "hard_sigmoid": [["X"], ["Out"]], "gru": [["Input", "Weight"], ["Hidden"]], "lstm": [["Input", "Weight"], ["Hidden"]], "pad2d": [["X"], ["Out"]], "flatten": [["X"], ["Out"]], "flatten2": [["X"], ["Out"]], } def _get_op_output_var_names(op): """ """ assert isinstance(op, (IrNode, Operator)), \ "The input op should be IrNode or Operator." var_names = [] op_name = op.name() if isinstance(op, IrNode) \ else op.type if op_name not in op_real_in_out_name: return [] name_list = op_real_in_out_name[op_name][1] for name in name_list: var_name = op.output(name) if isinstance(var_name, list): var_names.extend(var_name) else: var_names.append(var_name) return var_names transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quantize_type, weight_quantize_type=weight_quantize_type) transform_pass.apply(graph) op_nodes = graph.all_op_nodes() for op_node in op_nodes: if op_node.name() in out_scale_op_list: var_names = _get_op_output_var_names(op_node) for var_name in var_names: in_node = graph._find_node_by_name(op_node.outputs, var_name) if in_node.dtype() not in \ [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]: continue op_node.op()._set_attr("out_threshold", 3.0) # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_quantize_type=weight_quantize_type) freeze_pass.apply(graph) main_program = graph.to_program() # modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales) op_nodes = graph.all_op_nodes() for op_node in op_nodes: if op_node.name() == 'fake_quantize_moving_average_abs_max': var_name = op_node.input("InScale")[0] tensor = scope.var(var_name).get_tensor() tensor.set(np.array([1], dtype=np.float32), place) elif op_node.name() == 'fake_channel_wise_dequantize_max_abs': var_name = op_node.input("Scales")[0] tensor = scope.var(var_name).get_tensor() tensor.set(np.ones(tensor.shape(), dtype=np.float32), place) if save: fluid.io.save_inference_model('test_inference_model', feed_target_names, fetch_targets, exe, main_program=main_program) feed_vars = [ main_program.global_block().var(name) for name in feed_target_names ] serialized_program = paddle.static.serialize_program(feed_vars, fetch_targets, program=main_program) serialized_params = paddle.static.serialize_persistables( feed_vars, fetch_targets, executor=exe, program=main_program) return serialized_program, serialized_params
def freeze_program(self, program, place, scope=None): """Freeze input training program for inference. Args: program (Program): the input program to be transpile. """ self.is_test = True scope = global_scope() if scope is None else scope program = default_main_program() if program is None else program persistable_vars = [ v.name for v in filter(lambda var: var.persistable, program.list_vars()) ] op_in_rename_map = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] op_out_rename_map = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] var_scale_map = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] def _remove_fake_quant_and_dequant_op(block, op): idx = block.ops.index(op) block_id = block.idx k = op.output('Out')[0] v = op.input('X')[0] if v not in op_in_rename_map[block_id]: op_in_rename_map[block_id][k] = v else: op_in_rename_map[block_id][k] = op_in_rename_map[block_id][v] block._remove_op(idx) def _insert_post_dequant_op(block, op): idx = block.ops.index(op) block_id = block.idx max_range = None scale_var = None for name in op.input_arg_names: #rename input name of the op to the input name of last op which has be removed if name in op_in_rename_map[block_id]: op._rename_input(name, op_in_rename_map[block_id][name]) scale_v = var_scale_map[block_id][_original_var_name(name)] if _original_var_name(name) in persistable_vars: param_range = (1 << (self.weight_bits - 1)) - 1 act_range = (1 << (self.activation_bits - 1)) - 1 assert _is_float(scale_v) max_range = param_range * act_range / scale_v else: assert isinstance(scale_v, Variable) scale_var = scale_v if len(op.output_arg_names) != 1: raise ValueError("Only support one output, but op %s has" " more than one output." % (op.type)) out_var = block.var(op.output_arg_names[0]) dequant_var = block.create_var(name=_dequantized_var_name( out_var.name), type=out_var.type, shape=out_var.shape, dtype=out_var.dtype) # insert fake_dequantize_op dequant_op = block._insert_op( idx + 1, type="fake_dequantize_max_abs", attrs={'max_range': float(max_range)}, inputs={ "X": out_var, 'Scale': scale_var }, outputs={"Out": dequant_var}) op_out_rename_map[block_id][out_var.name] = dequant_var.name return dequant_var def _load_var(name): return np.array(scope.find_var(name).get_tensor()) def _restore_var(name, arr): t = scope.find_var(name).get_tensor() t.set(arr, place) for block in program.blocks: ops = list(block.ops) block_id = block.idx for op in ops: op_type = op.type # insert dequant_op after fc/conv, need to rename # input of the followed ops(of fc/conv) to the dquant_op for name in op.input_arg_names: if name in op_out_rename_map[block_id]: op._rename_input(name, op_out_rename_map[block_id][name]) if op_type in self.fake_quant_op_types: in_arg_name = op.input('X')[0] if in_arg_name in persistable_vars: if self.weight_quantize_type == 'abs_max': param = _load_var(in_arg_name) scale_v = np.max(np.abs(param)) else: scale_v = _load_var(op.output('OutScale')[0]) var_scale_map[block_id][in_arg_name] = scale_v else: scale_v = block.var(op.output('OutScale')[0]) var_scale_map[block_id][in_arg_name] = scale_v if in_arg_name in persistable_vars: _remove_fake_quant_and_dequant_op(block, op) # quantize weight and restore param_t = _load_var(in_arg_name) param_q_t = quant(param_t, scale_v, self.weight_bits) _restore_var(in_arg_name, param_q_t) if op_type in self.fake_dequant_op_types: _remove_fake_quant_and_dequant_op(block, op) if op_type in _QUANTIZABLE_OP_TYPES: dequant_var = _insert_post_dequant_op(block, op) # remove the unused var in ProgramDesc self._remove_unused_var(program)
def _load_optimizer(self, state, executor): prog = self._progs.get('train', None) optim = list(filter(is_belong_to_optimizer, prog.list_vars())) if not optim: return fluid.core._create_loaded_parameter(optim, global_scope(), executor) converted_state = dict(state) for var in optim: if var.name in ["@LR_DECAY_COUNTER@", "global_step"]: # When using learning rate scheduler, dygraph would name the # global step var as "global_step" to save, while static-graph # would has a state var named as "@LR_DECAY_COUNTER@". # NOTE: dygraph saved global_step is 1 larger than that in # static-graph, since the time of global_step to increase is # different. state_val = ( np.array(converted_state.pop("global_step")) - 1 ) if "global_step" in converted_state else converted_state.pop( "@LR_DECAY_COUNTER@", None) if state_val is not None: converted_state[var.name] = state_val elif var.name.startswith("learning_rate_"): # When using static learning rate, static-graph would make it # a persistable var named 'unique_name.generate("learning_rate")', # However, dygraph wouldn't save it. if var.name not in state: continue else: # moment and other accumulators if var.name not in converted_state: # try to convert from dygraph name opt_name = self.model._optimizer._name opt_cls_name = self.model._optimizer.__class__.__name__ opt_unq_name = None for name in self.model._optimizer._accumulators.keys(): accum_name = name if opt_name is None else name[ len(opt_name) + 1:] for param_name, state_var in self.model._optimizer._accumulators[ name].items(): if opt_unq_name is None: # can not infer out the exact unique(opt_name), # thus try to extract rather than generate for state_key in sorted(state.keys(), key=lambda x: len(x), reverse=True): prefix = param_name + "_" + ( opt_cls_name if opt_name is None else opt_name) + "_" if state_key.startswith(prefix): prefix_offset = state_key[len( prefix):].find("_") + len(prefix) opt_unq_name = state_key[ len(param_name + "_"):prefix_offset] # TODO: assert # assert opt_unq_name is None # gen(param.name + "_" + gen(opt_name) + "_" + accum_name) # always end with "_0" since the unique optimizer._name dy_state_name = (param_name + "_" + opt_unq_name + "_" + accum_name + "_0") converted_state[ state_var.name] = converted_state.pop( dy_state_name) assert var.name in converted_state, \ "variable [{}] is not in optimizer state file".format(var.name) self._set_var(var, converted_state[var.name])
def save_vars(executor, dirname, main_program=None, vars=None, predicate=None, filename=None): """ This API saves specific variables in the `Program` to files. There are two ways to specify the variables to be saved: set variables in a list and assign it to the `vars`, or use the `predicate` function to select variables that make `predicate(variable) == True`. The first way has a higher priority. The `dirname` is used to specify the folder where to save variables. If you prefer to save variables in separate files in the `dirname` floder, do not set `filename`. If you prefer to save all variables in a single file, use `filename` to specify it. Args: executor(Executor): The executor to run for saving variables. dirname(str, optional): The folder where to save variables. When you need to save the parameter to the memory, set it to None. main_program(Program, optional): The program whose variables will be saved. If it is None, the default main program will be used automatically. Default: None vars(list[Variable], optional): The list contains all variables to be saved. Default: None predicate(function, optional): The function selects the variables that make `predicate(variable) == True`. Default: None filename(str, optional): If you prefer to save all variables in a single file, use `filename` to specify it. Otherwise, let `filename` be None. Default: None Returns: str: When saving parameters to a file, returns None. When saving parameters to memory, returns a binary string containing parameters. Raises: TypeError: If `main_program` is not an instance of Program nor None. Examples: .. code-block:: python import paddle.fluid as fluid main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False) w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w') b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b') hidden_w = fluid.layers.matmul(x=data, y=w) hidden_b = fluid.layers.elementwise_add(hidden_w, b) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) # The first usage: use `vars` to set the saved variables. var_list = [w, b] path = "./my_paddle_vars" fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, filename="vars_file") # w and b will be save in a file named "var_file". # The second usage: use `predicate` to select the saved variable. def name_has_fc(var): res = "fc" in var.name return res param_path = "./my_paddle_model" fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc) # all variables whose names contain "fc " are saved. """ save_to_memory = False if dirname is None and filename is None: save_to_memory = True main_program = _get_valid_program(main_program) if vars is None: return save_vars( executor, main_program=main_program, dirname=dirname, vars=list(filter(predicate, main_program.list_vars())), filename=filename) else: params_var_name = unique_name.generate("saved_params") # give warning when there is no var in model if len(list(vars)) == 0: warnings.warn( "no variable in your model, please ensure there are any variables in your model to save" ) return None save_program = Program() save_block = save_program.global_block() save_var_map = {} for each_var in vars: # NOTE: don't save the variable which type is RAW if each_var.type == core.VarDesc.VarType.RAW: continue new_var = _clone_var_in_block_(save_block, each_var) if filename is None and save_to_memory is False: save_file_path = os.path.join( os.path.normpath(dirname), new_var.name) save_block.append_op( type='save', inputs={'X': [new_var]}, outputs={}, attrs={'file_path': os.path.normpath(save_file_path)}) else: save_var_map[new_var.name] = new_var if filename is not None or save_to_memory: save_var_list = [] for name in sorted(save_var_map.keys()): save_var_list.append(save_var_map[name]) save_path = str() if save_to_memory is False: save_path = os.path.join(os.path.normpath(dirname), filename) saved_params = save_block.create_var( type=core.VarDesc.VarType.RAW, name=params_var_name) saved_params.desc.set_persistable(True) save_block.append_op( type='save_combine', inputs={'X': save_var_list}, outputs={'Y': saved_params}, attrs={ 'file_path': save_path, 'save_to_memory': save_to_memory }) #NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc, # which leads to diff on save_program and its desc. Call _sync_with_cpp # to keep consistency. save_program._sync_with_cpp() executor.run(save_program) if save_to_memory: return global_scope().find_var(params_var_name).get_bytes()
def __get_reader__(): scope = global_scope() var = scope.find_var(reader.name) return var.get_reader()
def to_numpy(var): assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable" if isinstance(var, fluid.core.VarBase): return var.numpy() t = global_scope().find_var(var.name).get_tensor() return np.array(t)
def ctr_reader( feed_dict, file_type, # gzip or plain file_format, # csv or svm dense_slot_index, sparse_slot_index, capacity, thread_num, batch_size, file_list, slots, name=None): """ Create a CTR reader for data feeding in Python This layer returns a Reader Variable. The Reader provides :code:`decorate_paddle_reader()` and :code:`decorate_tensor_provider()` to set a Python generator as the data source in Python side. When :code:`Executor::Run()` is invoked in C++ side, the data from the generator would be read automatically. Unlike :code:`DataFeeder.feed()`, the data reading process and :code:`Executor::Run()` process can run in parallel using :code:`py_reader`. The :code:`start()` method of the Reader should be called when each pass begins, while the :code:`reset()` method should be called when the pass ends and :code:`fluid.core.EOFException` raises. Note that :code:`Program.clone()` method cannot clone :code:`py_reader`. Args: feed_dict(list(variable)): a list of data variable. file_type('gzip'|'plain'): the type of the data file file_format('csv'|'svm'): csv data or svm data format. cvs data format is : label dense_fea,dense_fea sparse_fea,sparse_fea the svm data format is : label slot1:fea_sign slot2:fea_sign slot1:fea_sign dense_slot_index(list(int)): the index of dense slots sparse_slot_index(list(int)): the index of sparse slots capacity(int): The buffer capacity maintained by :code:`py_reader`. thread_num(int): the thread num to read files by cpp reader. batch_size(int): batch size of data. file_list(list(str)): List of file names that need to read. slots(list(int64)): list of slot id. name(string): The prefix Python queue name and Reader name. None will be generated automatically. Returns: Variable: A Reader from which we can get feeding data. Examples: 1. The basic usage of :code:`ctr_reader` is as follows: .. code-block:: python py_reader = fluid.contrib.ctr_reader.ctr_reader( feed_dict=datas, file_type='plain', file_format='csv', file_list=file_list, dense_slot_indexs=[1, 2, 3, 4], sparse_slot_indexs=[], capacity=64, thread_num=20, batch_size=1000, slots=[], name='ctr_reader') """ if name is None: queue_name = unique_name('lod_tensor_blocking_queue') reader_name = unique_name('create_ctr_reader') else: queue_name = "_".join([name, "queue"]) reader_name = "_".join([name, "reader"]) var = global_scope().var(queue_name) feed_queue = core.init_lod_tensor_blocking_queue(var, capacity) startup_blk = default_startup_program().current_block() reader_var = startup_blk.create_var(name=reader_name) startup_blk.append_op(type='create_ctr_reader', inputs={'blocking_queue': [queue_name]}, outputs={'Out': [reader_var]}, attrs={ 'use_data_config': False, 'thread_num': thread_num, 'batch_size': batch_size, 'file_list': file_list, 'file_type': file_type, 'file_format': file_format, 'dense_slot_index': dense_slot_index, 'sparse_slot_index': sparse_slot_index, 'sparse_slots': slots, 'ranks': [], 'lod_levels': [], 'shape_concat': [] }) dtypes = [data.dtype for data in feed_dict] reader_var.desc.set_dtypes(dtypes) reader_var.persistable = True main_prog_reader_var = _copy_reader_var_( default_main_program().current_block(), reader_var) reader = monkey_patch_reader_methods(main_prog_reader_var) # monkey patch py_reader special methods reader.queue = feed_queue reader.exited = False main_blk = default_main_program().current_block() main_blk.append_op(type='read', inputs={'Reader': [reader]}, attrs={'infer_out': False}, outputs={'Out': feed_dict}) return reader