def test_save_inference_model_with_auc(self): MODEL_DIR = "./tmp/inference_model4" init_program = Program() program = Program() # fake program without feed/fetch with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='int32') predict = fluid.layers.fc(input=x, size=2, act='softmax') acc = fluid.layers.accuracy(input=predict, label=y) auc_var, batch_auc_var, auc_states = fluid.layers.auc( input=predict, label=y) cost = fluid.layers.cross_entropy(input=predict, label=y) avg_cost = fluid.layers.mean(x=cost) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) expected_warn = "please ensure that you have set the auc states to zeros before saving inference model" self.assertTrue(len(w) > 0) self.assertTrue(expected_warn == str(w[0].message))
def test_save_inference_model(self): MODEL_DIR = "./tmp/inference_model3" init_program = Program() program = Program() # fake program without feed/fetch with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) # will print warning message cp_prog = CompiledProgram(program).with_data_parallel( loss_name=avg_cost.name) save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, cp_prog) self.assertRaises(TypeError, save_inference_model, [MODEL_DIR, ["x", "y"], [avg_cost], [], cp_prog])
def save_inference_model(self, executor, dirname, feeded_var_names=None, target_vars=None, main_program=None, export_for_deployment=True): """ Prune the given `main_program` to build a new program especially for inference, and then save it and all related parameters to given `dirname` by the `executor`. """ assert isinstance(executor, Executor), \ "In fleet.save_inference_model() function, executor must be as" \ " Executor type." if main_program is None: main_program = self._origin_program assert isinstance(main_program, Program), \ "In fleet.save_inference_model() function, main_program " \ "must be as Program type." io.save_inference_model(dirname, feeded_var_names, target_vars, executor, main_program, None, None, export_for_deployment)
def __save_offline_model(self): ''' Save the quantized model to the disk. ''' io.save_inference_model(self.output, self.feed_var_names, self.fetch_list, self.exe, self.sampling_program)
def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" init_program = Program() program = Program() with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) for i in xrange(100): tensor_x = np.array( [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost]) save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) expected = exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost])[0] reload(executor) # reload to build a new scope exe = executor.Executor(place) [infer_prog, feed_var_names, fetch_vars] = load_inference_model( MODEL_DIR, exe) outs = exe.run( infer_prog, feed={feed_var_names[0]: tensor_x, feed_var_names[1]: tensor_y}, fetch_list=fetch_vars) actual = outs[0] self.assertEqual(feed_var_names, ["x", "y"]) self.assertEqual(len(fetch_vars), 1) self.assertEqual(str(fetch_vars[0]), str(avg_cost)) self.assertEqual(expected, actual)
def save_inference_model(self, executor, dirname, feeded_var_names=None, target_vars=None, main_program=None, export_for_deployment=True): io.save_inference_model(dirname, feeded_var_names, target_vars, executor, main_program, None, None, export_for_deployment)
def save_model(model_path, feeded_var_names, target_vars, exe): """ 保存预测模型,此函数仅用于预测模块! :param model_path: :param feeded_var_names: 预测模型的输入变量名的列表 :param target_vars: 预测模型的输出参数列表 :param exe: :return: """ assert os.path.exists(model_path), "[%s] can't be found." % model_path io.save_inference_model(model_path, feeded_var_names, target_vars, exe)
def save_inference_model(self, executor, dirname, feeded_var_names, target_vars, main_program=None, export_for_deployment=True): """ Prune the given `main_program` to build a new program especially for inference, and then save it and all related parameters to given `dirname` by the `executor`. """ io.save_inference_model(dirname, feeded_var_names, target_vars, executor, main_program, None, None, export_for_deployment)
def save_inference_model(self, dirname, feed=None, fetch=None): """ Save current model as the inference model. """ program_cache = self.get_program_cache() if feed is None: feeded_var_names = [i.name for i in program_cache.inputs] else: feeded_var_names = [program_cache.inputs[i].name for i in feed] target_vars = program_cache.outputs from paddle.fluid.io import save_inference_model save_inference_model(dirname=dirname, feeded_var_names=feeded_var_names, target_vars=target_vars, executor=self._exe, main_program=self.main_program.clone())
def save_inference_model(self, executor, dirname, feeded_var_names, target_vars, main_program=None, export_for_deployment=True): """ Prune the given `main_program` to build a new program especially for inference, and then save it and all related parameters to given `dirname` by the `executor`. """ if isinstance(executor, ParallelExecutor): raise TypeError( "in fleet.save_inference_model() function, executor must be as Executor type, ParallelExecutor is not allowed" ) if not isinstance(executor, Executor): raise TypeError( "in fleet.save_inference_model() function, executor must be as Executor type" ) if main_program is not None: if isinstance(main_program, CompiledProgram): raise TypeError( "in fleet.save_inference_model() function, main_program must be as Program type, CompiledProgram is not allowed" ) io.save_inference_model(dirname, feeded_var_names, target_vars, executor, main_program, None, None, export_for_deployment) else: io.save_inference_model(dirname, feeded_var_names, target_vars, executor, self._origin_program, None, None, export_for_deployment, True) model_basename = "__model__" model_filename = os.path.join(dirname, model_basename) with open(model_filename, "rb") as f: program_desc_str = f.read() program = Program.parse_from_string(program_desc_str) program._copy_dist_param_info_from(self.main_program) self.save_persistables(executor, dirname, program)
def test_save_inference_model(self): MODEL_DIR = "./tmp/inference_model2" init_program = Program() program = Program() # fake program without feed/fetch with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" UNI_MODEL_DIR = "./tmp/inference_model1" init_program = Program() program = Program() with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) for i in six.moves.xrange(100): tensor_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") exe.run(program, feed={ 'x': tensor_x, 'y': tensor_y }, fetch_list=[avg_cost]) # Separated model and unified model save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) save_inference_model(UNI_MODEL_DIR, ["x", "y"], [avg_cost], exe, program, 'model', 'params') main_program = program.clone()._prune_with_input( feeded_var_names=["x", "y"], targets=[avg_cost]) params_str = save_persistables(exe, None, main_program, None) expected = exe.run(program, feed={ 'x': tensor_x, 'y': tensor_y }, fetch_list=[avg_cost])[0] six.moves.reload_module(executor) # reload to build a new scope model_0 = InferModel(load_inference_model(MODEL_DIR, exe)) with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f: model_str = f.read() model_1 = InferModel( load_inference_model(None, exe, model_str, params_str)) for model in [model_0, model_1]: outs = exe.run(model.program, feed={ model.feed_var_names[0]: tensor_x, model.feed_var_names[1]: tensor_y }, fetch_list=model.fetch_vars) actual = outs[0] self.assertEqual(model.feed_var_names, ["x", "y"]) self.assertEqual(len(model.fetch_vars), 1) print("fetch %s" % str(model.fetch_vars[0])) self.assertEqual(expected, actual) self.assertRaises(ValueError, fluid.io.load_inference_model, None, exe, model_str, None)
def save_model(server_model_folder, client_config_folder, feed_var_dict, fetch_var_dict, main_program=None): executor = Executor(place=CPUPlace()) feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] target_vars = [] target_var_names = [] for key in sorted(fetch_var_dict.keys()): target_vars.append(fetch_var_dict[key]) target_var_names.append(key) save_inference_model( server_model_folder, feed_var_names, target_vars, executor, main_program=main_program) config = model_conf.GeneralModelConfig() #int64 = 0; float32 = 1; int32 = 2; for key in feed_var_dict: feed_var = model_conf.FeedVar() feed_var.alias_name = key feed_var.name = feed_var_dict[key].name feed_var.is_lod_tensor = feed_var_dict[key].lod_level >= 1 if feed_var_dict[key].dtype == core.VarDesc.VarType.INT64: feed_var.feed_type = 0 if feed_var_dict[key].dtype == core.VarDesc.VarType.FP32: feed_var.feed_type = 1 if feed_var_dict[key].dtype == core.VarDesc.VarType.INT32: feed_var.feed_type = 2 if feed_var.is_lod_tensor: feed_var.shape.extend([-1]) else: tmp_shape = [] for v in feed_var_dict[key].shape: if v >= 0: tmp_shape.append(v) feed_var.shape.extend(tmp_shape) config.feed_var.extend([feed_var]) for key in target_var_names: fetch_var = model_conf.FetchVar() fetch_var.alias_name = key fetch_var.name = fetch_var_dict[key].name fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1 if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64: fetch_var.fetch_type = 0 if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32: fetch_var.fetch_type = 1 if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT32: fetch_var.fetch_type = 2 if fetch_var.is_lod_tensor: fetch_var.shape.extend([-1]) else: tmp_shape = [] for v in fetch_var_dict[key].shape: if v >= 0: tmp_shape.append(v) fetch_var.shape.extend(tmp_shape) config.fetch_var.extend([fetch_var]) cmd = "mkdir -p {}".format(client_config_folder) os.system(cmd) with open("{}/serving_client_conf.prototxt".format(client_config_folder), "w") as fout: fout.write(str(config)) with open("{}/serving_server_conf.prototxt".format(server_model_folder), "w") as fout: fout.write(str(config)) with open("{}/serving_client_conf.stream.prototxt".format( client_config_folder), "wb") as fout: fout.write(config.SerializeToString()) with open("{}/serving_server_conf.stream.prototxt".format( server_model_folder), "wb") as fout: fout.write(config.SerializeToString())
def save_quantized_model(self, layer, path, input_spec=None, **config): """ Save the quantized model for the inference. Args: layer (Layer): The Layer to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance(layer, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." self._gather_output_scale(layer) with dygraph.guard(): layer.eval() for handle in self._register_hook_handle_list: handle.remove() paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) if len(self._out_scale_dict) == 0: warnings.warn("Warning: No Layer of the model while to be " \ "saved contains the out_threshold attribute, so the " \ "generated inference model would not contain the " \ "out_threshold.") return # load static model is_dynamic_mode = False if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() place = core.CUDAPlace(0) if core.is_compiled_with_cuda() \ else core.CPUPlace() exe = Executor(place) dirname = os.path.dirname(path) basename = os.path.basename(path) model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX [infer_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) # TODO(jc): analyse whether the dygraph model has # several blocks before applying qat assert infer_program.num_blocks == 1, \ "Quantization aware training (QAT) requires the program " \ "only has a block for now. When the model has if-else or " \ "while, the program will have several blocks." # set output scales to the static model self._save_output_scale(infer_program) # process skip quant self._set_skip_quant_attr(infer_program) # save the final quantized model that has output scales save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=infer_program.clone(), model_filename=model_filename, params_filename=params_filename) if is_dynamic_mode: paddle.disable_static()
def save_inference_model(self, dirname, feed=None, fetch=None): """ Save the TracedLayer to a model for inference. The saved inference model can be loaded by C++ inference APIs. Args: dirname (str): the directory to save the inference model. feed (list[int], optional): the input variable indices of the saved inference model. If None, all input variables of the TracedLayer object would be the inputs of the saved inference model. Default None. fetch (list[int], optional): the output variable indices of the saved inference model. If None, all output variables of the TracedLayer object would be the outputs of the saved inference model. Default None. Returns: None Examples: .. code-block:: python: import paddle.fluid as fluid from paddle.fluid.dygraph import Linear, to_variable, TracedLayer import numpy as np class ExampleLayer(fluid.dygraph.Layer): def __init__(self): super(ExampleLayer, self).__init__() self._fc = Linear(3, 10) def forward(self, input): return self._fc(input) save_dirname = './saved_infer_model' in_np = np.random.random([2, 3]).astype('float32') with fluid.dygraph.guard(): layer = ExampleLayer() in_var = to_variable(in_np) out_dygraph, static_layer = TracedLayer.trace(layer, inputs=[in_var]) static_layer.save_inference_model(save_dirname, feed=[0], fetch=[0]) place = fluid.CPUPlace() exe = fluid.Executor(place) program, feed_vars, fetch_vars = fluid.io.load_inference_model(save_dirname, exe) fetch, = exe.run(program, feed={feed_vars[0]: in_np}, fetch_list=fetch_vars) print(fetch.shape) # (2, 10) """ from paddle.fluid.io import save_inference_model def get_feed_fetch(all_vars, partial_vars): if partial_vars is None: return all_vars return [all_vars[idx] for idx in partial_vars] with scope_guard(self._scope): feeded_var_names = get_feed_fetch(self._feed_names, feed) target_var_names = get_feed_fetch(self._fetch_names, fetch) target_vars = [] for name in target_var_names: target_var = self._program.global_block().vars.get(name, None) assert target_var is not None, "{} cannot be found".format(name) target_vars.append(target_var) save_inference_model( dirname=dirname, feeded_var_names=feeded_var_names, target_vars=target_vars, executor=self._exe, main_program=self._program.clone())
def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" init_program = Program() program = Program() with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) for i in six.moves.xrange(100): tensor_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") exe.run(program, feed={ 'x': tensor_x, 'y': tensor_y }, fetch_list=[avg_cost]) save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) expected = exe.run(program, feed={ 'x': tensor_x, 'y': tensor_y }, fetch_list=[avg_cost])[0] six.moves.reload_module(executor) # reload to build a new scope exe = executor.Executor(place) [infer_prog, feed_var_names, fetch_vars] = load_inference_model(MODEL_DIR, exe) outs = exe.run(infer_prog, feed={ feed_var_names[0]: tensor_x, feed_var_names[1]: tensor_y }, fetch_list=fetch_vars) actual = outs[0] self.assertEqual(feed_var_names, ["x", "y"]) self.assertEqual(len(fetch_vars), 1) print("fetch %s" % str(fetch_vars[0])) self.assertTrue("scale" in str(fetch_vars[0])) self.assertEqual(expected, actual)
def save_quantized_model(self, model, path, input_spec=None, onnx_format=False, **config): """ Save the quantized model for the inference. Args: model (Layer): The model to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. onnx_format (bool, optional): Whether to export the quantized model with format of ONNX. Default is False. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config) is_dynamic_mode = False if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() place = core.CPUPlace() scope = global_scope() exe = Executor(place) dirname = os.path.dirname(path) basename = os.path.basename(path) model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX [infer_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) self._gather_scales(infer_program, scope, fetch_targets) # Remove `moving_average_abs_max_scale` node in sub graphs. graph = IrGraph(core.Graph(infer_program.desc), for_test=False) for sub_graph in graph.all_sub_graphs(): for _op in sub_graph.all_op_nodes(): if _op.name() == "moving_average_abs_max_scale": sub_graph.safe_remove_nodes(_op) sub_graph.resolve_hazard() infer_program = graph.to_program() self._set_skip_quant_attr(infer_program) clip_extra = False if onnx_format: graph = IrGraph(core.Graph(infer_program.desc), for_test=False) transform_pass = ReplaceFakeQuantDequantPass(scope, place) transform_pass.apply(graph) quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(graph) infer_program = graph.to_program() clip_extra = True save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=infer_program.clone(), model_filename=model_filename, params_filename=params_filename, clip_extra=clip_extra) if is_dynamic_mode: paddle.disable_static()
def save_quantized_model(self, layer, path, input_spec=None, **config): """ Save the quantized model for the inference. Args: layer (Layer): The Layer to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance( layer, dygraph.Layer), "model must be the instance of dygraph.Layer" is_dynamic_mode = False with dygraph.guard(): layer.eval() for handle in self._register_hook_handle_list: handle.remove() for key in self._out_scale_dict: self._out_scale_dict[key] = float( self._out_scale_dict[key].numpy()) paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) file_prefix = os.path.basename(path) dirname = os.path.dirname(path) model_filename = file_prefix + INFER_MODEL_SUFFIX params_filename = file_prefix + INFER_PARAMS_SUFFIX [inference_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) # Traverse all ops in the program and find out the op matching # the Layer in the dynamic graph. layer_var_dict = {} for block in inference_program.blocks: for op in block.ops: if op.type in _op_real_in_out_name: output_var_names = quantization_pass._get_op_output_var_names( op) for output_var_name in output_var_names: output_var_tensor = block.var(output_var_name) if output_var_tensor.dtype not in [ core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32 ]: continue # Because the Layer in dygraph may correspond to multiple ops # in static program after being saved. To ensure correctness, # the outscale collected for output of dygraph Layer can only # be set to the last op in the corresponding ops in static program. # # We can judge the execution order of the ops which corresponding # to dygraph Layer by the name of output. And use dict to save # the corresponding relationship between the dygraph Layer and the # static graph op that needs to set the outscale attribute. if '.' not in output_var_name: continue dynamic_layer_name, var_name_suffix = output_var_name.split( ".") if dynamic_layer_name in layer_var_dict: if layer_var_dict[dynamic_layer_name][ 0] < var_name_suffix: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] else: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] # Because the naming styles of static and dynamic graph are different, # in order to avoid mistakes, we unify the name here. for (layer_name, var_name_op_list) in layer_var_dict.items(): if 'prelu' in layer_name: layer_name = layer_name.replace('prelu', 'p_re_lu') if 'relu' in layer_name: layer_name = layer_name.replace('relu', 're_lu') if layer_name not in self._out_scale_dict: continue var_name_op_list[1]._set_attr('out_threshold', self._out_scale_dict[layer_name]) # Save the processed program. save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=inference_program.clone(), model_filename=model_filename, params_filename=params_filename) if is_dynamic_mode: paddle.disable_static()
def save_inference_model(self, dirname, feed=None, fetch=None): """ Save the TracedLayer to an model for inference. The saved inference model can be loaded by C++ inference APIs. Args: dirname (str): the directory to save the inference model. feed (list[int], optional): the input variable indices of the saved inference model. If None, all input variables of the TracedLayer object would be the inputs of the saved inference model. Default None. fetch (list[int], optional): the output variable indices of the saved inference model. If None, all output variables of the TracedLayer object would be the outputs of the saved inference model. Default None. Returns: The fetch variables' name list Return Type: list(str) Examples: .. code-block:: python: import paddle.fluid as fluid from paddle.fluid.dygraph import FC, to_variable, TracedLayer import paddle.fluid.dygraph.jit as jit import numpy as np class ExampleLayer(fluid.dygraph.Layer): def __init__(self, name_scope): super(ExampleLayer, self).__init__(name_scope) self._fc = FC(self.full_name(), 10) def forward(self, input): return self._fc(input) with fluid.dygraph.guard(): layer = ExampleLayer("example_layer") in_np = np.random.random([2, 3]).astype('float32') in_var = to_variable(in_np) out_dygraph, static_layer = TracedLayer.trace(layer, inputs=[in_var]) static_layer.save_inference_model('./saved_infer_model') """ def get_feed_fetch(all_vars, partial_vars): if partial_vars is None: return all_vars return [all_vars[idx] for idx in partial_vars] with scope_guard(self._scope): feeded_var_names = get_feed_fetch(self._feed_names, feed) target_var_names = get_feed_fetch(self._fetch_names, fetch) target_vars = [] for name in target_var_names: target_var = self._program.global_block().vars.get(name, None) assert target_var is not None, "{} cannot be found".format( name) target_vars.append(target_var) return fluid_io.save_inference_model( dirname=dirname, feeded_var_names=feeded_var_names, target_vars=target_vars, executor=self._exe, main_program=self._program.clone())
def save_inference_model(self, dirname, feed=None, fetch=None): """ Saves current model as the inference model. It will prune the main_program to build a new program especially for inference, and then save it and all related parameters to given `dirname` . The saved inference model can be loaded by `:ref:`api_fluid_io_load_inference_model` or `C++ inference APIs. Args: dirname (str): the directory to save the inference model. feed (list[int], optional): the input variable indices of the saved inference model. If None, all input variables of the ProgramTranslator would be the inputs of the saved inference model. Default None. fetch (list[int], optional): the output variable indices of the saved inference model. If None, all output variables of the TracedLayer object would be the outputs of the saved inference model. Default None. Returns: None Examples: .. code-block:: python import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph import Linear from paddle.fluid.dygraph import declarative from paddle.fluid.dygraph import ProgramTranslator class SimpleNet(fluid.dygraph.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() self._linear = Linear(in_size, out_size) @declarative def forward(self, x): y = self._linear(x) z = self._linear(y) loss = fluid.layers.mean(z) return z, loss with fluid.dygraph.guard(fluid.CPUPlace()): net = SimpleNet(8, 8) adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) for i in range(10): loss, out = net(x) loss.backward() adam.minimize(loss) net.clear_gradients() # Save inference model. # Note that fetch=[0] means we set 'y' as the inference output. prog_trans = ProgramTranslator() prog_trans.save_inference_model("./dy2stat_infer_model", fetch=[0]) # In this example, the inference model will be pruned based on input (x) and # output (y). The pruned inference program is going to be saved in the folder # "./dy2stat_infer_model" and parameters are going to be saved in separate # files in the folder. """ def get_feed_fetch(var_list, partial_vars, return_name=False): vars = [ var for var in var_list if isinstance(var, framework.Variable) ] if partial_vars: vars = [vars[idx] for idx in partial_vars] if return_name: vars = [var.name for var in vars] return vars func_spec, (concrete_program, partial_layer) = self._program_cache.last() # share paramBase data with parameter scope = core.Scope() for param_base in concrete_program.parameters: param_tensor = scope.var(param_base.name).get_tensor() src_tensor = param_base.value().get_tensor() param_tensor._share_data_with(src_tensor) feed_var_names = get_feed_fetch(concrete_program.inputs, feed, True) fetch_vars = get_feed_fetch(concrete_program.outputs, fetch) from paddle.fluid.io import save_inference_model with scope_guard(scope): save_inference_model( dirname=dirname, feeded_var_names=feed_var_names, target_vars=fetch_vars, executor=executor.Executor( framework._current_expected_place()), main_program=concrete_program.main_program.clone())