コード例 #1
0
def load_model(model_path, exe):
    """
    加载预测模型,此函数仅用于预测模块!
    :param model_path: 模型二进制文件的存放地址
    :param exe: 加载模型的 executor
    :return: program,feed_target_names,fetch_targets
    """
    assert os.path.exists(model_path), "[%s] can't be found." % model_path
    return io.load_inference_model(dirname=model_path, executor=exe)
コード例 #2
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in xrange(100):
            tensor_x = np.array(
                [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={'x': tensor_x,
                          'y': tensor_y},
                    fetch_list=[avg_cost])

        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        expected = exe.run(program,
                           feed={'x': tensor_x,
                                 'y': tensor_y},
                           fetch_list=[avg_cost])[0]

        reload(executor)  # reload to build a new scope
        exe = executor.Executor(place)

        [infer_prog, feed_var_names, fetch_vars] = load_inference_model(
            MODEL_DIR, exe)

        outs = exe.run(
            infer_prog,
            feed={feed_var_names[0]: tensor_x,
                  feed_var_names[1]: tensor_y},
            fetch_list=fetch_vars)
        actual = outs[0]

        self.assertEqual(feed_var_names, ["x", "y"])
        self.assertEqual(len(fetch_vars), 1)
        self.assertEqual(str(fetch_vars[0]), str(avg_cost))
        self.assertEqual(expected, actual)
コード例 #3
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"
        UNI_MODEL_DIR = "./tmp/inference_model1"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in six.moves.xrange(100):
            tensor_x = np.array([[1, 1], [1, 2], [3, 4],
                                 [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={
                        'x': tensor_x,
                        'y': tensor_y
                    },
                    fetch_list=[avg_cost])

        # Separated model and unified model
        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        save_inference_model(UNI_MODEL_DIR, ["x", "y"], [avg_cost], exe,
                             program, 'model', 'params')
        main_program = program.clone()._prune_with_input(
            feeded_var_names=["x", "y"], targets=[avg_cost])
        params_str = save_persistables(exe, None, main_program, None)

        expected = exe.run(program,
                           feed={
                               'x': tensor_x,
                               'y': tensor_y
                           },
                           fetch_list=[avg_cost])[0]

        six.moves.reload_module(executor)  # reload to build a new scope

        model_0 = InferModel(load_inference_model(MODEL_DIR, exe))
        with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f:
            model_str = f.read()
        model_1 = InferModel(
            load_inference_model(None, exe, model_str, params_str))

        for model in [model_0, model_1]:
            outs = exe.run(model.program,
                           feed={
                               model.feed_var_names[0]: tensor_x,
                               model.feed_var_names[1]: tensor_y
                           },
                           fetch_list=model.fetch_vars)
            actual = outs[0]

            self.assertEqual(model.feed_var_names, ["x", "y"])
            self.assertEqual(len(model.fetch_vars), 1)
            print("fetch %s" % str(model.fetch_vars[0]))
            self.assertEqual(expected, actual)

        self.assertRaises(ValueError, fluid.io.load_inference_model, None, exe,
                          model_str, None)
コード例 #4
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in six.moves.xrange(100):
            tensor_x = np.array([[1, 1], [1, 2], [3, 4],
                                 [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={
                        'x': tensor_x,
                        'y': tensor_y
                    },
                    fetch_list=[avg_cost])

        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        expected = exe.run(program,
                           feed={
                               'x': tensor_x,
                               'y': tensor_y
                           },
                           fetch_list=[avg_cost])[0]

        six.moves.reload_module(executor)  # reload to build a new scope
        exe = executor.Executor(place)

        [infer_prog, feed_var_names,
         fetch_vars] = load_inference_model(MODEL_DIR, exe)

        outs = exe.run(infer_prog,
                       feed={
                           feed_var_names[0]: tensor_x,
                           feed_var_names[1]: tensor_y
                       },
                       fetch_list=fetch_vars)
        actual = outs[0]

        self.assertEqual(feed_var_names, ["x", "y"])
        self.assertEqual(len(fetch_vars), 1)
        print("fetch %s" % str(fetch_vars[0]))
        self.assertTrue("scale" in str(fetch_vars[0]))
        self.assertEqual(expected, actual)
コード例 #5
0
    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
        Save the quantized model for the inference.

        Args:
            layer (Layer): The Layer to be saved.
            path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward 
                method, which can be described by InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of the saved model. Default None.
            **configs (dict, optional): Other save configuration options for compatibility. We do not 
                recommend using these configurations, they may be removed in the future. If not necessary, 
                DO NOT use them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of the saved model.
                By default, all return variables of original Layer's forward method are kept as the 
                output of the saved model. If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given ``output_spec`` list. 

        Returns:
            None
        """

        assert isinstance(
            layer,
            dygraph.Layer), "model must be the instance of dygraph.Layer"
        is_dynamic_mode = False
        with dygraph.guard():
            layer.eval()
            for handle in self._register_hook_handle_list:
                handle.remove()
            for key in self._out_scale_dict:
                self._out_scale_dict[key] = float(
                    self._out_scale_dict[key].numpy())

        paddle.jit.save(layer=layer,
                        path=path,
                        input_spec=input_spec,
                        **config)

        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)

        file_prefix = os.path.basename(path)
        dirname = os.path.dirname(path)
        model_filename = file_prefix + INFER_MODEL_SUFFIX
        params_filename = file_prefix + INFER_PARAMS_SUFFIX

        [inference_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        # Traverse all ops in the program and find out the op matching
        # the Layer in the dynamic graph.
        layer_var_dict = {}
        for block in inference_program.blocks:
            for op in block.ops:
                if op.type in _op_real_in_out_name:
                    output_var_names = quantization_pass._get_op_output_var_names(
                        op)
                    for output_var_name in output_var_names:
                        output_var_tensor = block.var(output_var_name)
                        if output_var_tensor.dtype not in [
                                core.VarDesc.VarType.FP64,
                                core.VarDesc.VarType.FP32
                        ]:
                            continue
                        # Because the Layer in dygraph may correspond to multiple ops
                        # in static program after being saved. To ensure correctness,
                        # the outscale collected for output of dygraph Layer can only
                        # be set to the last op in the corresponding ops in static program.
                        #
                        # We can judge the execution order of the ops which corresponding
                        # to dygraph Layer by the name of output. And use dict to save
                        # the corresponding relationship between the dygraph Layer and the
                        # static graph op that needs to set the outscale attribute.
                        if '.' not in output_var_name:
                            continue
                        dynamic_layer_name, var_name_suffix = output_var_name.split(
                            ".")
                        if dynamic_layer_name in layer_var_dict:
                            if layer_var_dict[dynamic_layer_name][
                                    0] < var_name_suffix:
                                layer_var_dict[dynamic_layer_name] = [
                                    var_name_suffix, op
                                ]
                        else:
                            layer_var_dict[dynamic_layer_name] = [
                                var_name_suffix, op
                            ]

        # Because the naming styles of static and dynamic graph are different,
        # in order to avoid mistakes, we unify the name here.
        for (layer_name, var_name_op_list) in layer_var_dict.items():
            if 'prelu' in layer_name:
                layer_name = layer_name.replace('prelu', 'p_re_lu')
            if 'relu' in layer_name:
                layer_name = layer_name.replace('relu', 're_lu')
            if layer_name not in self._out_scale_dict:
                continue
            var_name_op_list[1]._set_attr('out_threshold',
                                          self._out_scale_dict[layer_name])

        # Save the processed program.
        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=inference_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename)

        if is_dynamic_mode:
            paddle.disable_static()
コード例 #6
0
    def save_quantized_model(self,
                             model,
                             path,
                             input_spec=None,
                             onnx_format=False,
                             **config):
        """
        Save the quantized model for the inference.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            onnx_format (bool, optional): Whether to export the quantized model 
                with format of ONNX. Default is False.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        paddle.jit.save(layer=model,
                        path=path,
                        input_spec=input_spec,
                        **config)

        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CPUPlace()
        scope = global_scope()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        self._gather_scales(infer_program, scope, fetch_targets)

        # Remove `moving_average_abs_max_scale` node in sub graphs.
        graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
        for sub_graph in graph.all_sub_graphs():
            for _op in sub_graph.all_op_nodes():
                if _op.name() == "moving_average_abs_max_scale":
                    sub_graph.safe_remove_nodes(_op)
            sub_graph.resolve_hazard()
        infer_program = graph.to_program()

        self._set_skip_quant_attr(infer_program)

        clip_extra = False
        if onnx_format:
            graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
            transform_pass = ReplaceFakeQuantDequantPass(scope, place)
            transform_pass.apply(graph)

            quant_weight_pass = QuantWeightPass(scope, place)
            quant_weight_pass.apply(graph)
            infer_program = graph.to_program()

            clip_extra = True

        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename,
                             clip_extra=clip_extra)

        if is_dynamic_mode:
            paddle.disable_static()
コード例 #7
0
    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
        Save the quantized model for the inference.

        Args:
            layer (Layer): The Layer to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """

        assert isinstance(layer, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        self._gather_output_scale(layer)

        with dygraph.guard():
            layer.eval()
            for handle in self._register_hook_handle_list:
                handle.remove()
        paddle.jit.save(layer=layer,
                        path=path,
                        input_spec=input_spec,
                        **config)

        if len(self._out_scale_dict) == 0:
            warnings.warn("Warning: No Layer of the model while to be " \
                          "saved contains the out_threshold attribute, so the " \
                          "generated inference model would not contain the " \
                          "out_threshold.")
            return

        # load static model
        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CUDAPlace(0) if core.is_compiled_with_cuda() \
            else core.CPUPlace()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        # TODO(jc): analyse whether the dygraph model has
        # several blocks before applying qat
        assert infer_program.num_blocks == 1, \
            "Quantization aware training (QAT) requires the program " \
            "only has a block for now. When the model has if-else or " \
            "while, the program will have several blocks."

        # set output scales to the static model
        self._save_output_scale(infer_program)

        # process skip quant
        self._set_skip_quant_attr(infer_program)

        # save the final quantized model that has output scales
        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename)

        if is_dynamic_mode:
            paddle.disable_static()