Exemple #1
0
def quant_embedding(program, place, config, scope=None):
    """quantize lookup_table op parameters

    Args:
        program(fluid.Program): infer program
        scope(fluid.Scope): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . When ``None`` will use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_. Default : ``None``.
        place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents the executor run on which device.
        config(dict): config to quantize. The keys are 'params_name', 'quantize_type', \
                'quantize_bits', 'dtype', 'threshold'. \
                ``params_name`` is parameter name to quantize, must be set.
                ``quantize_type`` is  quantize type, supported types are ['abs_max'], default is "abs_max".
                ``quantize_bits`` supported bits are [8] and default is 8.
                ``dtype`` is quantize dtype, supported dtype are ['int8'], default is 'int8'.
                ``threshold`` is threshold to clip tensor before quant. When threshold is not set, \
                        tensor will not be clipped.

    Returns:
        None
    """
    assert isinstance(config, dict), "config must be dict"
    config = _merge_config(copy.deepcopy(default_config), config)
    scope = fluid.global_scope() if scope is None else scope

    graph = IrGraph(core.Graph(program.desc), for_test=True)
    if config['quantize_type'] == 'abs_max':
        _quant_embedding_abs_max(graph, scope, place, config)

    return graph.to_program()
Exemple #2
0
def transform_and_save_model(original_path, save_path, save_type):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_scope = fluid.executor.global_scope()
    with fluid.scope_guard(inference_scope):
        if os.path.exists(os.path.join(original_path, '__model__')):
            [inference_program, feed_target_names, fetch_targets
             ] = fluid.io.load_inference_model(original_path, exe)
        else:
            [inference_program, feed_target_names, fetch_targets
             ] = fluid.io.load_inference_model(original_path, exe, 'model',
                                               'params')

        quantized_ops = set(test_args.quantized_ops.split(','))
        transform_to_mkldnn_int8_pass = Qat2Int8MkldnnPass(
            quantized_ops, _scope=inference_scope, _place=place, _core=core)

        graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
        if save_type == 'FP32':
            graph = transform_to_mkldnn_int8_pass.apply_fp32(graph)
        elif save_type == 'INT8':
            graph = transform_to_mkldnn_int8_pass.apply(graph)
        inference_program = graph.to_program()
        with fluid.scope_guard(inference_scope):
            fluid.io.save_inference_model(save_path, feed_target_names,
                                          fetch_targets, exe,
                                          inference_program)
        print(
            "Success! Transformed QAT_{0} model can be found at {1}\n".format(
                save_type, save_path))
 def residual_block_quant(self, quant_type):
     main = fluid.Program()
     startup = fluid.Program()
     with fluid.program_guard(main, startup):
         loss = residual_block(2)
         opt = fluid.optimizer.Adam(learning_rate=0.001)
         opt.minimize(loss)
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     graph = IrGraph(core.Graph(main.desc), for_test=False)
     transform_pass = QuantizationTransformPass(
         scope=fluid.global_scope(),
         place=place,
         activation_quantize_type=quant_type)
     transform_pass.apply(graph)
     marked_nodes = set()
     for op in graph.all_op_nodes():
         if op.name().find('quantize') > -1:
             marked_nodes.add(op)
     graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
     program = graph.to_program()
     self.check_program(transform_pass, program)
     val_graph = IrGraph(core.Graph(program.desc), for_test=False)
     val_marked_nodes = set()
     for op in val_graph.all_op_nodes():
         if op.name().find('quantize') > -1:
             val_marked_nodes.add(op)
     val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
Exemple #4
0
def quant_embedding(program, place, config=None, scope=None):
    """quantize lookup_table op parameters

    Args:
        program(paddle.static.Program): infer program
        scope(paddle.static.Scope, optional): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . When ``None`` will use `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_. Default : ``None``.
        place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents the executor run on which device.
        config(dict, optional): config to quantize. The keys are 'quantize_op_types'. For op in quantize_op_types, you can define 'quantize_type', \
                'quantize_bits', 'dtype', 'threshold'. \
                ``quantize_type`` is  quantize type, supported types are ['abs_max'], default is "abs_max".
                ``quantize_bits`` supported bits are [8] and default is 8.
                ``dtype`` is quantize dtype, supported dtype are ['int8'], default is 'int8'.
                ``threshold`` is threshold to clip tensor before quant. When threshold is not set, \
                        tensor will not be clipped.

    Returns:
        None
    """
    config = config or {}
    config = _merge_config(copy.deepcopy(_default_config), config)
    scope = paddle.static.global_scope() if scope is None else scope

    graph = IrGraph(core.Graph(program.desc), for_test=True)
    quantize_params_map = {}
    all_op = graph.all_op_nodes()
    for op in all_op:
        if op.inputs == [] and op.outputs == []:
            continue
        op_type = op.name()
        if op_type in config['quantize_op_types']:
            weight_name = op.input('W')[0]
            if weight_name in quantize_params_map.values():
                continue
            embedding_node = graph._find_node_by_name(op.inputs,
                                                      op.input('W')[0])
            for op_node in embedding_node.outputs:
                if op_node.name() == 'fused_embedding_seq_pool':
                    _split_embedding_seq_pool(graph, op_node)
            if config[op_type]['quantize_type'] == 'abs_max':
                _quant_embedding_abs_max(graph, scope, place, config[op_type],
                                         weight_name, embedding_node)
            elif config[op_type]['quantize_type'] == 'log':
                _quant_embedding_log(graph, scope, place, config[op_type],
                                     weight_name, embedding_node)
            quantize_params_map[weight_name] = _get_quant_var_name(weight_name)
    for op in all_op:
        if op.name() == 'fused_embedding_seq_pool':
            graph.safe_remove_nodes(op)

    return graph.to_program()
Exemple #5
0
    def _predict(self, test_reader=None, model_path=None):
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        inference_scope = fluid.executor.global_scope()
        with fluid.scope_guard(inference_scope):
            if os.path.exists(os.path.join(model_path, '__model__')):
                [inference_program, feed_target_names, fetch_targets
                 ] = fluid.io.load_inference_model(model_path, exe)
            else:
                [inference_program, feed_target_names, fetch_targets
                 ] = fluid.io.load_inference_model(model_path, exe, 'model',
                                                   'params')

            use_mkldnn = fluid.core.globals()["FLAGS_use_mkldnn"]
            if (use_mkldnn):
                graph = IrGraph(core.Graph(inference_program.desc),
                                for_test=True)
                graph = self._transform_depthwise_conv(graph)
                inference_program = graph.to_program()

            dshape = [3, 224, 224]
            top1 = 0.0
            top5 = 0.0
            total_samples = 0
            for batch_id, data in enumerate(test_reader()):
                if six.PY2:
                    images = map(lambda x: x[0].reshape(dshape), data)
                if six.PY3:
                    images = list(map(lambda x: x[0].reshape(dshape), data))
                images = np.array(images).astype('float32')
                labels = np.array([x[1] for x in data]).astype("int64")
                labels = labels.reshape([-1, 1])
                fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS']))
                out = exe.run(inference_program,
                              feed={
                                  feed_target_names[0]: images,
                                  feed_target_names[1]: labels
                              },
                              fetch_list=fetch_targets)
                fluid.core.set_num_threads(1)
                top1 += np.sum(out[1]) * len(data)
                top5 += np.sum(out[2]) * len(data)
                total_samples += len(data)
                if (batch_id + 1) % 100 == 0:
                    _logger.info(
                        '{} images have been predicted'.format(total_samples))
            return top1 / total_samples, top5 / total_samples
def eval(args):
    # parameters from arguments

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    val_program, feed_names, fetch_targets = fluid.io.load_inference_model(
        args.model_path,
        exe,
        model_filename="__model__.infer",
        params_filename="__params__")
    val_reader = paddle.batch(reader.val(), batch_size=128)
    feeder = fluid.DataFeeder(
        place=place, feed_list=feed_names, program=val_program)

    results = []
    for batch_id, data in enumerate(val_reader()):
        image = [[d[0]] for d in data]
        label = [[d[1]] for d in data]
        feed_data = feeder.feed(image)
        pred = exe.run(val_program, feed=feed_data, fetch_list=fetch_targets)
        pred = np.array(pred[0])
        label = np.array(label)
        sort_array = pred.argsort(axis=1)
        top_1_pred = sort_array[:, -1:][:, ::-1]
        top_1 = np.mean(label == top_1_pred)
        top_5_pred = sort_array[:, -5:][:, ::-1]
        acc_num = 0
        for i in range(len(label)):
            if label[i][0] in top_5_pred[i]:
                acc_num += 1
        top_5 = acc_num / len(label)
        results.append([top_1, top_5])

    result = np.mean(np.array(results), axis=0)
    print("top1_acc/top5_acc= {}".format(result))
    sys.stdout.flush()

    _logger.info("freeze the graph for inference")
    test_graph = IrGraph(core.Graph(val_program.desc), for_test=True)

    freeze_pass = QuantizationFreezePass(
        scope=fluid.global_scope(),
        place=place,
        weight_quantize_type=args.weight_quant_type)
    freeze_pass.apply(test_graph)
    server_program = test_graph.to_program()
    fluid.io.save_inference_model(
        dirname=os.path.join(args.save_path, 'float'),
        feeded_var_names=feed_names,
        target_vars=fetch_targets,
        executor=exe,
        main_program=server_program,
        model_filename='model',
        params_filename='weights')

    _logger.info("convert the weights into int8 type")
    convert_int8_pass = ConvertToInt8Pass(
        scope=fluid.global_scope(), place=place)
    convert_int8_pass.apply(test_graph)
    server_int8_program = test_graph.to_program()
    fluid.io.save_inference_model(
        dirname=os.path.join(args.save_path, 'int8'),
        feeded_var_names=feed_names,
        target_vars=fetch_targets,
        executor=exe,
        main_program=server_int8_program,
        model_filename='model',
        params_filename='weights')
Exemple #7
0
def create_quant_model(model,
                       params,
                       activation_quantize_type='moving_average_abs_max',
                       weight_quantize_type='channel_wise_abs_max',
                       save=False):
    place = paddle.CUDAPlace(0)
    scope = global_scope()
    exe = paddle.static.Executor(place)
    [inference_program, feed_target_names, fetch_targets
     ] = paddle.static.load_inference_model(path_prefix=None,
                                            executor=exe,
                                            model_filename=model,
                                            params_filename=params)
    graph = IrGraph(core.Graph(inference_program.desc), for_test=True)

    out_scale_op_list = [
        "conv2d",
        "depthwise_conv2d",
        "mul",
        "matmul",
        "relu",
        "leaky_relu",
        "relu6",
        "sigmoid",
        "tanh",
        "prelu",
        "swish",
        "softmax",
        "batch_norm",
        "layer_norm",
        "elementwise_add",
        "pool2d",
        "reshape2",
        "transpose2",
        "concat",
        "elementwise_mul",
        "scale",
        "slice",
        "hard_swish",
        "hard_sigmoid",
        "conv2d_transpose",
        "gru",
        "bilinear_interp",
        "nearest_interp",
        "trilinear_interp",
        "flatten",
        "flatten2",
        "transpose",
        "pad2d",
        "reshape",
        "layer_norm",
    ]
    op_real_in_out_name = {
        "conv2d": [["Input", "Filter"], ["Output"]],
        "depthwise_conv2d": [["Input", "Filter"], ["Output"]],
        "conv2d_transpose": [["Input", "Filter"], ["Output"]],
        "mul": [["X", "Y"], ["Out"]],
        "matmul": [["X", "Y"], ["Out"]],
        "pool2d": [["X"], ["Out"]],
        "elementwise_add": [["X", "Y"], ["Out"]],
        "concat": [["X"], ["Out"]],
        "softmax": [["X"], ["Out"]],
        "argmax": [["X"], ["Out"]],
        "transpose": [["X"], ["Out"]],
        "equal": [["X", "Y"], ["Out"]],
        "gather": [["X"], ["Out"]],
        "greater_equal": [["X", "Y"], ["Out"]],
        "greater_than": [["X", "Y"], ["Out"]],
        "less_equal": [["X", "Y"], ["Out"]],
        "less_than": [["X", "Y"], ["Out"]],
        "mean": [["X"], ["Out"]],
        "not_equal": [["X", "Y"], ["Out"]],
        "reshape": [["X"], ["Out"]],
        "reshape2": [["X"], ["Out"]],
        "transpose2": [["X"], ["Out"]],
        "bilinear_interp": [["X"], ["Out"]],
        "nearest_interp": [["X"], ["Out"]],
        "trilinear_interp": [["X"], ["Out"]],
        "slice": [["Input"], ["Out"]],
        "squeeze": [["X"], ["Out"]],
        "elementwise_sub": [["X", "Y"], ["Out"]],
        "relu": [["X"], ["Out"]],
        "relu6": [["X"], ["Out"]],
        "leaky_relu": [["X"], ["Out"]],
        "prelu": [["X"], ["Out"]],
        "tanh": [["X"], ["Out"]],
        "swish": [["X"], ["Out"]],
        "dropout": [["X"], ["Out"]],
        "batch_norm": [["X"], ["Y"]],
        "layer_norm": [["X"], ["Y"]],
        "sigmoid": [["X"], ["Out"]],
        "elementwise_mul": [["X", "Y"], ["Out"]],
        "scale": [["X"], ["Out"]],
        "hard_swish": [["X"], ["Out"]],
        "hard_sigmoid": [["X"], ["Out"]],
        "gru": [["Input", "Weight"], ["Hidden"]],
        "lstm": [["Input", "Weight"], ["Hidden"]],
        "pad2d": [["X"], ["Out"]],
        "flatten": [["X"], ["Out"]],
        "flatten2": [["X"], ["Out"]],
    }

    def _get_op_output_var_names(op):
        """ """
        assert isinstance(op, (IrNode, Operator)), \
            "The input op should be IrNode or Operator."
        var_names = []
        op_name = op.name() if isinstance(op, IrNode) \
            else op.type
        if op_name not in op_real_in_out_name:
            return []

        name_list = op_real_in_out_name[op_name][1]
        for name in name_list:
            var_name = op.output(name)
            if isinstance(var_name, list):
                var_names.extend(var_name)
            else:
                var_names.append(var_name)
        return var_names

    transform_pass = QuantizationTransformPass(
        scope=scope,
        place=place,
        activation_quantize_type=activation_quantize_type,
        weight_quantize_type=weight_quantize_type)
    transform_pass.apply(graph)

    op_nodes = graph.all_op_nodes()
    for op_node in op_nodes:
        if op_node.name() in out_scale_op_list:
            var_names = _get_op_output_var_names(op_node)
            for var_name in var_names:
                in_node = graph._find_node_by_name(op_node.outputs, var_name)
                if in_node.dtype() not in \
                    [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]:
                    continue

                op_node.op()._set_attr("out_threshold", 3.0)

    # Freeze graph for inference, but the weight of fc/conv is still float type.
    freeze_pass = QuantizationFreezePass(
        scope=scope, place=place, weight_quantize_type=weight_quantize_type)
    freeze_pass.apply(graph)

    main_program = graph.to_program()

    # modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales)
    op_nodes = graph.all_op_nodes()
    for op_node in op_nodes:
        if op_node.name() == 'fake_quantize_moving_average_abs_max':
            var_name = op_node.input("InScale")[0]
            tensor = scope.var(var_name).get_tensor()
            tensor.set(np.array([1], dtype=np.float32), place)
        elif op_node.name() == 'fake_channel_wise_dequantize_max_abs':
            var_name = op_node.input("Scales")[0]
            tensor = scope.var(var_name).get_tensor()
            tensor.set(np.ones(tensor.shape(), dtype=np.float32), place)

    if save:
        fluid.io.save_inference_model('test_inference_model',
                                      feed_target_names,
                                      fetch_targets,
                                      exe,
                                      main_program=main_program)

    feed_vars = [
        main_program.global_block().var(name) for name in feed_target_names
    ]
    serialized_program = paddle.static.serialize_program(feed_vars,
                                                         fetch_targets,
                                                         program=main_program)
    serialized_params = paddle.static.serialize_persistables(
        feed_vars, fetch_targets, executor=exe, program=main_program)
    return serialized_program, serialized_params
Exemple #8
0
    def save_quantized_model(self,
                             model,
                             path,
                             input_spec=None,
                             onnx_format=False,
                             **config):
        """
        Save the quantized model for the inference.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            onnx_format (bool, optional): Whether to export the quantized model 
                with format of ONNX. Default is False.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        paddle.jit.save(layer=model,
                        path=path,
                        input_spec=input_spec,
                        **config)

        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CPUPlace()
        scope = global_scope()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        self._gather_scales(infer_program, scope, fetch_targets)

        # Remove `moving_average_abs_max_scale` node in sub graphs.
        graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
        for sub_graph in graph.all_sub_graphs():
            for _op in sub_graph.all_op_nodes():
                if _op.name() == "moving_average_abs_max_scale":
                    sub_graph.safe_remove_nodes(_op)
            sub_graph.resolve_hazard()
        infer_program = graph.to_program()

        self._set_skip_quant_attr(infer_program)

        clip_extra = False
        if onnx_format:
            graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
            transform_pass = ReplaceFakeQuantDequantPass(scope, place)
            transform_pass.apply(graph)

            quant_weight_pass = QuantWeightPass(scope, place)
            quant_weight_pass.apply(graph)
            infer_program = graph.to_program()

            clip_extra = True

        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename,
                             clip_extra=clip_extra)

        if is_dynamic_mode:
            paddle.disable_static()
Exemple #9
0
    def test_out_scale_acc(self):
        def _build_static_lenet(main, startup, is_test=False, seed=1000):
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    main.random_seed = seed
                    startup.random_seed = seed
                    img = fluid.layers.data(name='image',
                                            shape=[1, 28, 28],
                                            dtype='float32')
                    label = fluid.layers.data(name='label',
                                              shape=[1],
                                              dtype='int64')
                    prediction = StaticLenet(img)
                    if not is_test:
                        loss = fluid.layers.cross_entropy(input=prediction,
                                                          label=label)
                        avg_loss = fluid.layers.mean(loss)
                    else:
                        avg_loss = prediction
            return img, label, avg_loss

        reader = paddle.batch(paddle.dataset.mnist.test(),
                              batch_size=32,
                              drop_last=True)
        weight_quantize_type = 'abs_max'
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
        lr = 0.1
        dynamic_out_scale_list = []
        static_out_scale_list = []

        # imperative train
        _logger.info(
            "--------------------------dynamic graph qat--------------------------"
        )
        imperative_out_scale = ImperativeQuantAware()

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            fixed_state = {}
            for name, param in lenet.named_parameters():
                p_shape = param.numpy().shape
                p_value = param.numpy()
                if name.endswith("bias"):
                    value = np.zeros_like(p_value).astype('float32')
                else:
                    value = np.random.normal(loc=0.0,
                                             scale=0.01,
                                             size=np.product(p_shape)).reshape(
                                                 p_shape).astype('float32')
                fixed_state[name] = value
                param_init_map[param.name] = value
            lenet.set_dict(fixed_state)
            imperative_out_scale.quantize(lenet)
            adam = AdamOptimizer(learning_rate=lr,
                                 parameter_list=lenet.parameters())
            dynamic_loss_rec = []
            lenet.train()
            for batch_id, data in enumerate(reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = fluid.dygraph.to_variable(x_data)
                label = fluid.dygraph.to_variable(y_data)

                out = lenet(img)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                lenet.clear_gradients()
                dynamic_loss_rec.append(avg_loss.numpy()[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))

            lenet.eval()

        path = "./dynamic_outscale_infer_model/lenet"
        dynamic_save_dir = "./dynamic_outscale_infer_model"

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                        dtype='float32')
            ])

        _logger.info(
            "--------------------------static graph qat--------------------------"
        )
        static_loss_rec = []
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        main = fluid.Program()
        infer = fluid.Program()
        startup = fluid.Program()
        static_img, static_label, static_loss = _build_static_lenet(
            main, startup, False, seed)
        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
                                                      seed)
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                opt = AdamOptimizer(learning_rate=lr)
                opt.minimize(static_loss)

        scope = core.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        for param in main.all_parameters():
            param_tensor = scope.var(param.name).get_tensor()
            param_tensor.set(param_init_map[param.name], place)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
        transform_pass.apply(main_graph)
        transform_pass.apply(infer_graph)
        outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
        outscale_pass.apply(main_graph)
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=static_loss.name, build_strategy=build_strategy)

        feeder = fluid.DataFeeder(feed_list=[static_img, static_label],
                                  place=place)
        with fluid.scope_guard(scope):
            for batch_id, data in enumerate(reader()):
                loss_v, = exe.run(binary,
                                  feed=feeder.feed(data),
                                  fetch_list=[static_loss])
                static_loss_rec.append(loss_v[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', loss_v))
        scale_inference_pass = OutScaleForInferencePass(scope=scope)
        scale_inference_pass.apply(infer_graph)

        save_program = infer_graph.to_program()
        static_save_dir = "./static_outscale_infer_model"
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model(
                dirname=static_save_dir,
                feeded_var_names=[infer_img.name],
                target_vars=[infer_pre],
                executor=exe,
                main_program=save_program,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX)

        rtol = 1e-05
        atol = 1e-08
        for i, (loss_d,
                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
            diff = np.abs(loss_d - loss_s)
            if diff > (atol + rtol * np.abs(loss_s)):
                _logger.info(
                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
                    format(diff, i, loss_d, loss_s))
                break

        self.assertTrue(np.allclose(np.array(dynamic_loss_rec),
                                    np.array(static_loss_rec),
                                    rtol=rtol,
                                    atol=atol,
                                    equal_nan=True),
                        msg='Failed to do the imperative qat.')

        # load dynamic model
        [dynamic_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=dynamic_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))
        # load static model
        [static_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=static_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))

        dynamic_ops = dynamic_inference_program.global_block().ops
        static_ops = static_inference_program.global_block().ops

        for op in dynamic_ops[:]:
            if op.type == "flatten2" or 'fake' in op.type:
                dynamic_ops.remove(op)

        for op in static_ops[:]:
            if 'fake' in op.type:
                static_ops.remove(op)

        for i in range(len(dynamic_ops)):
            if dynamic_ops[i].has_attr("out_threshold"):
                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
                                static_ops[i].attr("out_threshold"))