コード例 #1
0
def train(dot_save_dir, prefix, seed=1234):
    np.random.seed(seed)
    paddle.seed(seed)
    if paddle.is_compiled_with_cuda():
        paddle.set_flags({'FLAGS_cudnn_deterministic': 1})

    startup_program = paddle.static.Program()
    main_program = paddle.static.Program()
    img, label, loss = build_program(main_program, startup_program)

    place = paddle.CUDAPlace(
        0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace()
    exe = paddle.static.Executor(place)
    exe.run(startup_program)

    build_strategy = paddle.static.BuildStrategy()
    build_strategy.debug_graphviz_path = os.path.join(dot_save_dir, prefix)
    compiled_program = paddle.static.CompiledProgram(
        main_program, build_strategy).with_data_parallel(loss_name=loss.name)

    iters = 100
    feed = rand_data(img.name, label.name, iters)
    loss_values = []
    for step in range(iters):
        loss_v = exe.run(compiled_program,
                         feed=feed[step],
                         fetch_list=[loss],
                         return_merged=False)
        loss_values.append(loss_v[0][0][0])
    return loss_values
コード例 #2
0
 def setUp(self):
     self.custom_ops = [
         custom_module.custom_relu, custom_module.custom_relu_dup
     ]
     self.dtypes = ['float32', 'float64']
     if paddle.is_compiled_with_cuda():
         self.dtypes.append('float16')
     self.devices = ['cpu']
     if paddle.is_compiled_with_cuda():
         self.devices.append('gpu')
コード例 #3
0
    def test_large_data(self):
        if not paddle.is_compiled_with_cuda():
            return

        x = np.random.rand(226862, 256).astype("float32")
        index = np.random.randint(0, 22682, size=(11859027))

        def test_dygraph():
            with fluid.dygraph.guard():
                gpu_out = paddle.gather(paddle.to_tensor(x),
                                        paddle.to_tensor(index))
                return gpu_out.numpy()

        @switch_to_static_graph
        def test_static_graph():
            with paddle.static.program_guard(paddle.static.Program(),
                                             paddle.static.Program()):
                x_t = paddle.static.data(name="x",
                                         dtype=x.dtype,
                                         shape=x.shape)
                index_t = paddle.static.data(name="index",
                                             dtype=index.dtype,
                                             shape=index.shape)
                out_t = paddle.gather(x_t, index_t)
                feed = {x_t.name: x, index_t.name: index}
                fetch = [out_t]

                gpu_exe = paddle.static.Executor(paddle.CUDAPlace(0))
                gpu_value = gpu_exe.run(feed=feed, fetch_list=fetch)[0]
                return gpu_value

        self.assertTrue(np.array_equal(test_dygraph(), test_static_graph()))
コード例 #4
0
 def test_set_current_stream_raise_error(self):
     if paddle.is_compiled_with_cuda():
         self.assertRaises(TypeError,
                           paddle.device.cuda._set_current_stream,
                           np.zeros(5))
         self.assertRaises(TypeError,
                           paddle.device.cuda._set_current_stream, None)
コード例 #5
0
    def test_sparse_coo_tensor_sorted(self):
        with _test_eager_guard():
            for device in devices:
                if device == 'cpu' or (device == 'gpu'
                                       and paddle.is_compiled_with_cuda()):
                    paddle.device.set_device(device)
                    #test unsorted and duplicate indices
                    indices = [[1, 0, 0], [0, 1, 1]]
                    values = [1.0, 2.0, 3.0]
                    indices = paddle.to_tensor(indices, dtype='int32')
                    values = paddle.to_tensor(values, dtype='float32')
                    sparse_x = paddle.incubate.sparse.sparse_coo_tensor(
                        indices, values)
                    indices_sorted = [[0, 1], [1, 0]]
                    values_sorted = [5.0, 1.0]
                    assert np.array_equal(indices_sorted,
                                          sparse_x.indices().numpy())
                    assert np.array_equal(values_sorted,
                                          sparse_x.values().numpy())

                    # test the non-zero values is a vector
                    values = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]
                    values = paddle.to_tensor(values, dtype='float32')
                    sparse_x = paddle.incubate.sparse.sparse_coo_tensor(
                        indices, values)
                    values_sorted = [[5.0, 5.0], [1.0, 1.0]]
                    assert np.array_equal(indices_sorted,
                                          sparse_x.indices().numpy())
                    assert np.array_equal(values_sorted,
                                          sparse_x.values().numpy())
コード例 #6
0
def run_test(clip_after_allreduce=True,
             max_global_norm=-1.0,
             gradient_merge_steps=1):
    if not paddle.is_compiled_with_cuda():
        return
    if os.name == 'nt':
        return
    args = locals()
    log_dir = 'log_{}'.format(os.getpid())
    cmd = [
        sys.executable,
        '-u',
        '-m',
        'paddle.distributed.launch',
        '--log_dir',
        log_dir,
        get_test_file(),
    ]

    cmd = ' '.join([shlex.quote(c) for c in cmd])

    os.environ['CLIP_AFTER_ALLREDUCE'] = str(clip_after_allreduce)
    os.environ['MAX_GLOBAL_NORM'] = str(max_global_norm)
    os.environ['GRADIENT_MERGE_STEPS'] = str(gradient_merge_steps)

    touch_file_env = 'SUCCESS_TOUCH_FILE'
    touch_file_name = 'distributed_fused_lamb_touch_file_{}'.format(os.getpid())
    os.environ[touch_file_env] = touch_file_name
    remove_file_if_exists(touch_file_name)
    try:
        assert os.system(cmd) == 0 and os.path.exists(
            touch_file_name), 'Test failed when {}'.format(args)
    finally:
        remove_file_if_exists(touch_file_name)
        remove_file_if_exists(log_dir)
コード例 #7
0
    def test_fast_math(self):
        if not paddle.is_compiled_with_cuda():
            return

        def use_fast_math(enabled):
            paddle.set_flags({'FLAGS_use_fast_math': enabled})

        shape = [11, 17, 8]
        x_np = np.random.uniform(-1, 1, size=shape).astype(np.float16)
        y_g_np = np.random.uniform(-1, 1, size=shape).astype(np.float16)

        def run_gelu_op(approximate):
            with dg.guard():
                x = paddle.to_tensor(x_np)
                x.stop_gradient = False
                y = F.gelu(x, approximate=approximate)
                x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
                return y.numpy(), x_grad.numpy()

        use_fast_math(True)
        y_fast_math, x_g_fast_math = run_gelu_op(True)
        use_fast_math(False)

        y_ref, x_g_ref = run_gelu_op(True)
        self.assertTrue(np.allclose(y_ref, y_fast_math, rtol=1e-5, atol=5e-4))

        self.assertTrue(
            np.allclose(x_g_ref, x_g_fast_math, rtol=1e-5, atol=5e-4))
コード例 #8
0
    def run_program(self, device_type):
        if device_type == DeviceType.CUDA:
            if not paddle.is_compiled_with_cuda():
                return
            places = paddle.static.cuda_places()
        else:
            self.assertEqual(device_type, DeviceType.CPU)
            places = paddle.static.cpu_places(4)

        paddle.seed(10)
        with paddle.fluid.unique_name.guard():
            main = paddle.static.Program()
            startup = paddle.static.Program()
            with paddle.static.program_guard(main, startup):
                loss = simple_fc_net(use_feed=True)
                optimizer = paddle.optimizer.SGD(learning_rate=0.0)
                optimizer.minimize(loss)

        grads = [p.name + '@GRAD' for p in main.all_parameters()]
        no_reduce = paddle.static.BuildStrategy.ReduceStrategy._NoReduce

        build_strategy = paddle.static.BuildStrategy()
        build_strategy.reduce_strategy = no_reduce
        main_multi_place = paddle.static.CompiledProgram(
            main).with_data_parallel(loss_name=loss.name,
                                     build_strategy=build_strategy,
                                     places=places)

        build_strategy = paddle.static.BuildStrategy()
        build_strategy.reduce_strategy = no_reduce
        main_single_place = paddle.static.CompiledProgram(
            main.clone()).with_data_parallel(loss_name=loss.name,
                                             build_strategy=build_strategy,
                                             places=places[0])

        image, label = init_data()
        feed = {'image': image, 'label': label}
        exe = paddle.static.Executor(places[0])
        scope = paddle.static.Scope()
        with paddle.static.scope_guard(scope):
            exe.run(startup)
            grads_multi_place = exe.run(main_multi_place,
                                        feed=feed,
                                        fetch_list=[grads])

            feeds = self.split_feed(feed, len(places))
            grads_single_place = [list() for _ in range(len(grads))]
            for f in feeds:
                gs = exe.run(main_single_place, feed=f, fetch_list=[grads])
                for i, g in enumerate(gs):
                    grads_single_place[i].append(g)

            for i in range(len(grads)):
                grads_single_place[i] = np.concatenate(grads_single_place[i],
                                                       axis=0) / len(places)

        self.assertEqual(len(grads_multi_place), len(grads_single_place))
        for g1, g2 in zip(grads_multi_place, grads_single_place):
            self.assertTrue(np.allclose(g1, g2),
                            'g1 = {}\ng2 = {}\n'.format(g1, g2))
コード例 #9
0
    def test_combined_loss(self, ):
        shape = [32, 16]
        x_feat_name = "student"
        y_feat_name = "teacher"
        pairs = [[x_feat_name, y_feat_name]]
        paddle.seed(0)
        predicts = {
            "student": paddle.rand(shape),
            "teacher": paddle.rand(shape),
        }

        devices = ["cpu"]
        if paddle.is_compiled_with_cuda():
            devices.append("gpu")

        loss_cfg_list = [
            {
                "loss_function": "DMLLoss",
                "weight": 1.0,
                "act": "softmax",
                "model_name_pairs": pairs
            },
        ]

        for device in devices:
            paddle.set_device(device)
            loss_func = CombinedLoss(loss_config_list=loss_cfg_list)
            pd_result_dict = loss_func(predicts, None)
            np_result_dict = self.np_combined_loss(predicts, loss_cfg_list)
            for k in pd_result_dict:
                pd_result = pd_result_dict[k].numpy()
                np_result = np_result_dict[k]
                self.assertTrue(np.allclose(np_result, pd_result))
コード例 #10
0
    def calc_distillation_distance_loss(self, predicts, pairs, key=None):
        modes = ["l1", "l2", "smooth_l1"]
        reductions = ["none", "mean", "sum"]
        devices = ["cpu"]
        if paddle.is_compiled_with_cuda():
            devices.append("gpu")

        for device in devices:
            paddle.set_device(device)
            for reduction in reductions:
                for mode in modes:
                    loss_func = DistillationLoss(
                        mode=mode,
                        loss_function='DistanceLoss',
                        model_name_pairs=pairs,
                        layers_name=[key, key] if key != None else None,
                        reduction=reduction)
                    np_result_dict = self.dist_np_distance_loss(
                        predicts,
                        loss_function='DistanceLoss',
                        mode=mode,
                        reduction=reduction,
                        model_name_pairs=pairs,
                        key=key)
                    pd_result_dict = loss_func(predicts, None)
                    for k in np_result_dict:
                        pd_result = pd_result_dict[k].numpy()
                        np_result = np_result_dict[k]
                        self.assertTrue(np.allclose(np_result, pd_result))
    def __init__(self,
                 net,
                 size,
                 mean=0.0,
                 std=1.0,
                 nms_method=None,
                 iou_threshold=0.3,
                 filter_threshold=0.01,
                 candidate_size=200,
                 sigma=0.5,
                 device=None):
        self.net = net
        self.transform = PredictionTransform(size, mean, std)
        self.iou_threshold = iou_threshold
        self.filter_threshold = filter_threshold
        self.candidate_size = candidate_size
        self.nms_method = nms_method

        self.sigma = sigma
        if device:
            self.device = device
        else:
            self.device = paddle.set_device(
                "cuda" if paddle.is_compiled_with_cuda() else "cpu")

        self.net.to(self.device)
        self.net.eval()

        self.timer = Timer()
コード例 #12
0
    def test_synchronize(self):
        if paddle.is_compiled_with_cuda():
            self.assertIsNone(cuda.synchronize())
            self.assertIsNone(cuda.synchronize(0))
            self.assertIsNone(cuda.synchronize(paddle.CUDAPlace(0)))

            self.assertRaises(ValueError, cuda.synchronize, "gpu:0")
コード例 #13
0
    def run_main(self, use_fp16, use_master_param_norm=True):
        if not paddle.is_compiled_with_cuda():
            return

        if not use_fp16:
            self.assertTrue(use_master_param_norm)

        base_config = self.config()
        config1 = dict(base_config)
        config1['use_distributed_lamb'] = True
        config1['use_fp16'] = use_fp16
        config1['use_master_param_norm'] = use_master_param_norm

        config2 = dict(base_config)
        config2['use_distributed_lamb'] = False
        config2['use_fp16'] = use_fp16
        config2['use_master_param_norm'] = use_master_param_norm

        result1 = run_model(**config1)
        result2 = run_model(**config2)
        self.assertEqual(len(result1), len(result2))

        if use_fp16:
            atol = 8e-4 if use_master_param_norm else 1e-3
        else:
            atol = 1e-7
        for ret1, ret2 in zip(result1, result2):
            max_diff = np.max(np.abs(ret1 - ret2))
            msg = 'max_diff = {} atol = {} when use_fp16 = {} , use_master_param_norm = {}'.format(
                max_diff, atol, use_fp16, use_master_param_norm)
            self.assertTrue(max_diff < atol, msg)
            print(msg)
コード例 #14
0
ファイル: test_eigh_op.py プロジェクト: sandyhouse/Paddle
 def test_check_output_gpu(self):
     if paddle.is_compiled_with_cuda():
         paddle.disable_static(place=paddle.CUDAPlace(0))
         input_real_data = paddle.to_tensor(self.x_np)
         actual_w, actual_v = paddle.linalg.eigh(input_real_data, self.UPLO)
         valid_eigh_result(self.x_np,
                           actual_w.numpy(), actual_v.numpy(), self.UPLO)
コード例 #15
0
 def test_check_output_gpu(self):
     if paddle.is_compiled_with_cuda():
         paddle.disable_static(place=paddle.CUDAPlace(0))
         input_real_data = paddle.to_tensor(self.x_np)
         expected_w = np.linalg.eigvalsh(self.x_np)
         actual_w = paddle.linalg.eigvalsh(input_real_data)
         compare_result(actual_w.numpy(), expected_w)
コード例 #16
0
 def _enable_gpu(self):
     enable_gpu = self.resource_quota.on_gpu
     if enable_gpu and not paddle.is_compiled_with_cuda():  # type: ignore
         raise BentoMLException(
             "`resource_quota.on_gpu=True` while CUDA is not currently supported by existing paddlepaddle."
             " Make sure to install `paddlepaddle-gpu` and try again.")
     return enable_gpu
コード例 #17
0
    def _build_predict_fn(self, rebuild: bool = False):
        if self.predict_fn is not None:
            assert callable(self.predict_fn), "predict_fn is predefined before, but is not callable." \
                "Check it again."

        import paddle
        if self.predict_fn is None or rebuild:
            if not paddle.is_compiled_with_cuda() and self.device[:3] == 'gpu':
                print("Paddle is not installed with GPU support. Change to CPU version now.")
                self.device = 'cpu'

            # set device. self.device is one of ['cpu', 'gpu:0', 'gpu:1', ...]
            paddle.set_device(self.device)

            # to get gradients, the ``train`` mode must be set.
            self.paddle_model.eval()

            def predict_fn(data):
                assert len(data.shape) == 4  # [bs, h, w, 3]

                with paddle.no_grad():
                    logits = self.paddle_model(paddle.to_tensor(data))  # get logits, [bs, num_c]
                    probas = paddle.nn.functional.softmax(logits, axis=1)  # get probabilities.
                return probas.numpy()

            self.predict_fn = predict_fn
コード例 #18
0
    def train(self, place, iters, feed, use_cinn=False, seed=1234):
        np.random.seed(seed)
        paddle.seed(seed)
        if paddle.is_compiled_with_cuda():
            paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
        set_cinn_flag(use_cinn)

        startup_program = paddle.static.Program()
        main_program = paddle.static.Program()

        loss = self.build_program(main_program, startup_program)
        exe = paddle.static.Executor(place)

        compiled_prog = paddle.static.CompiledProgram(
            main_program).with_data_parallel(loss_name=loss.name)
        loss_vals = []
        scope = paddle.static.Scope()

        with paddle.static.scope_guard(scope):
            exe.run(startup_program)
            for step in range(iters):
                loss_v = exe.run(compiled_prog,
                                 feed=feed[step],
                                 fetch_list=[loss],
                                 return_numpy=True)
                loss_vals.append(loss_v[0][0])
        return loss_vals
コード例 #19
0
    def test_fixed_random_number(self):
        # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t'
        if not paddle.is_compiled_with_cuda():
            return

        print("Test Fixed Random number on GPU------>")
        paddle.disable_static()
        paddle.set_device('gpu')
        paddle.seed(100)
        np.random.seed(100)

        x_np = np.random.rand(32, 1024, 1024)

        x = paddle.to_tensor(x_np, dtype='float64')
        y = paddle.bernoulli(x).numpy()
        index0, index1, index2 = np.nonzero(y)
        self.assertEqual(np.sum(index0), 260028995)
        self.assertEqual(np.sum(index1), 8582429431)
        self.assertEqual(np.sum(index2), 8581445798)
        expect = [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.]
        self.assertTrue(np.array_equal(y[16, 500, 500:510], expect))

        x = paddle.to_tensor(x_np, dtype='float32')
        y = paddle.bernoulli(x).numpy()
        index0, index1, index2 = np.nonzero(y)
        self.assertEqual(np.sum(index0), 260092343)
        self.assertEqual(np.sum(index1), 8583509076)
        self.assertEqual(np.sum(index2), 8582778540)
        expect = [0., 0., 1., 1., 1., 1., 0., 1., 1., 1.]
        self.assertTrue(np.array_equal(y[16, 500, 500:510], expect))

        paddle.enable_static()
コード例 #20
0
    def test_identity(self):
        self.place = paddle.CPUPlace()
        self._test_identity()

        if paddle.is_compiled_with_cuda():
            self.place = paddle.CUDAPlace(0)
            self._test_identity()
コード例 #21
0
    def setUp(self):
        cur_dir = os.path.dirname(os.path.abspath(__file__))
        # compile, install the custom op egg into site-packages under background
        if os.name == 'nt':
            cmd = 'cd /d {} && python custom_relu_setup.py install'.format(
                cur_dir)
        else:
            cmd = 'cd {} && python custom_relu_setup.py install'.format(
                cur_dir)
        run_cmd(cmd)

        # NOTE(Aurelius84): Normally, it's no need to add following codes for users.
        # But we simulate to pip install in current process, so interpreter don't snap
        # sys.path has been updated. So we update it manually.

        # See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
        if os.name == 'nt':
            # NOTE(zhouwei25): getsitepackages on windows will return a list: [python install dir, site packages dir]
            site_dir = site.getsitepackages()[1]
        else:
            site_dir = site.getsitepackages()[0]
        custom_egg_path = [
            x for x in os.listdir(site_dir) if 'custom_relu_module_setup' in x
        ]
        assert len(custom_egg_path
                   ) == 1, "Matched egg number is %d." % len(custom_egg_path)
        sys.path.append(os.path.join(site_dir, custom_egg_path[0]))

        # usage: import the package directly
        import custom_relu_module_setup
        # `custom_relu_dup` is same as `custom_relu_dup`
        self.custom_ops = [
            custom_relu_module_setup.custom_relu,
            custom_relu_module_setup.custom_relu_dup
        ]

        self.dtypes = ['float32', 'float64']
        if paddle.is_compiled_with_cuda():
            self.dtypes.append('float16')
        self.devices = ['cpu']
        if paddle.is_compiled_with_cuda():
            self.devices.append('gpu')

        # config seed
        SEED = 2021
        paddle.seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)
コード例 #22
0
ファイル: test_eigh_op.py プロジェクト: sandyhouse/Paddle
 def setUp(self):
     self.init_input_data()
     self.UPLO = 'L'
     self.rtol = 1e-5  # for test_eigh_grad
     self.atol = 1e-5  # for test_eigh_grad
     self.place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda() \
         else paddle.CPUPlace()
     np.random.seed(123)
コード例 #23
0
def execute(main_program, startup_program):
    if paddle.is_compiled_with_cuda():
        place = paddle.CUDAPlace(0)
    else:
        place = paddle.CPUPlace()
    exe = paddle.static.Executor(place)
    exe.run(startup_program)
    exe.run(main_program)
コード例 #24
0
    def setUpClass(cls):
        if not paddle.is_compiled_with_cuda():
            return

        paddle.enable_static()
        paddle.set_flags({'FLAGS_cudnn_deterministic': True})
        _clip_by_global_norm_using_mp_type(True)
        fleet.init(role_maker=get_role_maker())
コード例 #25
0
 def test_check_output_gpu(self):
     if paddle.is_compiled_with_cuda():
         paddle.disable_static(place=paddle.CUDAPlace(0))
         input_real_data = paddle.to_tensor(self.x_np)
         expected_w = np.linalg.eigvalsh(self.x_np)
         actual_w = paddle.linalg.eigvalsh(input_real_data)
         np.testing.assert_allclose(
             actual_w, expected_w, rtol=self.rtol, atol=self.atol)
コード例 #26
0
    def test_stream_guard_default_stream(self):
        if paddle.is_compiled_with_cuda():
            s1 = paddle.device.cuda.current_stream()
            with paddle.device.cuda.stream_guard(s1):
                pass
            s2 = paddle.device.cuda.current_stream()

            self.assertTrue(id(s1) == id(s2))
コード例 #27
0
 def setUp(self):
     self.seed = 2021
     self.in_size = 10
     self.out_size = 10
     self.batch_size = 4
     self.devices = ["cpu"]
     if paddle.is_compiled_with_cuda():
         self.devices.append("gpu")
コード例 #28
0
 def setUp(self):
     self.dtypes = ['float32', 'float64']
     self.devices = ['cpu']
     if paddle.is_compiled_with_cuda():
         self.devices.append('gpu')
     self.np_x = np.random.random((3, 2)).astype("float32")
     self.np_weight = np.full([2, 4], fill_value=0.5, dtype="float32")
     self.np_bias = np.ones([4], dtype="float32")
コード例 #29
0
 def setUp(self):
     paddle.framework.random.set_random_seed_generator('seed0', 123)
     paddle.framework.random.set_random_seed_generator('seed1', 123)
     rng0 = paddle.framework.random.get_random_seed_generator('seed0')
     rng1 = paddle.framework.random.get_random_seed_generator('seed1')
     self.places = [paddle.CPUPlace()]
     if paddle.is_compiled_with_cuda():
         self.places.append(paddle.CUDAPlace(0))
コード例 #30
0
ファイル: test_ptq.py プロジェクト: itminner/PaddleSlim
    def test_ptq(self):
        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed

        _logger.info("create the fp32 model")
        fp32_lenet = ImperativeLenet()

        _logger.info("prepare data")
        batch_size = 64
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.MNIST(
            mode='train', backend='cv2', transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(
            mode='test', backend='cv2', transform=transform)

        place = paddle.CUDAPlace(0) \
            if paddle.is_compiled_with_cuda() else paddle.CPUPlace()
        train_reader = paddle.io.DataLoader(
            train_dataset,
            drop_last=True,
            places=place,
            batch_size=batch_size,
            return_list=True)
        test_reader = paddle.io.DataLoader(
            val_dataset, places=place, batch_size=batch_size, return_list=True)

        _logger.info("train the fp32 model")
        self.model_train(fp32_lenet, train_reader)

        _logger.info("test fp32 model")
        fp32_top1, fp32_top5 = self.model_test(fp32_lenet, test_reader)

        _logger.info("quantize the fp32 model")
        quanter = PTQ()
        quant_lenet = quanter.quantize(fp32_lenet, fuse=True)

        _logger.info("calibrate")
        self.calibrate(quant_lenet, test_reader)

        _logger.info("save and test the quantized model")
        save_path = "./tmp/model"
        input_spec = paddle.static.InputSpec(
            shape=[None, 1, 28, 28], dtype='float32')
        quanter.save_quantized_model(
            quant_lenet, save_path, input_spec=[input_spec])
        quant_top1, quant_top5 = self.model_test(quant_lenet, test_reader)

        _logger.info("FP32 acc: top1: {}, top5: {}".format(fp32_top1,
                                                           fp32_top5))
        _logger.info("Int acc: top1: {}, top5: {}".format(quant_top1,
                                                          quant_top5))

        diff = 0.002
        self.assertTrue(
            fp32_top1 - quant_top1 < diff,
            msg="The acc of quant model is too lower than fp32 model")