Exemple #1
0
 def residual_block_quant(self, quant_type):
     main = fluid.Program()
     startup = fluid.Program()
     with fluid.program_guard(main, startup):
         loss = residual_block(2)
         opt = fluid.optimizer.Adam(learning_rate=0.001)
         opt.minimize(loss)
         t = QuantizeTranspiler(activation_quantize_type=quant_type)
         t.training_transpile(main)
         self.check_program(main)
def infer():
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    [program, feed, fetch] = fluid.io.load_inference_model(args.model, exe)
    # remove fetch ops in origin program
    for block in program.blocks:
        ops = list(block.ops)
        for op in ops:
            if op.type == "fetch":
                idx = ops.index(op)
                block._remove_op(idx)
    # set feed and fetch list
    if args.input_ops is not None:
        feed_list = args.input_ops.split(',')
        if len(feed_list) > 0:
            feed = [
                fluid.framework._get_var(var, program) for var in feed_list
            ]
    if args.output_ops is not None:
        fetch_list = args.output_ops.split(',')
        if len(fetch_list) > 0:
            fetch = [
                fluid.framework._get_var(var, program) for var in fetch_list
            ]
    # quantize weights
    quant_transpiler = QuantizeTranspiler()
    quant_transpiler.training_transpile(program)
    # read test image
    test_data = np.fromfile(args.input_image, dtype=np.float32)
    test_data = [[test_data.reshape([3, 224, 224])]]
    # infer
    with fluid.program_guard(program):
        quant_transpiler.freeze_program(program, place)
        feeder = fluid.DataFeeder(feed_list=feed, place=place)
        fetch_out = exe.run(program=program,
                            feed=feeder.feed(test_data),
                            fetch_list=fetch)
        # print result
        for out in fetch_out:
            stride = int((out.size + 19) / 20)
            loop = int(out.size / stride)
            for i in range(loop):
                print out.flat[i * stride],
def convert():
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    [program, feed, fetch] = fluid.io.load_inference_model(args.model, exe)
    # remove fetch ops in origin program
    for block in program.blocks:
        ops = list(block.ops)
        for op in ops:
            if op.type == "fetch":
                idx = ops.index(op)
                block._remove_op(idx)
    # set feed and fetch list
    if args.input_ops is not None:
        feed_list = args.input_ops.split(',')
        if len(feed_list) > 0:
            feed = [
                fluid.framework._get_var(var, program) for var in feed_list
            ]
    if args.output_ops is not None:
        fetch_list = args.output_ops.split(',')
        if len(fetch_list) > 0:
            fetch = [
                fluid.framework._get_var(var, program) for var in fetch_list
            ]
    # quantize weights and save model
    quant_transpiler = QuantizeTranspiler()
    quant_transpiler.training_transpile(program)

    with fluid.program_guard(program):
        quant_transpiler.freeze_program(program, place)
        quant_transpiler.convert_to_int8(program, place)
        for block in program.blocks:
            for op in list(block.ops):
                if op.type == "fake_dequantize_max_abs":
                    op.desc.set_type("dequantize")
                if op.type == "fake_quantize_abs_max" or \
                   op.type == "fake_quantize_range_abs_max":
                    op.desc.set_type("quantize")
        fluid.io.save_inference_model(args.output, feed, fetch, exe, program)
Exemple #4
0
    def freeze_program(self, use_cuda, seed):
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()

        import random
        random.seed(0)
        np.random.seed(0)

        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)

        quant_type = 'range_abs_max'  # 'range_abs_max' or 'abs_max'
        quant_transpiler = QuantizeTranspiler(
            activation_quantize_type=quant_type)
        quant_transpiler.training_transpile(main, startup)
        quant_transpiler.training_transpile(test_program, startup)

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
        iters = 5
        batch_size = 8
        class_num = 10
        exe.run(startup)

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)

        with fluid.program_guard(main):
            for _ in range(iters):
                data = next(train_reader())
                loss_v = exe.run(program=main,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])

        with fluid.program_guard(test_program):
            test_data = next(test_reader())
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             test_program)
            # Testing during training
            test_loss1, w_quant = exe.run(program=test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

            # Freeze program for inference, but the weight of fc/conv is still float type.
            quant_transpiler.freeze_program(test_program, place)
            test_loss2, = exe.run(program=test_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
            self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
            w_freeze = np.array(fluid.global_scope().find_var('conv2d_1.w_0')
                                .get_tensor())
            # fail: -432.0 != -433.0, this is due to the calculation precision
            #self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))

            # Convert parameter to 8-bit.
            quant_transpiler.convert_to_int8(test_program, place)
            # Save the 8-bit parameter and model file.
            fluid.io.save_inference_model(
                'model_8bit', ['image', 'label'], [loss],
                exe,
                test_program,
                clip_extra=True)
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit',
                                                                 exe)
            # Check the loaded 8-bit weight.
            w_8bit = np.array(fluid.global_scope().find_var('conv2d_1.w_0.int8')
                              .get_tensor())

            self.assertEqual(w_8bit.dtype, np.int8)
            self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))