예제 #1
0
    def test_quant_op(self):
        startup_prog, train_prog = self.get_model()
        place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
        ) else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup_prog)
        config_1 = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }

        quant_prog_1 = quant_aware(
            train_prog, place, config=config_1, for_test=True)
        op_nums_1, quant_op_nums_1 = self.get_op_number(quant_prog_1)
        convert_prog_1 = convert(quant_prog_1, place, config=config_1)
        convert_op_nums_1, convert_quant_op_nums_1 = self.get_op_number(
            convert_prog_1)

        config_1['not_quant_pattern'] = ['last_fc']
        quant_prog_2 = quant_aware(
            train_prog, place, config=config_1, for_test=True)
        op_nums_2, quant_op_nums_2 = self.get_op_number(quant_prog_2)
        convert_prog_2 = convert(quant_prog_2, place, config=config_1)
        convert_op_nums_2, convert_quant_op_nums_2 = self.get_op_number(
            convert_prog_2)

        self.assertTrue(op_nums_1 == op_nums_2)
        # test quant_aware op numbers
        self.assertTrue(op_nums_1 * 4 == quant_op_nums_1)
        # test convert op numbers
        self.assertTrue(convert_op_nums_1 * 2 == convert_quant_op_nums_1)
        # test skip_quant
        self.assertTrue(quant_op_nums_1 - 4 == quant_op_nums_2)
        self.assertTrue(convert_quant_op_nums_1 - 2 == convert_quant_op_nums_2)
예제 #2
0
def main():
    cfg = load_config(FLAGS.config)

    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            inputs_def['use_dataloader'] = False
            feed_vars, _ = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)

    exe.run(startup_prog)
    checkpoint.load_params(exe, infer_prog, cfg.weights)

    infer_prog, int8_program = convert(infer_prog,
                                       place,
                                       config,
                                       save_int8=True)

    save_infer_model(os.path.join(FLAGS.output_dir, 'float'), exe, feed_vars,
                     test_fetches, infer_prog)

    save_infer_model(os.path.join(FLAGS.output_dir, 'int'), exe, feed_vars,
                     test_fetches, int8_program)
예제 #3
0
def export_inference_model(args):
    """
    Export PaddlePaddle inference model for prediction depolyment and serving.
    """
    print("Exporting inference model...")
    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    image, logit_out = build_model(infer_prog,
                                   startup_prog,
                                   phase=ModelPhase.PREDICT)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    infer_prog = infer_prog.clone(for_test=True)
    not_quant_pattern_list = []
    if args.not_quant_pattern is not None:
        not_quant_pattern_list = args.not_quant_pattern

    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern_list
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)
    if os.path.exists(cfg.TEST.TEST_MODEL):
        fluid.io.load_persistables(exe,
                                   cfg.TEST.TEST_MODEL,
                                   main_program=infer_prog)
    else:
        print("TEST.TEST_MODEL diretory is empty!")
        exit(-1)

    infer_prog = convert(infer_prog, place, config)

    fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR,
                                  feeded_var_names=[image.name],
                                  target_vars=[logit_out],
                                  executor=exe,
                                  main_program=infer_prog,
                                  model_filename=cfg.FREEZE.MODEL_FILENAME,
                                  params_filename=cfg.FREEZE.PARAMS_FILENAME)
    print("Inference model exported!")
    print("Exporting inference model config...")
    deploy_cfg_path = export_inference_config()
    print("Inference model saved : [%s]" % (deploy_cfg_path))
예제 #4
0
    def test_accuracy(self):
        image = paddle.static.data(
            name='image', shape=[None, 1, 28, 28], dtype='float32')
        image.stop_gradient = False
        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
        avg_cost = paddle.mean(x=cost)
        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
        optimizer = paddle.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            weight_decay=paddle.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())

        def transform(x):
            return np.reshape(x, [1, 28, 28])

        train_dataset = paddle.vision.datasets.MNIST(
            mode='train', backend='cv2', transform=transform)
        test_dataset = paddle.vision.datasets.MNIST(
            mode='test', backend='cv2', transform=transform)
        train_loader = paddle.io.DataLoader(
            train_dataset,
            places=place,
            feed_list=[image, label],
            drop_last=True,
            return_list=False,
            batch_size=64)
        valid_loader = paddle.io.DataLoader(
            test_dataset,
            places=place,
            feed_list=[image, label],
            batch_size=64,
            return_list=False)

        def train(program):
            iter = 0
            for data in train_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in valid_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print('eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                          format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog_pact = quant_aware(
            main_prog,
            place,
            config,
            for_test=False,
            act_preprocess_func=pact,
            optimizer_func=get_optimizer,
            executor=exe)

        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog_pact)
        quant_eval_prog, int8_prog = convert(
            quant_eval_prog, place, config, save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
예제 #5
0
def compress(args):
    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    if args.data == "mnist":
        train_dataset = paddle.vision.datasets.MNIST(mode='train')
        val_dataset = paddle.vision.datasets.MNIST(mode='test')
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = paddle.static.data(name='image',
                               shape=[None] + image_shape,
                               dtype='float32')
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

    train_prog = paddle.static.default_main_program()
    val_program = paddle.static.default_main_program().clone(for_test=True)

    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
    ############################################################################################################
    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.
    ############################################################################################################
    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      for_test=False)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)

    exe = paddle.static.Executor(place)
    exe.run(paddle.static.default_startup_program())

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:
        paddle.static.load(train_prog, args.pretrained_model, exe)

    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=places,
                                        feed_list=[image, label],
                                        drop_last=True,
                                        batch_size=args.batch_size,
                                        return_list=False,
                                        use_shared_memory=True,
                                        shuffle=True,
                                        num_workers=4)
    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        feed_list=[image, label],
                                        drop_last=False,
                                        return_list=False,
                                        batch_size=args.batch_size,
                                        use_shared_memory=True,
                                        shuffle=False)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in valid_loader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog):

        batch_id = 0
        for data in train_loader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    build_strategy = paddle.static.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    exec_strategy = paddle.static.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    ############################################################################################################
    # train loop
    ############################################################################################################
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, compiled_train_prog)
        acc1 = test(i, val_program)
        paddle.static.save(program=val_program,
                           model_path=os.path.join(args.checkpoint_dir,
                                                   str(i)))
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            paddle.static.save(program=val_program,
                               model_path=os.path.join(args.checkpoint_dir,
                                                       'best_model'))
    if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')):
        paddle.static.load(executor=exe,
                           model_path=os.path.join(args.checkpoint_dir,
                                                   'best_model'),
                           program=val_program)
    ############################################################################################################
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    ############################################################################################################
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    print("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    ############################################################################################################
    # 4. Save inference model
    ############################################################################################################
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    paddle.fluid.io.save_inference_model(dirname=float_path,
                                         feeded_var_names=[image.name],
                                         target_vars=[out],
                                         executor=exe,
                                         main_program=float_program,
                                         model_filename=float_path + '/model',
                                         params_filename=float_path +
                                         '/params')
예제 #6
0
def compress(args):
    num_workers = 4
    shuffle = True
    if args.ce_test:
        # set seed
        seed = 111
        paddle.seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        num_workers = 0
        shuffle = False

    if args.data == "mnist":
        train_dataset = paddle.vision.datasets.MNIST(mode='train')
        val_dataset = paddle.vision.datasets.MNIST(mode='test')
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = paddle.static.data(name='image',
                               shape=[None] + image_shape,
                               dtype='float32')
    if args.use_pact:
        image.stop_gradient = False
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

    train_prog = paddle.static.default_main_program()
    val_program = paddle.static.default_main_program().clone(for_test=True)

    if not args.analysis:
        learning_rate, opt = create_optimizer(args)
        opt.minimize(avg_cost)

    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    exe = paddle.static.Executor(place)
    exe.run(paddle.static.default_startup_program())

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=places,
                                        feed_list=[image, label],
                                        drop_last=True,
                                        return_list=False,
                                        batch_size=args.batch_size,
                                        use_shared_memory=True,
                                        shuffle=shuffle,
                                        num_workers=num_workers)

    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        feed_list=[image, label],
                                        drop_last=False,
                                        return_list=False,
                                        batch_size=args.batch_size,
                                        use_shared_memory=True,
                                        shuffle=False)

    if args.analysis:
        # get all activations names
        activates = [
            'pool2d_1.tmp_0', 'tmp_35', 'batch_norm_21.tmp_2', 'tmp_26',
            'elementwise_mul_5.tmp_0', 'pool2d_5.tmp_0',
            'elementwise_add_5.tmp_0', 'relu_2.tmp_0', 'pool2d_3.tmp_0',
            'conv2d_40.tmp_2', 'elementwise_mul_0.tmp_0', 'tmp_62',
            'elementwise_add_8.tmp_0', 'batch_norm_39.tmp_2',
            'conv2d_32.tmp_2', 'tmp_17', 'tmp_5', 'elementwise_add_9.tmp_0',
            'pool2d_4.tmp_0', 'relu_0.tmp_0', 'tmp_53', 'relu_3.tmp_0',
            'elementwise_add_4.tmp_0', 'elementwise_add_6.tmp_0', 'tmp_11',
            'conv2d_36.tmp_2', 'relu_8.tmp_0', 'relu_5.tmp_0',
            'pool2d_7.tmp_0', 'elementwise_add_2.tmp_0',
            'elementwise_add_7.tmp_0', 'pool2d_2.tmp_0', 'tmp_47',
            'batch_norm_12.tmp_2', 'elementwise_mul_6.tmp_0',
            'elementwise_mul_7.tmp_0', 'pool2d_6.tmp_0', 'relu_6.tmp_0',
            'elementwise_add_0.tmp_0', 'elementwise_mul_3.tmp_0',
            'conv2d_12.tmp_2', 'elementwise_mul_2.tmp_0', 'tmp_8', 'tmp_2',
            'conv2d_8.tmp_2', 'elementwise_add_3.tmp_0',
            'elementwise_mul_1.tmp_0', 'pool2d_8.tmp_0', 'conv2d_28.tmp_2',
            'image', 'conv2d_16.tmp_2', 'batch_norm_33.tmp_2', 'relu_1.tmp_0',
            'pool2d_0.tmp_0', 'tmp_20', 'conv2d_44.tmp_2', 'relu_10.tmp_0',
            'tmp_41', 'relu_4.tmp_0', 'elementwise_add_1.tmp_0', 'tmp_23',
            'batch_norm_6.tmp_2', 'tmp_29', 'elementwise_mul_4.tmp_0', 'tmp_14'
        ]
        var_collector = VarCollector(train_prog, activates, use_ema=True)
        values = var_collector.abs_max_run(train_loader,
                                           exe,
                                           step=None,
                                           loss_name=avg_cost.name)
        np.save('pact_thres.npy', values)
        _logger.info(values)
        _logger.info("PACT threshold have been saved as pact_thres.npy")

        # Draw Histogram in 'dist_pdf/result.pdf'
        # var_collector.pdf(values)

        return

    values = defaultdict(lambda: 20)
    try:
        values = np.load("pact_thres.npy", allow_pickle=True).item()
        values.update(tmp)
        _logger.info("pact_thres.npy info loaded.")
    except:
        _logger.info(
            "cannot find pact_thres.npy. Set init PACT threshold as 20.")
    _logger.info(values)

    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.

    def pact(x):
        helper = LayerHelper("pact", **locals())
        dtype = 'float32'
        init_thres = values[x.name.split('_tmp_input')[0]]
        u_param_attr = paddle.ParamAttr(
            name=x.name + '_pact',
            initializer=paddle.nn.initializer.Constant(value=init_thres),
            regularizer=paddle.regularizer.L2Decay(0.0001),
            learning_rate=1)
        u_param = helper.create_parameter(attr=u_param_attr,
                                          shape=[1],
                                          dtype=dtype)

        part_a = paddle.nn.functional.relu(x - u_param)
        part_b = paddle.nn.functional.relu(-u_param - x)
        x = x - part_a + part_b
        return x

    def get_optimizer():
        return paddle.optimizer.Momentum(args.lr, 0.9)

    if args.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None

    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              act_preprocess_func=act_preprocess_func,
                              optimizer_func=optimizer_func,
                              executor=executor,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      act_preprocess_func=act_preprocess_func,
                                      optimizer_func=optimizer_func,
                                      executor=executor,
                                      for_test=False)

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:
        paddle.static.load(train_prog, args.pretrained_model, exe)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in valid_loader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog, lr):

        batch_id = 0
        for data in train_loader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])

            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}"
                    .format(epoch, batch_id, learning_rate.get_lr(), loss_n,
                            acc_top1_n, acc_top5_n, end_time - start_time))

            if args.use_pact and batch_id % 1000 == 0:
                threshold = {}
                for var in val_program.list_vars():
                    if 'pact' in var.name:
                        array = np.array(paddle.static.global_scope().find_var(
                            var.name).get_tensor())
                        threshold[var.name] = array[0]
                _logger.info(threshold)
            batch_id += 1
            lr.step()

    build_strategy = paddle.static.BuildStrategy()
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    exec_strategy = paddle.static.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    # train loop
    best_acc1 = 0.0
    best_epoch = 0

    start_epoch = 0
    if args.checkpoint_dir is not None:
        ckpt_path = args.checkpoint_dir
        assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set"
        start_epoch = args.checkpoint_epoch
        paddle.static.load(executor=exe,
                           model_path=args.checkpoint_dir,
                           program=val_program)

    best_eval_acc1 = 0
    best_acc1_epoch = 0
    for i in range(start_epoch, args.num_epochs):
        train(i, compiled_train_prog, learning_rate)
        acc1 = test(i, val_program)
        if acc1 > best_eval_acc1:
            best_eval_acc1 = acc1
            best_acc1_epoch = i
        _logger.info("Best Validation Acc1: {:.6f}, at epoch {}".format(
            best_eval_acc1, best_acc1_epoch))
        paddle.static.save(model_path=os.path.join(args.output_dir, str(i)),
                           program=val_program)
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            paddle.static.save(model_path=os.path.join(args.output_dir,
                                                       'best_model'),
                               program=val_program)

    if os.path.exists(os.path.join(args.output_dir, 'best_model.pdparams')):
        paddle.static.load(executor=exe,
                           model_path=os.path.join(args.output_dir,
                                                   'best_model'),
                           program=val_program)

    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    _logger.info("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    _logger.info("final acc:{}".format(final_acc1))

    # 4. Save inference model
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    paddle.fluid.io.save_inference_model(dirname=float_path,
                                         feeded_var_names=[image.name],
                                         target_vars=[out],
                                         executor=exe,
                                         main_program=float_program,
                                         model_filename=float_path + '/model',
                                         params_filename=float_path +
                                         '/params')
예제 #7
0
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = SegDataset(file_list=cfg.DATASET.VAL_FILE_LIST,
                         mode=ModelPhase.EVAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b[0], b[1], b[2]

    data_loader, avg_loss, pred, grts, masks = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(data_generator,
                                     drop_last=False,
                                     batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)
    not_quant_pattern_list = []
    if kwargs['not_quant_pattern'] is not None:
        not_quant_pattern_list = kwargs['not_quant_pattern']
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern_list
    }
    test_prog = quant_aware(test_prog, place, config, for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if not os.path.exists(ckpt_dir):
        raise ValueError(
            'The TEST.TEST_MODEL {} is not found'.format(ckpt_dir))

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        fluid.io.load_persistables(exe, ckpt_dir, main_program=test_prog)
    if kwargs['convert']:
        test_prog = convert(test_prog, place, config)
    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(precision=4,
                        suppress=True,
                        linewidth=160,
                        floatmode="fixed")
    conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)
    fetch_list = [avg_loss.name, pred.name, grts.name, masks.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    while True:
        try:
            step += 1
            loss, pred, grts, masks = exe.run(test_prog,
                                              fetch_list=fetch_list,
                                              return_numpy=True)

            loss = np.mean(np.array(loss))

            num_images += pred.shape[0]
            conf_mat.calculate(pred, grts, masks)
            _, iou = conf_mat.mean_iou()
            _, acc = conf_mat.accuracy()

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, loss, acc, iou, speed,
                        calculate_eta(all_step - step, speed)))
            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format(
        num_images, avg_acc, avg_iou))
    print("[EVAL]Category IoU:", category_iou)
    print("[EVAL]Category Acc:", category_acc)
    print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa()))

    return category_iou, avg_iou, category_acc, avg_acc
예제 #8
0
def main():
    """
    Main evaluate function
    """
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    # define executor
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # build program
    model = create(main_arch)
    startup_prog = fluid.Program()
    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['EvalReader']['inputs_def']
            test_feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.eval(test_feed_vars)
    eval_prog = eval_prog.clone(True)

    reader = create_reader(cfg.EvalReader)
    loader.set_sample_list_generator(reader, place)

    # eval already exists json file
    if FLAGS.json_eval:
        logger.info(
            "In json_eval mode, PaddleDetection will evaluate json files in "
            "output_eval directly. And proposal.json, bbox.json and mask.json "
            "will be detected by default.")
        json_eval_results(cfg.metric,
                          json_directory=FLAGS.output_eval,
                          dataset=dataset)
        return

    assert cfg.metric != 'OID', "eval process of OID dataset \
                          is not supported."

    if cfg.metric == "WIDERFACE":
        raise ValueError("metric type {} does not support in tools/eval.py, "
                         "please use tools/face_eval.py".format(cfg.metric))
    assert cfg.metric in ['COCO', 'VOC'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []

    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']

    keys, values, cls = parse_fetches(test_fetches, eval_prog, extra_keys)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    dataset = cfg['EvalReader']['dataset']

    sub_eval_prog = None
    sub_keys = None
    sub_values = None

    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    eval_prog = quant_aware(eval_prog, place, config, for_test=True)

    # load model
    exe.run(startup_prog)
    if 'weights' in cfg:
        checkpoint.load_params(exe, eval_prog, cfg.weights)
    eval_prog = convert(eval_prog, place, config, save_int8=False)

    compile_program = fluid.compiler.CompiledProgram(
        eval_prog).with_data_parallel()

    results = eval_run(exe, compile_program, loader, keys, values, cls, cfg,
                       sub_eval_prog, sub_keys, sub_values)

    # evaluation
    resolution = None
    if 'mask' in results[0]:
        resolution = model.mask_head.resolution
    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
    eval_results(results,
                 cfg.metric,
                 cfg.num_classes,
                 resolution,
                 is_bbox_normalized,
                 FLAGS.output_eval,
                 map_type,
                 dataset=dataset)
예제 #9
0
def main():
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    dataset = cfg.TestReader['dataset']

    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    dataset.set_images(test_images)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    reader = create_reader(cfg.TestReader)
    # When iterable mode, set set_sample_list_generator(reader, place)
    loader.set_sample_list_generator(reader)
    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)

    exe.run(startup_prog)

    if cfg.weights:
        checkpoint.load_params(exe, infer_prog, cfg.weights)
    infer_prog = convert(infer_prog, place, config, save_int8=False)

    # parse infer fetches
    assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg['metric'] in ['COCO', 'OID']:
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
        extra_keys = ['im_id', 'im_shape']
    keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)

    # parse dataset category
    if cfg.metric == 'COCO':
        from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
    if cfg.metric == 'OID':
        from ppdet.utils.oid_eval import bbox2out, get_category_info
    if cfg.metric == "VOC":
        from ppdet.utils.voc_eval import bbox2out, get_category_info
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info

    anno_file = dataset.get_anno()
    with_background = dataset.with_background
    use_default_label = dataset.use_default_label

    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    imid2path = dataset.get_imid2path()
    iter_id = 0
    try:
        loader.start()
        while True:
            outs = exe.run(infer_prog, fetch_list=values, return_numpy=False)
            res = {
                k: (np.array(v), v.recursive_sequence_lengths())
                for k, v in zip(keys, outs)
            }
            logger.info('Infer iter {}'.format(iter_id))
            iter_id += 1
            bbox_results = None
            mask_results = None
            if 'bbox' in res:
                bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
            if 'mask' in res:
                mask_results = mask2out([res], clsid2catid,
                                        model.mask_head.resolution)

            # visualize result
            im_ids = res['im_id'][0]
            for im_id in im_ids:
                image_path = imid2path[int(im_id)]
                image = Image.open(image_path).convert('RGB')

                image = visualize_results(image, int(im_id), catid2name,
                                          FLAGS.draw_threshold, bbox_results,
                                          mask_results)

                save_name = get_save_image_name(FLAGS.output_dir, image_path)
                logger.info(
                    "Detection bbox results save in {}".format(save_name))
                image.save(save_name, quality=95)
    except (StopIteration, fluid.core.EOFException):
        loader.reset()
예제 #10
0
def main():
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    startup_prog, eval_program, place, config, alg_type = program.preprocess()

    feeded_var_names, target_vars, fetches_var_name = program.build_export(
        config, eval_program, startup_prog)

    eval_program = eval_program.clone(for_test=True)
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    eval_program = quant_aware(
        eval_program, place, quant_config, scope=None, for_test=True)

    init_model(config, eval_program, exe)

    # 2. Convert the program before save inference program
    #    The dtype of eval_program's weights is float32, but in int8 range.

    eval_program = convert(eval_program, place, quant_config, scope=None)

    eval_fetch_name_list = fetches_var_name
    eval_fetch_varname_list = [v.name for v in target_vars]
    eval_reader = reader_main(config=config, mode="eval")
    quant_info_dict = {'program':eval_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}

    if alg_type == 'det':
        final_metrics = eval_det_run(exe, config, quant_info_dict, "eval")
    else:
        final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval")
    print(final_metrics)

    # 3. Save inference model
    model_path = "./quant_model"
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    fluid.io.save_inference_model(
        dirname=model_path,
        feeded_var_names=feeded_var_names,
        target_vars=target_vars,
        executor=exe,
        main_program=eval_program,
        model_filename=model_path + '/model',
        params_filename=model_path + '/params')
    print("model saved as {}".format(model_path))
    def test_accuracy(self):
        image = fluid.layers.data(name='image',
                                  shape=[1, 28, 28],
                                  dtype='float32')
        image.stop_gradient = False
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = fluid.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = fluid.CUDAPlace(
            0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        feeder = fluid.DataFeeder([image, label], place, program=main_prog)
        train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(),
                                             batch_size=64)
        eval_reader = paddle.fluid.io.batch(paddle.dataset.mnist.test(),
                                            batch_size=64)

        def train(program):
            iter = 0
            for data in train_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in eval_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog_pact = quant_aware(main_prog,
                                            place,
                                            config,
                                            for_test=False,
                                            act_preprocess_func=pact,
                                            optimizer_func=get_optimizer,
                                            executor=exe)

        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog_pact)
        quant_eval_prog, int8_prog = convert(quant_eval_prog,
                                             place,
                                             config,
                                             save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
예제 #12
0
def export_quant_infermodel(
        executor,
        place=None,
        scope=None,
        quant_config=None,
        train_config=None,
        checkpoint_path=None,
        export_inference_model_path_prefix="./export_quant_infermodel"):
    """export quant model checkpoints to infermodel.
    Args:
        executor(paddle.static.Executor): The executor to load, run and save the
            quantized model.
        place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents
            the executor run on which device.
        scope(paddle.static.Scope, optional):  Scope records the mapping between
            variable names and variables, similar to brackets in
            programming languages. Usually users can use
            `paddle.static.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_.
            When ``None`` will use
            `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_
            . Default: ``None``.
        quant_config(dict, optional): configs for convert. if set None, will use
                default config. It must be same with config that used in
                'quant_aware'. Default is None.
        train_config(dict):train aware configs, include num_epoch, save_iter_step, learning_rate,
                weight_decay, use_pact, quant_model_ckpt_path,
                model_path_prefix, teacher_model_path_prefix, 
                distill_node_pair(teacher_node_name1, node_name1, teacher_node_name2, teacher_node_name2, ...)
        checkpoint_path(str): checkpoint path need to export quant infer model.
        export_inference_model_path_prefix(str): export infer model path prefix, storage directory of model + model name (excluding suffix).
    Returns:
        None
    """
    scope = paddle.static.global_scope() if not scope else scope
    # parse quant config
    if quant_config is None:
        quant_config = _quant_config_default
    else:
        assert isinstance(quant_config, dict), "quant config must be dict"
        quant_config = _parse_configs(quant_config)
    _logger.info("quant_aware config {}".format(quant_config))

    train_config = _parse_train_configs(train_config)
    distill_program_info = build_distill_prog_with_infermodel(
        executor, place, train_config)
    test_program = distill_program_info.test_program
    test_feed_names = distill_program_info.test_feed_names
    test_fetch_list = distill_program_info.test_fetch_list

    ############################################################################
    # quant
    ############################################################################
    use_pact = False  # export model should set use_pact is False
    if use_pact:
        act_preprocess_func = pact
        optimizer_func = get_pact_optimizer
        pact_executor = executor
    else:
        act_preprocess_func = None
        optimizer_func = None
        pact_executor = None

    test_program = quant_aware(test_program,
                               place,
                               quant_config,
                               scope=scope,
                               act_preprocess_func=act_preprocess_func,
                               optimizer_func=optimizer_func,
                               executor=pact_executor,
                               for_test=True)

    paddle.static.load(executor=executor,
                       model_path=os.path.join(checkpoint_path),
                       program=test_program)
    ############################################################################################################
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    ############################################################################################################
    float_program, int8_program = convert(test_program, place, quant_config, \
                                          scope=scope, \
                                          save_int8=True)
    ############################################################################################################
    # 4. Save inference model
    ############################################################################################################
    export_model_dir = os.path.abspath(
        os.path.join(export_inference_model_path_prefix, os.path.pardir))
    if not os.path.exists(export_model_dir):
        os.makedirs(export_model_dir)

    feed_vars = []
    for name in test_feed_names:
        for var in float_program.list_vars():
            if var.name == name:
                feed_vars.append(var)
                break
    assert len(feed_vars) > 0, "can not find feed vars in quant program"
    paddle.static.save_inference_model(
        path_prefix=export_inference_model_path_prefix,
        feed_vars=feed_vars,
        fetch_vars=test_fetch_list,
        executor=executor,
        program=float_program)
예제 #13
0
    def test_accuracy(self):
        image = paddle.static.data(name='image',
                                   shape=[None, 1, 28, 28],
                                   dtype='float32')
        label = paddle.static.data(name='label',
                                   shape=[None, 1],
                                   dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
        avg_cost = paddle.mean(x=cost)
        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
        optimizer = paddle.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            weight_decay=paddle.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
        ) else paddle.static.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())
        train_loader = paddle.io.DataLoader.from_generator(
            feed_list=[image, label],
            capacity=512,
            use_double_buffer=True,
            iterable=True)
        valid_loader = paddle.io.DataLoader.from_generator(
            feed_list=[image, label],
            capacity=512,
            use_double_buffer=True,
            iterable=True)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=64)
        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)
        train_loader.set_sample_list_generator(train_reader, place)
        valid_loader.set_sample_list_generator(eval_reader, place)

        def train(program):
            iter = 0
            for data in train_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in valid_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog = quant_aware(main_prog,
                                       place,
                                       config,
                                       for_test=False)
        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog)
        quant_eval_prog, int8_prog = convert(quant_eval_prog,
                                             place,
                                             config,
                                             save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
예제 #14
0
def compress(args):
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    if args.use_pact:
        image.stop_gradient = False
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    train_prog = fluid.default_main_program()
    val_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    opt = create_optimizer(args)
    opt.minimize(avg_cost)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.

    if args.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None

    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              act_preprocess_func=act_preprocess_func,
                              optimizer_func=optimizer_func,
                              executor=executor,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      act_preprocess_func=act_preprocess_func,
                                      optimizer_func=optimizer_func,
                                      executor=executor,
                                      for_test=False)

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.fluid.io.batch(train_reader,
                                         batch_size=args.batch_size,
                                         drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog):

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))

            if args.use_pact and batch_id % 1000 == 0:
                threshold = {}
                for var in val_program.list_vars():
                    if 'pact' in var.name:
                        array = np.array(fluid.global_scope().find_var(
                            var.name).get_tensor())
                        threshold[var.name] = array[0]
                print(threshold)

            batch_id += 1

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    exec_strategy = fluid.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    # train loop
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, compiled_train_prog)
        acc1 = test(i, val_program)
        fluid.io.save_persistables(exe,
                                   dirname=os.path.join(
                                       args.checkpoint_dir, str(i)),
                                   main_program=val_program)
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            fluid.io.save_persistables(exe,
                                       dirname=os.path.join(
                                           args.checkpoint_dir, 'best_model'),
                                       main_program=val_program)
    if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')):
        fluid.io.load_persistables(exe,
                                   dirname=os.path.join(
                                       args.checkpoint_dir, 'best_model'),
                                   main_program=val_program)
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    print("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    # 4. Save inference model
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    int8_path = os.path.join(model_path, 'int8')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    fluid.io.save_inference_model(dirname=float_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=float_program,
                                  model_filename=float_path + '/model',
                                  params_filename=float_path + '/params')

    fluid.io.save_inference_model(dirname=int8_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=int8_program,
                                  model_filename=int8_path + '/model',
                                  params_filename=int8_path + '/params')