コード例 #1
0
    def test_quant_op(self):
        startup_prog, train_prog = self.get_model()
        place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
        ) else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup_prog)
        config_1 = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }

        quant_prog_1 = quant_aware(
            train_prog, place, config=config_1, for_test=True)
        op_nums_1, quant_op_nums_1 = self.get_op_number(quant_prog_1)
        convert_prog_1 = convert(quant_prog_1, place, config=config_1)
        convert_op_nums_1, convert_quant_op_nums_1 = self.get_op_number(
            convert_prog_1)

        config_1['not_quant_pattern'] = ['last_fc']
        quant_prog_2 = quant_aware(
            train_prog, place, config=config_1, for_test=True)
        op_nums_2, quant_op_nums_2 = self.get_op_number(quant_prog_2)
        convert_prog_2 = convert(quant_prog_2, place, config=config_1)
        convert_op_nums_2, convert_quant_op_nums_2 = self.get_op_number(
            convert_prog_2)

        self.assertTrue(op_nums_1 == op_nums_2)
        # test quant_aware op numbers
        self.assertTrue(op_nums_1 * 4 == quant_op_nums_1)
        # test convert op numbers
        self.assertTrue(convert_op_nums_1 * 2 == convert_quant_op_nums_1)
        # test skip_quant
        self.assertTrue(quant_op_nums_1 - 4 == quant_op_nums_2)
        self.assertTrue(convert_quant_op_nums_1 - 2 == convert_quant_op_nums_2)
コード例 #2
0
def main():
    cfg = load_config(FLAGS.config)

    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            inputs_def['use_dataloader'] = False
            feed_vars, _ = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)

    exe.run(startup_prog)
    checkpoint.load_params(exe, infer_prog, cfg.weights)

    infer_prog, int8_program = convert(infer_prog,
                                       place,
                                       config,
                                       save_int8=True)

    save_infer_model(os.path.join(FLAGS.output_dir, 'float'), exe, feed_vars,
                     test_fetches, infer_prog)

    save_infer_model(os.path.join(FLAGS.output_dir, 'int'), exe, feed_vars,
                     test_fetches, int8_program)
コード例 #3
0
ファイル: export_model.py プロジェクト: yuantao15/PaddleSeg
def export_inference_model(args):
    """
    Export PaddlePaddle inference model for prediction depolyment and serving.
    """
    print("Exporting inference model...")
    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    image, logit_out = build_model(infer_prog,
                                   startup_prog,
                                   phase=ModelPhase.PREDICT)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    infer_prog = infer_prog.clone(for_test=True)
    not_quant_pattern_list = []
    if args.not_quant_pattern is not None:
        not_quant_pattern_list = args.not_quant_pattern

    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern_list
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)
    if os.path.exists(cfg.TEST.TEST_MODEL):
        fluid.io.load_persistables(exe,
                                   cfg.TEST.TEST_MODEL,
                                   main_program=infer_prog)
    else:
        print("TEST.TEST_MODEL diretory is empty!")
        exit(-1)

    infer_prog = convert(infer_prog, place, config)

    fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR,
                                  feeded_var_names=[image.name],
                                  target_vars=[logit_out],
                                  executor=exe,
                                  main_program=infer_prog,
                                  model_filename=cfg.FREEZE.MODEL_FILENAME,
                                  params_filename=cfg.FREEZE.PARAMS_FILENAME)
    print("Inference model exported!")
    print("Exporting inference model config...")
    deploy_cfg_path = export_inference_config()
    print("Inference model saved : [%s]" % (deploy_cfg_path))
コード例 #4
0
ファイル: train.py プロジェクト: itminner/PaddleSlim
def compress(args):
    num_workers = 4
    shuffle = True
    if args.ce_test:
        # set seed
        seed = 111
        paddle.seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        num_workers = 0
        shuffle = False

    if args.data == "mnist":
        train_dataset = paddle.vision.datasets.MNIST(mode='train')
        val_dataset = paddle.vision.datasets.MNIST(mode='test')
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = paddle.static.data(name='image',
                               shape=[None] + image_shape,
                               dtype='float32')
    if args.use_pact:
        image.stop_gradient = False
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

    train_prog = paddle.static.default_main_program()
    val_program = paddle.static.default_main_program().clone(for_test=True)

    if not args.analysis:
        learning_rate, opt = create_optimizer(args)
        opt.minimize(avg_cost)

    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    exe = paddle.static.Executor(place)
    exe.run(paddle.static.default_startup_program())

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=places,
                                        feed_list=[image, label],
                                        drop_last=True,
                                        return_list=False,
                                        batch_size=args.batch_size,
                                        use_shared_memory=True,
                                        shuffle=shuffle,
                                        num_workers=num_workers)

    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        feed_list=[image, label],
                                        drop_last=False,
                                        return_list=False,
                                        batch_size=args.batch_size,
                                        use_shared_memory=True,
                                        shuffle=False)

    if args.analysis:
        # get all activations names
        activates = [
            'pool2d_1.tmp_0', 'tmp_35', 'batch_norm_21.tmp_2', 'tmp_26',
            'elementwise_mul_5.tmp_0', 'pool2d_5.tmp_0',
            'elementwise_add_5.tmp_0', 'relu_2.tmp_0', 'pool2d_3.tmp_0',
            'conv2d_40.tmp_2', 'elementwise_mul_0.tmp_0', 'tmp_62',
            'elementwise_add_8.tmp_0', 'batch_norm_39.tmp_2',
            'conv2d_32.tmp_2', 'tmp_17', 'tmp_5', 'elementwise_add_9.tmp_0',
            'pool2d_4.tmp_0', 'relu_0.tmp_0', 'tmp_53', 'relu_3.tmp_0',
            'elementwise_add_4.tmp_0', 'elementwise_add_6.tmp_0', 'tmp_11',
            'conv2d_36.tmp_2', 'relu_8.tmp_0', 'relu_5.tmp_0',
            'pool2d_7.tmp_0', 'elementwise_add_2.tmp_0',
            'elementwise_add_7.tmp_0', 'pool2d_2.tmp_0', 'tmp_47',
            'batch_norm_12.tmp_2', 'elementwise_mul_6.tmp_0',
            'elementwise_mul_7.tmp_0', 'pool2d_6.tmp_0', 'relu_6.tmp_0',
            'elementwise_add_0.tmp_0', 'elementwise_mul_3.tmp_0',
            'conv2d_12.tmp_2', 'elementwise_mul_2.tmp_0', 'tmp_8', 'tmp_2',
            'conv2d_8.tmp_2', 'elementwise_add_3.tmp_0',
            'elementwise_mul_1.tmp_0', 'pool2d_8.tmp_0', 'conv2d_28.tmp_2',
            'image', 'conv2d_16.tmp_2', 'batch_norm_33.tmp_2', 'relu_1.tmp_0',
            'pool2d_0.tmp_0', 'tmp_20', 'conv2d_44.tmp_2', 'relu_10.tmp_0',
            'tmp_41', 'relu_4.tmp_0', 'elementwise_add_1.tmp_0', 'tmp_23',
            'batch_norm_6.tmp_2', 'tmp_29', 'elementwise_mul_4.tmp_0', 'tmp_14'
        ]
        var_collector = VarCollector(train_prog, activates, use_ema=True)
        values = var_collector.abs_max_run(train_loader,
                                           exe,
                                           step=None,
                                           loss_name=avg_cost.name)
        np.save('pact_thres.npy', values)
        _logger.info(values)
        _logger.info("PACT threshold have been saved as pact_thres.npy")

        # Draw Histogram in 'dist_pdf/result.pdf'
        # var_collector.pdf(values)

        return

    values = defaultdict(lambda: 20)
    try:
        values = np.load("pact_thres.npy", allow_pickle=True).item()
        values.update(tmp)
        _logger.info("pact_thres.npy info loaded.")
    except:
        _logger.info(
            "cannot find pact_thres.npy. Set init PACT threshold as 20.")
    _logger.info(values)

    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.

    def pact(x):
        helper = LayerHelper("pact", **locals())
        dtype = 'float32'
        init_thres = values[x.name.split('_tmp_input')[0]]
        u_param_attr = paddle.ParamAttr(
            name=x.name + '_pact',
            initializer=paddle.nn.initializer.Constant(value=init_thres),
            regularizer=paddle.regularizer.L2Decay(0.0001),
            learning_rate=1)
        u_param = helper.create_parameter(attr=u_param_attr,
                                          shape=[1],
                                          dtype=dtype)

        part_a = paddle.nn.functional.relu(x - u_param)
        part_b = paddle.nn.functional.relu(-u_param - x)
        x = x - part_a + part_b
        return x

    def get_optimizer():
        return paddle.optimizer.Momentum(args.lr, 0.9)

    if args.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None

    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              act_preprocess_func=act_preprocess_func,
                              optimizer_func=optimizer_func,
                              executor=executor,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      act_preprocess_func=act_preprocess_func,
                                      optimizer_func=optimizer_func,
                                      executor=executor,
                                      for_test=False)

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:
        paddle.static.load(train_prog, args.pretrained_model, exe)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in valid_loader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog, lr):

        batch_id = 0
        for data in train_loader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])

            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}"
                    .format(epoch, batch_id, learning_rate.get_lr(), loss_n,
                            acc_top1_n, acc_top5_n, end_time - start_time))

            if args.use_pact and batch_id % 1000 == 0:
                threshold = {}
                for var in val_program.list_vars():
                    if 'pact' in var.name:
                        array = np.array(paddle.static.global_scope().find_var(
                            var.name).get_tensor())
                        threshold[var.name] = array[0]
                _logger.info(threshold)
            batch_id += 1
            lr.step()

    build_strategy = paddle.static.BuildStrategy()
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    exec_strategy = paddle.static.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    # train loop
    best_acc1 = 0.0
    best_epoch = 0

    start_epoch = 0
    if args.checkpoint_dir is not None:
        ckpt_path = args.checkpoint_dir
        assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set"
        start_epoch = args.checkpoint_epoch
        paddle.static.load(executor=exe,
                           model_path=args.checkpoint_dir,
                           program=val_program)

    best_eval_acc1 = 0
    best_acc1_epoch = 0
    for i in range(start_epoch, args.num_epochs):
        train(i, compiled_train_prog, learning_rate)
        acc1 = test(i, val_program)
        if acc1 > best_eval_acc1:
            best_eval_acc1 = acc1
            best_acc1_epoch = i
        _logger.info("Best Validation Acc1: {:.6f}, at epoch {}".format(
            best_eval_acc1, best_acc1_epoch))
        paddle.static.save(model_path=os.path.join(args.output_dir, str(i)),
                           program=val_program)
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            paddle.static.save(model_path=os.path.join(args.output_dir,
                                                       'best_model'),
                               program=val_program)

    if os.path.exists(os.path.join(args.output_dir, 'best_model.pdparams')):
        paddle.static.load(executor=exe,
                           model_path=os.path.join(args.output_dir,
                                                   'best_model'),
                           program=val_program)

    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    _logger.info("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    _logger.info("final acc:{}".format(final_acc1))

    # 4. Save inference model
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    paddle.fluid.io.save_inference_model(dirname=float_path,
                                         feeded_var_names=[image.name],
                                         target_vars=[out],
                                         executor=exe,
                                         main_program=float_program,
                                         model_filename=float_path + '/model',
                                         params_filename=float_path +
                                         '/params')
コード例 #5
0
ファイル: eval_quant.py プロジェクト: zhang-deep/PaddleSeg
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = SegDataset(file_list=cfg.DATASET.VAL_FILE_LIST,
                         mode=ModelPhase.EVAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b[0], b[1], b[2]

    data_loader, avg_loss, pred, grts, masks = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(data_generator,
                                     drop_last=False,
                                     batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)
    not_quant_pattern_list = []
    if kwargs['not_quant_pattern'] is not None:
        not_quant_pattern_list = kwargs['not_quant_pattern']
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern_list
    }
    test_prog = quant_aware(test_prog, place, config, for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if not os.path.exists(ckpt_dir):
        raise ValueError(
            'The TEST.TEST_MODEL {} is not found'.format(ckpt_dir))

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        fluid.io.load_persistables(exe, ckpt_dir, main_program=test_prog)
    if kwargs['convert']:
        test_prog = convert(test_prog, place, config)
    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(precision=4,
                        suppress=True,
                        linewidth=160,
                        floatmode="fixed")
    conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)
    fetch_list = [avg_loss.name, pred.name, grts.name, masks.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    while True:
        try:
            step += 1
            loss, pred, grts, masks = exe.run(test_prog,
                                              fetch_list=fetch_list,
                                              return_numpy=True)

            loss = np.mean(np.array(loss))

            num_images += pred.shape[0]
            conf_mat.calculate(pred, grts, masks)
            _, iou = conf_mat.mean_iou()
            _, acc = conf_mat.accuracy()

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, loss, acc, iou, speed,
                        calculate_eta(all_step - step, speed)))
            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format(
        num_images, avg_acc, avg_iou))
    print("[EVAL]Category IoU:", category_iou)
    print("[EVAL]Category Acc:", category_acc)
    print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa()))

    return category_iou, avg_iou, category_acc, avg_acc
コード例 #6
0
def main():
    """
    Main evaluate function
    """
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    # define executor
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # build program
    model = create(main_arch)
    startup_prog = fluid.Program()
    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['EvalReader']['inputs_def']
            test_feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.eval(test_feed_vars)
    eval_prog = eval_prog.clone(True)

    reader = create_reader(cfg.EvalReader)
    loader.set_sample_list_generator(reader, place)

    # eval already exists json file
    if FLAGS.json_eval:
        logger.info(
            "In json_eval mode, PaddleDetection will evaluate json files in "
            "output_eval directly. And proposal.json, bbox.json and mask.json "
            "will be detected by default.")
        json_eval_results(cfg.metric,
                          json_directory=FLAGS.output_eval,
                          dataset=dataset)
        return

    assert cfg.metric != 'OID', "eval process of OID dataset \
                          is not supported."

    if cfg.metric == "WIDERFACE":
        raise ValueError("metric type {} does not support in tools/eval.py, "
                         "please use tools/face_eval.py".format(cfg.metric))
    assert cfg.metric in ['COCO', 'VOC'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []

    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']

    keys, values, cls = parse_fetches(test_fetches, eval_prog, extra_keys)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    dataset = cfg['EvalReader']['dataset']

    sub_eval_prog = None
    sub_keys = None
    sub_values = None

    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    eval_prog = quant_aware(eval_prog, place, config, for_test=True)

    # load model
    exe.run(startup_prog)
    if 'weights' in cfg:
        checkpoint.load_params(exe, eval_prog, cfg.weights)
    eval_prog = convert(eval_prog, place, config, save_int8=False)

    compile_program = fluid.compiler.CompiledProgram(
        eval_prog).with_data_parallel()

    results = eval_run(exe, compile_program, loader, keys, values, cls, cfg,
                       sub_eval_prog, sub_keys, sub_values)

    # evaluation
    resolution = None
    if 'mask' in results[0]:
        resolution = model.mask_head.resolution
    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
    eval_results(results,
                 cfg.metric,
                 cfg.num_classes,
                 resolution,
                 is_bbox_normalized,
                 FLAGS.output_eval,
                 map_type,
                 dataset=dataset)
コード例 #7
0
ファイル: train_quant.py プロジェクト: zjhellofss/PaddleSeg
def train_quant(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    data_loader.set_sample_generator(data_generator,
                                     batch_size=batch_size_per_dev,
                                     drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        load_pretrained_weights(exe, train_prog,
                                cfg.TRAIN.PRETRAINED_MODEL_DIR)
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(precision=4,
                            suppress=True,
                            linewidth=160,
                            floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    not_quant_pattern = []
    if args.not_quant_pattern:
        not_quant_pattern = args.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      config,
                                      for_test=False)
    eval_prog = quant_aware(train_prog, place, config, for_test=True)
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    global_step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    best_mIoU = 0.0

    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError((
            "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, eval_prog, epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio,
                    not_quant_pattern=args.not_quant_pattern,
                    convert=False)

                if mean_iou > best_mIoU:
                    best_mIoU = mean_iou
                    update_best_model(ckpt_dir)
                    print_info(
                        "Save best model {} to {}, mIoU = {:.4f}".format(
                            ckpt_dir,
                            os.path.join(cfg.TRAIN.MODEL_SAVE_DIR,
                                         'best_model'), mean_iou))

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(exe, eval_prog, 'final')
コード例 #8
0
def main():
    if FLAGS.eval is False:
        raise ValueError(
            "Currently only supports `--eval==True` while training in `quantization`."
        )
    env = os.environ
    FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
    if FLAGS.dist:
        trainer_id = int(env['PADDLE_TRAINER_ID'])
        import random
        local_seed = (99 + trainer_id)
        random.seed(local_seed)
        np.random.seed(local_seed)

    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')

    # build program
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = create(main_arch)
            inputs_def = cfg['TrainReader']['inputs_def']
            feed_vars, train_loader = model.build_inputs(**inputs_def)
            if FLAGS.use_pact:
                feed_vars['image'].stop_gradient = False
            train_fetches = model.train(feed_vars)
            loss = train_fetches['loss']
            lr = lr_builder()
            optimizer = optim_builder(lr)
            optimizer.minimize(loss)

    # parse train fetches
    train_keys, train_values, _ = parse_fetches(train_fetches)
    train_values.append(lr)

    if FLAGS.eval:
        eval_prog = fluid.Program()
        with fluid.program_guard(eval_prog, startup_prog):
            with fluid.unique_name.guard():
                model = create(main_arch)
                inputs_def = cfg['EvalReader']['inputs_def']
                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                fetches = model.eval(feed_vars)
        eval_prog = eval_prog.clone(True)

        eval_reader = create_reader(cfg.EvalReader)
        # When iterable mode, set set_sample_list_generator(eval_reader, place)
        eval_loader.set_sample_list_generator(eval_reader)

        # parse eval fetches
        extra_keys = []
        if cfg.metric == 'COCO':
            extra_keys = ['im_info', 'im_id', 'im_shape']
        if cfg.metric == 'VOC':
            extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
        if cfg.metric == 'WIDERFACE':
            extra_keys = ['im_id', 'im_shape', 'gt_bbox']
        eval_keys, eval_values, eval_cls = parse_fetches(
            fetches, eval_prog, extra_keys)

    # compile program for multi-devices
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    build_strategy.fuse_all_reduce_ops = False

    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    sync_bn = False
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1
    if FLAGS.dist:
        dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog,
                                             train_prog)
        exec_strategy.num_threads = 1

    exe.run(startup_prog)
    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    ignore_params = cfg.finetune_exclude_pretrained_params \
                 if 'finetune_exclude_pretrained_params' in cfg else []

    fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'

    if cfg.pretrain_weights and fuse_bn and not ignore_params:
        checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights)
    elif cfg.pretrain_weights:
        checkpoint.load_params(exe,
                               train_prog,
                               cfg.pretrain_weights,
                               ignore_params=ignore_params)

    if FLAGS.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None
    # insert quantize op in train_prog, return type is CompiledProgram
    train_prog_quant = quant_aware(train_prog,
                                   place,
                                   config,
                                   scope=None,
                                   act_preprocess_func=act_preprocess_func,
                                   optimizer_func=optimizer_func,
                                   executor=executor,
                                   for_test=False)

    compiled_train_prog = train_prog_quant.with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    if FLAGS.eval:
        # insert quantize op in eval_prog
        eval_prog = quant_aware(eval_prog,
                                place,
                                config,
                                scope=None,
                                act_preprocess_func=act_preprocess_func,
                                optimizer_func=optimizer_func,
                                executor=executor,
                                for_test=True)
        compiled_eval_prog = fluid.CompiledProgram(eval_prog)

    start_iter = 0

    train_reader = create_reader(cfg.TrainReader,
                                 (cfg.max_iters - start_iter) * devices_num)
    # When iterable mode, set set_sample_list_generator(train_reader, place)
    train_loader.set_sample_list_generator(train_reader)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'

    train_stats = TrainingStats(cfg.log_iter, train_keys)
    train_loader.start()
    start_time = time.time()
    end_time = time.time()

    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)
    time_stat = deque(maxlen=cfg.log_iter)
    best_box_ap_list = [0.0, 0]  #[map, iter]

    for it in range(start_iter, cfg.max_iters):
        start_time = end_time
        end_time = time.time()
        time_stat.append(end_time - start_time)
        time_cost = np.mean(time_stat)
        eta_sec = (cfg.max_iters - it) * time_cost
        eta = str(datetime.timedelta(seconds=int(eta_sec)))
        outs = exe.run(compiled_train_prog, fetch_list=train_values)
        stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])}

        train_stats.update(stats)
        logs = train_stats.log()
        if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0):
            strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
                it, np.mean(outs[-1]), logs, time_cost, eta)
            logger.info(strs)

        if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \
           and (not FLAGS.dist or trainer_id == 0):
            save_name = str(it) if it != cfg.max_iters - 1 else "model_final"

            if FLAGS.eval:
                # evaluation
                results = eval_run(exe,
                                   compiled_eval_prog,
                                   eval_loader,
                                   eval_keys,
                                   eval_values,
                                   eval_cls,
                                   cfg=cfg)
                resolution = None
                if 'mask' in results[0]:
                    resolution = model.mask_head.resolution
                box_ap_stats = eval_results(results, cfg.metric,
                                            cfg.num_classes, resolution,
                                            is_bbox_normalized,
                                            FLAGS.output_eval, map_type,
                                            cfg['EvalReader']['dataset'])

                if box_ap_stats[0] > best_box_ap_list[0]:
                    best_box_ap_list[0] = box_ap_stats[0]
                    best_box_ap_list[1] = it
                    save_checkpoint(exe, eval_prog,
                                    os.path.join(save_dir, "best_model"),
                                    train_prog)
                logger.info("Best test box ap: {}, in iter: {}".format(
                    best_box_ap_list[0], best_box_ap_list[1]))

    train_loader.reset()
コード例 #9
0
    def test_accuracy(self):
        image = fluid.layers.data(name='image',
                                  shape=[1, 28, 28],
                                  dtype='float32')
        image.stop_gradient = False
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = fluid.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = fluid.CUDAPlace(
            0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        feeder = fluid.DataFeeder([image, label], place, program=main_prog)
        train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(),
                                             batch_size=64)
        eval_reader = paddle.fluid.io.batch(paddle.dataset.mnist.test(),
                                            batch_size=64)

        def train(program):
            iter = 0
            for data in train_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in eval_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog_pact = quant_aware(main_prog,
                                            place,
                                            config,
                                            for_test=False,
                                            act_preprocess_func=pact,
                                            optimizer_func=get_optimizer,
                                            executor=exe)

        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog_pact)
        quant_eval_prog, int8_prog = convert(quant_eval_prog,
                                             place,
                                             config,
                                             save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
コード例 #10
0
def export_quant_infermodel(
        executor,
        place=None,
        scope=None,
        quant_config=None,
        train_config=None,
        checkpoint_path=None,
        export_inference_model_path_prefix="./export_quant_infermodel"):
    """export quant model checkpoints to infermodel.
    Args:
        executor(paddle.static.Executor): The executor to load, run and save the
            quantized model.
        place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents
            the executor run on which device.
        scope(paddle.static.Scope, optional):  Scope records the mapping between
            variable names and variables, similar to brackets in
            programming languages. Usually users can use
            `paddle.static.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_.
            When ``None`` will use
            `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_
            . Default: ``None``.
        quant_config(dict, optional): configs for convert. if set None, will use
                default config. It must be same with config that used in
                'quant_aware'. Default is None.
        train_config(dict):train aware configs, include num_epoch, save_iter_step, learning_rate,
                weight_decay, use_pact, quant_model_ckpt_path,
                model_path_prefix, teacher_model_path_prefix, 
                distill_node_pair(teacher_node_name1, node_name1, teacher_node_name2, teacher_node_name2, ...)
        checkpoint_path(str): checkpoint path need to export quant infer model.
        export_inference_model_path_prefix(str): export infer model path prefix, storage directory of model + model name (excluding suffix).
    Returns:
        None
    """
    scope = paddle.static.global_scope() if not scope else scope
    # parse quant config
    if quant_config is None:
        quant_config = _quant_config_default
    else:
        assert isinstance(quant_config, dict), "quant config must be dict"
        quant_config = _parse_configs(quant_config)
    _logger.info("quant_aware config {}".format(quant_config))

    train_config = _parse_train_configs(train_config)
    distill_program_info = build_distill_prog_with_infermodel(
        executor, place, train_config)
    test_program = distill_program_info.test_program
    test_feed_names = distill_program_info.test_feed_names
    test_fetch_list = distill_program_info.test_fetch_list

    ############################################################################
    # quant
    ############################################################################
    use_pact = False  # export model should set use_pact is False
    if use_pact:
        act_preprocess_func = pact
        optimizer_func = get_pact_optimizer
        pact_executor = executor
    else:
        act_preprocess_func = None
        optimizer_func = None
        pact_executor = None

    test_program = quant_aware(test_program,
                               place,
                               quant_config,
                               scope=scope,
                               act_preprocess_func=act_preprocess_func,
                               optimizer_func=optimizer_func,
                               executor=pact_executor,
                               for_test=True)

    paddle.static.load(executor=executor,
                       model_path=os.path.join(checkpoint_path),
                       program=test_program)
    ############################################################################################################
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    ############################################################################################################
    float_program, int8_program = convert(test_program, place, quant_config, \
                                          scope=scope, \
                                          save_int8=True)
    ############################################################################################################
    # 4. Save inference model
    ############################################################################################################
    export_model_dir = os.path.abspath(
        os.path.join(export_inference_model_path_prefix, os.path.pardir))
    if not os.path.exists(export_model_dir):
        os.makedirs(export_model_dir)

    feed_vars = []
    for name in test_feed_names:
        for var in float_program.list_vars():
            if var.name == name:
                feed_vars.append(var)
                break
    assert len(feed_vars) > 0, "can not find feed vars in quant program"
    paddle.static.save_inference_model(
        path_prefix=export_inference_model_path_prefix,
        feed_vars=feed_vars,
        fetch_vars=test_fetch_list,
        executor=executor,
        program=float_program)
コード例 #11
0
def quant_aware_with_infermodel(executor,
                                place,
                                scope=None,
                                train_reader=None,
                                quant_config=None,
                                train_config=None,
                                test_callback=None):
    """train aware quantization with infermodel
    Args:
        executor(paddle.static.Executor): The executor to load, run and save the
            quantized model.
        place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents
            the executor run on which device.
        scope(paddle.static.Scope, optional):  Scope records the mapping between
            variable names and variables, similar to brackets in
            programming languages. Usually users can use
            `paddle.static.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_.
            When ``None`` will use
            `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_
            . Default: ``None``.
        train_reader(data generator): data generator, yield feed_dictionary, {feed_name[0]:data[0], feed_name[1]:data[1]}.
        quant_config(dict, optional): configs for convert. if set None, will use
                default config. It must be same with config that used in
                'quant_aware'. Default is None.
        train_config(dict):train aware configs, include num_epoch, save_iter_step, learning_rate,
                weight_decay, use_pact, quant_model_ckpt_path,
                model_path_prefix, teacher_model_path_prefix,
                distill_node_pair(teacher_node_name1, node_name1, teacher_node_name2, teacher_node_name2, ...)
        test_callback(callback function): callback function include two params: compiled test quant program and checkpoint save filename.
                user can implement test logic.
    Returns:
        None
    """
    scope = paddle.static.global_scope() if not scope else scope
    # parse quant config
    if quant_config is None:
        quant_config = _quant_config_default
    else:
        assert isinstance(quant_config, dict), "quant config must be dict"
        quant_config = _parse_configs(quant_config)
    _logger.info("quant_aware config {}".format(quant_config))

    train_config = _parse_train_configs(train_config)
    distill_program_info = build_distill_prog_with_infermodel(
        executor, place, train_config)
    startup_program = distill_program_info.startup_program
    train_program = distill_program_info.train_program
    train_feed_names = distill_program_info.train_feed_names
    train_fetch_list = distill_program_info.train_fetch_list
    optimizer = distill_program_info.optimizer
    test_program = distill_program_info.test_program
    test_feed_names = distill_program_info.test_feed_names
    test_fetch_list = distill_program_info.test_fetch_list

    ############################################################################
    # quant
    ############################################################################
    use_pact = train_config["use_pact"]
    if use_pact:
        act_preprocess_func = pact
        optimizer_func = get_pact_optimizer
        pact_executor = executor
    else:
        act_preprocess_func = None
        optimizer_func = None
        pact_executor = None

    test_program = quant_aware(test_program,
                               place,
                               quant_config,
                               scope=scope,
                               act_preprocess_func=act_preprocess_func,
                               optimizer_func=optimizer_func,
                               executor=pact_executor,
                               for_test=True)
    train_program = quant_aware(train_program,
                                place,
                                quant_config,
                                scope=scope,
                                act_preprocess_func=act_preprocess_func,
                                optimizer_func=optimizer_func,
                                executor=pact_executor,
                                for_test=False,
                                return_program=True)

    executor.run(startup_program)
    compiled_train_prog = _compile_program(train_program,
                                           train_fetch_list[0].name)
    compiled_test_prog = _compile_program(test_program,
                                          test_fetch_list[0].name)
    num_epoch = train_config["num_epoch"]
    save_iter_step = train_config["save_iter_step"]
    iter_sum = 0
    for epoch in range(num_epoch):
        for iter_num, feed_dict in enumerate(train_reader()):
            np_probs_float = executor.run(compiled_train_prog, \
                feed=feed_dict, \
                fetch_list=train_fetch_list)
            print("loss: ", np_probs_float)

            if iter_num > 0 and iter_num % save_iter_step == 0:
                checkpoint_name = "epoch_" + str(epoch) + "_iter_" + str(
                    iter_num)
                if not os.path.exists(train_config["quant_model_ckpt_path"]):
                    os.makedirs(train_config["quant_model_ckpt_path"])
                paddle.static.save(program=test_program,
                                   model_path=os.path.join(
                                       train_config["quant_model_ckpt_path"],
                                       checkpoint_name))
                test_callback(compiled_test_prog, test_feed_names,
                              test_fetch_list, checkpoint_name)
            iter_sum += 1
            if train_config["max_iter"] >= 0 and iter_sum > train_config[
                    "max_iter"]:
                return
コード例 #12
0
    def test_accuracy(self):
        image = paddle.static.data(name='image',
                                   shape=[None, 1, 28, 28],
                                   dtype='float32')
        label = paddle.static.data(name='label',
                                   shape=[None, 1],
                                   dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
        avg_cost = paddle.mean(x=cost)
        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
        optimizer = paddle.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            weight_decay=paddle.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
        ) else paddle.static.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())
        train_loader = paddle.io.DataLoader.from_generator(
            feed_list=[image, label],
            capacity=512,
            use_double_buffer=True,
            iterable=True)
        valid_loader = paddle.io.DataLoader.from_generator(
            feed_list=[image, label],
            capacity=512,
            use_double_buffer=True,
            iterable=True)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=64)
        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)
        train_loader.set_sample_list_generator(train_reader, place)
        valid_loader.set_sample_list_generator(eval_reader, place)

        def train(program):
            iter = 0
            for data in train_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in valid_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog = quant_aware(main_prog,
                                       place,
                                       config,
                                       for_test=False)
        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog)
        quant_eval_prog, int8_prog = convert(quant_eval_prog,
                                             place,
                                             config,
                                             save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
コード例 #13
0
ファイル: quant.py プロジェクト: Lzp970904/LZPSelect
def main():
    train_build_outputs = program.build(config,
                                        train_program,
                                        startup_program,
                                        mode='train')
    train_loader = train_build_outputs[0]
    train_fetch_name_list = train_build_outputs[1]
    train_fetch_varname_list = train_build_outputs[2]
    train_opt_loss_name = train_build_outputs[3]
    model_average = train_build_outputs[-1]

    eval_program = fluid.Program()
    eval_build_outputs = program.build(config,
                                       eval_program,
                                       startup_program,
                                       mode='eval')
    eval_fetch_name_list = eval_build_outputs[1]
    eval_fetch_varname_list = eval_build_outputs[2]
    eval_program = eval_program.clone(for_test=True)

    train_reader = reader_main(config=config, mode="train")
    train_loader.set_sample_list_generator(train_reader, places=place)

    eval_reader = reader_main(config=config, mode="eval")

    exe = fluid.Executor(place)
    exe.run(startup_program)

    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.
    act_preprocess_func = pact
    optimizer_func = get_optimizer
    executor = exe

    eval_program = quant_aware(eval_program,
                               place,
                               quant_config,
                               scope=None,
                               act_preprocess_func=act_preprocess_func,
                               optimizer_func=optimizer_func,
                               executor=executor,
                               for_test=True)
    quant_train_program = quant_aware(train_program,
                                      place,
                                      quant_config,
                                      scope=None,
                                      act_preprocess_func=act_preprocess_func,
                                      optimizer_func=optimizer_func,
                                      executor=executor,
                                      for_test=False)

    # compile program for multi-devices
    train_compile_program = program.create_multi_devices_program(
        quant_train_program, train_opt_loss_name, for_quant=True)

    init_model(config, train_program, exe)

    train_info_dict = {'compile_program':train_compile_program,\
        'train_program':quant_train_program,\
        'reader':train_loader,\
        'fetch_name_list':train_fetch_name_list,\
        'fetch_varname_list':train_fetch_varname_list,\
        'model_average': model_average}

    eval_info_dict = {'program':eval_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}

    if train_alg_type == 'det':
        program.train_eval_det_run(config,
                                   exe,
                                   train_info_dict,
                                   eval_info_dict,
                                   is_slim="quant")
    elif train_alg_type == 'rec':
        program.train_eval_rec_run(config,
                                   exe,
                                   train_info_dict,
                                   eval_info_dict,
                                   is_slim="quant")
    else:
        program.train_eval_cls_run(config,
                                   exe,
                                   train_info_dict,
                                   eval_info_dict,
                                   is_slim="quant")
コード例 #14
0
def compress(args):
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    if args.use_pact:
        image.stop_gradient = False
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    train_prog = fluid.default_main_program()
    val_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    opt = create_optimizer(args)
    opt.minimize(avg_cost)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.

    if args.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None

    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              act_preprocess_func=act_preprocess_func,
                              optimizer_func=optimizer_func,
                              executor=executor,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      act_preprocess_func=act_preprocess_func,
                                      optimizer_func=optimizer_func,
                                      executor=executor,
                                      for_test=False)

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.fluid.io.batch(train_reader,
                                         batch_size=args.batch_size,
                                         drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog):

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))

            if args.use_pact and batch_id % 1000 == 0:
                threshold = {}
                for var in val_program.list_vars():
                    if 'pact' in var.name:
                        array = np.array(fluid.global_scope().find_var(
                            var.name).get_tensor())
                        threshold[var.name] = array[0]
                print(threshold)

            batch_id += 1

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    exec_strategy = fluid.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    # train loop
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, compiled_train_prog)
        acc1 = test(i, val_program)
        fluid.io.save_persistables(exe,
                                   dirname=os.path.join(
                                       args.checkpoint_dir, str(i)),
                                   main_program=val_program)
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            fluid.io.save_persistables(exe,
                                       dirname=os.path.join(
                                           args.checkpoint_dir, 'best_model'),
                                       main_program=val_program)
    if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')):
        fluid.io.load_persistables(exe,
                                   dirname=os.path.join(
                                       args.checkpoint_dir, 'best_model'),
                                   main_program=val_program)
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    print("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    # 4. Save inference model
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    int8_path = os.path.join(model_path, 'int8')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    fluid.io.save_inference_model(dirname=float_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=float_program,
                                  model_filename=float_path + '/model',
                                  params_filename=float_path + '/params')

    fluid.io.save_inference_model(dirname=int8_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=int8_program,
                                  model_filename=int8_path + '/model',
                                  params_filename=int8_path + '/params')
コード例 #15
0
def main():
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    startup_prog, eval_program, place, config, alg_type = program.preprocess()

    feeded_var_names, target_vars, fetches_var_name = program.build_export(
        config, eval_program, startup_prog)

    eval_program = eval_program.clone(for_test=True)
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    eval_program = quant_aware(
        eval_program, place, quant_config, scope=None, for_test=True)

    init_model(config, eval_program, exe)

    # 2. Convert the program before save inference program
    #    The dtype of eval_program's weights is float32, but in int8 range.

    eval_program = convert(eval_program, place, quant_config, scope=None)

    eval_fetch_name_list = fetches_var_name
    eval_fetch_varname_list = [v.name for v in target_vars]
    eval_reader = reader_main(config=config, mode="eval")
    quant_info_dict = {'program':eval_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}

    if alg_type == 'det':
        final_metrics = eval_det_run(exe, config, quant_info_dict, "eval")
    else:
        final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval")
    print(final_metrics)

    # 3. Save inference model
    model_path = "./quant_model"
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    fluid.io.save_inference_model(
        dirname=model_path,
        feeded_var_names=feeded_var_names,
        target_vars=target_vars,
        executor=exe,
        main_program=eval_program,
        model_filename=model_path + '/model',
        params_filename=model_path + '/params')
    print("model saved as {}".format(model_path))
コード例 #16
0
def compress(args):
    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    if args.data == "mnist":
        train_dataset = paddle.vision.datasets.MNIST(mode='train')
        val_dataset = paddle.vision.datasets.MNIST(mode='test')
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = paddle.static.data(name='image',
                               shape=[None] + image_shape,
                               dtype='float32')
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

    train_prog = paddle.static.default_main_program()
    val_program = paddle.static.default_main_program().clone(for_test=True)

    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
    ############################################################################################################
    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.
    ############################################################################################################
    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      for_test=False)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)

    exe = paddle.static.Executor(place)
    exe.run(paddle.static.default_startup_program())

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:
        paddle.static.load(train_prog, args.pretrained_model, exe)

    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=places,
                                        feed_list=[image, label],
                                        drop_last=True,
                                        batch_size=args.batch_size,
                                        return_list=False,
                                        use_shared_memory=True,
                                        shuffle=True,
                                        num_workers=4)
    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        feed_list=[image, label],
                                        drop_last=False,
                                        return_list=False,
                                        batch_size=args.batch_size,
                                        use_shared_memory=True,
                                        shuffle=False)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in valid_loader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog):

        batch_id = 0
        for data in train_loader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    build_strategy = paddle.static.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    exec_strategy = paddle.static.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    ############################################################################################################
    # train loop
    ############################################################################################################
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, compiled_train_prog)
        acc1 = test(i, val_program)
        paddle.static.save(program=val_program,
                           model_path=os.path.join(args.checkpoint_dir,
                                                   str(i)))
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            paddle.static.save(program=val_program,
                               model_path=os.path.join(args.checkpoint_dir,
                                                       'best_model'))
    if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')):
        paddle.static.load(executor=exe,
                           model_path=os.path.join(args.checkpoint_dir,
                                                   'best_model'),
                           program=val_program)
    ############################################################################################################
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    ############################################################################################################
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    print("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    ############################################################################################################
    # 4. Save inference model
    ############################################################################################################
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    paddle.fluid.io.save_inference_model(dirname=float_path,
                                         feeded_var_names=[image.name],
                                         target_vars=[out],
                                         executor=exe,
                                         main_program=float_program,
                                         model_filename=float_path + '/model',
                                         params_filename=float_path +
                                         '/params')
コード例 #17
0
ファイル: infer.py プロジェクト: zyx1996/PaddleDetection
def main():
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    dataset = cfg.TestReader['dataset']

    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    dataset.set_images(test_images)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    reader = create_reader(cfg.TestReader)
    # When iterable mode, set set_sample_list_generator(reader, place)
    loader.set_sample_list_generator(reader)
    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)

    exe.run(startup_prog)

    if cfg.weights:
        checkpoint.load_params(exe, infer_prog, cfg.weights)
    infer_prog = convert(infer_prog, place, config, save_int8=False)

    # parse infer fetches
    assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg['metric'] in ['COCO', 'OID']:
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
        extra_keys = ['im_id', 'im_shape']
    keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)

    # parse dataset category
    if cfg.metric == 'COCO':
        from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
    if cfg.metric == 'OID':
        from ppdet.utils.oid_eval import bbox2out, get_category_info
    if cfg.metric == "VOC":
        from ppdet.utils.voc_eval import bbox2out, get_category_info
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info

    anno_file = dataset.get_anno()
    with_background = dataset.with_background
    use_default_label = dataset.use_default_label

    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    imid2path = dataset.get_imid2path()
    iter_id = 0
    try:
        loader.start()
        while True:
            outs = exe.run(infer_prog, fetch_list=values, return_numpy=False)
            res = {
                k: (np.array(v), v.recursive_sequence_lengths())
                for k, v in zip(keys, outs)
            }
            logger.info('Infer iter {}'.format(iter_id))
            iter_id += 1
            bbox_results = None
            mask_results = None
            if 'bbox' in res:
                bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
            if 'mask' in res:
                mask_results = mask2out([res], clsid2catid,
                                        model.mask_head.resolution)

            # visualize result
            im_ids = res['im_id'][0]
            for im_id in im_ids:
                image_path = imid2path[int(im_id)]
                image = Image.open(image_path).convert('RGB')

                image = visualize_results(image, int(im_id), catid2name,
                                          FLAGS.draw_threshold, bbox_results,
                                          mask_results)

                save_name = get_save_image_name(FLAGS.output_dir, image_path)
                logger.info(
                    "Detection bbox results save in {}".format(save_name))
                image.save(save_name, quality=95)
    except (StopIteration, fluid.core.EOFException):
        loader.reset()
コード例 #18
0
    def test_accuracy(self):
        image = paddle.static.data(
            name='image', shape=[None, 1, 28, 28], dtype='float32')
        image.stop_gradient = False
        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
        avg_cost = paddle.mean(x=cost)
        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
        optimizer = paddle.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            weight_decay=paddle.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())

        def transform(x):
            return np.reshape(x, [1, 28, 28])

        train_dataset = paddle.vision.datasets.MNIST(
            mode='train', backend='cv2', transform=transform)
        test_dataset = paddle.vision.datasets.MNIST(
            mode='test', backend='cv2', transform=transform)
        train_loader = paddle.io.DataLoader(
            train_dataset,
            places=place,
            feed_list=[image, label],
            drop_last=True,
            return_list=False,
            batch_size=64)
        valid_loader = paddle.io.DataLoader(
            test_dataset,
            places=place,
            feed_list=[image, label],
            batch_size=64,
            return_list=False)

        def train(program):
            iter = 0
            for data in train_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in valid_loader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=data,
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print('eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                          format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog_pact = quant_aware(
            main_prog,
            place,
            config,
            for_test=False,
            act_preprocess_func=pact,
            optimizer_func=get_optimizer,
            executor=exe)

        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog_pact)
        quant_eval_prog, int8_prog = convert(
            quant_eval_prog, place, config, save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
コード例 #19
0
        if iter % 100 == 0:
            print('test iter={}, top1={}, top5={}, loss={}'.format(
                iter, acc1.mean(), acc5.mean(), loss.mean()))
        res[0].append(acc1.mean())
        res[1].append(acc5.mean())
        iter += 1
    print('final test result top1={}, top5={}'.format(
        np.array(res[0]).mean(),
        np.array(res[1]).mean()))


train(train_program)

test(val_program)

# 量化模型
place = exe.place
config = {
    'weight_quantize_type': 'channel_wise_abs_max',
    'activation_quantize_type': 'moving_average_abs_max',
    'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d']
}
quant_program = quant.quant_aware(train_program, place, config,
                                  for_test=False)  #请在次数添加你的代码
val_quant_program = quant.quant_aware(val_program,
                                      place,
                                      config,
                                      for_test=True)  #请在次数添加你的代码
train(quant_program)
test(val_quant_program)