예제 #1
0
def main(config, device, logger, vdl_writer):
    # init dist environment
    if config['Global']['distributed']:
        dist.init_parallel_env()

    global_config = config['Global']

    # build dataloader
    train_dataloader = build_dataloader(config, 'Train', device, logger)
    if config['Eval']:
        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
    else:
        valid_dataloader = None

    # build post process
    post_process_class = build_post_process(config['PostProcess'],
                                            global_config)

    # build model
    # for rec algorithm
    if hasattr(post_process_class, 'character'):
        char_num = len(getattr(post_process_class, 'character'))
        config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])

    if config['Global']['distributed']:
        model = paddle.DataParallel(model)

    # build loss
    loss_class = build_loss(config['Loss'])

    # build optim
    optimizer, lr_scheduler = build_optimizer(
        config['Optimizer'],
        epochs=config['Global']['epoch_num'],
        step_each_epoch=len(train_dataloader),
        parameters=model.parameters())

    # build metric
    eval_class = build_metric(config['Metric'])
    # load pretrain model
    pre_best_model_dict = init_model(config, model, logger, optimizer)

    logger.info(
        'train dataloader has {} iters, valid dataloader has {} iters'.format(
            len(train_dataloader), len(valid_dataloader)))
    quanter = QAT(config=quant_config, act_preprocess=PACT)
    quanter.quantize(model)

    # start train
    program.train(config, train_dataloader, valid_dataloader, device, model,
                  loss_class, optimizer, lr_scheduler, post_process_class,
                  eval_class, pre_best_model_dict, logger, vdl_writer)
예제 #2
0
def get_quaner(config, model):
    if config.get("Slim", False) and config["Slim"].get("quant", False):
        from paddleslim.dygraph.quant import QAT
        assert config["Slim"]["quant"]["name"].lower(
        ) == 'pact', 'Only PACT quantization method is supported now'
        QUANT_CONFIG["activation_preprocess_type"] = "PACT"
        quanter = QAT(config=QUANT_CONFIG)
        quanter.quantize(model)
        logger.info("QAT model summary:")
        paddle.summary(model, (1, 3, 224, 224))
    else:
        quanter = None
    return quanter
예제 #3
0
def main():
    args = parse_args()

    net = architectures.__dict__[args.model]
    model = Net(net, args.class_dim, args.model)

    # get QAT model
    quant_config = get_default_quant_config()
    # TODO(littletomatodonkey): add PACT for export model
    # quant_config["activation_preprocess_type"] = "PACT"
    quanter = QAT(config=quant_config)
    quanter.quantize(model)

    load_dygraph_pretrain(model.pre_net,
                          path=args.pretrained_model,
                          load_static_weights=args.load_static_weights)
    model.eval()

    save_path = os.path.join(args.output_path, "inference")
    quanter.save_quantized_model(model,
                                 save_path,
                                 input_spec=[
                                     paddle.static.InputSpec(shape=[
                                         None, 3, args.img_size, args.img_size
                                     ],
                                                             dtype='float32')
                                 ])
    print('inference QAT model is saved to {}'.format(save_path))
예제 #4
0
파일: qat.py 프로젝트: zzjjay/PaddleSlim
def main():
    model_list = [x for x in models.__dict__["__all__"]]
    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
        model_list, FLAGS.arch)
    model = models.__dict__[FLAGS.arch](pretrained=True)

    if FLAGS.enable_quant:
        print("quantize model")
        quant_config = {
            'weight_preprocess_type': None,
            'activation_preprocess_type': 'PACT' if FLAGS.use_pact else None,
            'weight_quantize_type': "channel_wise_abs_max",
            'activation_quantize_type': 'moving_average_abs_max',
            'weight_bits': 8,
            'activation_bits': 8,
            'window_size': 10000,
            'moving_rate': 0.9,
            'quantizable_layer_type': ['Conv2D', 'Linear'],
        }
        dygraph_qat = QAT(quant_config)
        dygraph_qat.quantize(model)

    model = hapi.Model(model)

    train_dataset = dataset.ImageNetDataset(data_dir=FLAGS.data, mode='train')
    val_dataset = dataset.ImageNetDataset(data_dir=FLAGS.data, mode='val')

    optim = paddle.optimizer.SGD(learning_rate=FLAGS.lr,
                                 parameters=model.parameters(),
                                 weight_decay=FLAGS.weight_decay)

    model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5)))

    checkpoint_dir = os.path.join(
        FLAGS.output_dir, "checkpoint", FLAGS.arch + "_checkpoint",
        time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
    model.fit(train_dataset,
              val_dataset,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epoch,
              save_dir=checkpoint_dir,
              num_workers=FLAGS.num_workers)

    if FLAGS.enable_quant:
        quant_output_dir = os.path.join(FLAGS.output_dir, "quant_dygraph",
                                        FLAGS.arch, "int8_infer")
        input_spec = paddle.static.InputSpec(
            shape=[None, 3, 224, 224], dtype='float32')
        dygraph_qat.save_quantized_model(model.network, quant_output_dir,
                                         [input_spec])
        print("Save quantized inference model in " + quant_output_dir)
예제 #5
0
 def prepare(self):
     self.quanter = QAT()
예제 #6
0
class TestQAT(unittest.TestCase):
    """
    QAT = quantization-aware training
    This test case uses defualt quantization config, weight_quantize_type 
    is channel_wise_abs_max
    """
    def set_seed(self):
        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed

    def prepare(self):
        self.quanter = QAT()

    def test_qat_acc(self):
        self.prepare()
        self.set_seed()

        fp32_lenet = ImperativeLenet()

        place = paddle.CUDAPlace(0) \
            if paddle.is_compiled_with_cuda() else paddle.CPUPlace()

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])

        train_dataset = paddle.vision.datasets.MNIST(mode='train',
                                                     backend='cv2',
                                                     transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                                   backend='cv2',
                                                   transform=transform)

        train_reader = paddle.io.DataLoader(train_dataset,
                                            drop_last=True,
                                            places=place,
                                            batch_size=64,
                                            return_list=True)
        test_reader = paddle.io.DataLoader(val_dataset,
                                           places=place,
                                           batch_size=64,
                                           return_list=True)

        def train(model):
            adam = paddle.optimizer.Adam(learning_rate=0.0001,
                                         parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader):
                    img = paddle.to_tensor(data[0])
                    label = paddle.to_tensor(data[1])
                    img = paddle.reshape(img, [-1, 1, 28, 28])
                    label = paddle.reshape(label, [-1, 1])

                    out = model(img)
                    acc = paddle.metric.accuracy(out, label)
                    loss = paddle.nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy()))

        def test(model):
            model.eval()
            avg_acc = [[], []]
            for batch_id, data in enumerate(test_reader):
                img = paddle.to_tensor(data[0])
                img = paddle.reshape(img, [-1, 1, 28, 28])
                label = paddle.to_tensor(data[1])
                label = paddle.reshape(label, [-1, 1])

                out = model(img)
                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
                avg_acc[0].append(acc_top1.numpy())
                avg_acc[1].append(acc_top5.numpy())
                if batch_id % 100 == 0:
                    _logger.info(
                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
                            batch_id, acc_top1.numpy(), acc_top5.numpy()))

            _logger.info("Test | Average: acc_top1 {}, acc_top5 {}".format(
                np.mean(avg_acc[0]), np.mean(avg_acc[1])))
            return np.mean(avg_acc[0]), np.mean(avg_acc[1])

        train(fp32_lenet)
        top1_1, top5_1 = test(fp32_lenet)

        fp32_lenet.__init__()
        quant_lenet = self.quanter.quantize(fp32_lenet)
        train(quant_lenet)
        top1_2, top5_2 = test(quant_lenet)
        self.quanter.save_quantized_model(quant_lenet,
                                          './tmp/qat',
                                          input_spec=[
                                              paddle.static.InputSpec(
                                                  shape=[None, 1, 28, 28],
                                                  dtype='float32')
                                          ])

        # values before quantization and after quantization should be close
        _logger.info("Before quantization: top1: {}, top5: {}".format(
            top1_1, top5_1))
        _logger.info("After quantization: top1: {}, top5: {}".format(
            top1_2, top5_2))
        _logger.info("\n")

        diff = 0.002
        self.assertTrue(
            top1_1 - top1_2 < diff,
            msg="The acc of quant model is too lower than fp32 model")
예제 #7
0
 def prepare(self):
     quant_config = {
         'activation_preprocess_type': 'PACT',
     }
     self.quanter = QAT(config=quant_config)
예제 #8
0
def main():
    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight preprocess type, default is None and no preprocessing is performed.
        'weight_preprocess_type': None,
        # activation preprocess type, default is None and no preprocessing is performed.
        'activation_preprocess_type': None,
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. default is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
        # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
        'quantizable_layer_type': ['Conv2D', 'Linear'],
    }
    FLAGS = ArgsParser().parse_args()
    config = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    logger = get_logger()
    # build post process

    post_process_class = build_post_process(config['PostProcess'],
                                            config['Global'])

    # build model
    # for rec algorithm
    if hasattr(post_process_class, 'character'):
        char_num = len(getattr(post_process_class, 'character'))
        if config['Architecture']["algorithm"] in [
                "Distillation",
        ]:  # distillation model
            for key in config['Architecture']["Models"]:
                config['Architecture']["Models"][key]["Head"][
                    'out_channels'] = char_num
        else:  # base rec model
            config['Architecture']["Head"]['out_channels'] = char_num

    model = build_model(config['Architecture'])

    # get QAT model
    quanter = QAT(config=quant_config)
    quanter.quantize(model)

    init_model(config, model)
    model.eval()

    # build metric
    eval_class = build_metric(config['Metric'])

    # build dataloader
    valid_dataloader = build_dataloader(config, 'Eval', device, logger)

    use_srn = config['Architecture']['algorithm'] == "SRN"
    model_type = config['Architecture']['model_type']
    # start eval
    metric = program.eval(model, valid_dataloader, post_process_class,
                          eval_class, model_type, use_srn)

    logger.info('metric eval ***************')
    for k, v in metric.items():
        logger.info('{}:{}'.format(k, v))

    infer_shape = [
        3, 32, 100
    ] if config['Architecture']['model_type'] != "det" else [3, 640, 640]

    save_path = config["Global"]["save_inference_dir"]

    arch_config = config["Architecture"]
    if arch_config["algorithm"] in [
            "Distillation",
    ]:  # distillation model
        for idx, name in enumerate(model.model_name_list):
            sub_model_save_path = os.path.join(save_path, name, "inference")
            export_single_model(quanter, model.model_list[idx], infer_shape,
                                sub_model_save_path, logger)
    else:
        save_path = os.path.join(save_path, "inference")
        export_single_model(quanter, model, infer_shape, save_path, logger)
예제 #9
0
    def test_qat_acc(self):
        lenet = ImperativeLenet()
        quanter = QAT()
        quanter.quantize(lenet)

        place = paddle.CUDAPlace(
            0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace()

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])

        train_dataset = paddle.vision.datasets.MNIST(mode='train',
                                                     backend='cv2',
                                                     transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                                   backend='cv2',
                                                   transform=transform)

        train_reader = paddle.io.DataLoader(train_dataset,
                                            drop_last=True,
                                            places=place,
                                            batch_size=64,
                                            return_list=True)
        test_reader = paddle.io.DataLoader(val_dataset,
                                           places=place,
                                           batch_size=64,
                                           return_list=True)

        def train(model):
            adam = paddle.optimizer.Adam(learning_rate=0.0001,
                                         parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader):
                    img = paddle.to_tensor(data[0])
                    label = paddle.to_tensor(data[1])
                    img = paddle.reshape(img, [-1, 1, 28, 28])
                    label = paddle.reshape(label, [-1, 1])

                    out = model(img)
                    acc = paddle.metric.accuracy(out, label)
                    loss = paddle.nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy()))

        def test(model):
            model.eval()
            avg_acc = [[], []]
            for batch_id, data in enumerate(test_reader):
                img = paddle.to_tensor(data[0])
                img = paddle.reshape(img, [-1, 1, 28, 28])
                label = paddle.to_tensor(data[1])
                label = paddle.reshape(label, [-1, 1])

                out = model(img)
                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
                avg_acc[0].append(acc_top1.numpy())
                avg_acc[1].append(acc_top5.numpy())
                if batch_id % 100 == 0:
                    _logger.info(
                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
                            batch_id, acc_top1.numpy(), acc_top5.numpy()))

            _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format(
                np.mean(avg_acc[0]), np.mean(avg_acc[1])))
            return np.mean(avg_acc[0]), np.mean(avg_acc[1])

        train(lenet)
        top1_1, top5_1 = test(lenet)

        lenet.__init__()
        train(lenet)
        top1_2, top5_2 = test(lenet)

        # values before quantization and after quantization should be close
        _logger.info("Before quantization: top1: {}, top5: {}".format(
            top1_2, top5_2))
        _logger.info("After quantization: top1: {}, top5: {}".format(
            top1_1, top5_1))
예제 #10
0
            save_backbone,
            os.path.join(os.path.join(os.getcwd(), MODEL_ROOT),
                         "Backbone_epoch{}".format(epoch)))
    else:
        quant_config = {
            'weight_preprocess_type': 'PACT',
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'weight_bits': 8,
            'activation_bits': 8,
            'dtype': 'int8',
            'window_size': 10000,
            'moving_rate': 0.9,
            'quantizable_layer_type': ['Conv2D', 'Linear'],
        }
        quanter = QAT(config=quant_config)
        quanter.quantize(BACKBONE)
        for epoch in tqdm(range(NUM_EPOCH),
                          ncols=80):  # start training process
            if epoch == STAGES[
                    0]:  # adjust LR for each training stage after warm up, you can also choose to adjust LR manually (with slight modification) once plaueau observed
                schedule_lr(OPTIMIZER)
            if epoch == STAGES[1]:
                schedule_lr(OPTIMIZER)
            if epoch == STAGES[2]:
                schedule_lr(OPTIMIZER)

            losses = AverageMeter()
            top1 = AverageMeter()
            top5 = AverageMeter()
예제 #11
0
def main(args):
    env_info = get_sys_env()

    place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
        'GPUs used'] else 'cpu'
    paddle.set_device(place)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    cfg = Config(args.cfg,
                 iters=args.retraining_iters,
                 batch_size=args.batch_size,
                 learning_rate=args.learning_rate)

    train_dataset = cfg.train_dataset
    if not train_dataset:
        raise RuntimeError(
            'The training dataset is not specified in the configuration file.')

    val_dataset = cfg.val_dataset
    if not val_dataset:
        raise RuntimeError(
            'The validation dataset is not specified in the configuration file.'
        )
    os.environ['PADDLESEG_EXPORT_STAGE'] = 'True'
    net = cfg.model

    if args.model_path:
        para_state_dict = paddle.load(args.model_path)
        net.set_dict(para_state_dict)
        logger.info('Loaded trained params of model successfully')

    logger.info('Step 1/2: Start to quantify the model...')
    quantizer = QAT(config=get_quant_config())
    quantizer.quantize(net)
    logger.info('Model quantification completed.')

    logger.info('Step 2/2: Start retraining the quantized model.')
    train(net,
          train_dataset,
          optimizer=cfg.optimizer,
          save_dir=args.save_dir,
          num_workers=args.num_workers,
          iters=cfg.iters,
          batch_size=cfg.batch_size,
          losses=cfg.loss)

    evaluate(net, val_dataset)

    if paddle.distributed.get_rank() == 0:
        save_path = os.path.join(args.save_dir, 'model')
        input_var = paddle.ones([1] + list(val_dataset[0][0].shape))
        quantizer.save_quantized_model(net, save_path, input_spec=[input_var])

        yml_file = os.path.join(args.save_dir, 'deploy.yaml')
        with open(yml_file, 'w') as file:
            transforms = cfg.dic['val_dataset']['transforms']
            data = {
                'Deploy': {
                    'transforms': transforms,
                    'model': 'model.pdmodel',
                    'params': 'model.pdiparams'
                }
            }
            yaml.dump(data, file)

        ckpt = os.path.join(args.save_dir, f'iter_{args.retraining_iters}')
        if os.path.exists(ckpt):
            shutil.rmtree(ckpt)

    logger.info(
        f'Model retraining complete. The quantized model is saved in {args.save_dir}.'
    )
예제 #12
0
def main(args):
    paddle.seed(12345)

    config = get_config(args.config, overrides=args.override, show=True)
    # assign the place
    use_gpu = config.get("use_gpu", True)
    place = paddle.set_device('gpu' if use_gpu else 'cpu')

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1
    config["use_data_parallel"] = use_data_parallel

    if config["use_data_parallel"]:
        paddle.distributed.init_parallel_env()

    net = program.create_model(config.ARCHITECTURE, config.classes_num)

    # prepare to quant
    quant_config = get_default_quant_config()
    quant_config["activation_preprocess_type"] = "PACT"
    quanter = QAT(config=quant_config)
    quanter.quantize(net)

    optimizer, lr_scheduler = program.create_optimizer(
        config, parameter_list=net.parameters())

    init_model(config, net, optimizer)

    if config["use_data_parallel"]:
        net = paddle.DataParallel(net)

    train_dataloader = Reader(config, 'train', places=place)()

    if config.validate:
        valid_dataloader = Reader(config, 'valid', places=place)()

    last_epoch_id = config.get("last_epoch", -1)
    best_top1_acc = 0.0  # best top1 acc record
    best_top1_epoch = last_epoch_id
    for epoch_id in range(last_epoch_id + 1, config.epochs):
        net.train()
        # 1. train with train dataset
        program.run(train_dataloader, config, net, optimizer, lr_scheduler,
                    epoch_id, 'train')

        # 2. validate with validate dataset
        if config.validate and epoch_id % config.valid_interval == 0:
            net.eval()
            with paddle.no_grad():
                top1_acc = program.run(valid_dataloader, config, net, None,
                                       None, epoch_id, 'valid')
            if top1_acc > best_top1_acc:
                best_top1_acc = top1_acc
                best_top1_epoch = epoch_id
                model_path = os.path.join(config.model_save_dir,
                                          config.ARCHITECTURE["name"])
                save_model(net, optimizer, model_path, "best_model")
            message = "The best top1 acc {:.5f}, in epoch: {:d}".format(
                best_top1_acc, best_top1_epoch)
            logger.info(message)

        # 3. save the persistable model
        if epoch_id % config.save_interval == 0:
            model_path = os.path.join(config.model_save_dir,
                                      config.ARCHITECTURE["name"])
            save_model(net, optimizer, model_path, epoch_id)
예제 #13
0
def main():
    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight preprocess type, default is None and no preprocessing is performed.
        'weight_preprocess_type': None,
        # activation preprocess type, default is None and no preprocessing is performed.
        'activation_preprocess_type': None,
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. default is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
        # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
        'quantizable_layer_type': ['Conv2D', 'Linear'],
    }
    FLAGS = ArgsParser().parse_args()
    config = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    logger = get_logger()
    # build post process

    post_process_class = build_post_process(config['PostProcess'],
                                            config['Global'])

    # build model
    # for rec algorithm
    if hasattr(post_process_class, 'character'):
        char_num = len(getattr(post_process_class, 'character'))
        config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])

    # get QAT model
    quanter = QAT(config=quant_config)
    quanter.quantize(model)

    init_model(config, model, logger)
    model.eval()

    # build metric
    eval_class = build_metric(config['Metric'])

    # build dataloader
    valid_dataloader = build_dataloader(config, 'Eval', device, logger)

    # start eval
    metirc = program.eval(model, valid_dataloader, post_process_class,
                          eval_class)
    logger.info('metric eval ***************')
    for k, v in metirc.items():
        logger.info('{}:{}'.format(k, v))

    save_path = '{}/inference'.format(config['Global']['save_inference_dir'])
    infer_shape = [
        3, 32, 100
    ] if config['Architecture']['model_type'] != "det" else [3, 640, 640]

    quanter.save_quantized_model(model,
                                 save_path,
                                 input_spec=[
                                     paddle.static.InputSpec(shape=[None] +
                                                             infer_shape,
                                                             dtype='float32')
                                 ])
    logger.info('inference QAT model is saved to {}'.format(save_path))
예제 #14
0
def compress(args):
    if args.data == "cifar10":
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode="train",
                                                       backend="cv2",
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode="test",
                                                     backend="cv2",
                                                     transform=transform)
        class_dim = 10
        image_shape = [3, 32, 32]
        pretrain = False
        args.total_images = 50000
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1

    place = paddle.set_device('gpu' if args.use_gpu else 'cpu')
    # model definition
    if use_data_parallel:
        paddle.distributed.init_parallel_env()

    pretrain = True if args.data == "imagenet" else False
    if args.model == "mobilenet_v1":
        net = mobilenet_v1(pretrained=pretrain, num_classes=class_dim)
    elif args.model == "mobilenet_v3":
        net = MobileNetV3_large_x1_0(class_dim=class_dim)
        if pretrain:
            load_dygraph_pretrain(net, args.pretrained_model, True)
    else:
        raise ValueError("{} is not supported.".format(args.model))
    _logger.info("Origin model summary:")
    paddle.summary(net, (1, 3, 224, 224))

    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight preprocess type, default is None and no preprocessing is performed.
        'weight_preprocess_type': None,
        # activation preprocess type, default is None and no preprocessing is performed.
        'activation_preprocess_type': None,
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. default is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
        # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
        'quantizable_layer_type': ['Conv2D', 'Linear'],
    }

    if args.use_pact:
        quant_config['activation_preprocess_type'] = 'PACT'

    ############################################################################################################
    # 2. Quantize the model with QAT (quant aware training)
    ############################################################################################################

    quanter = QAT(config=quant_config)
    quanter.quantize(net)

    _logger.info("QAT model summary:")
    paddle.summary(net, (1, 3, 224, 224))

    opt, lr = create_optimizer(net, trainer_num, args)

    if use_data_parallel:
        net = paddle.DataParallel(net)

    train_batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True)
    train_loader = paddle.io.DataLoader(train_dataset,
                                        batch_sampler=train_batch_sampler,
                                        places=place,
                                        return_list=True,
                                        num_workers=4)

    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        batch_size=args.batch_size,
                                        shuffle=False,
                                        drop_last=False,
                                        return_list=True,
                                        num_workers=4)

    @paddle.no_grad()
    def test(epoch, net):
        net.eval()
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []

        eval_reader_cost = 0.0
        eval_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()
        for data in valid_loader():
            eval_reader_cost += time.time() - reader_start
            image = data[0]
            label = data[1]
            if args.data == "cifar10":
                label = paddle.reshape(label, [-1, 1])

            eval_start = time.time()

            out = net(image)
            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

            eval_run_cost += time.time() - eval_start
            batch_size = image.shape[0]
            total_samples += batch_size

            if batch_id % args.log_period == 0:
                log_period = 1 if batch_id == 0 else args.log_period
                _logger.info(
                    "Eval epoch[{}] batch[{}] - top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()),
                            eval_reader_cost / log_period,
                            (eval_reader_cost + eval_run_cost) / log_period,
                            total_samples / log_period, total_samples /
                            (eval_reader_cost + eval_run_cost)))
                eval_reader_cost = 0.0
                eval_run_cost = 0.0
                total_samples = 0
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))
            batch_id += 1
            reader_start = time.time()

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def cross_entropy(input, target, ls_epsilon):
        if ls_epsilon > 0:
            if target.shape[-1] != class_dim:
                target = paddle.nn.functional.one_hot(target, class_dim)
            target = paddle.nn.functional.label_smooth(target,
                                                       epsilon=ls_epsilon)
            target = paddle.reshape(target, shape=[-1, class_dim])
            input = -paddle.nn.functional.log_softmax(input, axis=-1)
            cost = paddle.sum(target * input, axis=-1)
        else:
            cost = paddle.nn.functional.cross_entropy(input=input,
                                                      label=target)
        avg_cost = paddle.mean(cost)
        return avg_cost

    def train(epoch, net):

        net.train()
        batch_id = 0

        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()
        for data in train_loader():
            train_reader_cost += time.time() - reader_start

            image = data[0]
            label = data[1]
            if args.data == "cifar10":
                label = paddle.reshape(label, [-1, 1])

            train_start = time.time()
            out = net(image)
            avg_cost = cross_entropy(out, label, args.ls_epsilon)

            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
            avg_cost.backward()
            opt.step()
            opt.clear_grad()
            lr.step()

            loss_n = np.mean(avg_cost.numpy())
            acc_top1_n = np.mean(acc_top1.numpy())
            acc_top5_n = np.mean(acc_top5.numpy())

            train_run_cost += time.time() - train_start
            batch_size = image.shape[0]
            total_samples += batch_size

            if batch_id % args.log_period == 0:
                log_period = 1 if batch_id == 0 else args.log_period
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s"
                    .format(
                        epoch, batch_id, lr.get_lr(), loss_n, acc_top1_n,
                        acc_top5_n, train_reader_cost / log_period,
                        (train_reader_cost + train_run_cost) / log_period,
                        total_samples / log_period,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0
            batch_id += 1
            reader_start = time.time()

    ############################################################################################################
    # train loop
    ############################################################################################################
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, net)
        acc1 = test(i, net)
        if paddle.distributed.get_rank() == 0:
            model_prefix = os.path.join(args.model_save_dir, "epoch_" + str(i))
            paddle.save(net.state_dict(), model_prefix + ".pdparams")
            paddle.save(opt.state_dict(), model_prefix + ".pdopt")

        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            if paddle.distributed.get_rank() == 0:
                model_prefix = os.path.join(args.model_save_dir, "best_model")
                paddle.save(net.state_dict(), model_prefix + ".pdparams")
                paddle.save(opt.state_dict(), model_prefix + ".pdopt")

    ############################################################################################################
    # 3. Save quant aware model
    ############################################################################################################
    if paddle.distributed.get_rank() == 0:
        # load best model
        load_dygraph_pretrain(net,
                              os.path.join(args.model_save_dir, "best_model"))

        path = os.path.join(args.model_save_dir, "inference_model",
                            'qat_model')
        quanter.save_quantized_model(net,
                                     path,
                                     input_spec=[
                                         paddle.static.InputSpec(
                                             shape=[None, 3, 224, 224],
                                             dtype='float32')
                                     ])
예제 #15
0
def main(args):
    if args.output_dir:
        utils.mkdir(args.output_dir)

    print(args)

    try:
        paddle.set_device(args.device)
    except:
        print("device set error, use default device...")

    # multi cards
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    train_dir = os.path.join(args.data_path, 'train')
    val_dir = os.path.join(args.data_path, 'val')
    dataset, dataset_test, train_sampler, test_sampler = load_data(
        train_dir, val_dir, args)
    train_batch_sampler = train_sampler
    # train_batch_sampler = paddle.io.BatchSampler(
    #     sampler=train_sampler, batch_size=args.batch_size)
    data_loader = paddle.io.DataLoader(dataset=dataset,
                                       num_workers=args.workers,
                                       return_list=True,
                                       batch_sampler=train_batch_sampler)
    test_batch_sampler = paddle.io.BatchSampler(sampler=test_sampler,
                                                batch_size=args.batch_size)
    data_loader_test = paddle.io.DataLoader(dataset_test,
                                            batch_sampler=test_batch_sampler,
                                            num_workers=args.workers)

    print("Creating model")
    model = paddlevision.models.__dict__[args.model](
        pretrained=args.pretrained)

    if args.pact_quant:
        try:
            from paddleslim.dygraph.quant import QAT
        except Exception as e:
            print(
                'Unable to QAT, please install paddleslim, for example: `pip install paddleslim`'
            )
            return

        quant_config = {
            # activation preprocess type, default is None and no preprocessing is performed.
            'activation_preprocess_type': 'PACT',
            # weight preprocess type, default is None and no preprocessing is performed.
            'weight_preprocess_type': None,
            # weight quantize type, default is 'channel_wise_abs_max'
            'weight_quantize_type': 'channel_wise_abs_max',
            # activation quantize type, default is 'moving_average_abs_max'
            'activation_quantize_type': 'moving_average_abs_max',
            # weight quantize bit num, default is 8
            'weight_bits': 8,
            # activation quantize bit num, default is 8
            'activation_bits': 8,
            # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
            'dtype': 'int8',
            # window size for 'range_abs_max' quantization. default is 10000
            'window_size': 10000,
            # The decay coefficient of moving average, default is 0.9
            'moving_rate': 0.9,
            # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
            'quantizable_layer_type': ['Conv2D', 'Linear'],
        }

        quanter = QAT(config=quant_config)
        quanter.quantize(model)
        print("Quanted model")

    criterion = nn.CrossEntropyLoss()

    lr_scheduler = paddle.optimizer.lr.StepDecay(args.lr,
                                                 step_size=args.lr_step_size,
                                                 gamma=args.lr_gamma)

    opt_name = args.opt.lower()
    if opt_name == 'sgd':
        optimizer = paddle.optimizer.Momentum(learning_rate=lr_scheduler,
                                              momentum=args.momentum,
                                              parameters=model.parameters(),
                                              weight_decay=args.weight_decay)
    elif opt_name == 'rmsprop':
        optimizer = paddle.optimizer.RMSprop(learning_rate=lr_scheduler,
                                             momentum=args.momentum,
                                             parameters=model.parameters(),
                                             weight_decay=args.weight_decay,
                                             eps=0.0316,
                                             alpha=0.9)
    else:
        raise RuntimeError(
            "Invalid optimizer {}. Only SGD and RMSprop are supported.".format(
                args.opt))

    if args.resume:
        layer_state_dict = paddle.load(os.path.join(args.resume, '.pdparams'))
        model.set_state_dict(layer_state_dict)
        opt_state_dict = paddle.load(os.path.join(args.resume, '.pdopt'))
        optimizer.load_state_dict(opt_state_dict)

    scaler = None
    if args.amp_level is not None:
        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        if args.amp_level == 'O2':
            model = paddle.amp.decorate(models=model,
                                        level='O2',
                                        save_dtype="float32")

    # multi cards
    if paddle.distributed.get_world_size() > 1:
        model = paddle.DataParallel(model)

    if args.test_only and paddle.distributed.get_rank() == 0:
        top1 = evaluate(model,
                        criterion,
                        data_loader_test,
                        amp_level=args.amp_level)
        return top1

    print("Start training")
    start_time = time.time()
    best_top1 = 0.0

    for epoch in range(args.start_epoch, args.epochs):
        train_one_epoch(model, criterion, optimizer, data_loader, epoch,
                        args.print_freq, args.amp_level, scaler)
        lr_scheduler.step()
        if paddle.distributed.get_rank() == 0:
            top1 = evaluate(model,
                            criterion,
                            data_loader_test,
                            amp_level=args.amp_level)
            if args.output_dir:
                paddle.save(
                    model.state_dict(),
                    os.path.join(args.output_dir,
                                 'model_{}.pdparams'.format(epoch)))
                paddle.save(
                    optimizer.state_dict(),
                    os.path.join(args.output_dir,
                                 'model_{}.pdopt'.format(epoch)))
                paddle.save(model.state_dict(),
                            os.path.join(args.output_dir, 'latest.pdparams'))
                paddle.save(optimizer.state_dict(),
                            os.path.join(args.output_dir, 'latest.pdopt'))
                if top1 > best_top1:
                    best_top1 = top1
                    paddle.save(model.state_dict(),
                                os.path.join(args.output_dir, 'best.pdparams'))
                    paddle.save(optimizer.state_dict(),
                                os.path.join(args.output_dir, 'best.pdopt'))

    if args.pact_quant:
        input_spec = [InputSpec(shape=[None, 3, 224, 224], dtype='float32')]
        quanter.save_quantized_model(model,
                                     os.path.join(args.output_dir,
                                                  "qat_inference"),
                                     input_spec=input_spec)
        print("QAT inference model saved in {args.output_dir}")

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
    return best_top1