def main(): args = parse_args() net = architectures.__dict__[args.model] model = Net(net, args.class_dim, args.model) # get QAT model quant_config = get_default_quant_config() # TODO(littletomatodonkey): add PACT for export model # quant_config["activation_preprocess_type"] = "PACT" quanter = QAT(config=quant_config) quanter.quantize(model) load_dygraph_pretrain(model.pre_net, path=args.pretrained_model, load_static_weights=args.load_static_weights) model.eval() save_path = os.path.join(args.output_path, "inference") quanter.save_quantized_model(model, save_path, input_spec=[ paddle.static.InputSpec(shape=[ None, 3, args.img_size, args.img_size ], dtype='float32') ]) print('inference QAT model is saved to {}'.format(save_path))
def main(config, device, logger, vdl_writer): # init dist environment if config['Global']['distributed']: dist.init_parallel_env() global_config = config['Global'] # build dataloader train_dataloader = build_dataloader(config, 'Train', device, logger) if config['Eval']: valid_dataloader = build_dataloader(config, 'Eval', device, logger) else: valid_dataloader = None # build post process post_process_class = build_post_process(config['PostProcess'], global_config) # build model # for rec algorithm if hasattr(post_process_class, 'character'): char_num = len(getattr(post_process_class, 'character')) config['Architecture']["Head"]['out_channels'] = char_num model = build_model(config['Architecture']) if config['Global']['distributed']: model = paddle.DataParallel(model) # build loss loss_class = build_loss(config['Loss']) # build optim optimizer, lr_scheduler = build_optimizer( config['Optimizer'], epochs=config['Global']['epoch_num'], step_each_epoch=len(train_dataloader), parameters=model.parameters()) # build metric eval_class = build_metric(config['Metric']) # load pretrain model pre_best_model_dict = init_model(config, model, logger, optimizer) logger.info( 'train dataloader has {} iters, valid dataloader has {} iters'.format( len(train_dataloader), len(valid_dataloader))) quanter = QAT(config=quant_config, act_preprocess=PACT) quanter.quantize(model) # start train program.train(config, train_dataloader, valid_dataloader, device, model, loss_class, optimizer, lr_scheduler, post_process_class, eval_class, pre_best_model_dict, logger, vdl_writer)
def get_quaner(config, model): if config.get("Slim", False) and config["Slim"].get("quant", False): from paddleslim.dygraph.quant import QAT assert config["Slim"]["quant"]["name"].lower( ) == 'pact', 'Only PACT quantization method is supported now' QUANT_CONFIG["activation_preprocess_type"] = "PACT" quanter = QAT(config=QUANT_CONFIG) quanter.quantize(model) logger.info("QAT model summary:") paddle.summary(model, (1, 3, 224, 224)) else: quanter = None return quanter
def main(): model_list = [x for x in models.__dict__["__all__"]] assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format( model_list, FLAGS.arch) model = models.__dict__[FLAGS.arch](pretrained=True) if FLAGS.enable_quant: print("quantize model") quant_config = { 'weight_preprocess_type': None, 'activation_preprocess_type': 'PACT' if FLAGS.use_pact else None, 'weight_quantize_type': "channel_wise_abs_max", 'activation_quantize_type': 'moving_average_abs_max', 'weight_bits': 8, 'activation_bits': 8, 'window_size': 10000, 'moving_rate': 0.9, 'quantizable_layer_type': ['Conv2D', 'Linear'], } dygraph_qat = QAT(quant_config) dygraph_qat.quantize(model) model = hapi.Model(model) train_dataset = dataset.ImageNetDataset(data_dir=FLAGS.data, mode='train') val_dataset = dataset.ImageNetDataset(data_dir=FLAGS.data, mode='val') optim = paddle.optimizer.SGD(learning_rate=FLAGS.lr, parameters=model.parameters(), weight_decay=FLAGS.weight_decay) model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5))) checkpoint_dir = os.path.join( FLAGS.output_dir, "checkpoint", FLAGS.arch + "_checkpoint", time.strftime('%Y-%m-%d-%H-%M', time.localtime())) model.fit(train_dataset, val_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.epoch, save_dir=checkpoint_dir, num_workers=FLAGS.num_workers) if FLAGS.enable_quant: quant_output_dir = os.path.join(FLAGS.output_dir, "quant_dygraph", FLAGS.arch, "int8_infer") input_spec = paddle.static.InputSpec( shape=[None, 3, 224, 224], dtype='float32') dygraph_qat.save_quantized_model(model.network, quant_output_dir, [input_spec]) print("Save quantized inference model in " + quant_output_dir)
class TestQAT(unittest.TestCase): """ QAT = quantization-aware training This test case uses defualt quantization config, weight_quantize_type is channel_wise_abs_max """ def set_seed(self): seed = 1 np.random.seed(seed) paddle.static.default_main_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed def prepare(self): self.quanter = QAT() def test_qat_acc(self): self.prepare() self.set_seed() fp32_lenet = ImperativeLenet() place = paddle.CUDAPlace(0) \ if paddle.is_compiled_with_cuda() else paddle.CPUPlace() transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend='cv2', transform=transform) train_reader = paddle.io.DataLoader(train_dataset, drop_last=True, places=place, batch_size=64, return_list=True) test_reader = paddle.io.DataLoader(val_dataset, places=place, batch_size=64, return_list=True) def train(model): adam = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) epoch_num = 1 for epoch in range(epoch_num): model.train() for batch_id, data in enumerate(train_reader): img = paddle.to_tensor(data[0]) label = paddle.to_tensor(data[1]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc = paddle.metric.accuracy(out, label) loss = paddle.nn.functional.loss.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) def test(model): model.eval() avg_acc = [[], []] for batch_id, data in enumerate(test_reader): img = paddle.to_tensor(data[0]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.to_tensor(data[1]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_acc[0].append(acc_top1.numpy()) avg_acc[1].append(acc_top5.numpy()) if batch_id % 100 == 0: _logger.info( "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) _logger.info("Test | Average: acc_top1 {}, acc_top5 {}".format( np.mean(avg_acc[0]), np.mean(avg_acc[1]))) return np.mean(avg_acc[0]), np.mean(avg_acc[1]) train(fp32_lenet) top1_1, top5_1 = test(fp32_lenet) fp32_lenet.__init__() quant_lenet = self.quanter.quantize(fp32_lenet) train(quant_lenet) top1_2, top5_2 = test(quant_lenet) self.quanter.save_quantized_model(quant_lenet, './tmp/qat', input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) # values before quantization and after quantization should be close _logger.info("Before quantization: top1: {}, top5: {}".format( top1_1, top5_1)) _logger.info("After quantization: top1: {}, top5: {}".format( top1_2, top5_2)) _logger.info("\n") diff = 0.002 self.assertTrue( top1_1 - top1_2 < diff, msg="The acc of quant model is too lower than fp32 model")
def main(): ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight preprocess type, default is None and no preprocessing is performed. 'weight_preprocess_type': None, # activation preprocess type, default is None and no preprocessing is performed. 'activation_preprocess_type': None, # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. default is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, # for dygraph quantization, layers of type in quantizable_layer_type will be quantized 'quantizable_layer_type': ['Conv2D', 'Linear'], } FLAGS = ArgsParser().parse_args() config = load_config(FLAGS.config) merge_config(FLAGS.opt) logger = get_logger() # build post process post_process_class = build_post_process(config['PostProcess'], config['Global']) # build model # for rec algorithm if hasattr(post_process_class, 'character'): char_num = len(getattr(post_process_class, 'character')) if config['Architecture']["algorithm"] in [ "Distillation", ]: # distillation model for key in config['Architecture']["Models"]: config['Architecture']["Models"][key]["Head"][ 'out_channels'] = char_num else: # base rec model config['Architecture']["Head"]['out_channels'] = char_num model = build_model(config['Architecture']) # get QAT model quanter = QAT(config=quant_config) quanter.quantize(model) init_model(config, model) model.eval() # build metric eval_class = build_metric(config['Metric']) # build dataloader valid_dataloader = build_dataloader(config, 'Eval', device, logger) use_srn = config['Architecture']['algorithm'] == "SRN" model_type = config['Architecture']['model_type'] # start eval metric = program.eval(model, valid_dataloader, post_process_class, eval_class, model_type, use_srn) logger.info('metric eval ***************') for k, v in metric.items(): logger.info('{}:{}'.format(k, v)) infer_shape = [ 3, 32, 100 ] if config['Architecture']['model_type'] != "det" else [3, 640, 640] save_path = config["Global"]["save_inference_dir"] arch_config = config["Architecture"] if arch_config["algorithm"] in [ "Distillation", ]: # distillation model for idx, name in enumerate(model.model_name_list): sub_model_save_path = os.path.join(save_path, name, "inference") export_single_model(quanter, model.model_list[idx], infer_shape, sub_model_save_path, logger) else: save_path = os.path.join(save_path, "inference") export_single_model(quanter, model, infer_shape, save_path, logger)
def test_qat_acc(self): lenet = ImperativeLenet() quanter = QAT() quanter.quantize(lenet) place = paddle.CUDAPlace( 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend='cv2', transform=transform) train_reader = paddle.io.DataLoader(train_dataset, drop_last=True, places=place, batch_size=64, return_list=True) test_reader = paddle.io.DataLoader(val_dataset, places=place, batch_size=64, return_list=True) def train(model): adam = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) epoch_num = 1 for epoch in range(epoch_num): model.train() for batch_id, data in enumerate(train_reader): img = paddle.to_tensor(data[0]) label = paddle.to_tensor(data[1]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc = paddle.metric.accuracy(out, label) loss = paddle.nn.functional.loss.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) def test(model): model.eval() avg_acc = [[], []] for batch_id, data in enumerate(test_reader): img = paddle.to_tensor(data[0]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.to_tensor(data[1]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_acc[0].append(acc_top1.numpy()) avg_acc[1].append(acc_top5.numpy()) if batch_id % 100 == 0: _logger.info( "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format( np.mean(avg_acc[0]), np.mean(avg_acc[1]))) return np.mean(avg_acc[0]), np.mean(avg_acc[1]) train(lenet) top1_1, top5_1 = test(lenet) lenet.__init__() train(lenet) top1_2, top5_2 = test(lenet) # values before quantization and after quantization should be close _logger.info("Before quantization: top1: {}, top5: {}".format( top1_2, top5_2)) _logger.info("After quantization: top1: {}, top5: {}".format( top1_1, top5_1))
os.path.join(os.path.join(os.getcwd(), MODEL_ROOT), "Backbone_epoch{}".format(epoch))) else: quant_config = { 'weight_preprocess_type': 'PACT', 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, 'quantizable_layer_type': ['Conv2D', 'Linear'], } quanter = QAT(config=quant_config) quanter.quantize(BACKBONE) for epoch in tqdm(range(NUM_EPOCH), ncols=80): # start training process if epoch == STAGES[ 0]: # adjust LR for each training stage after warm up, you can also choose to adjust LR manually (with slight modification) once plaueau observed schedule_lr(OPTIMIZER) if epoch == STAGES[1]: schedule_lr(OPTIMIZER) if epoch == STAGES[2]: schedule_lr(OPTIMIZER) losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for iters, data in enumerate(train_loader()):
def main(args): env_info = get_sys_env() place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ 'GPUs used'] else 'cpu' paddle.set_device(place) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) cfg = Config(args.cfg, iters=args.retraining_iters, batch_size=args.batch_size, learning_rate=args.learning_rate) train_dataset = cfg.train_dataset if not train_dataset: raise RuntimeError( 'The training dataset is not specified in the configuration file.') val_dataset = cfg.val_dataset if not val_dataset: raise RuntimeError( 'The validation dataset is not specified in the configuration file.' ) os.environ['PADDLESEG_EXPORT_STAGE'] = 'True' net = cfg.model if args.model_path: para_state_dict = paddle.load(args.model_path) net.set_dict(para_state_dict) logger.info('Loaded trained params of model successfully') logger.info('Step 1/2: Start to quantify the model...') quantizer = QAT(config=get_quant_config()) quantizer.quantize(net) logger.info('Model quantification completed.') logger.info('Step 2/2: Start retraining the quantized model.') train(net, train_dataset, optimizer=cfg.optimizer, save_dir=args.save_dir, num_workers=args.num_workers, iters=cfg.iters, batch_size=cfg.batch_size, losses=cfg.loss) evaluate(net, val_dataset) if paddle.distributed.get_rank() == 0: save_path = os.path.join(args.save_dir, 'model') input_var = paddle.ones([1] + list(val_dataset[0][0].shape)) quantizer.save_quantized_model(net, save_path, input_spec=[input_var]) yml_file = os.path.join(args.save_dir, 'deploy.yaml') with open(yml_file, 'w') as file: transforms = cfg.dic['val_dataset']['transforms'] data = { 'Deploy': { 'transforms': transforms, 'model': 'model.pdmodel', 'params': 'model.pdiparams' } } yaml.dump(data, file) ckpt = os.path.join(args.save_dir, f'iter_{args.retraining_iters}') if os.path.exists(ckpt): shutil.rmtree(ckpt) logger.info( f'Model retraining complete. The quantized model is saved in {args.save_dir}.' )
def main(args): paddle.seed(12345) config = get_config(args.config, overrides=args.override, show=True) # assign the place use_gpu = config.get("use_gpu", True) place = paddle.set_device('gpu' if use_gpu else 'cpu') trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 config["use_data_parallel"] = use_data_parallel if config["use_data_parallel"]: paddle.distributed.init_parallel_env() net = program.create_model(config.ARCHITECTURE, config.classes_num) # prepare to quant quant_config = get_default_quant_config() quant_config["activation_preprocess_type"] = "PACT" quanter = QAT(config=quant_config) quanter.quantize(net) optimizer, lr_scheduler = program.create_optimizer( config, parameter_list=net.parameters()) init_model(config, net, optimizer) if config["use_data_parallel"]: net = paddle.DataParallel(net) train_dataloader = Reader(config, 'train', places=place)() if config.validate: valid_dataloader = Reader(config, 'valid', places=place)() last_epoch_id = config.get("last_epoch", -1) best_top1_acc = 0.0 # best top1 acc record best_top1_epoch = last_epoch_id for epoch_id in range(last_epoch_id + 1, config.epochs): net.train() # 1. train with train dataset program.run(train_dataloader, config, net, optimizer, lr_scheduler, epoch_id, 'train') # 2. validate with validate dataset if config.validate and epoch_id % config.valid_interval == 0: net.eval() with paddle.no_grad(): top1_acc = program.run(valid_dataloader, config, net, None, None, epoch_id, 'valid') if top1_acc > best_top1_acc: best_top1_acc = top1_acc best_top1_epoch = epoch_id model_path = os.path.join(config.model_save_dir, config.ARCHITECTURE["name"]) save_model(net, optimizer, model_path, "best_model") message = "The best top1 acc {:.5f}, in epoch: {:d}".format( best_top1_acc, best_top1_epoch) logger.info(message) # 3. save the persistable model if epoch_id % config.save_interval == 0: model_path = os.path.join(config.model_save_dir, config.ARCHITECTURE["name"]) save_model(net, optimizer, model_path, epoch_id)
def main(): ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight preprocess type, default is None and no preprocessing is performed. 'weight_preprocess_type': None, # activation preprocess type, default is None and no preprocessing is performed. 'activation_preprocess_type': None, # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. default is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, # for dygraph quantization, layers of type in quantizable_layer_type will be quantized 'quantizable_layer_type': ['Conv2D', 'Linear'], } FLAGS = ArgsParser().parse_args() config = load_config(FLAGS.config) merge_config(FLAGS.opt) logger = get_logger() # build post process post_process_class = build_post_process(config['PostProcess'], config['Global']) # build model # for rec algorithm if hasattr(post_process_class, 'character'): char_num = len(getattr(post_process_class, 'character')) config['Architecture']["Head"]['out_channels'] = char_num model = build_model(config['Architecture']) # get QAT model quanter = QAT(config=quant_config) quanter.quantize(model) init_model(config, model, logger) model.eval() # build metric eval_class = build_metric(config['Metric']) # build dataloader valid_dataloader = build_dataloader(config, 'Eval', device, logger) # start eval metirc = program.eval(model, valid_dataloader, post_process_class, eval_class) logger.info('metric eval ***************') for k, v in metirc.items(): logger.info('{}:{}'.format(k, v)) save_path = '{}/inference'.format(config['Global']['save_inference_dir']) infer_shape = [ 3, 32, 100 ] if config['Architecture']['model_type'] != "det" else [3, 640, 640] quanter.save_quantized_model(model, save_path, input_spec=[ paddle.static.InputSpec(shape=[None] + infer_shape, dtype='float32') ]) logger.info('inference QAT model is saved to {}'.format(save_path))
def compress(args): if args.data == "cifar10": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode="train", backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode="test", backend="cv2", transform=transform) class_dim = 10 image_shape = [3, 32, 32] pretrain = False args.total_images = 50000 elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 place = paddle.set_device('gpu' if args.use_gpu else 'cpu') # model definition if use_data_parallel: paddle.distributed.init_parallel_env() pretrain = True if args.data == "imagenet" else False if args.model == "mobilenet_v1": net = mobilenet_v1(pretrained=pretrain, num_classes=class_dim) elif args.model == "mobilenet_v3": net = MobileNetV3_large_x1_0(class_dim=class_dim) if pretrain: load_dygraph_pretrain(net, args.pretrained_model, True) else: raise ValueError("{} is not supported.".format(args.model)) _logger.info("Origin model summary:") paddle.summary(net, (1, 3, 224, 224)) ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight preprocess type, default is None and no preprocessing is performed. 'weight_preprocess_type': None, # activation preprocess type, default is None and no preprocessing is performed. 'activation_preprocess_type': None, # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. default is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, # for dygraph quantization, layers of type in quantizable_layer_type will be quantized 'quantizable_layer_type': ['Conv2D', 'Linear'], } if args.use_pact: quant_config['activation_preprocess_type'] = 'PACT' ############################################################################################################ # 2. Quantize the model with QAT (quant aware training) ############################################################################################################ quanter = QAT(config=quant_config) quanter.quantize(net) _logger.info("QAT model summary:") paddle.summary(net, (1, 3, 224, 224)) opt, lr = create_optimizer(net, trainer_num, args) if use_data_parallel: net = paddle.DataParallel(net) train_batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True) train_loader = paddle.io.DataLoader(train_dataset, batch_sampler=train_batch_sampler, places=place, return_list=True, num_workers=4) valid_loader = paddle.io.DataLoader(val_dataset, places=place, batch_size=args.batch_size, shuffle=False, drop_last=False, return_list=True, num_workers=4) @paddle.no_grad() def test(epoch, net): net.eval() batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] eval_reader_cost = 0.0 eval_run_cost = 0.0 total_samples = 0 reader_start = time.time() for data in valid_loader(): eval_reader_cost += time.time() - reader_start image = data[0] label = data[1] if args.data == "cifar10": label = paddle.reshape(label, [-1, 1]) eval_start = time.time() out = net(image) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) eval_run_cost += time.time() - eval_start batch_size = image.shape[0] total_samples += batch_size if batch_id % args.log_period == 0: log_period = 1 if batch_id == 0 else args.log_period _logger.info( "Eval epoch[{}] batch[{}] - top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), eval_reader_cost / log_period, (eval_reader_cost + eval_run_cost) / log_period, total_samples / log_period, total_samples / (eval_reader_cost + eval_run_cost))) eval_reader_cost = 0.0 eval_run_cost = 0.0 total_samples = 0 acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) batch_id += 1 reader_start = time.time() _logger.info( "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def cross_entropy(input, target, ls_epsilon): if ls_epsilon > 0: if target.shape[-1] != class_dim: target = paddle.nn.functional.one_hot(target, class_dim) target = paddle.nn.functional.label_smooth(target, epsilon=ls_epsilon) target = paddle.reshape(target, shape=[-1, class_dim]) input = -paddle.nn.functional.log_softmax(input, axis=-1) cost = paddle.sum(target * input, axis=-1) else: cost = paddle.nn.functional.cross_entropy(input=input, label=target) avg_cost = paddle.mean(cost) return avg_cost def train(epoch, net): net.train() batch_id = 0 train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for data in train_loader(): train_reader_cost += time.time() - reader_start image = data[0] label = data[1] if args.data == "cifar10": label = paddle.reshape(label, [-1, 1]) train_start = time.time() out = net(image) avg_cost = cross_entropy(out, label, args.ls_epsilon) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_cost.backward() opt.step() opt.clear_grad() lr.step() loss_n = np.mean(avg_cost.numpy()) acc_top1_n = np.mean(acc_top1.numpy()) acc_top5_n = np.mean(acc_top5.numpy()) train_run_cost += time.time() - train_start batch_size = image.shape[0] total_samples += batch_size if batch_id % args.log_period == 0: log_period = 1 if batch_id == 0 else args.log_period _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s" .format( epoch, batch_id, lr.get_lr(), loss_n, acc_top1_n, acc_top5_n, train_reader_cost / log_period, (train_reader_cost + train_run_cost) / log_period, total_samples / log_period, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 batch_id += 1 reader_start = time.time() ############################################################################################################ # train loop ############################################################################################################ best_acc1 = 0.0 best_epoch = 0 for i in range(args.num_epochs): train(i, net) acc1 = test(i, net) if paddle.distributed.get_rank() == 0: model_prefix = os.path.join(args.model_save_dir, "epoch_" + str(i)) paddle.save(net.state_dict(), model_prefix + ".pdparams") paddle.save(opt.state_dict(), model_prefix + ".pdopt") if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i if paddle.distributed.get_rank() == 0: model_prefix = os.path.join(args.model_save_dir, "best_model") paddle.save(net.state_dict(), model_prefix + ".pdparams") paddle.save(opt.state_dict(), model_prefix + ".pdopt") ############################################################################################################ # 3. Save quant aware model ############################################################################################################ if paddle.distributed.get_rank() == 0: # load best model load_dygraph_pretrain(net, os.path.join(args.model_save_dir, "best_model")) path = os.path.join(args.model_save_dir, "inference_model", 'qat_model') quanter.save_quantized_model(net, path, input_spec=[ paddle.static.InputSpec( shape=[None, 3, 224, 224], dtype='float32') ])
def main(args): if args.output_dir: utils.mkdir(args.output_dir) print(args) try: paddle.set_device(args.device) except: print("device set error, use default device...") # multi cards if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() train_dir = os.path.join(args.data_path, 'train') val_dir = os.path.join(args.data_path, 'val') dataset, dataset_test, train_sampler, test_sampler = load_data( train_dir, val_dir, args) train_batch_sampler = train_sampler # train_batch_sampler = paddle.io.BatchSampler( # sampler=train_sampler, batch_size=args.batch_size) data_loader = paddle.io.DataLoader(dataset=dataset, num_workers=args.workers, return_list=True, batch_sampler=train_batch_sampler) test_batch_sampler = paddle.io.BatchSampler(sampler=test_sampler, batch_size=args.batch_size) data_loader_test = paddle.io.DataLoader(dataset_test, batch_sampler=test_batch_sampler, num_workers=args.workers) print("Creating model") model = paddlevision.models.__dict__[args.model]( pretrained=args.pretrained) if args.pact_quant: try: from paddleslim.dygraph.quant import QAT except Exception as e: print( 'Unable to QAT, please install paddleslim, for example: `pip install paddleslim`' ) return quant_config = { # activation preprocess type, default is None and no preprocessing is performed. 'activation_preprocess_type': 'PACT', # weight preprocess type, default is None and no preprocessing is performed. 'weight_preprocess_type': None, # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. default is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, # for dygraph quantization, layers of type in quantizable_layer_type will be quantized 'quantizable_layer_type': ['Conv2D', 'Linear'], } quanter = QAT(config=quant_config) quanter.quantize(model) print("Quanted model") criterion = nn.CrossEntropyLoss() lr_scheduler = paddle.optimizer.lr.StepDecay(args.lr, step_size=args.lr_step_size, gamma=args.lr_gamma) opt_name = args.opt.lower() if opt_name == 'sgd': optimizer = paddle.optimizer.Momentum(learning_rate=lr_scheduler, momentum=args.momentum, parameters=model.parameters(), weight_decay=args.weight_decay) elif opt_name == 'rmsprop': optimizer = paddle.optimizer.RMSprop(learning_rate=lr_scheduler, momentum=args.momentum, parameters=model.parameters(), weight_decay=args.weight_decay, eps=0.0316, alpha=0.9) else: raise RuntimeError( "Invalid optimizer {}. Only SGD and RMSprop are supported.".format( args.opt)) if args.resume: layer_state_dict = paddle.load(os.path.join(args.resume, '.pdparams')) model.set_state_dict(layer_state_dict) opt_state_dict = paddle.load(os.path.join(args.resume, '.pdopt')) optimizer.load_state_dict(opt_state_dict) scaler = None if args.amp_level is not None: scaler = paddle.amp.GradScaler(init_loss_scaling=1024) if args.amp_level == 'O2': model = paddle.amp.decorate(models=model, level='O2', save_dtype="float32") # multi cards if paddle.distributed.get_world_size() > 1: model = paddle.DataParallel(model) if args.test_only and paddle.distributed.get_rank() == 0: top1 = evaluate(model, criterion, data_loader_test, amp_level=args.amp_level) return top1 print("Start training") start_time = time.time() best_top1 = 0.0 for epoch in range(args.start_epoch, args.epochs): train_one_epoch(model, criterion, optimizer, data_loader, epoch, args.print_freq, args.amp_level, scaler) lr_scheduler.step() if paddle.distributed.get_rank() == 0: top1 = evaluate(model, criterion, data_loader_test, amp_level=args.amp_level) if args.output_dir: paddle.save( model.state_dict(), os.path.join(args.output_dir, 'model_{}.pdparams'.format(epoch))) paddle.save( optimizer.state_dict(), os.path.join(args.output_dir, 'model_{}.pdopt'.format(epoch))) paddle.save(model.state_dict(), os.path.join(args.output_dir, 'latest.pdparams')) paddle.save(optimizer.state_dict(), os.path.join(args.output_dir, 'latest.pdopt')) if top1 > best_top1: best_top1 = top1 paddle.save(model.state_dict(), os.path.join(args.output_dir, 'best.pdparams')) paddle.save(optimizer.state_dict(), os.path.join(args.output_dir, 'best.pdopt')) if args.pact_quant: input_spec = [InputSpec(shape=[None, 3, 224, 224], dtype='float32')] quanter.save_quantized_model(model, os.path.join(args.output_dir, "qat_inference"), input_spec=input_spec) print("QAT inference model saved in {args.output_dir}") total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) return best_top1