def test_quant_op(self): startup_prog, train_prog = self.get_model() place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) config_1 = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], } quant_prog_1 = quant_aware( train_prog, place, config=config_1, for_test=True) op_nums_1, quant_op_nums_1 = self.get_op_number(quant_prog_1) convert_prog_1 = convert(quant_prog_1, place, config=config_1) convert_op_nums_1, convert_quant_op_nums_1 = self.get_op_number( convert_prog_1) config_1['not_quant_pattern'] = ['last_fc'] quant_prog_2 = quant_aware( train_prog, place, config=config_1, for_test=True) op_nums_2, quant_op_nums_2 = self.get_op_number(quant_prog_2) convert_prog_2 = convert(quant_prog_2, place, config=config_1) convert_op_nums_2, convert_quant_op_nums_2 = self.get_op_number( convert_prog_2) self.assertTrue(op_nums_1 == op_nums_2) # test quant_aware op numbers self.assertTrue(op_nums_1 * 4 == quant_op_nums_1) # test convert op numbers self.assertTrue(convert_op_nums_1 * 2 == convert_quant_op_nums_1) # test skip_quant self.assertTrue(quant_op_nums_1 - 4 == quant_op_nums_2) self.assertTrue(convert_quant_op_nums_1 - 2 == convert_quant_op_nums_2)
def main(): cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['use_dataloader'] = False feed_vars, _ = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) not_quant_pattern = [] if FLAGS.not_quant_pattern: not_quant_pattern = FLAGS.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern } infer_prog = quant_aware(infer_prog, place, config, for_test=True) exe.run(startup_prog) checkpoint.load_params(exe, infer_prog, cfg.weights) infer_prog, int8_program = convert(infer_prog, place, config, save_int8=True) save_infer_model(os.path.join(FLAGS.output_dir, 'float'), exe, feed_vars, test_fetches, infer_prog) save_infer_model(os.path.join(FLAGS.output_dir, 'int'), exe, feed_vars, test_fetches, int8_program)
def export_inference_model(args): """ Export PaddlePaddle inference model for prediction depolyment and serving. """ print("Exporting inference model...") startup_prog = fluid.Program() infer_prog = fluid.Program() image, logit_out = build_model(infer_prog, startup_prog, phase=ModelPhase.PREDICT) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) infer_prog = infer_prog.clone(for_test=True) not_quant_pattern_list = [] if args.not_quant_pattern is not None: not_quant_pattern_list = args.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern_list } infer_prog = quant_aware(infer_prog, place, config, for_test=True) if os.path.exists(cfg.TEST.TEST_MODEL): fluid.io.load_persistables(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) else: print("TEST.TEST_MODEL diretory is empty!") exit(-1) infer_prog = convert(infer_prog, place, config) fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR, feeded_var_names=[image.name], target_vars=[logit_out], executor=exe, main_program=infer_prog, model_filename=cfg.FREEZE.MODEL_FILENAME, params_filename=cfg.FREEZE.PARAMS_FILENAME) print("Inference model exported!") print("Exporting inference model config...") deploy_cfg_path = export_inference_config() print("Inference model saved : [%s]" % (deploy_cfg_path))
def test_accuracy(self): image = paddle.static.data( name='image', shape=[None, 1, 28, 28], dtype='float32') image.stop_gradient = False label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) optimizer = paddle.optimizer.Momentum( momentum=0.9, learning_rate=0.01, weight_decay=paddle.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = paddle.static.default_main_program() val_prog = main_prog.clone(for_test=True) place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( ) else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) def transform(x): return np.reshape(x, [1, 28, 28]) train_dataset = paddle.vision.datasets.MNIST( mode='train', backend='cv2', transform=transform) test_dataset = paddle.vision.datasets.MNIST( mode='test', backend='cv2', transform=transform) train_loader = paddle.io.DataLoader( train_dataset, places=place, feed_list=[image, label], drop_last=True, return_list=False, batch_size=64) valid_loader = paddle.io.DataLoader( test_dataset, places=place, feed_list=[image, label], batch_size=64, return_list=False) def train(program): iter = 0 for data in train_loader(): cost, top1, top5 = exe.run( program, feed=data, fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) def test(program): iter = 0 result = [[], [], []] for data in valid_loader(): cost, top1, top5 = exe.run( program, feed=data, fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print('eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) result[0].append(cost) result[1].append(top1) result[2].append(top5) print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format( np.mean(result[0]), np.mean(result[1]), np.mean(result[2]))) return np.mean(result[1]), np.mean(result[2]) train(main_prog) top1_1, top5_1 = test(main_prog) config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], } quant_train_prog_pact = quant_aware( main_prog, place, config, for_test=False, act_preprocess_func=pact, optimizer_func=get_optimizer, executor=exe) quant_eval_prog = quant_aware(val_prog, place, config, for_test=True) train(quant_train_prog_pact) quant_eval_prog, int8_prog = convert( quant_eval_prog, place, config, save_int8=True) top1_2, top5_2 = test(quant_eval_prog) # values before quantization and after quantization should be close print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1)) print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
def compress(args): ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } if args.data == "mnist": train_dataset = paddle.vision.datasets.MNIST(mode='train') val_dataset = paddle.vision.datasets.MNIST(mode='test') class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) train_prog = paddle.static.default_main_program() val_program = paddle.static.default_main_program().clone(for_test=True) place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() ############################################################################################################ # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. ############################################################################################################ val_program = quant_aware(val_program, place, quant_config, scope=None, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, for_test=False) opt = create_optimizer(args) opt.minimize(avg_cost) exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: paddle.static.load(train_prog, args.pretrained_model, exe) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[image, label], drop_last=True, batch_size=args.batch_size, return_list=False, use_shared_memory=True, shuffle=True, num_workers=4) valid_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[image, label], drop_last=False, return_list=False, batch_size=args.batch_size, use_shared_memory=True, shuffle=False) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in valid_loader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): batch_id = 0 for data in train_loader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 build_strategy = paddle.static.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = paddle.static.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) ############################################################################################################ # train loop ############################################################################################################ best_acc1 = 0.0 best_epoch = 0 for i in range(args.num_epochs): train(i, compiled_train_prog) acc1 = test(i, val_program) paddle.static.save(program=val_program, model_path=os.path.join(args.checkpoint_dir, str(i))) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i paddle.static.save(program=val_program, model_path=os.path.join(args.checkpoint_dir, 'best_model')) if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')): paddle.static.load(executor=exe, model_path=os.path.join(args.checkpoint_dir, 'best_model'), program=val_program) ############################################################################################################ # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) ############################################################################################################ # 4. Save inference model ############################################################################################################ model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') if not os.path.isdir(model_path): os.makedirs(model_path) paddle.fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params')
def compress(args): num_workers = 4 shuffle = True if args.ce_test: # set seed seed = 111 paddle.seed(seed) np.random.seed(seed) random.seed(seed) num_workers = 0 shuffle = False if args.data == "mnist": train_dataset = paddle.vision.datasets.MNIST(mode='train') val_dataset = paddle.vision.datasets.MNIST(mode='test') class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') if args.use_pact: image.stop_gradient = False label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) train_prog = paddle.static.default_main_program() val_program = paddle.static.default_main_program().clone(for_test=True) if not args.analysis: learning_rate, opt = create_optimizer(args) opt.minimize(avg_cost) place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[image, label], drop_last=True, return_list=False, batch_size=args.batch_size, use_shared_memory=True, shuffle=shuffle, num_workers=num_workers) valid_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[image, label], drop_last=False, return_list=False, batch_size=args.batch_size, use_shared_memory=True, shuffle=False) if args.analysis: # get all activations names activates = [ 'pool2d_1.tmp_0', 'tmp_35', 'batch_norm_21.tmp_2', 'tmp_26', 'elementwise_mul_5.tmp_0', 'pool2d_5.tmp_0', 'elementwise_add_5.tmp_0', 'relu_2.tmp_0', 'pool2d_3.tmp_0', 'conv2d_40.tmp_2', 'elementwise_mul_0.tmp_0', 'tmp_62', 'elementwise_add_8.tmp_0', 'batch_norm_39.tmp_2', 'conv2d_32.tmp_2', 'tmp_17', 'tmp_5', 'elementwise_add_9.tmp_0', 'pool2d_4.tmp_0', 'relu_0.tmp_0', 'tmp_53', 'relu_3.tmp_0', 'elementwise_add_4.tmp_0', 'elementwise_add_6.tmp_0', 'tmp_11', 'conv2d_36.tmp_2', 'relu_8.tmp_0', 'relu_5.tmp_0', 'pool2d_7.tmp_0', 'elementwise_add_2.tmp_0', 'elementwise_add_7.tmp_0', 'pool2d_2.tmp_0', 'tmp_47', 'batch_norm_12.tmp_2', 'elementwise_mul_6.tmp_0', 'elementwise_mul_7.tmp_0', 'pool2d_6.tmp_0', 'relu_6.tmp_0', 'elementwise_add_0.tmp_0', 'elementwise_mul_3.tmp_0', 'conv2d_12.tmp_2', 'elementwise_mul_2.tmp_0', 'tmp_8', 'tmp_2', 'conv2d_8.tmp_2', 'elementwise_add_3.tmp_0', 'elementwise_mul_1.tmp_0', 'pool2d_8.tmp_0', 'conv2d_28.tmp_2', 'image', 'conv2d_16.tmp_2', 'batch_norm_33.tmp_2', 'relu_1.tmp_0', 'pool2d_0.tmp_0', 'tmp_20', 'conv2d_44.tmp_2', 'relu_10.tmp_0', 'tmp_41', 'relu_4.tmp_0', 'elementwise_add_1.tmp_0', 'tmp_23', 'batch_norm_6.tmp_2', 'tmp_29', 'elementwise_mul_4.tmp_0', 'tmp_14' ] var_collector = VarCollector(train_prog, activates, use_ema=True) values = var_collector.abs_max_run(train_loader, exe, step=None, loss_name=avg_cost.name) np.save('pact_thres.npy', values) _logger.info(values) _logger.info("PACT threshold have been saved as pact_thres.npy") # Draw Histogram in 'dist_pdf/result.pdf' # var_collector.pdf(values) return values = defaultdict(lambda: 20) try: values = np.load("pact_thres.npy", allow_pickle=True).item() values.update(tmp) _logger.info("pact_thres.npy info loaded.") except: _logger.info( "cannot find pact_thres.npy. Set init PACT threshold as 20.") _logger.info(values) # 1. quantization configs quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. def pact(x): helper = LayerHelper("pact", **locals()) dtype = 'float32' init_thres = values[x.name.split('_tmp_input')[0]] u_param_attr = paddle.ParamAttr( name=x.name + '_pact', initializer=paddle.nn.initializer.Constant(value=init_thres), regularizer=paddle.regularizer.L2Decay(0.0001), learning_rate=1) u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype) part_a = paddle.nn.functional.relu(x - u_param) part_b = paddle.nn.functional.relu(-u_param - x) x = x - part_a + part_b return x def get_optimizer(): return paddle.optimizer.Momentum(args.lr, 0.9) if args.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None val_program = quant_aware(val_program, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: paddle.static.load(train_prog, args.pretrained_model, exe) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in valid_loader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog, lr): batch_id = 0 for data in train_loader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}" .format(epoch, batch_id, learning_rate.get_lr(), loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) if args.use_pact and batch_id % 1000 == 0: threshold = {} for var in val_program.list_vars(): if 'pact' in var.name: array = np.array(paddle.static.global_scope().find_var( var.name).get_tensor()) threshold[var.name] = array[0] _logger.info(threshold) batch_id += 1 lr.step() build_strategy = paddle.static.BuildStrategy() build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False exec_strategy = paddle.static.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) # train loop best_acc1 = 0.0 best_epoch = 0 start_epoch = 0 if args.checkpoint_dir is not None: ckpt_path = args.checkpoint_dir assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set" start_epoch = args.checkpoint_epoch paddle.static.load(executor=exe, model_path=args.checkpoint_dir, program=val_program) best_eval_acc1 = 0 best_acc1_epoch = 0 for i in range(start_epoch, args.num_epochs): train(i, compiled_train_prog, learning_rate) acc1 = test(i, val_program) if acc1 > best_eval_acc1: best_eval_acc1 = acc1 best_acc1_epoch = i _logger.info("Best Validation Acc1: {:.6f}, at epoch {}".format( best_eval_acc1, best_acc1_epoch)) paddle.static.save(model_path=os.path.join(args.output_dir, str(i)), program=val_program) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i paddle.static.save(model_path=os.path.join(args.output_dir, 'best_model'), program=val_program) if os.path.exists(os.path.join(args.output_dir, 'best_model.pdparams')): paddle.static.load(executor=exe, model_path=os.path.join(args.output_dir, 'best_model'), program=val_program) # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) _logger.info("eval best_model after convert") final_acc1 = test(best_epoch, float_program) _logger.info("final acc:{}".format(final_acc1)) # 4. Save inference model model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') if not os.path.isdir(model_path): os.makedirs(model_path) paddle.fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params')
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): np.set_printoptions(precision=5, suppress=True) startup_prog = fluid.Program() test_prog = fluid.Program() dataset = SegDataset(file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.EVAL, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: yield b[0], b[1], b[2] data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) data_loader.set_sample_generator(data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) not_quant_pattern_list = [] if kwargs['not_quant_pattern'] is not None: not_quant_pattern_list = kwargs['not_quant_pattern'] config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern_list } test_prog = quant_aware(test_prog, place, config, for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if not os.path.exists(ckpt_dir): raise ValueError( 'The TEST.TEST_MODEL {} is not found'.format(ckpt_dir)) if ckpt_dir is not None: print('load test model:', ckpt_dir) fluid.io.load_persistables(exe, ckpt_dir, main_program=test_prog) if kwargs['convert']: test_prog = convert(test_prog, place, config) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) fetch_list = [avg_loss.name, pred.name, grts.name, masks.name] num_images = 0 step = 0 all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() data_loader.start() while True: try: step += 1 loss, pred, grts, masks = exe.run(test_prog, fetch_list=fetch_list, return_numpy=True) loss = np.mean(np.array(loss)) num_images += pred.shape[0] conf_mat.calculate(pred, grts, masks) _, iou = conf_mat.mean_iou() _, acc = conf_mat.accuracy() speed = 1.0 / timer.elapsed_time() print( "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}" .format(step, loss, acc, iou, speed, calculate_eta(all_step - step, speed))) timer.restart() sys.stdout.flush() except fluid.core.EOFException: break category_iou, avg_iou = conf_mat.mean_iou() category_acc, avg_acc = conf_mat.accuracy() print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format( num_images, avg_acc, avg_iou)) print("[EVAL]Category IoU:", category_iou) print("[EVAL]Category Acc:", category_acc) print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa())) return category_iou, avg_iou, category_acc, avg_acc
def main(): """ Main evaluate function """ cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture # define executor place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['EvalReader']['inputs_def'] test_feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.eval(test_feed_vars) eval_prog = eval_prog.clone(True) reader = create_reader(cfg.EvalReader) loader.set_sample_list_generator(reader, place) # eval already exists json file if FLAGS.json_eval: logger.info( "In json_eval mode, PaddleDetection will evaluate json files in " "output_eval directly. And proposal.json, bbox.json and mask.json " "will be detected by default.") json_eval_results(cfg.metric, json_directory=FLAGS.output_eval, dataset=dataset) return assert cfg.metric != 'OID', "eval process of OID dataset \ is not supported." if cfg.metric == "WIDERFACE": raise ValueError("metric type {} does not support in tools/eval.py, " "please use tools/face_eval.py".format(cfg.metric)) assert cfg.metric in ['COCO', 'VOC'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] keys, values, cls = parse_fetches(test_fetches, eval_prog, extra_keys) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() dataset = cfg['EvalReader']['dataset'] sub_eval_prog = None sub_keys = None sub_values = None not_quant_pattern = [] if FLAGS.not_quant_pattern: not_quant_pattern = FLAGS.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern } eval_prog = quant_aware(eval_prog, place, config, for_test=True) # load model exe.run(startup_prog) if 'weights' in cfg: checkpoint.load_params(exe, eval_prog, cfg.weights) eval_prog = convert(eval_prog, place, config, save_int8=False) compile_program = fluid.compiler.CompiledProgram( eval_prog).with_data_parallel() results = eval_run(exe, compile_program, loader, keys, values, cls, cfg, sub_eval_prog, sub_keys, sub_values) # evaluation resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset)
def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture dataset = cfg.TestReader['dataset'] test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) dataset.set_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) reader = create_reader(cfg.TestReader) # When iterable mode, set set_sample_list_generator(reader, place) loader.set_sample_list_generator(reader) not_quant_pattern = [] if FLAGS.not_quant_pattern: not_quant_pattern = FLAGS.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern } infer_prog = quant_aware(infer_prog, place, config, for_test=True) exe.run(startup_prog) if cfg.weights: checkpoint.load_params(exe, infer_prog, cfg.weights) infer_prog = convert(infer_prog, place, config, save_int8=False) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background use_default_label = dataset.use_default_label clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() imid2path = dataset.get_imid2path() iter_id = 0 try: loader.start() while True: outs = exe.run(infer_prog, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) iter_id += 1 bbox_results = None mask_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) # visualize result im_ids = res['im_id'][0] for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results) save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info( "Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95) except (StopIteration, fluid.core.EOFException): loader.reset()
def main(): # 1. quantization configs quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } startup_prog, eval_program, place, config, alg_type = program.preprocess() feeded_var_names, target_vars, fetches_var_name = program.build_export( config, eval_program, startup_prog) eval_program = eval_program.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_prog) eval_program = quant_aware( eval_program, place, quant_config, scope=None, for_test=True) init_model(config, eval_program, exe) # 2. Convert the program before save inference program # The dtype of eval_program's weights is float32, but in int8 range. eval_program = convert(eval_program, place, quant_config, scope=None) eval_fetch_name_list = fetches_var_name eval_fetch_varname_list = [v.name for v in target_vars] eval_reader = reader_main(config=config, mode="eval") quant_info_dict = {'program':eval_program,\ 'reader':eval_reader,\ 'fetch_name_list':eval_fetch_name_list,\ 'fetch_varname_list':eval_fetch_varname_list} if alg_type == 'det': final_metrics = eval_det_run(exe, config, quant_info_dict, "eval") else: final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval") print(final_metrics) # 3. Save inference model model_path = "./quant_model" if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model( dirname=model_path, feeded_var_names=feeded_var_names, target_vars=target_vars, executor=exe, main_program=eval_program, model_filename=model_path + '/model', params_filename=model_path + '/params') print("model saved as {}".format(model_path))
def test_accuracy(self): image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = fluid.default_main_program() val_prog = main_prog.clone(for_test=True) place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) feeder = fluid.DataFeeder([image, label], place, program=main_prog) train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(), batch_size=64) eval_reader = paddle.fluid.io.batch(paddle.dataset.mnist.test(), batch_size=64) def train(program): iter = 0 for data in train_reader(): cost, top1, top5 = exe.run( program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) def test(program): iter = 0 result = [[], [], []] for data in eval_reader(): cost, top1, top5 = exe.run( program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) result[0].append(cost) result[1].append(top1) result[2].append(top5) print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format( np.mean(result[0]), np.mean(result[1]), np.mean(result[2]))) return np.mean(result[1]), np.mean(result[2]) train(main_prog) top1_1, top5_1 = test(main_prog) config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], } quant_train_prog_pact = quant_aware(main_prog, place, config, for_test=False, act_preprocess_func=pact, optimizer_func=get_optimizer, executor=exe) quant_eval_prog = quant_aware(val_prog, place, config, for_test=True) train(quant_train_prog_pact) quant_eval_prog, int8_prog = convert(quant_eval_prog, place, config, save_int8=True) top1_2, top5_2 = test(quant_eval_prog) # values before quantization and after quantization should be close print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1)) print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
def export_quant_infermodel( executor, place=None, scope=None, quant_config=None, train_config=None, checkpoint_path=None, export_inference_model_path_prefix="./export_quant_infermodel"): """export quant model checkpoints to infermodel. Args: executor(paddle.static.Executor): The executor to load, run and save the quantized model. place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents the executor run on which device. scope(paddle.static.Scope, optional): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `paddle.static.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_. When ``None`` will use `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . Default: ``None``. quant_config(dict, optional): configs for convert. if set None, will use default config. It must be same with config that used in 'quant_aware'. Default is None. train_config(dict):train aware configs, include num_epoch, save_iter_step, learning_rate, weight_decay, use_pact, quant_model_ckpt_path, model_path_prefix, teacher_model_path_prefix, distill_node_pair(teacher_node_name1, node_name1, teacher_node_name2, teacher_node_name2, ...) checkpoint_path(str): checkpoint path need to export quant infer model. export_inference_model_path_prefix(str): export infer model path prefix, storage directory of model + model name (excluding suffix). Returns: None """ scope = paddle.static.global_scope() if not scope else scope # parse quant config if quant_config is None: quant_config = _quant_config_default else: assert isinstance(quant_config, dict), "quant config must be dict" quant_config = _parse_configs(quant_config) _logger.info("quant_aware config {}".format(quant_config)) train_config = _parse_train_configs(train_config) distill_program_info = build_distill_prog_with_infermodel( executor, place, train_config) test_program = distill_program_info.test_program test_feed_names = distill_program_info.test_feed_names test_fetch_list = distill_program_info.test_fetch_list ############################################################################ # quant ############################################################################ use_pact = False # export model should set use_pact is False if use_pact: act_preprocess_func = pact optimizer_func = get_pact_optimizer pact_executor = executor else: act_preprocess_func = None optimizer_func = None pact_executor = None test_program = quant_aware(test_program, place, quant_config, scope=scope, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=pact_executor, for_test=True) paddle.static.load(executor=executor, model_path=os.path.join(checkpoint_path), program=test_program) ############################################################################################################ # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ float_program, int8_program = convert(test_program, place, quant_config, \ scope=scope, \ save_int8=True) ############################################################################################################ # 4. Save inference model ############################################################################################################ export_model_dir = os.path.abspath( os.path.join(export_inference_model_path_prefix, os.path.pardir)) if not os.path.exists(export_model_dir): os.makedirs(export_model_dir) feed_vars = [] for name in test_feed_names: for var in float_program.list_vars(): if var.name == name: feed_vars.append(var) break assert len(feed_vars) > 0, "can not find feed vars in quant program" paddle.static.save_inference_model( path_prefix=export_inference_model_path_prefix, feed_vars=feed_vars, fetch_vars=test_fetch_list, executor=executor, program=float_program)
def test_accuracy(self): image = paddle.static.data(name='image', shape=[None, 1, 28, 28], dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) optimizer = paddle.optimizer.Momentum( momentum=0.9, learning_rate=0.01, weight_decay=paddle.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = paddle.static.default_main_program() val_prog = main_prog.clone(for_test=True) place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( ) else paddle.static.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) train_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) valid_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=64) eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64) train_loader.set_sample_list_generator(train_reader, place) valid_loader.set_sample_list_generator(eval_reader, place) def train(program): iter = 0 for data in train_loader(): cost, top1, top5 = exe.run( program, feed=data, fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) def test(program): iter = 0 result = [[], [], []] for data in valid_loader(): cost, top1, top5 = exe.run( program, feed=data, fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) result[0].append(cost) result[1].append(top1) result[2].append(top5) print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format( np.mean(result[0]), np.mean(result[1]), np.mean(result[2]))) return np.mean(result[1]), np.mean(result[2]) train(main_prog) top1_1, top5_1 = test(main_prog) config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], } quant_train_prog = quant_aware(main_prog, place, config, for_test=False) quant_eval_prog = quant_aware(val_prog, place, config, for_test=True) train(quant_train_prog) quant_eval_prog, int8_prog = convert(quant_eval_prog, place, config, save_int8=True) top1_2, top5_2 = test(quant_eval_prog) # values before quantization and after quantization should be close print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1)) print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
def compress(args): # 1. quantization configs quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') if args.use_pact: image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) train_prog = fluid.default_main_program() val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() opt = create_optimizer(args) opt.minimize(avg_cost) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. if args.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None val_program = quant_aware(val_program, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) if args.use_pact and batch_id % 1000 == 0: threshold = {} for var in val_program.list_vars(): if 'pact' in var.name: array = np.array(fluid.global_scope().find_var( var.name).get_tensor()) threshold[var.name] = array[0] print(threshold) batch_id += 1 build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = fluid.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) # train loop best_acc1 = 0.0 best_epoch = 0 for i in range(args.num_epochs): train(i, compiled_train_prog) acc1 = test(i, val_program) fluid.io.save_persistables(exe, dirname=os.path.join( args.checkpoint_dir, str(i)), main_program=val_program) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i fluid.io.save_persistables(exe, dirname=os.path.join( args.checkpoint_dir, 'best_model'), main_program=val_program) if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')): fluid.io.load_persistables(exe, dirname=os.path.join( args.checkpoint_dir, 'best_model'), main_program=val_program) # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) # 4. Save inference model model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') int8_path = os.path.join(model_path, 'int8') if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params') fluid.io.save_inference_model(dirname=int8_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=int8_program, model_filename=int8_path + '/model', params_filename=int8_path + '/params')