def main(): train_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): loss, train_reader = network(True) adam = fluid.optimizer.Adam(learning_rate=0.01) adam.minimize(loss) test_prog = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_prog, test_startup): with fluid.unique_name.guard(): test_loss, test_reader = network(False) use_cuda = fluid.core.is_compiled_with_cuda() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() fluid.Executor(place).run(startup_prog) fluid.Executor(place).run(test_startup) trainer = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=loss.name, main_program=train_prog) tester = fluid.ParallelExecutor(use_cuda=use_cuda, share_vars_from=trainer, main_program=test_prog) train_reader.decorate_paddle_reader( paddle.v2.reader.shuffle(paddle.batch(mnist.train(), 512), buf_size=8192)) test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) for epoch_id in six.moves.xrange(10): train_reader.start() try: while True: print 'train_loss', numpy.array( trainer.run(fetch_list=[loss.name])) except fluid.core.EOFException: print 'End of epoch', epoch_id train_reader.reset() test_reader.start() try: while True: print 'test loss', numpy.array( tester.run(fetch_list=[test_loss.name])) except fluid.core.EOFException: print 'End of testing' test_reader.reset()
def export_model(args): if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] image = fluid.data(name='image', shape=[None] + image_shape, dtype='float32') assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) else: assert False, "args.pretrained_model must set" fluid.io.save_inference_model('./inference_model/' + args.model, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=val_program, model_filename='model', params_filename='weights')
# 运行startup_program进行初始化 exe.run(train_startup) exe.run(test_startup) # Compile programs # train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(loss_name=train_loss.name) # test_prog = fluid.CompiledProgram(test_prog).with_data_parallel(share_vars_from=train_prog) ITERABLE = True # 设置DataLoader的数据源 places = fluid.cuda_places() if ITERABLE else None train_loader.set_sample_list_generator(fluid.io.shuffle(fluid.io.batch( mnist.train(), 512), buf_size=1024), places=places) test_loader.set_sample_list_generator(fluid.io.batch(mnist.test(), 512), places=places) def run_iterable(program, exe, loss, data_loader): for data in data_loader(): loss_value = exe.run(program=program, feed=data, fetch_list=[loss]) print('loss is {}'.format(loss_value)) for epoch_id in range(10): run_iterable(train_prog, exe, train_loss, train_loader) run_iterable(test_prog, exe, test_loss, test_loader)
def compress(args): test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists( os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) val_feeder = feeder = fluid.DataFeeder( [image, label], place, program=val_program) def test(program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=val_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}". format(batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info("Final eva - acc_top1: {}; acc_top5: {}".format( np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) sensitivity( val_program, place, params, test, sensitivities_file="sensitivities_0.data", pruned_ratios=[0.1, 0.2, 0.3, 0.4]) sensitivity( val_program, place, params, test, sensitivities_file="sensitivities_1.data", pruned_ratios=[0.5, 0.6, 0.7]) sens = merge_sensitive( ["./sensitivities_0.data", "./sensitivities_1.data"]) ratios = get_ratios_by_loss(sens, 0.01) print ratios
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {:.3f}; acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) def eval_func(program): return test(0, program) if args.data == "mnist": train(0, fluid.default_main_program()) pruner = SensitivePruner(place, eval_func, checkpoints=args.checkpoints) pruned_program, pruned_val_program, iter = pruner.restore() if pruned_program is None: pruned_program = fluid.default_main_program() if pruned_val_program is None: pruned_val_program = val_program base_flops = flops(val_program) start = iter end = args.prune_steps for iter in range(start, end): pruned_program, pruned_val_program = pruner.greedy_prune( pruned_program, pruned_val_program, params, 0.03, topk=1) current_flops = flops(pruned_val_program) print("iter:{}; pruned FLOPS: {}".format( iter, float(base_flops - current_flops) / base_flops)) acc = None for i in range(args.retrain_epoch): train(i, pruned_program) acc = test(i, pruned_val_program) print("iter:{}; pruned FLOPS: {}; acc: {}".format( iter, float(base_flops - current_flops) / base_flops, acc)) pruner.save_checkpoint(pruned_program, pruned_val_program)
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 test(0, val_program) params = get_pruned_params(args, fluid.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(fluid.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): train(i, pruned_program) if i % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) fluid.io.save_inference_model(infer_model_path, ["image"], [out], exe, pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) pruner = AutoPruner(val_program, fluid.global_scope(), place, params=params, init_ratios=[0.33] * len(params), pruned_flops=0.5, pruned_latency=None, server_addr=("", 0), init_temperature=100, reduce_rate=0.85, max_try_times=300, max_client_num=10, search_steps=100, max_ratios=0.9, min_ratios=0., is_server=True, key="auto_pruner") while True: pruned_program, pruned_val_program = pruner.prune( fluid.default_main_program(), val_program) for i in range(1): train(i, pruned_program) score = test(0, pruned_val_program) pruner.reward(score)
def compress(args): ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format(args.model, model_list) image = paddle.static.data( name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) train_prog = paddle.static.default_main_program() val_program = paddle.static.default_main_program().clone(for_test=True) place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() ############################################################################################################ # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. ############################################################################################################ val_program = quant_aware( val_program, place, quant_config, scope=None, for_test=True) compiled_train_prog = quant_aware( train_prog, place, quant_config, scope=None, for_test=False) opt = create_optimizer(args) opt.minimize(avg_cost) exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: paddle.static.load(train_prog, args.pretrained_model, exe) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch( train_reader, batch_size=args.batch_size, drop_last=True) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() train_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) valid_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) train_loader.set_sample_list_generator(train_reader, places) valid_loader.set_sample_list_generator(val_reader, places[0]) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in valid_loader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}". format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): batch_id = 0 for data in train_loader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}". format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 build_strategy = paddle.static.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = paddle.static.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) ############################################################################################################ # train loop ############################################################################################################ best_acc1 = 0.0 best_epoch = 0 for i in range(args.num_epochs): train(i, compiled_train_prog) acc1 = test(i, val_program) paddle.static.save( program=val_program, model_path=os.path.join(args.checkpoint_dir, str(i))) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i paddle.static.save( program=val_program, model_path=os.path.join(args.checkpoint_dir, 'best_model')) if os.path.exists(os.path.join(args.checkpoint_dir, 'best_model')): paddle.static.load( exe, dirname=os.path.join(args.checkpoint_dir, 'best_model'), main_program=val_program) ############################################################################################################ # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) ############################################################################################################ # 4. Save inference model ############################################################################################################ model_path = os.path.join(quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') if not os.path.isdir(model_path): os.makedirs(model_path) paddle.static.save_inference_model( dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params')
def compress(args): if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') if args.use_pact: image.stop_gradient = False label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) train_prog = paddle.static.default_main_program() val_program = paddle.static.default_main_program().clone(for_test=True) if not args.analysis: learning_rate, opt = create_optimizer(args) opt.minimize(avg_cost) place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() train_loader.set_sample_list_generator(train_reader, places) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) valid_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) valid_loader.set_sample_list_generator(val_reader, places[0]) if args.analysis: # get all activations names activates = [ 'pool2d_1.tmp_0', 'tmp_35', 'batch_norm_21.tmp_2', 'tmp_26', 'elementwise_mul_5.tmp_0', 'pool2d_5.tmp_0', 'elementwise_add_5.tmp_0', 'relu_2.tmp_0', 'pool2d_3.tmp_0', 'conv2d_40.tmp_2', 'elementwise_mul_0.tmp_0', 'tmp_62', 'elementwise_add_8.tmp_0', 'batch_norm_39.tmp_2', 'conv2d_32.tmp_2', 'tmp_17', 'tmp_5', 'elementwise_add_9.tmp_0', 'pool2d_4.tmp_0', 'relu_0.tmp_0', 'tmp_53', 'relu_3.tmp_0', 'elementwise_add_4.tmp_0', 'elementwise_add_6.tmp_0', 'tmp_11', 'conv2d_36.tmp_2', 'relu_8.tmp_0', 'relu_5.tmp_0', 'pool2d_7.tmp_0', 'elementwise_add_2.tmp_0', 'elementwise_add_7.tmp_0', 'pool2d_2.tmp_0', 'tmp_47', 'batch_norm_12.tmp_2', 'elementwise_mul_6.tmp_0', 'elementwise_mul_7.tmp_0', 'pool2d_6.tmp_0', 'relu_6.tmp_0', 'elementwise_add_0.tmp_0', 'elementwise_mul_3.tmp_0', 'conv2d_12.tmp_2', 'elementwise_mul_2.tmp_0', 'tmp_8', 'tmp_2', 'conv2d_8.tmp_2', 'elementwise_add_3.tmp_0', 'elementwise_mul_1.tmp_0', 'pool2d_8.tmp_0', 'conv2d_28.tmp_2', 'image', 'conv2d_16.tmp_2', 'batch_norm_33.tmp_2', 'relu_1.tmp_0', 'pool2d_0.tmp_0', 'tmp_20', 'conv2d_44.tmp_2', 'relu_10.tmp_0', 'tmp_41', 'relu_4.tmp_0', 'elementwise_add_1.tmp_0', 'tmp_23', 'batch_norm_6.tmp_2', 'tmp_29', 'elementwise_mul_4.tmp_0', 'tmp_14' ] var_collector = VarCollector(train_prog, activates, use_ema=True) values = var_collector.abs_max_run(train_loader, exe, step=None, loss_name=avg_cost.name) np.save('pact_thres.npy', values) _logger.info(values) _logger.info("PACT threshold have been saved as pact_thres.npy") # Draw Histogram in 'dist_pdf/result.pdf' # var_collector.pdf(values) return values = defaultdict(lambda: 20) try: values = np.load("pact_thres.npy", allow_pickle=True).item() values.update(tmp) _logger.info("pact_thres.npy info loaded.") except: _logger.info( "cannot find pact_thres.npy. Set init PACT threshold as 20.") _logger.info(values) # 1. quantization configs quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. def pact(x): helper = LayerHelper("pact", **locals()) dtype = 'float32' init_thres = values[x.name.split('_tmp_input')[0]] u_param_attr = paddle.ParamAttr( name=x.name + '_pact', initializer=paddle.nn.initializer.Constant(value=init_thres), regularizer=paddle.regularizer.L2Decay(0.0001), learning_rate=1) u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype) part_a = paddle.nn.functional.relu(x - u_param) part_b = paddle.nn.functional.relu(-u_param - x) x = x - part_a + part_b return x def get_optimizer(): return paddle.optimizer.Momentum(args.lr, 0.9) if args.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None val_program = quant_aware(val_program, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: paddle.static.load(train_prog, args.pretrained_model, exe) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in valid_loader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog, lr): batch_id = 0 for data in train_loader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; acc_top1: {:.6f}; acc_top5: {:.6f}; time: {:.3f}" .format(epoch, batch_id, learning_rate.get_lr(), loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) if args.use_pact and batch_id % 1000 == 0: threshold = {} for var in val_program.list_vars(): if 'pact' in var.name: array = np.array(paddle.static.global_scope().find_var( var.name).get_tensor()) threshold[var.name] = array[0] _logger.info(threshold) batch_id += 1 lr.step() build_strategy = paddle.static.BuildStrategy() build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False exec_strategy = paddle.static.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) # train loop best_acc1 = 0.0 best_epoch = 0 start_epoch = 0 if args.checkpoint_dir is not None: ckpt_path = args.checkpoint_dir assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set" start_epoch = args.checkpoint_epoch paddle.static.load_vars(exe, dirname=args.checkpoint_dir, main_program=val_program) start_step = start_epoch * int( math.ceil(float(args.total_images) / args.batch_size)) v = paddle.static.global_scope().find_var( '@LR_DECAY_COUNTER@').get_tensor() v.set(np.array([start_step]).astype(np.float32), place) best_eval_acc1 = 0 best_acc1_epoch = 0 for i in range(start_epoch, args.num_epochs): train(i, compiled_train_prog, learning_rate) acc1 = test(i, val_program) if acc1 > best_eval_acc1: best_eval_acc1 = acc1 best_acc1_epoch = i _logger.info("Best Validation Acc1: {:.6f}, at epoch {}".format( best_eval_acc1, best_acc1_epoch)) paddle.static.save(exe, dirname=os.path.join(args.output_dir, str(i)), main_program=val_program) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i paddle.static.save(exe, dirname=os.path.join(args.output_dir, 'best_model'), main_program=val_program) if os.path.exists(os.path.join(args.output_dir, 'best_model')): paddle.static.load(exe, dirname=os.path.join(args.output_dir, 'best_model'), main_program=val_program) # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) _logger.info("eval best_model after convert") final_acc1 = test(best_epoch, float_program) _logger.info("final acc:{}".format(final_acc1)) # 4. Save inference model model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') if not os.path.isdir(model_path): os.makedirs(model_path) paddle.static.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params')
batch_label).astype("int64") # start training step = 0 for batch_img, batch_label in batch_generator(mnist.train(), batch_size, epochs): step += 1 out_loss = exe.run(feed={ "img": batch_img, "label": batch_label }, fetch_list=[loss.name]) if step % 100 == 0: print("step %d, loss %.3f" % (step, out_loss[0])) # start testing accuracy = fluid.metrics.Accuracy() for batch_img, batch_label in batch_generator(mnist.test(), batch_size, 1): out_pred = exe.run(program=test_program, feed={ "img": batch_img, "label": batch_label }, fetch_list=[acc.name]) accuracy.update(value=out_pred[0], weight=len(batch_img)) print("test acc: %.3f" % accuracy.eval())
0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() # # # ## 3 定义输入数据 # # 为了快速执行该示例,我们选取简单的MNIST数据,Paddle框架的`paddle.dataset.mnist`包定义了MNIST数据的下载和读取。 # 代码如下: # # # In[14]: import paddle.dataset.mnist as reader train_reader = paddle.batch(reader.train(), batch_size=128, drop_last=True) test_reader = paddle.batch(reader.test(), batch_size=128, drop_last=True) data_feeder = fluid.DataFeeder(inputs, place) # # # ## 4. 训练和测试 # # 先定义训练和测试函数,正常训练和量化训练时只需要调用函数即可。在训练函数中执行了一个epoch的训练,因为MNIST数据集数据较少,一个epoch就可将top1精度训练到95%以上。 # # In[15]: def train(prog): iter = 0 for data in train_reader():
def compress(args): # 1. quantization configs quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') if args.use_pact: image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) train_prog = fluid.default_main_program() val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() opt = create_optimizer(args) opt.minimize(avg_cost) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. if args.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None val_program = quant_aware(val_program, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) if args.use_pact and batch_id % 1000 == 0: threshold = {} for var in val_program.list_vars(): if 'pact' in var.name: array = np.array(fluid.global_scope().find_var( var.name).get_tensor()) threshold[var.name] = array[0] print(threshold) batch_id += 1 build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = fluid.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) # train loop best_acc1 = 0.0 best_epoch = 0 start_epoch = 0 if args.checkpoint_dir is not None: ckpt_path = args.checkpoint_dir assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set" start_epoch = args.checkpoint_epoch fluid.io.load_persistables(exe, dirname=args.checkpoint_dir, main_program=val_program) start_step = start_epoch * int( math.ceil(float(args.total_images) / args.batch_size)) v = fluid.global_scope().find_var('@LR_DECAY_COUNTER@').get_tensor() v.set(np.array([start_step]).astype(np.float32), place) for i in range(start_epoch, args.num_epochs): train(i, compiled_train_prog) acc1 = test(i, val_program) fluid.io.save_persistables(exe, dirname=os.path.join( args.output_dir, str(i)), main_program=val_program) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i fluid.io.save_persistables(exe, dirname=os.path.join( args.output_dir, 'best_model'), main_program=val_program) if os.path.exists(os.path.join(args.output_dir, 'best_model')): fluid.io.load_persistables(exe, dirname=os.path.join( args.output_dir, 'best_model'), main_program=val_program) # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) # 4. Save inference model model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') int8_path = os.path.join(model_path, 'int8') if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params') fluid.io.save_inference_model(dirname=int8_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=int8_program, model_filename=int8_path + '/model', params_filename=int8_path + '/params')
def compress(args): ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight quantize type, default is 'abs_max' 'weight_quantize_type': 'abs_max', # activation quantize type, default is 'abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # op of name_scope in not_quant_pattern list, will not quantized 'not_quant_pattern': ['skip_quant'], # op of types in quantize_op_types, will quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, # if set quant_weight_only True, then only quantize parameters of layers which need quantization, # and insert anti-quantization op for parameters of these layers. 'quant_weight_only': False } train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) train_prog = fluid.default_main_program() val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() ############################################################################################################ # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. ############################################################################################################ val_program = quant_aware(val_program, place, quant_config, scope=None, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, for_test=False) opt = create_optimizer(args) opt.minimize(avg_cost) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = fluid.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 ############################################################################################################ # train loop ############################################################################################################ for i in range(args.num_epochs): train(i, compiled_train_prog) if i % args.test_period == 0: test(i, val_program) ############################################################################################################ # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) ############################################################################################################ # 4. Save inference model ############################################################################################################ model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') int8_path = os.path.join(model_path, 'int8') if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params') fluid.io.save_inference_model(dirname=int8_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=int8_program, model_filename=int8_path + '/model', params_filename=int8_path + '/params')
# 获取损失函数和准确率函数 cost = fluid.layers.cross_entropy(input=model, label=label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=model, label=label) # 获取训练和测试程序 test_program = fluid.default_main_program().clone(for_test=True) # 定义优化方法 optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001) opts = optimizer.minimize(avg_cost) # 获取MNIST数据 train_reader = paddle.batch(mnist.train(), batch_size=128) test_reader = paddle.batch(mnist.test(), batch_size=128) # 定义一个使用CPU的解析器 place = fluid.CPUPlace() exe = fluid.Executor(place) # 进行参数初始化 exe.run(fluid.default_startup_program()) # 定义输入数据维度 feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) # 开始训练和测试 for pass_id in range(10): # 进行训练 for batch_id, data in enumerate(train_reader()): train_cost, train_acc = exe.run(program=fluid.default_main_program(),
def eval(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) load_model(val_program, "./model/mobilenetv1_prune_50") batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( val_program, feed=val_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".format( batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info("Final eval - acc_top1: {}; acc_top5: {}".format( np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
buf_size=500)) train_loss = network(train_reader) # 一些网络定义 adam = fluid.optimizer.Adam(learning_rate=0.01) adam.minimize(train_loss) # Create test_main_prog and test_startup_prog test_main_prog = fluid.Program() test_startup_prog = fluid.Program() with fluid.program_guard(test_main_prog, test_startup_prog): # 使用 fluid.unique_name.guard() 实现与train program的参数共享 with fluid.unique_name.guard(): test_reader = fluid.layers.py_reader(capacity=32, shapes=[(-1, 1, 28, 28), (-1, 1)], dtypes=['float32', 'int64'], name='test_reader') test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) test_loss = network(test_reader) fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog) fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog) train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=train_loss.name, main_program=train_main_prog) test_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=test_loss.name, main_program=test_main_prog) for epoch_id in range(10): train_reader.start() try: while True:
def build_model(self): image_batch = layers.data(name='image_batch', shape=[-1, 1, 28, 28], dtype='float32') label_batch = layers.data(name='label_batch', shape=[-1, 1], dtype='int64') noise = layers.data(name='noise', shape=[-1, self.cfg.latent_size], dtype='float32') sampled_labels = layers.data(name='sampled_labels', shape=[-1, 1], dtype='int64') x = layers.data(name='x', shape=[-1, 1, 28, 28], dtype='float32') y = layers.data(name='y', shape=[-1, 1], dtype='float32') aux_y = layers.data(name='aux_y', shape=[-1, 1], dtype='int64') trick = layers.data(name='trick', shape=[-1, 1], dtype='float32') g_train = GTrain(sampled_labels, noise, trick, self.cfg) d_train = DTrain(x, y, aux_y, self.cfg) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) g_train_prog = fluid.CompiledProgram(g_train.program) d_train_prog = fluid.CompiledProgram(d_train.program) train_history = defaultdict(list) test_history = defaultdict(list) for epoch in range(1, self.cfg.epochs + 1): print('Epoch {}/{}'.format(epoch, self.cfg.epochs)) num_batches = int(np.ceil(60000 / float(self.cfg.batch_size))) progress_bar = Bar('Training', max=num_batches) epoch_gen_loss = [] epoch_disc_loss = [] train_reader = paddle.batch(paddle.reader.shuffle(mnist.train(), buf_size=60000), batch_size=self.cfg.batch_size, drop_last=True) test_reader = mnist.test() step = 0 for i, data in enumerate(train_reader()): image_batch = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') label_batch = np.array([[x[1]] for x in data]).astype('int64') if len(image_batch) != self.cfg.batch_size: continue # generate a new batch of noise noise_np = np.random.uniform( -1, 1, (self.cfg.batch_size, self.cfg.latent_size)).astype('float32') # sample some labels from p_c sampled_labels_np = np.random.randint( 0, self.cfg.num_classes, self.cfg.batch_size).astype('int64') sampled_labels_np = np.expand_dims(sampled_labels_np, axis=1) # generate a batch of fake images, using the generated labels as # a conditioner. We reshape the sampled labels to be # (self.cfg.batch_size, 1) so that we can feed them into the # embedding layer as a length one sequence generated_images = exe.run(g_train.infer_program, feed={ 'sampled_labels': sampled_labels_np, 'noise': noise_np }, fetch_list=[g_train.fake_img])[0] x_np = np.concatenate((image_batch, generated_images)) # use one-sided soft real/fake labels # Salimans et al., 2016 # https://arxiv.org/pdf/1606.03498.pdf (Section 3.4) soft_zero, soft_one = 0, 0.95 y_np = np.array([[soft_one]] * len(image_batch) + [[soft_zero]] * len(image_batch)).astype('float32') aux_y_np = np.concatenate((label_batch, sampled_labels_np), axis=0) # see if the discriminator can figure itself out... epoch_disc_loss.append( exe.run(d_train_prog, feed={ 'x': x_np, 'y': y_np, 'aux_y': aux_y_np }, fetch_list=[d_train.loss])[0]) # make new noise. we generate 2 * batch size here such that we have # the generator optimize over an identical number of images as the # discriminator noise_np = np.random.uniform( -1, 1, (2 * self.cfg.batch_size, self.cfg.latent_size)).astype('float32') sampled_labels_np = np.random.randint( 0, self.cfg.num_classes, 2 * self.cfg.batch_size).astype('int64') sampled_labels_np = np.expand_dims(sampled_labels_np, axis=1) # we want to train the generator to trick the discriminator # For the generator, we want all the {fake, not-fake} labels to say # not-fake trick_np = np.array([[soft_one]] * 2 * self.cfg.batch_size).astype('float32') epoch_gen_loss.append( exe.run(g_train_prog, feed={ 'sampled_labels': sampled_labels_np, 'noise': noise_np, 'trick': trick_np }, fetch_list=[g_train.loss])[0]) step += 1 progress_bar.next() progress_bar.finish() print('Testing for epoch {}'.format(epoch)) # evaluate the testing loss here # generate a new batch of noise noise_np = np.random.uniform( -1, 1, (self.cfg.test_size, self.cfg.latent_size)).astype('float32') # sample some labels from p_c and generate images from them sampled_labels_np = np.random.randint( 0, self.cfg.num_classes, self.cfg.test_size).astype('int64') sampled_labels_np = np.expand_dims(sampled_labels_np, axis=1) generated_images = exe.run(g_train.infer_program, feed={ 'sampled_labels': sampled_labels_np, 'noise': noise_np }, fetch_list=[g_train.fake_img])[0] x_test, y_test = [], [] for data in test_reader(): x_test.append(np.reshape(data[0], [1, 28, 28])) y_test.append([data[1]]) if len(x_test) >= self.cfg.test_size: break x_test = np.array(x_test).astype('float32') y_test = np.array(y_test).astype('int64') x_np = np.concatenate((x_test, generated_images)) y_np = np.array([[1]] * self.cfg.test_size + [[0]] * self.cfg.test_size).astype('float32') aux_y_np = np.concatenate((y_test, sampled_labels_np), axis=0) # see if the discriminator can figure itself out... discriminator_test_loss = exe.run( d_train.infer_program, feed={ 'x': x_np, 'y': y_np, 'aux_y': aux_y_np }, fetch_list=[d_train.unweighted_loss])[0][0] discriminator_train_loss = np.mean(np.array(epoch_disc_loss)) # make new noise noise_np = np.random.uniform( -1, 1, (2 * self.cfg.test_size, self.cfg.latent_size)).astype('float32') sampled_labels_np = np.random.randint( 0, self.cfg.num_classes, 2 * self.cfg.test_size).astype('int64') sampled_labels_np = np.expand_dims(sampled_labels_np, axis=1) trick_np = np.array([[1]] * 2 * self.cfg.test_size).astype('float32') generated_images = exe.run(g_train.infer_program, feed={ 'sampled_labels': sampled_labels_np, 'noise': noise_np }, fetch_list=[g_train.fake_img])[0] generator_test_loss = exe.run(d_train.infer_program, feed={ 'x': generated_images, 'y': trick_np, 'aux_y': sampled_labels_np }, fetch_list=[d_train.unweighted_loss ])[0][0] generator_train_loss = np.mean(np.array(epoch_gen_loss)) # generate an epoch report on performance train_history['generator'].append(generator_train_loss) train_history['discriminator'].append(discriminator_train_loss) test_history['generator'].append(generator_test_loss) test_history['discriminator'].append(discriminator_test_loss) print('train g loss', generator_train_loss) print('train d loss', discriminator_train_loss) print('test g loss', generator_test_loss) print('test d loss', discriminator_test_loss) # generate some digits to display num_rows = 4 noise_np = np.tile( np.random.uniform(-1, 1, (num_rows, self.cfg.latent_size)), (self.cfg.num_classes, 1)).astype('float32') sampled_labels_np = np.array([[i] * num_rows for i in range(self.cfg.num_classes) ]).reshape(-1, 1).astype('int64') generated_images = exe.run(g_train.infer_program, feed={ 'sampled_labels': sampled_labels_np, 'noise': noise_np }, fetch_list=[g_train.fake_img])[0] def save_images(generated_images, epoch): for i in range(len(generated_images)): fname = './data/image_epoch_%d_%d.jpeg' % (epoch, i) img = np.array( generated_images[i]).astype('float32').reshape( (28, 28)) img = img * 127.5 + 127.5 img = np.clip(img, 0, 255).astype('uint8') img = Image.fromarray(img, 'L') img.save(fname, format='JPEG') save_images(generated_images, epoch) with open('acgan-history.pkl', 'wb') as f: pickle.dump({'train': train_history, 'test': test_history}, f)