# 获取损失函数和准确率函数 cost = fluid.layers.cross_entropy(input=model, label=label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=model, label=label) # 获取训练和测试程序 test_program = fluid.default_main_program().clone(for_test=True) # 定义优化方法 optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=1e-3, momentum=0.9) opts = optimizer.minimize(avg_cost) # 获取CIFAR数据 train_reader = paddle.batch(cifar.train10(), batch_size=32) test_reader = paddle.batch(cifar.test10(), batch_size=32) # 定义一个使用CPU的执行器 place = fluid.CUDAPlace(0) # place = fluid.CPUPlace() exe = fluid.Executor(place) # 进行参数初始化 exe.run(fluid.default_startup_program()) # 加载之前训练过的持久性变量模型 save_path = 'models/persistables_model/' if os.path.exists(save_path): print('使用持久性变量模型作为预训练模型') fluid.io.load_persistables(executor=exe, dirname=save_path) # 定义输入数据维度
def compress(args): if args.data == "cifar10": import paddle.dataset.cifar as reader train_reader = reader.train10() val_reader = reader.test10() class_dim = 10 image_shape = "3,32,32" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) student_program = fluid.Program() s_startup = fluid.Program() with fluid.program_guard(student_program, s_startup): with fluid.unique_name.guard(): image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') train_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) valid_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) val_reader = paddle.batch(val_reader, batch_size=args.batch_size, drop_last=True) val_program = student_program.clone(for_test=True) places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() train_loader.set_sample_list_generator(train_reader, places) valid_loader.set_sample_list_generator(val_reader, place) teacher_model = models.__dict__[args.teacher_model]() # define teacher program teacher_program = fluid.Program() t_startup = fluid.Program() with fluid.program_guard(teacher_program, t_startup): with fluid.unique_name.guard(): image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') predict = teacher_model.net(image, class_dim=class_dim) exe.run(t_startup) if not os.path.exists(args.teacher_pretrained_model): _download( 'http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar', '.') _decompress('./ResNet50_vd_pretrained.tar') assert args.teacher_pretrained_model and os.path.exists( args.teacher_pretrained_model ), "teacher_pretrained_model should be set when teacher_model is not None." def if_exist(var): return os.path.exists( os.path.join(args.teacher_pretrained_model, var.name)) fluid.io.load_vars(exe, args.teacher_pretrained_model, main_program=teacher_program, predicate=if_exist) data_name_map = {'image': 'image'} merge(teacher_program, student_program, data_name_map, place) with fluid.program_guard(student_program, s_startup): distill_loss = soft_label_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0", student_program) loss = avg_cost + distill_loss lr, opt = create_optimizer(args) opt.minimize(loss) exe.run(s_startup) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False parallel_main = fluid.CompiledProgram(student_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) for epoch_id in range(args.num_epochs): for step_id, data in enumerate(train_loader): lr_np, loss_1, loss_2, loss_3 = exe.run(parallel_main, feed=data, fetch_list=[ lr.name, loss.name, avg_cost.name, distill_loss.name ]) if step_id % args.log_period == 0: _logger.info( "train_epoch {} step {} lr {:.6f}, loss {:.6f}, class loss {:.6f}, distill loss {:.6f}" .format(epoch_id, step_id, lr_np[0], loss_1[0], loss_2[0], loss_3[0])) val_acc1s = [] val_acc5s = [] for step_id, data in enumerate(valid_loader): val_loss, val_acc1, val_acc5 = exe.run( val_program, data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) val_acc1s.append(val_acc1) val_acc5s.append(val_acc5) if step_id % args.log_period == 0: _logger.info( "valid_epoch {} step {} loss {:.6f}, top1 {:.6f}, top5 {:.6f}" .format(epoch_id, step_id, val_loss[0], val_acc1[0], val_acc5[0])) _logger.info("epoch {} top1 {:.6f}, top5 {:.6f}".format( epoch_id, np.mean(val_acc1s), np.mean(val_acc5s)))