def main(args): devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) startup_prog = fluid.Program() infer_prog = fluid.Program() infer_fetch_list = build_program( main_prog=infer_prog, startup_prog=startup_prog, args=args) infer_prog = infer_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) valid_reader = reader.train_valid( batch_size=args.batch_size, is_train=False, is_shuffle=False, args=args) fluid.io.load_persistables(exe, args.model_dir, main_program=infer_prog) infer_prog = fluid.CompiledProgram(infer_prog) top1 = infer(infer_prog, exe, valid_reader, infer_fetch_list, args) logger.info("test_acc {:.6f}".format(top1))
def main(args): if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) train_reader = reader.train_valid(batch_size=args.batch_size, is_train=True, is_shuffle=True) valid_reader = reader.train_valid(batch_size=args.batch_size, is_train=False, is_shuffle=False) with fluid.dygraph.guard(place): models = [MobileNetV1(), MobileNetV1()] parallel_models = None if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() parallel_models = [ fluid.dygraph.parallel.DataParallel(model, strategy) for model in models ] train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) train_loader = fluid.io.DataLoader.from_generator( capacity=1024, use_double_buffer=True, iterable=True, return_list=True, use_multiprocess=True) valid_loader = fluid.io.DataLoader.from_generator( capacity=1024, use_double_buffer=True, iterable=True, return_list=True, use_multiprocess=True) train_loader.set_batch_generator(train_reader, places=place) valid_loader.set_batch_generator(valid_reader, places=place) dataloaders = [train_loader, valid_loader] device_num = fluid.dygraph.parallel.Env().nranks step = int(args.trainset_num / (args.batch_size * device_num)) epochs = [60, 120, 180] bd = [step * e for e in epochs] lr = [args.init_lr * (0.1**i) for i in range(len(bd) + 1)] lr_a = fluid.dygraph.PiecewiseDecay(bd, lr, 0) lr_b = fluid.dygraph.PiecewiseDecay(bd, lr, 0) opt_a = fluid.optimizer.MomentumOptimizer( lr_a, 0.9, parameter_list=models[0].parameters(), use_nesterov=True, regularization=fluid.regularizer.L2DecayRegularizer(5e-4)) opt_b = fluid.optimizer.MomentumOptimizer( lr_b, 0.9, parameter_list=models[1].parameters(), use_nesterov=True, regularization=fluid.regularizer.L2DecayRegularizer(5e-4)) optimizers = [opt_a, opt_b] trainer = Trainer(models, parallel_models, optimizers, dataloaders, args.epochs, args.log_freq) trainer.train()
def main(args): place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): genotype = eval("genotypes.%s" % args.arch) model = Network( C=args.init_channels, num_classes=args.class_num, layers=args.layers, auxiliary=args.auxiliary, genotype=genotype) logger.info("param size = {:.6f}MB".format( count_parameters_in_MB(model.parameters()))) device_num = fluid.dygraph.parallel.Env().nranks step_per_epoch = int(args.trainset_num / (args.batch_size * device_num)) learning_rate = fluid.dygraph.CosineDecay(args.learning_rate, step_per_epoch, args.epochs) clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=args.grad_clip) optimizer = fluid.optimizer.MomentumOptimizer( learning_rate, momentum=args.momentum, regularization=fluid.regularizer.L2Decay(args.weight_decay), parameter_list=model.parameters(), grad_clip=clip) if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() model = fluid.dygraph.parallel.DataParallel(model, strategy) train_loader = fluid.io.DataLoader.from_generator( capacity=64, use_double_buffer=True, iterable=True, return_list=True, use_multiprocess=args.use_multiprocess) valid_loader = fluid.io.DataLoader.from_generator( capacity=64, use_double_buffer=True, iterable=True, return_list=True, use_multiprocess=args.use_multiprocess) train_reader = reader.train_valid( batch_size=args.batch_size, is_train=True, is_shuffle=True, args=args) valid_reader = reader.train_valid( batch_size=args.batch_size, is_train=False, is_shuffle=False, args=args) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) train_loader.set_batch_generator(train_reader, places=place) valid_loader.set_batch_generator(valid_reader, places=place) save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) best_acc = 0 for epoch in range(args.epochs): drop_path_prob = args.drop_path_prob * epoch / args.epochs logger.info('Epoch {}, lr {:.6f}'.format( epoch, optimizer.current_step_lr())) train_top1 = train(model, train_loader, optimizer, epoch, drop_path_prob, args) logger.info("Epoch {}, train_acc {:.6f}".format(epoch, train_top1)) valid_top1 = valid(model, valid_loader, epoch, args) if valid_top1 > best_acc: best_acc = valid_top1 if save_parameters: fluid.save_dygraph(model.state_dict(), args.model_save_dir + "/best_model") logger.info("Epoch {}, valid_acc {:.6f}, best_valid_acc {:.6f}". format(epoch, valid_top1, best_acc))
def main(args): devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) is_shuffle = True startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() train_fetch_list, train_loader = build_program(main_prog=train_prog, startup_prog=startup_prog, is_train=True, args=args) valid_fetch_list, valid_loader = build_program(main_prog=test_prog, startup_prog=startup_prog, is_train=False, args=args) logger.info("param size = {:.6f}MB".format( utility.count_parameters_in_MB( train_prog.global_block().all_parameters(), 'model'))) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) train_reader = reader.train_valid(batch_size=args.batch_size, is_train=True, is_shuffle=is_shuffle, args=args) valid_reader = reader.train_valid(batch_size=args.batch_size, is_train=False, is_shuffle=False, args=args) places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() train_loader.set_batch_generator(train_reader, places=places) valid_loader.set_batch_generator(valid_reader, places=place) exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 4 * devices_num build_strategy = fluid.BuildStrategy() if args.with_mem_opt: for i in range(len(train_fetch_list)): train_fetch_list[i].persistable = True build_strategy.enable_inplace = True build_strategy.memory_optimize = True parallel_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=train_fetch_list[0].name, build_strategy=build_strategy, exec_strategy=exec_strategy) test_prog = fluid.CompiledProgram(test_prog) def save_model(postfix, program): model_path = os.path.join(args.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) logger.info('save models to %s' % (model_path)) fluid.io.save_persistables(exe, model_path, main_program=program) best_acc = 0 for epoch_id in range(args.epochs): train_top1 = train(parallel_train_prog, exe, epoch_id, train_loader, train_fetch_list, args) logger.info("Epoch {}, train_acc {:.6f}".format(epoch_id, train_top1)) valid_top1 = valid(test_prog, exe, epoch_id, valid_loader, valid_fetch_list, args) if valid_top1 > best_acc: best_acc = valid_top1 save_model('cifar10_model', train_prog) logger.info("Epoch {}, valid_acc {:.6f}, best_valid_acc {:.6f}".format( epoch_id, valid_top1, best_acc))