def main(args): """main""" paddle.enable_static() model_config = json.load(open(args.model_config, 'r')) exe_params = default_exe_params(False, args.use_cuda, args.thread_num) exe = exe_params['exe'] trainer_num = exe_params['trainer_num'] trainer_id = exe_params['trainer_id'] places = exe_params['places'] task = model_config['task'] model = TAPEModel(model_config=model_config, name=task) test_program = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): model.forward(True) model.cal_loss() test_data_loader = setup_data_loader(model.input_list, model_config, args.test_data, trainer_id, trainer_num, places, args.batch_size) exe.run(test_startup) test_metric = get_metric(task) if not args.init_model is None and args.init_model != "": load_partial_params(exe, args.init_model, test_program) else: raise RuntimeError('Please set init_model.') test_fetch_list = model.get_fetch_list(), for data in test_data_loader(): results = exe.run(program=test_program, feed=data, fetch_list=test_fetch_list, return_numpy=False) update_metric(task, test_metric, results) test_metric.show()
def main(args): paddle.enable_static() model_config = json.load(open(args.model_config, 'r')) exe_params = default_exe_params(args.is_distributed, args.use_cuda, args.thread_num) exe = exe_params['exe'] trainer_num = exe_params['trainer_num'] trainer_id = exe_params['trainer_id'] dist_strategy = exe_params['dist_strategy'] places = exe_params['places'] task = model_config['task'] model = TAPEModel(model_config=model_config, name=task) train_program = fluid.Program() train_startup = fluid.Program() with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): model.forward(False) model.cal_loss() optimizer = default_optimizer(args.lr, args.warmup_steps, args.max_grad_norm) setup_optimizer(optimizer, model, args.use_cuda, args.is_distributed, dist_strategy) optimizer.minimize(model.loss) train_data_loader = setup_data_loader(model.input_list, model_config, args.train_data, trainer_id, trainer_num, places, args.batch_size) exe.run(train_startup) train_metric = get_metric(task) train_fetch_list = model.get_fetch_list() if args.test_data is not None: test_program = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): model.forward(True) model.cal_loss() test_data_loader = setup_data_loader(model, model_config, args.test_data, trainer_id, trainer_num, places, args.batch_size) exe.run(test_startup) test_metric = get_metric(task) test_fetch_list = model.get_fetch_list() if not args.is_distributed: train_program = fluid.compiler.CompiledProgram( train_program).with_data_parallel(loss_name=model.loss.name) if args.test_data is not None and args.test_data != "": test_program = fluid.compiler.CompiledProgram( test_program).with_data_parallel(loss_name=model.loss.name) if args.init_model is not None and args.init_model != "": load_partial_params(exe, args.init_model, test_program) for epoch_id in range(args.max_epoch): print(time.time(), 'Start epoch %d' % epoch_id) print('Train:') train_metric.clear() for data in train_data_loader(): results = exe.run(program=train_program, feed=data, fetch_list=train_fetch_list, return_numpy=False) update_metric(task, train_metric, results) train_metric.show() print() if args.test_data is not None and args.test_data != "": print('Test:') test_metric.clear() for data in test_data_loader(): results = exe.run(program=test_program, feed=data, fetch_list=test_fetch_list, return_numpy=False) update_metric(task, test_metric, results) test_metric.show() print() if trainer_id == 0: print(time.time(), "Save model epoch%d." % epoch_id) is_exist = os.path.exists(args.model_dir) if not is_exist: os.makedirs(args.model_dir) fluid.io.save_params(exe, '%s/epoch%d' % (args.model_dir, epoch_id), train_program)