def do_eval(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() test_program = fluid.Program() with fluid.program_guard(test_program, fluid.default_startup_program()): with fluid.unique_name.guard(): test_ret = creator.create_ernie_model(args, ernie_config) test_program = test_program.clone(for_test=True) pyreader = creator.create_pyreader(args, file_name=args.test_data, feed_list=test_ret['feed_list'], model="ernie", place=place, mode='test',) print('program startup') exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) print('program loading') # load model if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if only doing test or infer!") utils.init_checkpoint(exe, args.init_checkpoint, test_program) evaluate(exe, test_program, pyreader, test_ret)
def do_infer(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() # define network and reader ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_ernie_model(args, ernie_config, is_prediction=False) infer_program = infer_program.clone(for_test=True) print(args.test_data) pyreader, reader = creator.create_pyreader( args, file_name=args.test_data, feed_list=infer_ret['feed_list'], mode="ernie", place=place, iterable=True, return_reader=True, for_test=True) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if only doing test or infer!" ) utils.init_checkpoint(exe, args.init_checkpoint, infer_program) # create dict id2word_dict = dict([(str(word_id), word) for word, word_id in reader.vocab.items()]) id2label_dict = dict([(str(label_id), label) for label, label_id in reader.label_map.items()]) Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"]) dataset = Dataset(id2word_dict, id2label_dict) # make prediction for data in pyreader(): (words, crf_decode) = exe.run( infer_program, fetch_list=[infer_ret["words"], infer_ret["crf_decode"]], feed=data[0], return_numpy=False) # User should notice that words had been clipped if long than args.max_seq_len results = utils.parse_result(words, crf_decode, dataset) for sent, tags in results: result_list = [ '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags) ] print(''.join(result_list))
def do_eval(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() print('ernie config') ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() test_program = fluid.Program() print('test program') with fluid.program_guard(test_program, fluid.default_startup_program()): with fluid.unique_name.guard(): test_ret = creator.create_ernie_model(args, ernie_config) test_program = test_program.clone(for_test=True) #print('create pyreader') pyreader = creator.create_pyreader( args, file_name=args.test_data, feed_list=[ret.name for ret in test_ret['feed_list']], model="ernie", place=place, return_reader=True, mode='test') #data_inter = reader.data_generator(args.test_data, args.batch_size, 1, shuffle=False, phase="train") exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if only doing test or infer!") model_utils.init_checkpoint(exe, args.init_checkpoint, test_program) teacher = Teacher(out_path=None, out_port=int(args.out_port)) teacher.start() print('run teacher......') test_ret["chunk_evaluator"].reset() reader_config = {"batch_generator": pyreader} teacher.start_knowledge_service( feed_list=[test_ret["words"].name, test_ret["sent_ids"].name, test_ret["pos_ids"].name, test_ret["input_mask"].name, test_ret["labels"].name, test_ret["seq_lens"].name], schema={"crf_decode":test_ret["crf_decode"],"seq_lens":test_ret["seq_lens"]}, program=test_program, reader_config=reader_config, exe=exe, times=10)
def do_train(args): """ Main Function """ ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = 1 else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): # user defined model based on ernie embeddings train_ret = creator.create_ernie_model(args, ernie_config) # ernie pyreader train_pyreader = creator.create_pyreader( args, file_name=args.train_data, feed_list=train_ret['feed_list'], model="ernie", place=place) test_program = train_program.clone(for_test=True) test_pyreader = creator.create_pyreader( args, file_name=args.test_data, feed_list=train_ret['feed_list'], model="ernie", place=place) clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate, grad_clip=clip) optimizer.minimize(train_ret["avg_cost"]) lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) print("Device count: %d" % dev_count) exe.run(startup_prog) # load checkpoints if args.init_checkpoint and args.init_pretraining_params: print("WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, startup_prog) elif args.init_pretraining_params: utils.init_pretraining_params(exe, args.init_pretraining_params, startup_prog) if dev_count > 1 and not args.use_cuda: device = "GPU" if args.use_cuda else "CPU" print("%d %s are used to train model" % (dev_count, device)) # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() build_strategy = fluid.BuildStrategy() compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) # start training steps = 0 for epoch_id in range(args.epoch): for data in train_pyreader(): steps += 1 if steps % args.print_steps == 0: fetch_list = [ train_ret["avg_cost"], train_ret["precision"], train_ret["recall"], train_ret["f1_score"], ] else: fetch_list = [] start_time = time.time() outputs = exe.run(program=compiled_prog, feed=data[0], fetch_list=fetch_list) end_time = time.time() if steps % args.print_steps == 0: loss, precision, recall, f1_score = [ np.mean(x) for x in outputs ] print( "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, " "pyreader queue_size: %d " % (steps, loss, precision, recall, f1_score, end_time - start_time, train_pyreader.queue.size())) if steps % args.save_steps == 0: save_path = os.path.join(args.model_save_dir, "step_" + str(steps), "checkpoint") print("\tsaving model as %s" % (save_path)) fluid.save(train_program, save_path) if steps % args.validation_steps == 0: evaluate(exe, test_program, test_pyreader, train_ret) save_path = os.path.join(args.model_save_dir, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path)