def extract_weights(args): # add ERNIE to environment print('extract weights start'.center(60, '=')) startup_prog = fluid.Program() test_prog = fluid.Program() place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) args.max_seq_len = 512 args.use_fp16 = False args.num_labels = 2 args.loss_scaling = 1.0 print('model config:') ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name='train', ernie_config=ernie_config) fluid.io.load_vars(exe, args.init_pretraining_params, main_program=test_prog, predicate=if_exist) state_dict = collections.OrderedDict() weight_map = build_weight_map() for ernie_name, gluon_name in weight_map.items(): fluid_tensor = fluid.global_scope().find_var(ernie_name).get_tensor() fluid_array = np.array(fluid_tensor, dtype=np.float32) if 'w_0' in ernie_name: fluid_array = fluid_array.transpose() state_dict[gluon_name] = fluid_array print('{} -> {} {}'.format(ernie_name, gluon_name, fluid_array.shape)) print('extract weights done!'.center(60, '=')) return state_dict
def main(args, init_checkpoint): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True, ernie_version=args.ernie_version) predict_prog = predict_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if init_checkpoint: init_pretraining_params(exe, init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") #保存模型 assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) print("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog)
raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() test_program = fluid.Program() with fluid.program_guard(test_program, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name='test_reader', ernie_config=ernie_config) exe.run(startup_prog) init_pretraining_params( exe, args.init_checkpoint, main_program=test_program, #main_program=startup_prog, use_fp16=args.use_fp16) name2params = {} prefix = args.init_checkpoint for var in startup_prog.list_vars(): path = os.path.join(prefix, var.name)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) print("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog) print("load inference model from %s" % model_path) infer_program, feed_target_names, probs = fluid.io.load_inference_model( model_path, exe) src_ids = feed_target_names[0] sent_ids = feed_target_names[1] pos_ids = feed_target_names[2] input_mask = feed_target_names[3] predict_data_generator = reader.data_generator(input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) print("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 for sample in predict_data_generator(): src_ids_data = sample[0] sent_ids_data = sample[1] pos_ids_data = sample[2] input_mask_data = sample[3] output = exe.run(infer_program, feed={ src_ids: src_ids_data, sent_ids: sent_ids_data, pos_ids: pos_ids_data, input_mask: input_mask_data }, fetch_list=probs) for single_result in output[0]: print("example_index:{}\t{}".format(index, single_result)) index += 1
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model( model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config #config = AnalysisConfig(args.model_dir) #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() config.switch_ir_optim(True) else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 for sample in predict_data_generator(): src_ids = sample[0] sent_ids = sample[1] pos_ids = sample[2] task_ids = sample[3] input_mask = sample[4] inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time # parse outputs output = outputs[0] batch_result = output.as_ndarray() for single_example_probs in batch_result: print('\t'.join(map(str, single_example_probs.tolist()))) index += 1 log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) fluid.memory_optimize( input_program=train_program, skip_opt_set=[ graph_vars["loss"].name, graph_vars["probs"].name, graph_vars["accuracy"].name, graph_vars["num_seqs"].name, ]) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr if args.save_log and args.log_path: if os.path.exists(args.log_path): raise FileExistsError("Logging file already exists!") with open(args.log_path, 'w') as logfile: logfile.write('%s\n' % time.asctime()) print('Writing logs into %s' % args.log_path) time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train") if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = reader.get_train_progress() time_end = time.time() used_time = time_end - time_begin print("epoch: %d, progress: %d/%d, step: %d, " "ave loss: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, " "speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"], args.skip_steps / used_time)) # Todo: complete logging function # Todo: print more useful metrics: f1/p/r instead of acc if args.save_log and args.log_path: with open(args.log_path, 'a') as logfile: logfile.write("epoch: %d, progress: %d/%d, step: %d, " "ave loss: %.4f, ave_acc: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, " "speed: %f steps/s\n" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["accuracy"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"], args.skip_steps / used_time)) time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, shuffle=False)) evaluate(exe, test_prog, test_pyreader, graph_vars, "dev") # evaluate test set if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, shuffle=False)) evaluate(exe, test_prog, test_pyreader, graph_vars, "test") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, shuffle=False)) print("Final validation result:") evaluate(exe, test_prog, test_pyreader, graph_vars, "dev") # final eval on test set if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, shuffle=False)) print("Final test result:") evaluate(exe, test_prog, test_pyreader, graph_vars, "test")
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, is_classify=args.is_classify, is_regression=args.is_regression, for_cn=args.for_cn, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") if args.do_test: assert args.test_save is not None startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=dev_count, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) log.info("Device count: %d" % dev_count) log.info("Num train examples: %d" % num_train_examples) log.info("Max train steps: %d" % max_train_steps) log.info("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() if args.random_seed is not None and args.enable_ce: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) log.info("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile( trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: log.warning( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe if args.do_val or args.do_test: if args.use_multi_gpu_test: test_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr ce_info = [] time_begin = time.time() last_epoch = 0 current_epoch = 0 while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate( train_exe, train_program, train_pyreader, graph_vars, "train", metric=args.metric, is_classify=args.is_classify, is_regression=args.is_regression) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) log.info(verbose) current_example, current_epoch = reader.get_train_progress() time_end = time.time() used_time = time_end - time_begin if args.is_classify: log.info( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["accuracy"], args.skip_steps / used_time)) ce_info.append( [outputs["loss"], outputs["accuracy"], used_time]) if args.is_regression: log.info( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " " speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], args.skip_steps / used_time)) time_begin = time.time() if nccl2_trainer_id == 0: if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0 or last_epoch != current_epoch: # evaluate dev set if args.do_val: evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) if args.do_test: predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) if last_epoch != current_epoch: last_epoch = current_epoch except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break if args.enable_ce: card_num = get_cards() ce_loss = 0 ce_acc = 0 ce_time = 0 try: ce_loss = ce_info[-2][0] ce_acc = ce_info[-2][1] ce_time = ce_info[-2][2] except: log.info("ce info error") log.info("kpis\ttrain_duration_card%s\t%s" % (card_num, ce_time)) log.info("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss)) log.info("kpis\ttrain_acc_card%s\t%f" % (card_num, ce_acc)) # final eval on dev set if args.do_val: evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # final eval on test set if args.do_test: predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # final eval on dianostic, hack for glue-ax if args.diagnostic: test_pyreader.decorate_tensor_provider( reader.data_generator( args.diagnostic, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) log.info("Final diagnostic") qids, preds, probs = predict( test_exe, test_prog, test_pyreader, graph_vars, is_classify=args.is_classify, is_regression=args.is_regression) assert len(qids) == len(preds), '{} v.s. {}'.format( len(qids), len(preds)) with open(args.diagnostic_save, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) log.info("Done final diagnostic, saving to {}".format( args.diagnostic_save))
def main(args): """main function""" ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, is_classify=args.is_classify, is_regression=args.is_regression, for_cn=args.for_cn, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") if args.do_test: assert args.test_save is not None startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.predict_batch_size is None: args.predict_batch_size = args.batch_size if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=dev_count, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() """ if args.random_seed is not None and args.enable_ce: train_program.random_seed = args.random_seed """ with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe if args.do_val or args.do_test: if args.use_multi_gpu_test: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) steps = 10000 current_epoch = 1 if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr ce_info = [] time_begin = time.time() last_epoch = 0 current_epoch = 0 previous_eval_acc = 0.80 previous_train_acc = 0.90 while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train", metric=args.metric, is_classify=args.is_classify, is_regression=args.is_regression) acc = outputs["accuracy"] if acc > previous_train_acc or acc > 0.95: print( "previous train accuracy is %f and current train accuracy is %f " % (previous_train_acc, acc)) previous_train_acc = acc eval_acc = evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) print( "previous evaluate accuracy is %f and current evaluate accuracy is %f " % (previous_eval_acc, eval_acc)) if eval_acc > previous_eval_acc: previous_eval_acc = eval_acc save_path = os.path.join( args.checkpoints, "evalacc_" + str(eval_acc).split('.')[1]) fluid.io.save_persistables(exe, save_path, train_program) predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps="evalacc_" + str(eval_acc).split('.')[1]) print( "predict and save model!!!!!!!!!!!!!!!!!!!!!!!!!! in %s" % (save_path)) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin print( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["accuracy"], args.skip_steps / used_time)) ce_info.append( [outputs["loss"], outputs["accuracy"], used_time]) time_begin = time.time() # if steps % args.save_steps == 0: # save_path = os.path.join(args.checkpoints, # "step_" + str(steps)) # fluid.io.save_persistables(exe, save_path, train_program) # if steps % args.validation_steps == 0 or last_epoch != current_epoch: # # evaluate dev set # if args.do_val: # ret=evaluate_wrapper(args, reader, exe, test_prog, # test_pyreader, graph_vars, # current_epoch, steps) # if args.do_test: # predict_wrapper(args, reader, exe, # test_prog, test_pyreader, graph_vars, # current_epoch, steps) if last_epoch != current_epoch: last_epoch = current_epoch except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set # if args.do_val: # evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, # graph_vars, current_epoch, steps) # final eval on test set steps = 0 # if args.do_test: # current_epoch = 0 # predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, # current_epoch, steps) # final eval on dianostic, hack for glue-ax if args.diagnostic: test_pyreader.decorate_tensor_provider( reader.data_generator(args.diagnostic, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final diagnostic") qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, is_classify=args.is_classify, is_regression=args.is_regression) assert len(qids) == len(preds), '{} v.s. {}'.format( len(qids), len(preds)) with open(args.diagnostic_save, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) print("Done final diagnostic, saving to {}".format( args.diagnostic_save))
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") predict_exe = fluid.Executor(place) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) predict_pyreader.decorate_tensor_provider(predict_data_generator) predict_pyreader.start() all_results = [] time_begin = time.time() while True: try: results = predict_exe.run(program=predict_prog, fetch_list=[probs.name]) all_results.extend(results[0]) except fluid.core.EOFException: predict_pyreader.reset() break time_end = time.time() np.set_printoptions(precision=4, suppress=True) print("-------------- prediction results --------------") for index, result in enumerate(all_results): print(str(index) + '\t{}'.format(result))
def main(args): """main""" model_config = UNIMOConfig(args.unimo_config_path) model_config.print_config() gpu_id = 0 gpus = fluid.core.get_cuda_device_count() if args.is_distributed and os.getenv("FLAGS_selected_gpus") is not None: gpu_list = os.getenv("FLAGS_selected_gpus").split(",") gpus = len(gpu_list) gpu_id = int(gpu_list[0]) if args.use_cuda: place = fluid.CUDAPlace(gpu_id) dev_count = gpus else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) tokenizer = GptBpeTokenizer(vocab_file=args.unimo_vocab_file, encoder_json_file=args.encoder_json_file, vocab_bpe_file=args.vocab_bpe_file, do_lower_case=args.do_lower_case) data_reader = ClassifyReader(tokenizer, args) if not (args.do_train or args.do_val or args.do_val_hard \ or args.do_test or args.do_test_hard or args.do_diagnostic): raise ValueError("For args `do_train`, `do_val`, `do_val_hard`, `do_test`," \ " `do_test_hard` and `do_diagnostic`, at least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: trainers_num = int(os.getenv("PADDLE_TRAINERS_NUM", "1")) train_data_generator = data_reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=trainers_num, shuffle=True, phase="train") num_train_examples = data_reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // trainers_num else: max_train_steps = args.epoch * num_train_examples // args.batch_size // trainers_num warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d, gpu_id: %d" % (dev_count, gpu_id)) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', config=model_config) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, beta1=args.beta1, beta2=args.beta2, epsilon=args.epsilon) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_val_hard or args.do_test or args.do_test_hard \ or args.do_pred or args.do_pred_hard or args.do_diagnostic: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', config=model_config) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 print("args.is_distributed:", args.is_distributed) if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" if args.nccl_comm_num > 1: config.nccl_comm_num = args.nccl_comm_num if args.use_hierarchical_allreduce and trainers_num > args.hierarchical_allreduce_inter_nranks: config.use_hierarchical_allreduce = args.use_hierarchical_allreduce config.hierarchical_allreduce_inter_nranks = args.hierarchical_allreduce_inter_nranks assert config.hierarchical_allreduce_inter_nranks > 1 assert trainers_num % config.hierarchical_allreduce_inter_nranks == 0 config.hierarchical_allreduce_exter_nranks = \ trainers_num / config.hierarchical_allreduce_inter_nranks t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=train_program) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=train_program) elif args.do_val or args.do_val_hard or args.do_test or args.do_test_hard \ or args.do_pred or args.do_pred_hard or args.do_diagnostic: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe if args.do_val or args.do_val_hard or args.do_test or args.do_test_hard \ or args.do_pred or args.do_pred_hard or args.do_diagnostic: if args.use_multi_gpu_test: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) dev_ret_history = [] # (steps, key_eval, eval) dev_hard_ret_history = [] # (steps, key_eval, eval) test_ret_history = [] # (steps, key_eval, eval) test_hard_ret_history = [] # (steps, key_eval, eval) if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() skip_steps = args.skip_steps while True: try: steps += 1 if steps % skip_steps == 0: train_fetch_list = [ graph_vars["loss"].name, graph_vars["accuracy"].name, graph_vars["num_seqs"].name ] if "learning_rate" in graph_vars: train_fetch_list.append( graph_vars["learning_rate"].name) res = train_exe.run(fetch_list=train_fetch_list) outputs = {"loss": np.mean(res[0])} if "learning_rate" in graph_vars: outputs["learning_rate"] = float(res[3][0]) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = data_reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin print("%s - epoch: %d, progress: %d/%d, step: %d, ave loss: %f, speed: %f steps/s" % \ (get_time(), current_epoch, current_example, num_train_examples, steps, \ outputs["loss"], args.skip_steps / used_time)) time_begin = time.time() else: train_exe.run(fetch_list=[]) if nccl2_trainer_id == 0: if steps % args.save_steps == 0 and args.save_checkpoints: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "dev") dev_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # evaluate dev_hard set if args.do_val_hard: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.dev_hard_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "dev_hard") dev_hard_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # evaluate test set if args.do_test: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "test") test_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # evaluate test_hard set if args.do_test_hard: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_hard_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "test_hard") test_hard_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # pred diagnostic set if args.do_diagnostic: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.diagnostic_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.diagnostic.' + str( steps) + '.txt' print("testing {}, save to {}".format( args.diagnostic_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) # pred test set if args.do_pred: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.test.' + str( steps) + '.txt' print("testing {}, save to {}".format( args.test_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) # pred test hard set if args.do_pred_hard: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_hard_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.test_hard.' + str( steps) + '.txt' print("testing {}, save to {}".format( args.test_hard_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) except fluid.core.EOFException: if args.save_checkpoints: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break if nccl2_trainer_id == 0: # final pred on diagnostic set if args.do_diagnostic: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.diagnostic_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.diagnostic.' + str(steps) + '.txt' print("testing {}, save to {}".format(args.diagnostic_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) # final pred on test set if args.do_pred: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.test.' + str(steps) + '.txt' print("testing {}, save to {}".format(args.test_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) # final pred on test_hard set if args.do_pred_hard: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.test_hard_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.test_hard.' + str(steps) + '.txt' print("testing {}, save to {}".format(args.test_hard_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) # final eval on test set if args.do_test: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final test result:") outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "test") test_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) test_ret_history = sorted(test_ret_history, key=lambda a: a[2], reverse=True) print("Best testing result: step %d %s %f" % (test_ret_history[0][0], test_ret_history[0][1], test_ret_history[0][2])) # final eval on test hard set if args.do_test_hard: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.test_hard_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final test_hard result:") outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "test_hard") test_hard_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) test_hard_ret_history = sorted(test_hard_ret_history, key=lambda a: a[2], reverse=True) print("Best testing hard result: step %d %s %f" % (test_hard_ret_history[0][0], test_hard_ret_history[0][1], test_hard_ret_history[0][2])) # final eval on dev set if args.do_val: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.dev_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final validation result:") outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "dev") dev_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) dev_ret_history = sorted(dev_ret_history, key=lambda a: a[2], reverse=True) print("Best validation result: step %d %s %f" % (dev_ret_history[0][0], dev_ret_history[0][1], dev_ret_history[0][2])) # final eval on dev hard set if args.do_val_hard: test_pyreader.decorate_tensor_provider( data_reader.data_generator(args.dev_hard_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final validation_hard result:") outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "dev_hard") dev_hard_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) dev_hard_ret_history = sorted(dev_hard_ret_history, key=lambda a: a[2], reverse=True) print("Best validation_hard result: step %d %s %f" % (dev_hard_ret_history[0][0], dev_hard_ret_history[0][1], dev_hard_ret_history[0][2]))
def gen_huggingface_bert_model(params_path): import paddle.fluid as fluid import sys sys.path.append("./LARK/ERNIE") from model.ernie import ErnieConfig from finetune.classifier import create_model from utils.init import init_pretraining_params ernie_config = ErnieConfig("./LARK/ERNIE/config/ernie_config.json") startup_prog = fluid.default_startup_program() test_prog = fluid.Program() args.max_seq_len = 512 args.use_fp16 = False args.num_labels = 2 args.loss_scaling = 1.0 with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name="test", ernie_config=ernie_config) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) init_pretraining_params(exe, params_path, main_program=startup_prog) sc = fluid.global_scope() new_model = collections.OrderedDict() for each in startup_prog.list_vars(): name = each.name if name == "test_reader": continue fc_w = sc.find_var(name).get_tensor() fc_w = np.array(fc_w, dtype=np.float32) if name == "word_embedding": new_model["embeddings.word_embeddings.weight"] = fc_w if name == "pos_embedding": new_model["embeddings.position_embeddings.weight"] = fc_w if name == "sent_embedding": new_model["embeddings.token_type_embeddings.weight"] = fc_w if name == "pre_encoder_layer_norm_scale": new_model["embeddings.LayerNorm.gamma"] = fc_w if name == "pre_encoder_layer_norm_bias": new_model["embeddings.LayerNorm.beta"] = fc_w if name.startswith("encoder_layer_"): splits = name.split(".") if len(splits) == 2: prefix, suffix = splits else: prefix = splits[0] prefixs = prefix.split("_") if prefixs[3] == "multi": new_suffix = ".weight" if suffix == "w_0" else ".bias" if new_suffix == ".weight": fc_w = fc_w.transpose() if prefixs[6] == "output": all_name = "encoder.layer." + prefixs[2] + \ ".attention.output.dense" + new_suffix else: all_name = "encoder.layer." + prefixs[2] + \ ".attention.self." + prefixs[6] + new_suffix elif prefixs[3] == "post": new_suffix = ".gamma" if name.endswith("scale") else ".beta" if prefixs[4] == "att": all_name = "encoder.layer." + prefixs[2] + \ ".attention.output.LayerNorm" + new_suffix elif prefixs[4] == "ffn": all_name = "encoder.layer." + prefixs[2] + \ ".output.LayerNorm" + new_suffix elif prefixs[3] == "ffn": new_suffix = ".weight" if suffix == "w_0" else ".bias" if new_suffix == ".weight": fc_w = fc_w.transpose() if prefixs[5] == "0": all_name = "encoder.layer." + prefixs[2] + \ ".intermediate.dense" + new_suffix elif prefixs[5] == "1": all_name = "encoder.layer." + prefixs[2] + \ ".output.dense" + new_suffix new_model[all_name] = fc_w if name == "pooled_fc.w_0": fc_w = fc_w.transpose() new_model["pooler.dense.weight"] = fc_w if name == "pooled_fc.b_0": new_model["pooler.dense.bias"] = fc_w return new_model
raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() test_program = fluid.Program() with fluid.program_guard(test_program, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name='test_reader', ernie_config=ernie_config, is_classify=True) exe.run(startup_prog) init_pretraining_params( exe, args.init_checkpoint, main_program=test_program, #main_program=startup_prog, use_fp16=args.use_fp16) name2params = {} prefix = args.init_checkpoint for var in startup_prog.list_vars(): path = os.path.join(prefix, var.name)