def extract_weights(args): # add ERNIE to environment print('extract weights start'.center(60, '=')) startup_prog = fluid.Program() test_prog = fluid.Program() place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) args.max_seq_len = 512 args.use_fp16 = False args.num_labels = 2 args.loss_scaling = 1.0 print('model config:') ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name='train', ernie_config=ernie_config) fluid.io.load_vars(exe, args.init_pretraining_params, main_program=test_prog, predicate=if_exist) state_dict = collections.OrderedDict() weight_map = build_weight_map() for ernie_name, gluon_name in weight_map.items(): fluid_tensor = fluid.global_scope().find_var(ernie_name).get_tensor() fluid_array = np.array(fluid_tensor, dtype=np.float32) if 'w_0' in ernie_name: fluid_array = fluid_array.transpose() state_dict[gluon_name] = fluid_array print('{} -> {} {}'.format(ernie_name, gluon_name, fluid_array.shape)) print('extract weights done!'.center(60, '=')) return state_dict
def test(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() test_prog = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_prog, test_startup): with fluid.unique_name.guard(): test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(test_startup) predict = predict_wrapper( args, exe, ernie_config, test_prog=test_prog, pyreader=test_pyreader, fetch_list=[next_sent_acc.name, mask_lm_loss.name, total_loss.name]) print("test begin") loss, lm_loss, acc, steps, speed = predict() print( "[test_set] loss: %f, global ppl: %f, next_sent_acc: %f, speed: %f steps/s" % (np.mean(np.array(loss) / steps), np.exp(np.mean(np.array(lm_loss) / steps)), np.mean(np.array(acc) / steps), speed))
def convert(args): ernie_export_path = f'{args.ernie_path}/ernie_persistables.pkl' pretraining_params_path = f'{args.ernie_path}/paddle/params' ernie_config_path = f'{args.ernie_path}/paddle/ernie_config.json' ernie_vocab_path = f'{args.ernie_path}/paddle/vocab.txt' unzip_message = f"Please unzip ERNIE paddle param archive into {args.ernie_path}/paddle" if not os.path.exists(pretraining_params_path): print(f"{pretraining_params_path} does not exist.", file=sys.stderr) print(unzip_message, file=sys.stderr) sys.exit(1) if not os.path.exists(ernie_config_path): print(f"{ernie_config_path} does not exist.", file=sys.stderr) print(unzip_message, file=sys.stderr) sys.exit(1) if not os.path.exists(ernie_vocab_path): print(f"{ernie_vocab_path} does not exist.", file=sys.stderr) print(unzip_message, file=sys.stderr) sys.exit(1) ernie_config = ErnieConfig(ernie_config_path) # Fix missing use_task_id ernie_config._config_dict['use_task_id'] = True ernie_config.print_config() place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() train_program = fluid.Program() inference_scope = fluid.core.Scope() with fluid.scope_guard(inference_scope): with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): _ = create_model(args, ernie_config=ernie_config) init_pretraining_params( exe, pretraining_params_path, main_program=startup_prog, use_fp16=args.use_fp16) persistables = dict() for var in filter(fluid.io.is_persistable, train_program.list_vars()): numpy_value = fetch_var(var.name, inference_scope) persistables[var.name] = numpy_value if args.verbose: print(var.name) print("totally", len(persistables), "persistables") with open(ernie_export_path, 'wb') as f: pickle.dump(persistables, f) return train_program
def save_model(state_dict, dump_path): print('save model start'.center(60, '=')) if not os.path.exists(dump_path): os.makedirs(dump_path) # save model for key in state_dict: state_dict[key] = torch.FloatTensor(state_dict[key]) torch.save(state_dict, os.path.join(dump_path, "pytorch_model.bin")) print('finish save model') # save config ernie_config = ErnieConfig(args.ernie_config_path)._config_dict # set layer_norm_eps, more detail see: https://github.com/PaddlePaddle/LARK/issues/75 ernie_config['layer_norm_eps'] = 1e-5 with open(os.path.join(dump_path, "bert_config.json"), 'wt', encoding='utf-8') as f: json.dump(ernie_config, f, indent=4) print('finish save config') # save vocab.txt vocab_f = open(os.path.join(dump_path, "vocab.txt"), "wt", encoding='utf-8') with open("./LARK/ERNIE/config/vocab.txt", "rt", encoding='utf-8') as f: for line in f: data = line.strip().split("\t") vocab_f.writelines(data[0] + "\n") vocab_f.close() print('finish save vocab') print('save model done!'.center(60, '='))
def __init__(self, args, task): candi_tasks = [ "predict_query", "predict_poi", "pointwise", "pairwise", "listwise", "listwise_hinge"] if task not in candi_tasks: raise ValueError("task %s not in %s" % (task, candi_tasks)) self.norm_score = args.norm_score self.ernie_config = ErnieConfig(args.ernie_config_path) self.ernie_config.print_config() self.city_size = 20000 self.hidden_size = 64 self._holder_list = [] node_feature = [ ('src_ids', [None, args.max_seq_len], "int64"), ('pos_ids', [None, args.max_seq_len], "int64"), ('sent_ids', [None, args.max_seq_len], "int64"), ('input_mask', [None, args.max_seq_len], "float32"), ('node_types', [None], "int32"), ] if task != 'predict_query': self.graph_wrapper = GraphWrapper( name="graph", place=F.CPUPlace(), node_feat=node_feature) self._holder_list.extend(self.graph_wrapper.holder_list) elif task == "predict_query": # This is for save_inference_mode for query self.graph_wrapper = FakeGraphWrapper( node_feat=node_feature) self._holder_list.extend(self.graph_wrapper.holder_list) self.build_model(args, task)
def main(args, init_checkpoint): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True, ernie_version=args.ernie_version) predict_prog = predict_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if init_checkpoint: init_pretraining_params(exe, init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") #保存模型 assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) print("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog)
def main(args): global test_pyreader, reader, exe, test_prog, test_graph_vars,ernie_config,startup_prog ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.predict_batch_size == None: args.predict_batch_size = args.batch_size if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, test_graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_training=False) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 exe.run(startup_prog)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") predict_exe = fluid.Executor(place) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) predict_pyreader.decorate_tensor_provider(predict_data_generator) predict_pyreader.start() all_results = [] time_begin = time.time() while True: try: results = predict_exe.run(program=predict_prog, fetch_list=[probs.name]) all_results.extend(results[0]) except fluid.core.EOFException: predict_pyreader.reset() break time_end = time.time() np.set_printoptions(precision=4, suppress=True) print("-------------- prediction results --------------") for index, result in enumerate(all_results): print(str(index) + '\t{}'.format(result))
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = reader_de_infer.ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, q_max_seq_len=args.q_max_seq_len, p_max_seq_len=args.p_max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, for_cn=args.for_cn, task_id=args.task_id) assert args.test_save is not None startup_prog = fluid.Program() test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, batch_size=args.batch_size, is_prediction=True) test_prog = test_prog.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_prog) if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) test_sets = args.test_set.split(',') save_dirs = args.test_save.split(',') assert len(test_sets) == len(save_dirs) batch_size = args.batch_size if args.predict_batch_size is None else args.predict_batch_size for test_f, save_f in zip(test_sets, save_dirs): test_pyreader.decorate_tensor_provider( reader.data_generator(test_f, batch_size=batch_size, epoch=1, dev_count=1, shuffle=False)) save_path = save_f log.info("testing {}, save to {}".format(test_f, save_path)) predict(args, exe, test_prog, test_pyreader, graph_vars, output_item=args.output_item, output_file_name=args.output_file_name, hidden_size=ernie_config['hidden_size'])
class BaseGraphErnie(object): """Base Graph Model""" def __init__(self, args, task): candi_tasks = [ "predict_query", "predict_poi", "pointwise", "pairwise", "listwise", "listwise_hinge"] if task not in candi_tasks: raise ValueError("task %s not in %s" % (task, candi_tasks)) self.norm_score = args.norm_score self.ernie_config = ErnieConfig(args.ernie_config_path) self.ernie_config.print_config() self.city_size = 20000 self.hidden_size = 64 self._holder_list = [] node_feature = [ ('src_ids', [None, args.max_seq_len], "int64"), ('pos_ids', [None, args.max_seq_len], "int64"), ('sent_ids', [None, args.max_seq_len], "int64"), ('input_mask', [None, args.max_seq_len], "float32"), ('node_types', [None], "int32"), ] if task != 'predict_query': self.graph_wrapper = GraphWrapper( name="graph", place=F.CPUPlace(), node_feat=node_feature) self._holder_list.extend(self.graph_wrapper.holder_list) elif task == "predict_query": # This is for save_inference_mode for query self.graph_wrapper = FakeGraphWrapper( node_feat=node_feature) self._holder_list.extend(self.graph_wrapper.holder_list) self.build_model(args, task) @property def holder_list(self): """ holder list """ return self._holder_list def city_embedding(self, input): """ add city_embeddding """ input = L.unsqueeze(input, axes=-1) return L.embedding( input, #size=(self.city_size, 20), size=(self.city_size, self.hidden_size), param_attr=F.ParamAttr(name="city_embed")) def build_model(self, args, task): """ build graph model""" self.query_geo = L.data(name="query_geo", shape=[-1, 80], dtype="float32") self.holder_list.append(self.query_geo) self.poi_geo = L.data(name="poi_geo", shape=[-1, 40], dtype="float32") self.holder_list.append(self.poi_geo) if task != "predict_query": self.city_id = L.data(name="city_id", shape=[-1], dtype="int64") self.holder_list.append(self.city_id) poi_city_embed = self.city_embedding(self.city_id) self.poi_index = L.data(name="poi_index", shape=[-1], dtype="int64") self.holder_list.append(self.poi_index) if task != "predict_poi": self.query_city = L.data(name="query_city", shape=[-1], dtype="int64") self.holder_list.append(self.query_city) query_city_embed = self.city_embedding(self.query_city) self.query_index = L.data( name="query_index", shape=[-1], dtype="int64") self.holder_list.append(self.query_index) if task == 'pointwise': self.labels = L.data(name="labels", shape=[-1], dtype="float32") self.holder_list.append(self.labels) elif task == "pairwise": self.labels = L.data(name="labels", shape=[-1], dtype="float32") self.holder_list.append(self.labels) self.labels = L.reshape(self.labels, [-1, 1]) self.labels.stop_gradients = True elif task == "listwise" or task == "listwise_hinge": self.labels = L.data(name="labels", shape=[-1], dtype="int64") self.holder_list.append(self.labels) self.labels = L.reshape(self.labels, [-1, 1]) self.labels.stop_gradients = True elif task == "predict_query": pass elif task == "predict_poi": pass src_ids = self.graph_wrapper.node_feat["src_ids"] pos_ids = self.graph_wrapper.node_feat["pos_ids"] sent_ids = self.graph_wrapper.node_feat["sent_ids"] input_mask = self.graph_wrapper.node_feat["input_mask"] src_ids = ernie_unsqueeze(src_ids) pos_ids = ernie_unsqueeze(pos_ids) sent_ids = ernie_unsqueeze(sent_ids) input_mask = ernie_unsqueeze(input_mask) task_ids = L.zeros_like(sent_ids) task_ids = L.cast(task_ids, dtype="int64") if args.model_type == "cnn": encoder_model = CnnModel elif args.model_type == "ernie": encoder_model = ErnieModel else: raise ValueError("model type %s not exists." % args.model_type) ernie = encoder_model( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=self.ernie_config, task_ids=task_ids, ) if task != "predict_query": args.max_addr_len = args.max_seq_len addr_src_ids = L.data( name='addr_src_ids', shape=[None, args.max_addr_len], dtype="int64") self.holder_list.append(addr_src_ids) addr_pos_ids = L.data( name='addr_pos_ids', shape=[None, args.max_addr_len], dtype="int64") self.holder_list.append(addr_pos_ids) addr_sent_ids = L.data( name='addr_sent_ids', shape=[None, args.max_addr_len], dtype="int64") self.holder_list.append(addr_sent_ids) addr_input_mask = L.data( name='addr_input_mask', shape=[None, args.max_addr_len], dtype="float32") self.holder_list.append(addr_input_mask) addr_src_ids = ernie_unsqueeze(addr_src_ids) addr_pos_ids = ernie_unsqueeze(addr_pos_ids) addr_sent_ids = ernie_unsqueeze(addr_sent_ids) addr_input_mask = ernie_unsqueeze(addr_input_mask) addr_task_ids = L.zeros_like(addr_sent_ids) addr_task_ids = L.cast(addr_task_ids, dtype="int64") addr_ernie = encoder_model( src_ids=addr_src_ids, position_ids=addr_pos_ids, sentence_ids=addr_sent_ids, input_mask=addr_input_mask, config=self.ernie_config, task_ids=addr_task_ids, ) addr_repr = addr_ernie.get_pooled_output() # get first token as sentence repr sent_repr = ernie.get_pooled_output() if task != "predict_poi": self.query_repr = L.gather( sent_repr, self.query_index, overwrite=False) self.query_city_embed = query_city_embed for_concat = [] if args.with_city: for_concat.append(query_city_embed) if args.with_geo_id: for_concat.append(self.query_geo) if len(for_concat) > 0: self.query_repr = L.concat( [self.query_repr ] + for_concat, axis=-1) self.query_repr = L.fc(self.query_repr, self.hidden_size, act="tanh", name="query_fc") self.query_city_score = L.reduce_sum(L.l2_normalize(self.query_city_embed, -1) * L.l2_normalize(self.query_repr, -1), -1) if task != "predict_query": neigh_repr = self.neighbor_aggregator(sent_repr) self.poi_repr = L.gather(sent_repr, self.poi_index, overwrite=False) for_concat = [self.poi_repr, addr_repr, ] if args.with_city: for_concat.append(poi_city_embed) if args.with_geo_id: for_concat.append(self.poi_geo) if neigh_repr is not None: poi_neigh_repr = L.gather( neigh_repr, self.poi_index, overwrite=False) for_concat.append(poi_neigh_repr) self.poi_repr = L.concat(for_concat, axis=-1) self.poi_repr = L.fc(self.poi_repr, self.hidden_size, act="tanh", name="pos_fc") if task == "pointwise": self.pointwise_loss() elif task == "pairwise": self.pairwise_loss() elif task == "listwise": self.listwise_loss(args) elif task == "listwise_hinge": self.listwise_hinge_loss() def pointwise_loss(self): """point wise model""" self.logits = L.reduce_sum(self.query_repr * self.poi_repr, -1) self.score = L.sigmoid(self.logits) self.loss = L.sigmoid_cross_entropy_with_logits( L.reshape(self.logits, [-1, 1]), L.reshape(self.labels, [-1, 1])) auc_label = L.cast(self.labels, dtype="int64") auc_label.stop_gradients = True _, self.batch_auc, _ = L.auc( L.reshape(self.score, [-1, 1]), L.reshape(auc_label, [-1, 1])) self.metrics = [L.reduce_mean(self.loss), self.batch_auc] self.loss = L.reduce_mean(self.loss) def pairwise_loss(self): """pairwise model""" poi_repr = L.split(self.poi_repr, 2, dim=0) pos_repr, neg_repr = poi_repr pos_pred = L.cos_sim(self.query_repr, pos_repr) neg_pred = L.cos_sim(self.query_repr, neg_repr) mode = 'hinge_loss' # log(1 + e-z), max(0, 1 - z) if 'hinge_loss' == mode: theta_z = L.relu(1 + neg_pred - pos_pred) elif 'logistic_loss' == mode: theta_z = L.log(1 + L.exp(neg_pred - pos_pred)) self.loss = L.reduce_mean(theta_z) pos_cnt = L.reduce_sum(L.cast(L.greater_than(pos_pred, neg_pred), dtype="float32")) neg_cnt = L.reduce_sum(L.cast(L.less_than(pos_pred, neg_pred), dtype="float32")) self.order = pos_cnt / (1e-5 + neg_cnt) self.metrics = [self.loss, self.order] def listwise_loss(self, args): """listwise model""" self.logits = L.matmul( self.query_repr, self.poi_repr, transpose_y=True) if self.norm_score: self.logits = L.softsign(self.logits) if args.scale_softmax: scale = L.create_parameter(shape=[1], dtype="float32", name="final_scale", default_initializer=F.initializer.ConstantInitializer(value=1.0)) bias = L.create_parameter(shape=[1], dtype="float32", name="final_bias", default_initializer=F.initializer.ConstantInitializer(value=0.0)) self.logits = self.logits * scale * scale + bias self.score = L.softmax(self.logits) self.loss = L.softmax_with_cross_entropy(self.logits, self.labels) self.loss = L.reduce_mean(self.loss) self.acc = L.accuracy(L.softmax(self.logits), self.labels) self.metrics = [self.loss, self.acc] def listwise_hinge_loss(self): """listwise hinge loss model""" self.poi_repr = L.l2_normalize(self.poi_repr, -1) self.query_repr = L.l2_normalize(self.query_repr, -1) pos_logits = L.reduce_sum(self.query_repr * self.poi_repr, -1, keep_dim=True) neg_logits = L.matmul(self.query_repr, self.poi_repr, transpose_y = True) self.loss = L.reduce_mean(L.relu(neg_logits - pos_logits + 0.3)) self.acc = L.accuracy(L.softmax(neg_logits), self.labels) self.metrics = [self.loss, self.acc] def neighbor_aggregator(self, sent_repr): """neighbor aggregation""" return None
def main(args): """main""" ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.RankReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, is_classify=args.is_classify, is_regression=args.is_regression, for_cn=args.for_cn, task_id=args.task_id, ) if not (args.do_train or args.do_val or args.do_test): raise ValueError( "For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.", ) if args.do_test: assert args.test_save is not None startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=dev_count, shuffle=True, phase="train", ) num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: if args.batch_size < args.max_seq_len: raise ValueError( 'if in_tokens=True, batch_size should greater than max_sqelen, \ got batch_size:%d seqlen:%d' % (args.batch_size, args.max_seq_len)) max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) log.info("Device count: %d" % dev_count) log.info("Num train examples: %d" % num_train_examples) log.info("Max train steps: %d" % max_train_steps) log.info("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() if args.random_seed is not None and args.enable_ce: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression, ) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio, ) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len, ) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size, ) log.info("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression, ) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile( trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog, ) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: log.warning( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.", ) if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16, ) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16, ) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if" "only doing validation or testing!", ) init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16, ) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id, ) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe if args.do_val or args.do_test: if args.use_multi_gpu_test: test_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe, ) if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr ce_info = [] time_begin = time.time() last_epoch = 0 current_epoch = 0 while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate( train_exe, train_program, train_pyreader, graph_vars, "train", metric=args.metric, is_classify=args.is_classify, is_regression=args.is_regression, ) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) log.info(verbose) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin if args.is_classify: log.info( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % ( current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs['acc'], args.skip_steps / used_time, ), ) ce_info.append([outputs["loss"], used_time], ) if args.is_regression: log.info( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " " speed: %f steps/s" % ( current_epoch, current_example, num_train_examples, steps, outputs["loss"], args.skip_steps / used_time, ), ) time_begin = time.time() if nccl2_trainer_id == 0: if steps % args.save_steps == 0: save_path = os.path.join( args.checkpoints, "step_" + str(steps), ) fluid.io.save_persistables( exe, save_path, train_program, ) if steps % args.validation_steps == 0 or last_epoch != current_epoch: # evaluate dev set if args.do_val: evaluate_wrapper( args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps, ) if args.do_test: predict_wrapper( args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps, ) if last_epoch != current_epoch: last_epoch = current_epoch except fluid.core.EOFException: save_path = os.path.join( args.checkpoints, "step_" + str(steps), ) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break if args.enable_ce: card_num = get_cards() ce_loss = 0 ce_acc = 0 ce_time = 0 try: ce_loss = ce_info[-2][0] ce_acc = ce_info[-2][1] ce_time = ce_info[-2][2] except: log.info("ce info error") log.info("kpis\ttrain_duration_card%s\t%s" % (card_num, ce_time)) log.info("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss)) log.info("kpis\ttrain_acc_card%s\t%f" % (card_num, ce_acc)) # final eval on dev set if args.do_val: evaluate_wrapper( args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps, ) # final eval on test set if args.do_test: predict_wrapper( args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps, ) # final eval on dianostic, hack for glue-ax if args.diagnostic: test_pyreader.decorate_tensor_provider( reader.data_generator( args.diagnostic, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False, ), ) log.info("Final diagnostic") qids, preds, probs = predict( test_exe, test_prog, test_pyreader, graph_vars, is_classify=args.is_classify, is_regression=args.is_regression, ) assert len(qids) == len(preds), '{} v.s. {}'.format( len(qids), len(preds), ) with open(args.diagnostic_save, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) log.info("Done final diagnostic, saving to {}".format( args.diagnostic_save, ))
def get_role_init_dict(args, suf): """main""" # log = logging.getLogger() # prepare_logger(log) log = logging.getLogger(__name__) check_cuda(args.use_cuda) labels_map = {} # label for line in utils.read_by_lines(args.label_map_config): arr = line.split("\t") labels_map[arr[0]] = int(arr[1]) args.num_labels = len(labels_map) print("=========ERNIE CONFIG============") ernie_config = ErnieConfig(args.ernie_config_path) # ernie_config.print_config() print("=========ERNIE CONFIG============") if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] print("==============place==================", place) # place = dev_list[1] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("==============place, dev_count==================", place, dev_count) reader = task_reader.RoleSequenceLabelReader( vocab_path=args.vocab_path, labels_map=labels_map, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): # TODO pyreader_name 再次调整为不同 test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader_role' + suf, ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 exe = fluid.Executor(place) exe.run(startup_prog) if args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) trigger_dict = dict() trigger_dict['log'] = log trigger_dict['args'] = args trigger_dict['labels_map'] = labels_map trigger_dict['ernie_config'] = ernie_config trigger_dict['place'] = place trigger_dict['dev_count'] = dev_count trigger_dict['reader'] = reader trigger_dict['startup_prog'] = startup_prog trigger_dict['test_prog'] = test_prog trigger_dict['test_pyreader'] = test_pyreader trigger_dict['graph_vars'] = graph_vars trigger_dict['nccl2_num_trainers'] = nccl2_num_trainers trigger_dict['nccl2_trainer_id'] = nccl2_trainer_id trigger_dict['exe'] = exe return trigger_dict
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model( model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config #config = AnalysisConfig(args.model_dir) #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() config.switch_ir_optim(True) else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 for sample in predict_data_generator(): src_ids = sample[0] sent_ids = sample[1] pos_ids = sample[2] task_ids = sample[3] input_mask = sample[4] inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time # parse outputs output = outputs[0] batch_result = output.as_ndarray() for single_example_probs in batch_result: print('\t'.join(map(str, single_example_probs.tolist()))) index += 1 log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) reader = task_reader.MisspellingReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, tokenizer=args.tokenizer, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: if args.batch_size < args.max_seq_len: raise ValueError( 'if in_tokens=True, batch_size should greater than max_sqelen, got batch_size:%d seqlen:%d' % (args.batch_size, args.max_seq_len)) max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) log.info("Device count: %d" % dev_count) log.info("Num train examples: %d" % num_train_examples) log.info("Max train steps: %d" % max_train_steps) log.info("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) log.info("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: log.info( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.set_batch_generator(train_data_generator) else: train_exe = None if args.do_val or args.do_test: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) if args.do_train: train_pyreader.start() steps = 0 graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: fetch_list = [ graph_vars["num_infer"].name, graph_vars["num_label"].name, graph_vars["num_correct"].name, graph_vars["loss"].name, graph_vars['learning_rate'].name, ] out = train_exe.run(fetch_list=fetch_list) num_infer, num_label, num_correct, np_loss, np_lr = out lr = float(np_lr[0]) loss = np_loss.mean() precision, recall, f1 = calculate_f1( num_label, num_infer, num_correct) if args.verbose: log.info( "train pyreader queue size: %d, learning rate: %f" % (train_pyreader.queue.size(), lr if warmup_steps > 0 else args.learning_rate)) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin log.info( "epoch: %d, progress: %d/%d, step: %d, loss: %f, " "f1: %f, precision: %f, recall: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, loss, f1, precision, recall, args.skip_steps / used_time)) time_begin = time.time() if nccl2_trainer_id == 0 and steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) latest_path = os.path.join( args.checkpoints, "latest" ) # Always save the current copy and cover with the latest copy fluid.io.save_persistables(exe, save_path, train_program) fluid.io.save_persistables(exe, latest_path, train_program) if nccl2_trainer_id == 0 and steps % args.validation_steps == 0: # evaluate dev set if args.do_val: evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # evaluate test set if args.do_test: predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if nccl2_trainer_id == 0 and args.do_val: current_example, current_epoch = reader.get_train_progress() evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, 'final') if nccl2_trainer_id == 0 and args.do_test: current_example, current_epoch = reader.get_train_progress() predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, 'final')
def train(args): print("pretraining start") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() train_program = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr = optimization(loss=total_loss, warmup_steps=args.warmup_steps, num_train_steps=args.num_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) fluid.memory_optimize(input_program=train_program, skip_opt_set=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name ]) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("Device count %d" % dev_count) print("theoretical memory usage: ") if args.in_tokens: print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size // args.max_seq_len)) else: print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size)) nccl2_num_trainers = 1 nccl2_trainer_id = 0 print("args.is_distributed:", args.is_distributed) if args.is_distributed: worker_endpoints_env = os.getenv("worker_endpoints") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) current_endpoint = os.getenv("current_endpoint") trainer_id = worker_endpoints.index(current_endpoint) if trainer_id == 0: print("train_id == 0, sleep 60s") time.sleep(60) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.init_checkpoint and args.init_checkpoint != "": init_checkpoint(exe, args.init_checkpoint, train_program, args.use_fp16) data_reader = ErnieDataReader(filelist=args.train_filelist, batch_size=args.batch_size, vocab_path=args.vocab_path, voc_size=ernie_config['vocab_size'], epoch=args.epoch, max_seq_len=args.max_seq_len, generate_neg_sample=args.generate_neg_sample, in_tokens=args.in_tokens, is_bidirection=args.is_bidirection) exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps) build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=total_loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) if args.valid_filelist and args.valid_filelist != "": predict = predict_wrapper(args, exe, ernie_config, test_prog=test_prog, pyreader=test_pyreader, fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name ]) train_pyreader.decorate_tensor_provider(data_reader.data_generator()) train_pyreader.start() steps = 0 cost = [] lm_cost = [] acc = [] time_begin = time.time() while steps < args.num_train_steps: try: steps += nccl2_num_trainers skip_steps = args.skip_steps * nccl2_num_trainers if nccl2_trainer_id != 0: train_exe.run(fetch_list=[]) continue if steps % skip_steps != 0: train_exe.run(fetch_list=[]) else: each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run( fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name, scheduled_lr.name ]) acc.extend(each_next_acc) lm_cost.extend(each_mask_lm_cost) cost.extend(each_total_cost) print("feed_queue size", train_pyreader.queue.size()) time_end = time.time() used_time = time_end - time_begin epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( ) print("current learning_rate:%f" % np_lr[0]) print( "epoch: %d, progress: %d/%d, step: %d, loss: %f, " "ppl: %f, next_sent_acc: %f, speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, np.mean(np.array(cost)), np.mean(np.exp( np.array(lm_cost))), np.mean(np.array(acc)), skip_steps / used_time, current_file, mask_type)) cost = [] lm_cost = [] acc = [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if args.valid_filelist and steps % args.validation_steps == 0: vali_cost, vali_lm_cost, vali_acc, vali_steps, vali_speed = predict( ) print("[validation_set] epoch: %d, step: %d, " "loss: %f, global ppl: %f, batch-averged ppl: %f, " "next_sent_acc: %f, speed: %f steps/s" % (epoch, steps, np.mean(np.array(vali_cost) / vali_steps), np.exp(np.mean(np.array(vali_lm_cost) / vali_steps)), np.mean(np.exp(np.array(vali_lm_cost) / vali_steps)), np.mean(np.array(vali_acc) / vali_steps), vali_speed)) except fluid.core.EOFException: train_pyreader.reset() break
def main(args): """main""" ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = task_reader.RankReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True, ) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): ret = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True, ) predict_pyreader = ret['pyreader'] left_score = ret['left_probs'] right_score = ret['right_probs'] type_probs = ret['type_probs'] feed_targets_name = ret['feed_targets_name'] predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] print('----------place-----------') print(place) dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!", ) assert args.save_inference_model_path, \ "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model( model_path, feed_targets_name, [left_score, right_score, type_probs], exe, main_program=predict_prog, ) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False, ) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 qid_total = None left_score_total = None right_score_total = None type_prob_total = None ent_id_total = None for sample in predict_data_generator(): src_ids_1 = sample[0] sent_ids_1 = sample[1] pos_ids_1 = sample[2] task_ids_1 = sample[3] input_mask_1 = sample[4] src_ids_2 = sample[5] sent_ids_2 = sample[6] pos_ids_2 = sample[7] task_ids_2 = sample[8] input_mask_2 = sample[9] src_ids_3 = sample[10] sent_ids_3 = sample[11] pos_ids_3 = sample[12] task_ids_3 = sample[13] input_mask_3 = sample[14] qids = sample[15] ent_ids = sample[16] inputs = [ array2tensor(ndarray) for ndarray in [ src_ids_1, sent_ids_1, pos_ids_1, task_ids_1, input_mask_1, src_ids_2, sent_ids_2, pos_ids_2, task_ids_2, input_mask_2, src_ids_3, sent_ids_3, pos_ids_3, task_ids_3, input_mask_3, qids, ] ] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time output_l = outputs[0] output_r = outputs[1] output_t = outputs[2] output_left = output_l.data.float_data() output_right = output_r.data.float_data() output_type = output_t.data.float_data() output_type = np.array(output_type) batch_result_left = np.array(output_left).reshape(output_l.shape) batch_result_right = np.array(output_right).reshape(output_r.shape) batch_result_type = np.array(output_type).reshape( int(output_type.shape[0] / 24), 24, ) if ent_id_total is None: ent_id_total = ent_ids else: ent_id_total = np.concatenate((ent_id_total, ent_ids), axis=0) if qid_total is None: qid_total = qids else: qid_total = np.concatenate((qid_total, qids), axis=0) if left_score_total is None: left_score_total = batch_result_left else: left_score_total = np.concatenate( (left_score_total, batch_result_left), axis=0, ) if right_score_total is None: right_score_total = batch_result_right else: right_score_total = np.concatenate( (right_score_total, batch_result_right), axis=0, ) if type_prob_total is None: type_prob_total = batch_result_type else: type_prob_total = np.concatenate( (type_prob_total, batch_result_type), axis=0, ) predict_res = {} predict_res['qid_total'] = qid_total predict_res['left_score_total'] = left_score_total predict_res['type_prob_total'] = type_prob_total predict_res['ent_id_total'] = ent_id_total predict_post_process(predict_res)
def predict(self): dir_path = os.path.join(os.path.dirname(__file__), "data_http.tsv") args = parser.parse_args() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) src_ids = self.feed_target_names[0] sent_ids = self.feed_target_names[1] pos_ids = self.feed_target_names[2] input_mask = self.feed_target_names[3] if args.ernie_version == "2.0": task_ids = self.feed_target_names[4] # 计算相似度 predict_data_generator = reader.data_generator( input_file=dir_path, #下面的方法写入的路径 batch_size=args.batch_size, epoch=1, shuffle=False) print("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) for sample in predict_data_generator(): src_ids_data = sample[0] sent_ids_data = sample[1] pos_ids_data = sample[2] task_ids_data = sample[3] input_mask_data = sample[4] if args.ernie_version == "1.0": output = self.exe.run(self.infer_program, feed={ src_ids: src_ids_data, sent_ids: sent_ids_data, pos_ids: pos_ids_data, input_mask: input_mask_data }, fetch_list=self.probs) elif args.ernie_version == "2.0": output = self.exe.run(self.infer_program, feed={ src_ids: src_ids_data, sent_ids: sent_ids_data, pos_ids: pos_ids_data, task_ids: task_ids_data, input_mask: input_mask_data }, fetch_list=self.probs) else: raise ValueError("ernie_version must be 1.0 or 2.0") #print(output) output_list = [] for output_temp in output[0]: output_list.append(output_temp[1]) print(output_list) return output_list
def gen_huggingface_bert_model(params_path): import paddle.fluid as fluid import sys sys.path.append("./LARK/ERNIE") from model.ernie import ErnieConfig from finetune.classifier import create_model from utils.init import init_pretraining_params ernie_config = ErnieConfig("./LARK/ERNIE/config/ernie_config.json") startup_prog = fluid.default_startup_program() test_prog = fluid.Program() args.max_seq_len = 512 args.use_fp16 = False args.num_labels = 2 args.loss_scaling = 1.0 with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name="test", ernie_config=ernie_config) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) init_pretraining_params(exe, params_path, main_program=startup_prog) sc = fluid.global_scope() new_model = collections.OrderedDict() for each in startup_prog.list_vars(): name = each.name if name == "test_reader": continue fc_w = sc.find_var(name).get_tensor() fc_w = np.array(fc_w, dtype=np.float32) if name == "word_embedding": new_model["embeddings.word_embeddings.weight"] = fc_w if name == "pos_embedding": new_model["embeddings.position_embeddings.weight"] = fc_w if name == "sent_embedding": new_model["embeddings.token_type_embeddings.weight"] = fc_w if name == "pre_encoder_layer_norm_scale": new_model["embeddings.LayerNorm.gamma"] = fc_w if name == "pre_encoder_layer_norm_bias": new_model["embeddings.LayerNorm.beta"] = fc_w if name.startswith("encoder_layer_"): splits = name.split(".") if len(splits) == 2: prefix, suffix = splits else: prefix = splits[0] prefixs = prefix.split("_") if prefixs[3] == "multi": new_suffix = ".weight" if suffix == "w_0" else ".bias" if new_suffix == ".weight": fc_w = fc_w.transpose() if prefixs[6] == "output": all_name = "encoder.layer." + prefixs[2] + \ ".attention.output.dense" + new_suffix else: all_name = "encoder.layer." + prefixs[2] + \ ".attention.self." + prefixs[6] + new_suffix elif prefixs[3] == "post": new_suffix = ".gamma" if name.endswith("scale") else ".beta" if prefixs[4] == "att": all_name = "encoder.layer." + prefixs[2] + \ ".attention.output.LayerNorm" + new_suffix elif prefixs[4] == "ffn": all_name = "encoder.layer." + prefixs[2] + \ ".output.LayerNorm" + new_suffix elif prefixs[3] == "ffn": new_suffix = ".weight" if suffix == "w_0" else ".bias" if new_suffix == ".weight": fc_w = fc_w.transpose() if prefixs[5] == "0": all_name = "encoder.layer." + prefixs[2] + \ ".intermediate.dense" + new_suffix elif prefixs[5] == "1": all_name = "encoder.layer." + prefixs[2] + \ ".output.dense" + new_suffix new_model[all_name] = fc_w if name == "pooled_fc.w_0": fc_w = fc_w.transpose() new_model["pooler.dense.weight"] = fc_w if name == "pooled_fc.b_0": new_model["pooler.dense.bias"] = fc_w return new_model
def create_model(args, phase, micro_bsz, dp_sharding_rank, dp_worldsize, topo): if args.use_sop: from reader.pretraining_ds_ernie_full_sent import make_pretrain_dataset else: from reader.pretraining_ds_mlm import make_pretrain_dataset # mask_label, mask_pos for mlm, labels for sop if args.use_sop: input_fields = { 'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos', 'labels'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } else: input_fields = { 'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0], } with fluid.device_guard("gpu:0"): inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] if args.use_sop: (src_ids, sent_ids, mask_label, mask_pos, labels) = inputs else: (src_ids, sent_ids, mask_label, mask_pos) = inputs train_file_list = glob.glob(args.data_dir + "/*") vocab = {} with open(args.vocab_file) as r: for line in r: lines = line.strip().split('\t') vocab[lines[0]] = int(lines[1]) log.debug("========= worker: {} of {} ==========".format( dp_sharding_rank, dp_worldsize)) data_reader = make_pretrain_dataset('pt', train_file_list, True, vocab, micro_bsz, len(vocab), args.max_seq_len, dp_sharding_rank, dp_worldsize) with fluid.device_guard("gpu:0"): data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=70, iterable=False) places = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0))) def data_gen(): yield from data_reader data_loader.set_batch_generator(data_gen, places) ernie_config = ErnieConfig(args.ernie_config_file)._config_dict ernie_config["preln"] = args.preln weight_sharing = (topo.mp.size == 1 and topo.pp.size == 1 ) # pp mp should not do weight sharing with fluid.device_guard("gpu:0"): ernie = ErnieModel(src_ids, sent_ids, ernie_config, weight_sharing=weight_sharing, topo=topo) checkpoints = ernie._checkpoints checkpoints.pop(-1) with fluid.device_guard(f'gpu:{args.num_pp-1}'): mask_lm_loss, mean_mask_lm_loss = ernie.get_lm_output( mask_label, mask_pos) total_loss = mean_mask_lm_loss if args.use_sop: sop_acc, mean_sop_loss = ernie.get_next_sentence_output(labels) total_loss += mean_sop_loss if topo.pp.size > 1: mask_lm_loss.persistable = True mean_mask_lm_loss.persistable = True # checkpoints.extend([mask_lm_loss.name, mean_mask_lm_loss.name]) if args.use_sop: mean_sop_loss.persistable = True sop_acc.persistable = True # checkpoints.extend([mean_sop_loss.name, sop_acc.name]) total_loss.persistable = True # checkpoints.append(total_loss.name) if args.use_sop: graph_vars = { 'data_loader': data_loader, 'mask_lm_loss': mask_lm_loss, 'mean_mask_lm_loss': mean_mask_lm_loss, 'sop_loss': mean_sop_loss, 'sop_acc': sop_acc, 'total_loss': total_loss, 'checkpoints': checkpoints } else: graph_vars = { 'data_loader': data_loader, 'mask_lm_loss': mask_lm_loss, 'mean_mask_lm_loss': mean_mask_lm_loss, 'total_loss': total_loss, 'checkpoints': checkpoints, } return graph_vars
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) if args.task_type == "dialog": ernie_config["role_type_size"] = args.role_type_size ernie_config["turn_type_size"] = args.turn_type_size if args.hidden_dropout_prob >= 0: ernie_config["hidden_dropout_prob"] = args.hidden_dropout_prob if args.attention_probs_dropout_prob >= 0: ernie_config[ "attention_probs_dropout_prob"] = args.attention_probs_dropout_prob ernie_config.print_config() if args.pred_batch_size <= 0: args.pred_batch_size = args.batch_size gpu_id = 0 gpus = fluid.core.get_cuda_device_count() if args.is_distributed: gpus = os.getenv("FLAGS_selected_gpus").split(",") gpu_id = int(gpus[0]) if args.use_cuda: place = fluid.CUDAPlace(gpu_id) dev_count = len(gpus) if args.is_distributed else gpus else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) reader = Seq2SeqReader(args) ernie_gen = ErnieGenFinetune(args, ernie_config, reader.tokenizer) if not (args.do_train or args.do_val or args.do_test or args.do_pred): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: trainers_num = int(os.getenv("PADDLE_TRAINERS_NUM")) train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=trainers_num, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // trainers_num else: max_train_steps = args.epoch * num_train_examples // args.batch_size // trainers_num warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d, gpu_id: %d" % (dev_count, gpu_id)) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = ernie_gen.create_model() scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test or args.do_pred: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, test_graph_vars = ernie_gen.create_model( decoding=args.do_decode) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 print("args.is_distributed:", args.is_distributed) if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) init_model(args, exe, startup_prog) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.set_batch_generator(train_data_generator) train_resource = { "exe": train_exe, "program": train_program, "pyreader": train_pyreader } save_model = partial(save_checkpoint, program=train_program, exe=exe) test_dev_count = 1 if args.do_val or args.do_test or args.do_pred: test_exe = exe if args.use_multi_gpu_test: test_dev_count = min(trainers_num, int(os.getenv("PADDLE_PROC_PER_NODE", "1"))) test_resource = { "exe": test_exe, "program": test_prog, "pyreader": test_pyreader } eval_data_generator = partial(reader.data_generator, batch_size=args.pred_batch_size, epoch=1, dev_count=test_dev_count, shuffle=False, do_decode=args.do_decode, place=place) eval_func = partial(ernie_gen.evaluate, resource=test_resource, graph_vars=test_graph_vars, dev_count=test_dev_count, output_path=args.checkpoints, gpu_id=trainer_id) evaluate = partial(evaluate_datasets, pyreader=test_pyreader, reader=reader, eval_func=eval_func, data_generator=eval_data_generator) if args.do_train: train_pyreader.start() steps = 0 last_epoch = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() skip_steps = args.skip_steps while True: try: steps += 1 if args.save_and_valid_by_epoch: suffix = "epoch_" + str(last_epoch) else: suffix = "step_" + str(steps) if steps % skip_steps == 0: outputs = ernie_gen.evaluate(train_resource, "train", graph_vars) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) if args.in_tokens: current_example, current_epoch = reader.get_train_progress( ) else: current_epoch = steps * args.batch_size * trainers_num // num_train_examples current_example = steps * args.batch_size * trainers_num % num_train_examples time_end = time.time() used_time = time_end - time_begin print("epoch: %d, progress: %d/%d, step: %d, loss: %f, " "ppl: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["ppl"], args.skip_steps / used_time)) time_begin = time.time() else: train_exe.run(fetch_list=[]) if nccl2_trainer_id >= test_dev_count: continue do_save = False do_eval = False if not args.save_and_valid_by_epoch: if steps % args.save_steps == 0 and nccl2_trainer_id == 0: do_save = True if steps % args.validation_steps == 0: do_eval = True else: if args.in_tokens: current_example, current_epoch = reader.get_train_progress( ) else: current_epoch = steps * args.batch_size * trainers_num // num_train_examples if current_epoch != last_epoch: if nccl2_trainer_id == 0: do_save = True do_eval = True if do_save: save_model(suffix=suffix) if do_eval: evaluate(suffix=suffix) last_epoch = current_epoch except fluid.core.EOFException: save_model(suffix=suffix) train_pyreader.reset() break if nccl2_trainer_id >= test_dev_count: return if args.do_val or args.do_test or args.do_pred: suffix = "output" if args.do_train: if not args.save_and_valid_by_epoch: suffix = "step_" + str(steps) else: suffix = "epoch_" + str(last_epoch) evaluate(suffix=suffix, do_pred=True)
def main(args): """main function""" ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, is_classify=args.is_classify, is_regression=args.is_regression, for_cn=args.for_cn, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") if args.do_test: assert args.test_save is not None startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.predict_batch_size is None: args.predict_batch_size = args.batch_size if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=dev_count, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() """ if args.random_seed is not None and args.enable_ce: train_program.random_seed = args.random_seed """ with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_classify=args.is_classify, is_regression=args.is_regression) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe if args.do_val or args.do_test: if args.use_multi_gpu_test: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) steps = 10000 current_epoch = 1 if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr ce_info = [] time_begin = time.time() last_epoch = 0 current_epoch = 0 previous_eval_acc = 0.80 previous_train_acc = 0.90 while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train", metric=args.metric, is_classify=args.is_classify, is_regression=args.is_regression) acc = outputs["accuracy"] if acc > previous_train_acc or acc > 0.95: print( "previous train accuracy is %f and current train accuracy is %f " % (previous_train_acc, acc)) previous_train_acc = acc eval_acc = evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) print( "previous evaluate accuracy is %f and current evaluate accuracy is %f " % (previous_eval_acc, eval_acc)) if eval_acc > previous_eval_acc: previous_eval_acc = eval_acc save_path = os.path.join( args.checkpoints, "evalacc_" + str(eval_acc).split('.')[1]) fluid.io.save_persistables(exe, save_path, train_program) predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps="evalacc_" + str(eval_acc).split('.')[1]) print( "predict and save model!!!!!!!!!!!!!!!!!!!!!!!!!! in %s" % (save_path)) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin print( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["accuracy"], args.skip_steps / used_time)) ce_info.append( [outputs["loss"], outputs["accuracy"], used_time]) time_begin = time.time() # if steps % args.save_steps == 0: # save_path = os.path.join(args.checkpoints, # "step_" + str(steps)) # fluid.io.save_persistables(exe, save_path, train_program) # if steps % args.validation_steps == 0 or last_epoch != current_epoch: # # evaluate dev set # if args.do_val: # ret=evaluate_wrapper(args, reader, exe, test_prog, # test_pyreader, graph_vars, # current_epoch, steps) # if args.do_test: # predict_wrapper(args, reader, exe, # test_prog, test_pyreader, graph_vars, # current_epoch, steps) if last_epoch != current_epoch: last_epoch = current_epoch except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set # if args.do_val: # evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, # graph_vars, current_epoch, steps) # final eval on test set steps = 0 # if args.do_test: # current_epoch = 0 # predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, # current_epoch, steps) # final eval on dianostic, hack for glue-ax if args.diagnostic: test_pyreader.decorate_tensor_provider( reader.data_generator(args.diagnostic, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final diagnostic") qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, is_classify=args.is_classify, is_regression=args.is_regression) assert len(qids) == len(preds), '{} v.s. {}'.format( len(qids), len(preds)) with open(args.diagnostic_save, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) print("Done final diagnostic, saving to {}".format( args.diagnostic_save))
def train(args): print("pretraining start") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() with open(args.task_group_json) as f: task_group = json.load(f) exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 if args.use_amp else 2 exec_strategy.num_iteration_per_drop_scope = min(1, args.skip_steps) node_nums = 1 #int(os.getenv("PADDLE_NODES_NUM")) print("args.is_distributed:", args.is_distributed) num_trainers = 1 trainer_id = 0 dist_strategy = None gpu_id = 0 gpus = 1 #fluid.core.get_cuda_device_count() print(gpus) if args.is_distributed: gpus = os.getenv("FLAGS_selected_gpus").split(",") gpu_id = int(gpus[0]) place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("Device count %d, gpu_id:%d" % (dev_count, gpu_id)) train_program = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): fetch_vars, train_data_names = create_model( pyreader_name='train_reader', ernie_config=ernie_config, task_group=task_group) graph_vars = fetch_vars["graph_vars"] checkpoints = fetch_vars["checkpoints"] total_loss = graph_vars[-1] if args.use_recompute: dist_strategy.recompute_checkpoints = checkpoints fetch_list_ascend = [var for var in graph_vars] scheduled_lr, loss_scaling = optimization( loss=total_loss, warmup_steps=args.warmup_steps, num_train_steps=args.num_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_amp, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio, fetch_list=fetch_list_ascend, dist_strategy=dist_strategy) origin_train_program = train_program test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): fetch_vars, test_data_names = create_model( pyreader_name='test_reader', ernie_config=ernie_config, task_group=task_group) graph_vars = fetch_vars["graph_vars"] total_loss = graph_vars[-1] test_prog = test_prog.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_prog) if args.init_checkpoint and args.init_checkpoint != "": #init_checkpoint(exe, args.init_checkpoint, origin_train_program, args.use_amp) init_pretraining_params(exe, args.init_checkpoint, origin_train_program, args.use_amp) data_reader = ErnieDataReader(task_group, False, batch_size=args.batch_size, vocab_path=args.vocab_path, voc_size=ernie_config['vocab_size'], epoch=args.epoch, max_seq_len=args.max_seq_len, generate_neg_sample=args.generate_neg_sample, hack_old_trainset=args.hack_old_data) #only fleet train_exe = exe predict = predict_wrapper(args, exe, ernie_config, task_group, test_prog=test_prog, data_names=test_data_names, fetch_list=[var for var in graph_vars]) #train_pyreader.set_batch_generator(data_reader.data_generator()) #train_pyreader.start() train_data_generator = data_reader.data_generator() steps = 0 time_begin = time.time() feed_list = {} while True: #steps < args.num_train_steps: try: steps += 1 #node_nums skip_steps = args.skip_steps # * node_nums input_list = next(train_data_generator(), None) for index in range(len(input_list)): feed_list[train_data_names[index]] = input_list[index] fetch_list = [] if trainer_id == 0 and steps % skip_steps == 0: fetch_list = [var for var in graph_vars] + [scheduled_lr.name] if args.use_amp: fetch_list.append(loss_scaling.name) outputs = train_exe.run(feed=feed_list, fetch_list=fetch_list, program=train_program) time_end = time.time() used_time = time_end - time_begin if outputs: each_mask_lm_cost, lm_w = outputs[:2] if args.use_amp: each_total_constract_loss, each_total_cost, np_lr, l_scaling = outputs[ -4:] else: each_total_constract_loss, each_total_cost, np_lr = outputs[ -3:] acc_list = [] index = 2 for task in task_group: each_task_acc = outputs[index] task_w = outputs[index + 1] acc = np.sum(each_task_acc * task_w) / np.sum(task_w) acc_list.append("%s acc: %f" % (task["task_name"], acc)) index += 2 epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( ) if args.use_amp: print("current learning_rate:%f, loss scaling:%f" % (np_lr[0], l_scaling[0])) else: print("current learning_rate:%f" % np_lr[0]) print( "epoch: %d, progress: %d/%d, step: %d, constract_loss: %f, loss: %f, " "ppl: %f, %s, speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, np.mean(each_total_constract_loss), np.mean(each_total_cost), np.exp(np.sum(each_mask_lm_cost * lm_w) / np.sum(lm_w)), ", ".join(acc_list), skip_steps / used_time, current_file, mask_type)) time_begin = time.time() elif steps % skip_steps == 0: epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( ) print("epoch: %d, progress: %d/%d, step: %d, " "speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, skip_steps / used_time, current_file, mask_type)) time_begin = time.time() if not trainer_id == 0: continue if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, origin_train_program) if steps % args.validation_steps == 0: valid_list = predict() print("[validation_set] epoch: %d, step: %d, %s" % \ (epoch, steps, ", ".join(valid_list))) except fluid.core.EOFException: train_pyreader.reset() break
def train(args): ernie_config = ErnieConfig(args.ernie_config) ernie_config.print_config() if not (args.do_train or args.do_predict): raise ValueError("For args `do_train` and `do_predict`, at " "least one of them must be True.") if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) processor = DataProcessor(vocab_path=args.vocab_path, do_lower_case=args.do_lower_case, max_seq_length=args.max_seq_len, in_tokens=args.in_tokens, doc_stride=args.doc_stride, max_query_length=args.max_query_length) startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = processor.data_generator( data_path=args.train_file, batch_size=args.batch_size, phase='train', shuffle=True, dev_count=dev_count, version_2_with_negative=args.version_2_with_negative, epoch=args.epoch) num_train_examples = processor.get_num_examples(phase='train') if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // ( args.batch_size) // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_data_loader, loss, num_seqs = create_model( ernie_config=ernie_config, is_training=True) scheduled_lr, loss_scaling = optimization( loss=loss, warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.do_predict: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_data_loader, unique_ids, start_logits, end_logits, num_seqs = create_model( ernie_config=ernie_config, is_training=False) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_predict: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing prediction!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = args.use_fast_executor exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=loss.name, exec_strategy=exec_strategy) train_data_loader.set_batch_generator(train_data_generator, place) train_data_loader.start() steps = 0 total_cost, total_num_seqs = [], [] time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps == 0: if args.use_fp16: fetch_list = [ loss.name, scheduled_lr.name, num_seqs.name, loss_scaling.name ] else: fetch_list = [ loss.name, scheduled_lr.name, num_seqs.name ] else: fetch_list = [] outputs = exe.run(train_compiled_program, fetch_list=fetch_list) if steps % args.skip_steps == 0: if args.use_fp16: np_loss, np_lr, np_num_seqs, np_scaling = outputs else: np_loss, np_lr, np_num_seqs = outputs total_cost.extend(np_loss * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train data_loader queue size: %d, " % train_data_loader.queue.size( ) verbose += "learning rate: %f " % np_lr[0] if args.use_fp16: verbose += ", loss scaling: %f" % np_scaling[0] print(verbose) time_end = time.time() used_time = time_end - time_begin current_example, epoch = processor.get_train_progress() print("epoch: %d, progress: %d/%d, step: %d, loss: %f, " "speed: %f steps/s" % (epoch, current_example, num_train_examples, steps, np.sum(total_cost) / np.sum(total_num_seqs), args.skip_steps / used_time)) total_cost, total_num_seqs = [], [] time_begin = time.time() if steps % args.save_steps == 0 or steps == max_train_steps: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps) + "_final") fluid.io.save_persistables(exe, save_path, train_program) train_data_loader.reset() break if args.do_predict: input_files = [] for input_pattern in args.predict_file: input_files.extend(glob.glob(input_pattern)) assert len(input_files) > 0, 'Can not find predict_file {}'.format( args.predict_file) for input_file in input_files: print('Run prediction on {}'.format(input_file)) prefix = os.path.basename(input_file) prefix = re.sub('.json', '', prefix) test_data_loader.set_batch_generator( processor.data_generator(data_path=input_file, batch_size=args.batch_size, phase='predict', shuffle=False, dev_count=1, epoch=1), place) predict(exe, test_prog, test_data_loader, [ unique_ids.name, start_logits.name, end_logits.name, num_seqs.name ], processor, prefix=prefix)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.SequenceLabelReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) fluid.memory_optimize( input_program=train_program, skip_opt_set=[ graph_vars["loss"].name, graph_vars["labels"].name, graph_vars["infers"].name, graph_vars["seq_lens"].name ]) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None if args.do_val or args.do_test: test_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr if args.save_log and args.log_path: if os.path.exists(args.log_path): raise FileExistsError("Logging file already exists!") with open(args.log_path, 'w') as logfile: logfile.write('%s\n' % time.asctime()) print('Writing logs into %s' % args.log_path) time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, args.num_labels, "train", dev_count) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["lr"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = reader.get_train_progress() time_end = time.time() used_time = time_end - time_begin print("epoch: %d, progress: %d/%d, step: %d, loss: %f, " "f1: %f, precision: %f, recall: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["f1"], outputs["precision"], outputs["recall"], args.skip_steps / used_time)) if args.save_log and args.log_path: with open(args.log_path, 'a') as logfile: logfile.write("epoch: %d, progress: %d/%d, step: %d, loss: %f, " "f1: %f, precision: %f, recall: %f\n" % ( current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["f1"], outputs["precision"], outputs["recall"])) time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, shuffle=False)) evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "dev") # evaluate test set if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, shuffle=False)) evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "test") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, shuffle=False)) print("Final validation result:") evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "dev") if args.do_predict: print("Saving predicted results...") predict(exe, test_prog, test_pyreader, graph_vars, args.label_map_config, "test", output_dir="./predicted_results") # final eval on test set if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, shuffle=False)) print("Final test result:") evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "test") if args.do_predict: print("Saving predicted results...") predict(exe, test_prog, test_pyreader, graph_vars, args.label_map_config, "test", output_dir="./predicted_results")
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) print("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog) print("load inference model from %s" % model_path) infer_program, feed_target_names, probs = fluid.io.load_inference_model( model_path, exe) src_ids = feed_target_names[0] sent_ids = feed_target_names[1] pos_ids = feed_target_names[2] input_mask = feed_target_names[3] predict_data_generator = reader.data_generator(input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) print("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 for sample in predict_data_generator(): src_ids_data = sample[0] sent_ids_data = sample[1] pos_ids_data = sample[2] input_mask_data = sample[3] output = exe.run(infer_program, feed={ src_ids: src_ids_data, sent_ids: sent_ids_data, pos_ids: pos_ids_data, input_mask: input_mask_data }, fetch_list=probs) for single_result in output[0]: print("example_index:{}\t{}".format(index, single_result)) index += 1
def train(args): print("pretraining start") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() with open(args.task_group_json) as f: task_group = json.load(f) exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 if args.use_amp else 2 exec_strategy.num_iteration_per_drop_scope = min(1, args.skip_steps) node_nums = int(os.getenv("PADDLE_NODES_NUM")) print("args.is_distributed:", args.is_distributed) num_trainers = 1 trainer_id = 0 if args.is_distributed: role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) trainer_id = fleet.worker_index() current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = fleet.worker_endpoints() trainers_num = len(worker_endpoints) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} trainer_id:{}" .format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.remove_unnecessary_lock = False # not useful dist_strategy.fuse_all_reduce_ops = True if args.use_fuse else False dist_strategy.nccl_comm_num = args.nccl_comm_num if args.use_hierarchical_allreduce \ and trainers_num > args.hierarchical_allreduce_inter_nranks: dist_strategy.use_hierarchical_allreduce = args.use_hierarchical_allreduce dist_strategy.hierarchical_allreduce_inter_nranks = \ args.hierarchical_allreduce_inter_nranks assert dist_strategy.use_hierarchical_allreduce > 1 assert trainers_num % dist_strategy.hierarchical_allreduce_inter_nranks == 0 dist_strategy.hierarchical_allreduce_exter_nranks = \ trainers_num / dist_strategy.hierarchical_allreduce_inter_nranks if args.use_amp: dist_strategy.use_amp = True dist_strategy.amp_loss_scaling = args.init_loss_scaling if args.use_recompute: dist_strategy.forward_recompute = True dist_strategy.enable_sequential_execution=True trainer_id = fleet.worker_index() current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = fleet.worker_endpoints() trainers_num = len(worker_endpoints) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} trainer_id:{}" .format(worker_endpoints,trainers_num, current_endpoint, trainer_id)) else: dist_strategy=None gpu_id=0 gpus = fluid.core.get_cuda_device_count() if args.is_distributed: gpus = os.getenv("FLAGS_selected_gpus").split(",") gpu_id = int(gpus[0]) if args.use_cuda: place = fluid.CUDAPlace(gpu_id) dev_count = len(gpus) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("Device count %d, gpu_id:%d" % (dev_count, gpu_id)) train_program = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, fetch_vars = create_model( pyreader_name='train_reader', ernie_config=ernie_config, task_group=task_group) graph_vars = fetch_vars["graph_vars"] checkpoints = fetch_vars["checkpoints"] total_loss = graph_vars[-1] if args.use_recompute: dist_strategy.recompute_checkpoints = checkpoints scheduled_lr, loss_scaling = optimization( loss=total_loss, warmup_steps=args.warmup_steps, num_train_steps=args.num_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_amp, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio, dist_strategy=dist_strategy) origin_train_program = train_program if args.is_distributed: #raped by fleet, need to assign fleet's modified train_grogram back train_program = fleet.main_program origin_train_program = fleet._origin_program test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, fetch_vars = create_model( pyreader_name='test_reader', ernie_config=ernie_config, task_group=task_group) graph_vars = fetch_vars["graph_vars"] total_loss = graph_vars[-1] test_prog = test_prog.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_prog) if args.init_checkpoint and args.init_checkpoint != "": #init_checkpoint(exe, args.init_checkpoint, origin_train_program, args.use_amp) init_pretraining_params(exe, args.init_checkpoint, origin_train_program, args.use_amp) data_reader = ErnieDataReader( task_group, False, batch_size=args.batch_size, vocab_path=args.vocab_path, voc_size=ernie_config['vocab_size'], epoch=args.epoch, max_seq_len=args.max_seq_len, generate_neg_sample=args.generate_neg_sample, hack_old_trainset=args.hack_old_data) #only fleet train_exe = exe predict = predict_wrapper( args, exe, ernie_config, task_group, test_prog=test_prog, pyreader=test_pyreader, fetch_list=[var.name for var in graph_vars]) train_pyreader.set_batch_generator(data_reader.data_generator()) train_pyreader.start() steps = 112000 time_begin = time.time() node_nums = int(os.getenv("PADDLE_NODES_NUM")) while True:#steps < args.num_train_steps: try: steps += 1#node_nums skip_steps = args.skip_steps# * node_nums fetch_list = [] if trainer_id == 0 and steps % skip_steps == 0: fetch_list = [var.name for var in graph_vars] + [scheduled_lr.name] if args.use_amp: fetch_list.append(loss_scaling.name) outputs = train_exe.run(fetch_list=fetch_list, program=train_program) time_end = time.time() used_time = time_end - time_begin if outputs: each_mask_lm_cost, lm_w = outputs[:2] if args.use_amp: each_total_constract_loss, each_total_cost, np_lr, l_scaling = outputs[-4:] else: each_total_constract_loss, each_total_cost, np_lr = outputs[-3:] acc_list =[] index = 2 for task in task_group: each_task_acc = outputs[index] task_w = outputs[index + 1] acc = np.sum(each_task_acc * task_w) / np.sum(task_w) acc_list.append("%s acc: %f" % (task["task_name"], acc)) index += 2 print("feed_queue size", train_pyreader.queue.size()) epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress() if args.use_amp: print("current learning_rate:%f, loss scaling:%f" % (np_lr[0], l_scaling[0])) else: print("current learning_rate:%f" % np_lr[0]) print( "epoch: %d, progress: %d/%d, step: %d, constract_loss: %f, loss: %f, " "ppl: %f, %s, speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, np.mean(each_total_constract_loss), np.mean(each_total_cost), np.exp(np.sum(each_mask_lm_cost * lm_w) / np.sum(lm_w)), ", ".join(acc_list), skip_steps / used_time, current_file, mask_type)) time_begin = time.time() elif steps % skip_steps == 0: epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( ) print("feed_queue size", train_pyreader.queue.size()) print("epoch: %d, progress: %d/%d, step: %d, " "speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, skip_steps / used_time, current_file, mask_type)) time_begin = time.time() if not trainer_id == 0: continue if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, origin_train_program) if steps % args.validation_steps == 0: valid_list = predict() print("[validation_set] epoch: %d, step: %d, %s" % \ (epoch, steps, ", ".join(valid_list))) except fluid.core.EOFException: train_pyreader.reset() break
help=".") parser.add_argument("--max_seq_len", default=128, type=int, help=".") parser.add_argument("--num_labels", default=2, type=int, help=".") parser.add_argument("--use_fp16", type=bool, default=False, help="Whether to use fp16 mixed precision training.") args = parser.parse_args() if __name__ == '__main__': if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() test_program = fluid.Program() with fluid.program_guard(test_program, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name='test_reader', ernie_config=ernie_config) exe.run(startup_prog)
def main(args): args = parser.parse_args() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ExtractEmbeddingReader( vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case) startup_prog = fluid.Program() data_generator = reader.data_generator(input_file=args.data_set, batch_size=args.batch_size, epoch=1, shuffle=False) total_examples = reader.get_num_examples(args.data_set) print("Device count: %d" % dev_count) print("Total num examples: %d" % total_examples) infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_prog): with fluid.unique_name.guard(): pyreader, graph_vars = create_model(args, pyreader_name='reader', ernie_config=ernie_config) fluid.memory_optimize(input_program=infer_program) infer_program = infer_program.clone(for_test=True) exe.run(startup_prog) if args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog) else: raise ValueError( "WARNING: args 'init_pretraining_params' must be specified") exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = dev_count pyreader.decorate_tensor_provider(data_generator) pyreader.start() total_cls_emb = [] total_top_layer_emb = [] total_labels = [] while True: try: cls_emb, unpad_top_layer_emb = exe.run( program=infer_program, fetch_list=[ graph_vars["cls_embeddings"].name, graph_vars["top_layer_embeddings"].name ], return_numpy=False) # batch_size * embedding_size total_cls_emb.append(np.array(cls_emb)) total_top_layer_emb.append(np.array(unpad_top_layer_emb)) except fluid.core.EOFException: break total_cls_emb = np.concatenate(total_cls_emb) total_top_layer_emb = np.concatenate(total_top_layer_emb) with open(os.path.join(args.output_dir, "cls_emb.npy"), "w") as cls_emb_file: np.save(cls_emb_file, total_cls_emb) with open(os.path.join(args.output_dir, "top_layer_emb.npy"), "w") as top_layer_emb_file: np.save(top_layer_emb_file, total_top_layer_emb)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.MRCReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, is_classify=args.is_classify, is_regression=args.is_regression, for_cn=args.for_cn, task_id=args.task_id, doc_stride=args.doc_stride, max_query_length=args.max_query_length) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.predict_batch_size == None: args.predict_batch_size = args.batch_size if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=dev_count, shuffle=True, phase="train") num_train_examples = reader.get_num_examples("train") if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config, is_training=True) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16) """ fluid.memory_optimize( input_program=train_program, skip_opt_set=[ graph_vars["loss"].name, graph_vars["num_seqs"].name, ]) """ if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, test_graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_training=False) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train") if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin print( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], args.skip_steps / used_time)) time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator(args.dev_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False, phase="dev")) evaluate(exe, test_prog, test_pyreader, test_graph_vars, str(steps) + "_dev", examples=reader.get_examples("dev"), features=reader.get_features("dev"), args=args) if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator(args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False, phase="test")) evaluate(exe, test_prog, test_pyreader, test_graph_vars, str(steps) + "_test", examples=reader.get_examples("test"), features=reader.get_features("test"), args=args) except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if args.do_val: print("Final validation result:") test_pyreader.decorate_tensor_provider( reader.data_generator(args.dev_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False, phase="dev")) evaluate(exe, test_prog, test_pyreader, test_graph_vars, "dev", examples=reader.get_examples("dev"), features=reader.get_features("dev"), args=args) # final eval on test set if args.do_test: print("Final test result:") test_pyreader.decorate_tensor_provider( reader.data_generator(args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False, phase="test")) evaluate(exe, test_prog, test_pyreader, test_graph_vars, "test", examples=reader.get_examples("test"), features=reader.get_features("test"), args=args)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = reader_ce.ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, total_num=args.train_data_size, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, tokenizer=args.tokenizer, for_cn=args.for_cn, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") if args.do_test: assert args.test_save is not None startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.predict_batch_size == None: args.predict_batch_size = args.batch_size if args.do_train: role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) dev_count = fleet.worker_num() train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=1, trainer_id=fleet.worker_index(), trainer_num=fleet.worker_num(), shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) log.info("Device count: %d" % dev_count) log.info("Num train examples: %d" % num_train_examples) log.info("Max train steps: %d" % max_train_steps) log.info("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() # use fleet api exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count if args.is_distributed: exec_strategy.num_threads = 3 exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.nccl_comm_num = 1 if args.is_distributed: dist_strategy.nccl_comm_num = 2 dist_strategy.use_hierarchical_allreduce = True if args.use_mix_precision: dist_strategy.use_amp = True with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio, dist_strategy=dist_strategy) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) log.info("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_prediction=True) test_prog = test_prog.clone(for_test=True) train_program = fleet.main_program exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: log.warning( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) if args.do_train: train_exe = exe train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe # if args.do_val or args.do_test: # if args.use_multi_gpu_test: # test_exe = fluid.ParallelExecutor( # use_cuda=args.use_cuda, # main_program=test_prog, # share_vars_from=train_exe) current_epoch = 0 steps = 0 if args.do_train: train_pyreader.start() if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr ce_info = [] time_begin = time.time() last_epoch = 0 while True: try: steps += 1 # log.info("step: %d" % steps) if fleet.worker_index() != 0: train_exe.run(fetch_list=[], program=train_program) continue if steps % args.skip_steps != 0: train_exe.run(fetch_list=[], program=train_program) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train", metric=args.metric) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) log.info(verbose) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin log.info( "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (current_epoch, current_example * dev_count, num_train_examples, steps, outputs["loss"], outputs["accuracy"], args.skip_steps / used_time)) ce_info.append( [outputs["loss"], outputs["accuracy"], used_time]) time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, fleet._origin_program) # if steps % args.validation_steps == 0 or last_epoch != current_epoch: if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) if args.do_test: predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) if last_epoch != current_epoch: last_epoch = current_epoch except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, fleet._origin_program) train_pyreader.reset() break # final eval on dev set if args.do_val: evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # final eval on test set if args.do_test: predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # final eval on dianostic, hack for glue-ax if args.diagnostic: test_pyreader.decorate_tensor_provider( reader.data_generator(args.diagnostic, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) log.info("Final diagnostic") qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars) assert len(qids) == len(preds), '{} v.s. {}'.format( len(qids), len(preds)) with open(args.diagnostic_save, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) log.info("Done final diagnostic, saving to {}".format( args.diagnostic_save))