def do_eval(args): dataset = reader.Dataset(args) test_program = fluid.Program() with fluid.program_guard(test_program, fluid.default_startup_program()): with fluid.unique_name.guard(): test_ret = creator.create_model( args, dataset.vocab_size, dataset.num_labels, mode='test') test_program = test_program.clone(for_test=True) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = creator.create_pyreader(args, file_name=args.test_data, feed_list=test_ret['feed_list'], place=place, mode='lac', reader=dataset, iterable=True, for_test=True) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint+'.pdckpt', test_program) test_process(exe=exe, program=test_program, reader=pyreader, test_ret=test_ret )
def do_infer(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() # define network and reader ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_ernie_model(args, ernie_config, is_prediction=False) infer_program = infer_program.clone(for_test=True) print(args.test_data) pyreader, reader = creator.create_pyreader( args, file_name=args.test_data, feed_list=infer_ret['feed_list'], mode="ernie", place=place, iterable=True, return_reader=True, for_test=True) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if only doing test or infer!" ) utils.init_checkpoint(exe, args.init_checkpoint, infer_program) # create dict id2word_dict = dict([(str(word_id), word) for word, word_id in reader.vocab.items()]) id2label_dict = dict([(str(label_id), label) for label, label_id in reader.label_map.items()]) Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"]) dataset = Dataset(id2word_dict, id2label_dict) # make prediction for data in pyreader(): (words, crf_decode) = exe.run( infer_program, fetch_list=[infer_ret["words"], infer_ret["crf_decode"]], feed=data[0], return_numpy=False) # User should notice that words had been clipped if long than args.max_seq_len results = utils.parse_result(words, crf_decode, dataset) for sent, tags in results: result_list = [ '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags) ] print(''.join(result_list))
def do_eval(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() test_program = fluid.Program() with fluid.program_guard(test_program, fluid.default_startup_program()): with fluid.unique_name.guard(): test_ret = creator.create_ernie_model(args, ernie_config) test_program = test_program.clone(for_test=True) pyreader = creator.create_pyreader(args, file_name=args.test_data, feed_list=test_ret['feed_list'], model="ernie", place=place, mode='test',) print('program startup') exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) print('program loading') # load model if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if only doing test or infer!") utils.init_checkpoint(exe, args.init_checkpoint, test_program) evaluate(exe, test_program, pyreader, test_ret)
def save_inference_model(args): # model definition if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) # load pretrain check point exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', infer_program) fluid.io.save_inference_model( args.inference_save_dir, ['words'], infer_ret['crf_decode'], exe, main_program=infer_program, model_filename='model.pdmodel', params_filename='params.pdparams', )
def do_save_inference_model(args): if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(0) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): infer_loader, probs, feed_target_names = create_model( args, num_labels=args.num_labels, is_prediction=True) test_prog = test_prog.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_checkpoint) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, test_prog) fluid.io.save_inference_model( args.inference_model_dir, feeded_var_names=feed_target_names, target_vars=[probs], executor=exe, main_program=test_prog, model_filename="model.pdmodel", params_filename="params.pdparams") print("save inference model at %s" % (args.inference_model_dir))
def do_save_inference_model(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(0) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() exe = fluid.Executor(place) test_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): infer_pyreader, ernie_inputs, labels = ernie_pyreader( args, pyreader_name="infer_reader") if args.use_paddle_hub: embeddings = ernie_encoder_with_paddle_hub( ernie_inputs, args.max_seq_len) else: embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) probs = create_model(args, embeddings, labels=labels, is_prediction=True) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) assert (args.init_checkpoint) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, test_prog) fluid.io.save_inference_model(args.inference_model_dir, feeded_var_names=[ ernie_inputs["src_ids"].name, ernie_inputs["sent_ids"].name, ernie_inputs["pos_ids"].name, ernie_inputs["input_mask"].name, ernie_inputs["seq_lens"].name ], target_vars=[probs], executor=exe, main_program=test_prog, model_filename="model.pdmodel", params_filename="params.pdparams") print("save inference model at %s" % (args.inference_model_dir))
def load(self, model_dir, is_checkpoint=False): """ Load persistables or parameters. """ # TODO: support dygraph. if is_checkpoint: init_checkpoint(self.exe, model_dir, self.program) else: init_pretraining_params(self.exe, model_dir, self.program) return
def _build_programs(self): """ Build programs. Build train_program, eval_program and inference_program. Only use in static graph mode. """ if self.run_infer: self.startup_program = fluid.Program() # build infer program self.infer_program = fluid.Program() with fluid.program_guard(self.infer_program, self.startup_program): with fluid.unique_name.guard(): self.infer_feed_dict = inputs = self._get_feed_dict(is_infer=True) outputs = self.forward(inputs, is_infer=True) predictions = self.infer(inputs, outputs) self.infer_fetch_dict = predictions self.infer_program = self.infer_program.clone(for_test=True) self.exe.run(self.startup_program) if self.init_pretraining_params is not None: init_pretraining_params(self.exe, self.init_pretraining_params, self.infer_program) if self.init_checkpoint is not None: init_checkpoint(self.exe, self.init_checkpoint, self.infer_program) else: self.startup_program = fluid.Program() # build train program self.train_program = fluid.Program() with fluid.program_guard(self.train_program, self.startup_program): with fluid.unique_name.guard(): self.feed_dict = inputs = self._get_feed_dict() outputs = self.forward(inputs) metrics, statistics = self.get_metrics_and_statistics(inputs, outputs) # build eval program self.eval_program = self.train_program.clone(for_test=True) self.eval_fetch_dict = {**metrics, **statistics} self.optimize(metrics) self.train_fetch_dict = metrics self.exe.run(self.startup_program) if self.init_pretraining_params is not None: init_pretraining_params(self.exe, self.init_pretraining_params, self.train_program) if self.init_checkpoint is not None: init_checkpoint(self.exe, self.init_checkpoint, self.train_program) return
def do_infer(args): dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = creator.create_pyreader(args, file_name=args.infer_data, feed_list=infer_ret['feed_list'], place=place, model='lac', reader=dataset, mode='infer') exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint, infer_program) result = infer_process( exe=exe, program=infer_program, reader=pyreader, fetch_vars=[infer_ret['words'], infer_ret['crf_decode']], dataset=dataset) with open('../processed.txt', 'w') as f: for sent, tags in result: result_list = [ '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags) ] f.write(''.join(result_list) + '\n')
def do_infer(args): dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = fluid.io.PyReader(feed_list=[infer_ret['words']], capacity=10, iterable=True, return_list=False) pyreader.decorate_sample_list_generator(paddle.batch( dataset.file_reader(args.infer_data, mode='infer'), batch_size=args.batch_size), places=place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint, infer_program) result = infer_process( exe=exe, program=infer_program, reader=pyreader, fetch_vars=[infer_ret['words'], infer_ret['crf_decode']], dataset=dataset) for sent, tags in result: result_list = ['(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)] print(''.join(result_list))
def do_train(args): """ Main Function """ ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = 1 else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): # user defined model based on ernie embeddings train_ret = creator.create_ernie_model(args, ernie_config) # ernie pyreader train_pyreader = creator.create_pyreader( args, file_name=args.train_data, feed_list=train_ret['feed_list'], model="ernie", place=place) test_program = train_program.clone(for_test=True) test_pyreader = creator.create_pyreader( args, file_name=args.test_data, feed_list=train_ret['feed_list'], model="ernie", place=place) clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate, grad_clip=clip) optimizer.minimize(train_ret["avg_cost"]) lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) print("Device count: %d" % dev_count) exe.run(startup_prog) # load checkpoints if args.init_checkpoint and args.init_pretraining_params: print("WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, startup_prog) elif args.init_pretraining_params: utils.init_pretraining_params(exe, args.init_pretraining_params, startup_prog) if dev_count > 1 and not args.use_cuda: device = "GPU" if args.use_cuda else "CPU" print("%d %s are used to train model" % (dev_count, device)) # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() build_strategy = fluid.BuildStrategy() compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) # start training steps = 0 for epoch_id in range(args.epoch): for data in train_pyreader(): steps += 1 if steps % args.print_steps == 0: fetch_list = [ train_ret["avg_cost"], train_ret["precision"], train_ret["recall"], train_ret["f1_score"], ] else: fetch_list = [] start_time = time.time() outputs = exe.run(program=compiled_prog, feed=data[0], fetch_list=fetch_list) end_time = time.time() if steps % args.print_steps == 0: loss, precision, recall, f1_score = [ np.mean(x) for x in outputs ] print( "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, " "pyreader queue_size: %d " % (steps, loss, precision, recall, f1_score, end_time - start_time, train_pyreader.queue.size())) if steps % args.save_steps == 0: save_path = os.path.join(args.model_save_dir, "step_" + str(steps), "checkpoint") print("\tsaving model as %s" % (save_path)) fluid.save(train_program, save_path) if steps % args.validation_steps == 0: evaluate(exe, test_program, test_pyreader, train_ret) save_path = os.path.join(args.model_save_dir, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path)
def infer(conf_dict, args): """ run predict """ if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) vocab = utils.load_vocab(args.vocab_path) simnet_process = reader.SimNetProcessor(args, vocab) startup_prog = fluid.Program() get_infer_examples = simnet_process.get_infer_reader batch_data = fluid.io.batch(get_infer_examples, args.batch_size, drop_last=False) test_prog = fluid.Program() conf_dict['dict_size'] = len(vocab) net = utils.import_class("../shared_modules/models/matching", conf_dict["net"]["module_name"], conf_dict["net"]["class_name"])(conf_dict) if args.task_mode == "pairwise": with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): infer_loader, left, pos_right = create_model(args, is_inference=True) left_feat, pos_score = net.predict(left, pos_right) pred = pos_score test_prog = test_prog.clone(for_test=True) else: with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): infer_loader, left, right = create_model(args, is_inference=True) left_feat, pred = net.predict(left, right) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog) test_exe = exe infer_loader.set_sample_list_generator(batch_data) logging.info("start test process ...") preds_list = [] fetch_list = [pred.name] output = [] infer_loader.start() while True: try: output = test_exe.run(program=test_prog, fetch_list=fetch_list) if args.task_mode == "pairwise": preds_list += list( map(lambda item: str((item[0] + 1) / 2), output[0])) else: preds_list += map(lambda item: str(np.argmax(item)), output[0]) except fluid.core.EOFException: infer_loader.reset() break with io.open(args.infer_result_path, "w", encoding="utf8") as infer_file: for _data, _pred in zip(simnet_process.get_infer_data(), preds_list): infer_file.write(_data + "\t" + _pred + "\n") logging.info("infer result saved in %s" % os.path.join(os.getcwd(), args.infer_result_path))
def test(conf_dict, args): """ Evaluation Function """ if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) vocab = utils.load_vocab(args.vocab_path) simnet_process = reader.SimNetProcessor(args, vocab) startup_prog = fluid.Program() get_test_examples = simnet_process.get_reader("test") batch_data = fluid.io.batch(get_test_examples, args.batch_size, drop_last=False) test_prog = fluid.Program() conf_dict['dict_size'] = len(vocab) net = utils.import_class("../shared_modules/models/matching", conf_dict["net"]["module_name"], conf_dict["net"]["class_name"])(conf_dict) metric = fluid.metrics.Auc(name="auc") with io.open("predictions.txt", "w", encoding="utf8") as predictions_file: if args.task_mode == "pairwise": with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_loader, left, pos_right = create_model( args, is_inference=True) left_feat, pos_score = net.predict(left, pos_right) pred = pos_score test_prog = test_prog.clone(for_test=True) else: with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_loader, left, right = create_model(args, is_inference=True) left_feat, pred = net.predict(left, right) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog) test_exe = exe test_loader.set_sample_list_generator(batch_data) logging.info("start test process ...") test_loader.start() pred_list = [] fetch_list = [pred.name] output = [] while True: try: output = test_exe.run(program=test_prog, fetch_list=fetch_list) if args.task_mode == "pairwise": pred_list += list( map(lambda item: float(item[0]), output[0])) predictions_file.write(u"\n".join( map(lambda item: str((item[0] + 1) / 2), output[0])) + "\n") else: pred_list += map(lambda item: item, output[0]) predictions_file.write(u"\n".join( map(lambda item: str(np.argmax(item)), output[0])) + "\n") except fluid.core.EOFException: test_loader.reset() break if args.task_mode == "pairwise": pred_list = np.array(pred_list).reshape((-1, 1)) pred_list = (pred_list + 1) / 2 pred_list = np.hstack( (np.ones_like(pred_list) - pred_list, pred_list)) else: pred_list = np.array(pred_list) labels = simnet_process.get_test_label() metric.update(pred_list, labels) if args.compute_accuracy: acc = utils.get_accuracy(pred_list, labels, args.task_mode, args.lamda) logging.info("AUC of test is %f, Accuracy of test is %f" % (metric.eval(), acc)) else: logging.info("AUC of test is %f" % metric.eval()) if args.verbose_result: utils.get_result_file(args) logging.info("test result saved in %s" % os.path.join(os.getcwd(), args.test_result_path))
def do_train(args): best_score = -999 train_program = fluid.default_main_program() startup_program = fluid.default_startup_program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') test_program = train_program.clone(for_test=True) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_reader = creator.create_pyreader(args, file_name=args.train_data, feed_list=train_ret['feed_list'], place=place, mode='lac', reader=dataset, iterable=True) test_reader = creator.create_pyreader(args, file_name=args.test_data, feed_list=train_ret['feed_list'], place=place, mode='lac', reader=dataset, iterable=True, for_test=True) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, train_program) if dev_count > 1: device = "GPU" if args.use_cuda else "CPU" print("%d %s are used to train model" % (dev_count, device)) # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() # exec_strategy.num_threads = dev_count * 6 build_strategy = fluid.compiler.BuildStrategy() # build_strategy.enable_inplace = True compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) # start training num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) ce_info = [] step = 0 print_start_time = time.time() for epoch_id in range(args.epoch): ce_time = 0 for data in train_reader(): # this is for minimizing the fetching op, saving the training speed. if step % args.print_steps == 0: fetch_list = [ train_ret["avg_cost"], train_ret["precision"], train_ret["recall"], train_ret["f1_score"] ] else: fetch_list = [] outputs = exe.run( compiled_prog, fetch_list=fetch_list, feed=data[0], ) if step % args.print_steps == 0: print_end_time = time.time() avg_cost, precision, recall, f1_score = [ np.mean(x) for x in outputs ] print( "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f" % (step, avg_cost, precision, recall, f1_score, print_end_time - print_start_time)) print_start_time = time.time() if step % args.validation_steps == 0: valid_end_time = time.time() test_f1 = test_process(exe, test_program, test_reader, train_ret) valid_start_time = time.time() if test_f1 > best_score: best_score = test_f1 save_path = os.path.join(args.model_save_dir, "best_mode") fluid.io.save_persistables(exe, save_path, train_program) ce_time += valid_end_time - valid_start_time ce_info.append( [ce_time, avg_cost, precision, recall, f1_score]) # save checkpoints if step % args.save_steps == 0 and step != 0: save_path = os.path.join(args.model_save_dir, "step_" + str(step)) fluid.io.save_persistables(exe, save_path, train_program) step += 1 if args.enable_ce: card_num = get_cards() ce_cost = 0 ce_f1 = 0 ce_p = 0 ce_r = 0 ce_time = 0 try: ce_time = ce_info[-2][0] ce_cost = ce_info[-2][1] ce_p = ce_info[-2][2] ce_r = ce_info[-2][3] ce_f1 = ce_info[-2][4] except: print("ce info error") print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time)) print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost)) print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p)) print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))
def main(args): """ Main Function """ if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() exe = fluid.Executor(place) task_name = args.task_name.lower() processor = reader.EmoTectProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) #num_labels = len(processor.get_labels()) num_labels = args.num_labels if not (args.do_train or args.do_val or args.do_infer): raise ValueError("For args `do_train`, `do_val` and `do_infer`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size + 1 print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_program = fluid.Program() if args.random_seed is not None: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_loader, loss, accuracy, num_seqs = create_model( args, num_labels=num_labels, is_prediction=False) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) sgd_optimizer.minimize(loss) if args.verbose: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val: if args.do_train: test_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=1) else: test_data_generator = processor.data_generator( batch_size=args.batch_size, phase='test', epoch=1) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_loader, loss, accuracy, num_seqs = create_model( args, num_labels=num_labels, is_prediction=False) test_prog = test_prog.clone(for_test=True) if args.do_infer: infer_data_generator = processor.data_generator( batch_size=args.batch_size, phase='infer', epoch=1) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): infer_loader, probs, _ = create_model(args, num_labels=num_labels, is_prediction=True) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) elif args.do_val or args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or infer!") utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog) if args.do_train: train_exe = exe train_loader.set_sample_list_generator(train_data_generator) else: train_exe = None if args.do_val: test_exe = exe test_loader.set_sample_list_generator(test_data_generator) if args.do_infer: test_exe = exe infer_loader.set_sample_list_generator(infer_data_generator) if args.do_train: train_loader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() ce_info = [] while True: try: steps += 1 if steps % args.skip_steps == 0: fetch_list = [loss.name, accuracy.name, num_seqs.name] else: fetch_list = [] outputs = train_exe.run(program=train_program, fetch_list=fetch_list, return_numpy=False) if steps % args.skip_steps == 0: np_loss, np_acc, np_num_seqs = outputs np_loss = np.array(np_loss) np_acc = np.array(np_acc) np_num_seqs = np.array(np_num_seqs) total_cost.extend(np_loss * np_num_seqs) total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train loader queue size: %d, " % train_loader.queue.size( ) print(verbose) time_end = time.time() used_time = time_end - time_begin print("step: %d, avg loss: %f, " "avg acc: %f, speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) ce_info.append([ np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time ]) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps)) fluid.save(train_program, save_path) if steps % args.validation_steps == 0: # evaluate on dev set if args.do_val: evaluate(test_exe, test_prog, test_loader, [loss.name, accuracy.name, num_seqs.name], "dev") except fluid.core.EOFException: print("final step: %d " % steps) if args.do_val: evaluate(test_exe, test_prog, test_loader, [loss.name, accuracy.name, num_seqs.name], "dev") save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps)) fluid.save(train_program, save_path) train_loader.reset() break if args.do_train and args.enable_ce: card_num = get_cards() ce_loss = 0 ce_acc = 0 ce_time = 0 try: ce_loss = ce_info[-2][0] ce_acc = ce_info[-2][1] ce_time = ce_info[-2][2] except: print("ce info error") print("kpis\teach_step_duration_%s_card%s\t%s" % (task_name, card_num, ce_time)) print("kpis\ttrain_loss_%s_card%s\t%f" % (task_name, card_num, ce_loss)) print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc)) # evaluate on test set if not args.do_train and args.do_val: print("Final test result:") evaluate(test_exe, test_prog, test_loader, [loss.name, accuracy.name, num_seqs.name], "test") # infer if args.do_infer: print("Final infer result:") infer(test_exe, test_prog, infer_loader, [probs.name], "infer")
def main(args): startup_program = fluid.Program() if args.random_seed is not None: startup_program.random_seed = args.random_seed # prepare dataset dataset = reader.Dataset(args) if args.do_train: train_program = fluid.Program() if args.random_seed is not None: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): train_ret = create_model(args, "train_reader", dataset.vocab_size, dataset.num_labels) train_ret["pyreader"].decorate_paddle_reader( paddle.batch(paddle.reader.shuffle( dataset.file_reader(args.train_data), buf_size=args.traindata_shuffle_buffer), batch_size=args.batch_size)) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) if args.do_test: test_program = fluid.Program() with fluid.program_guard(test_program, startup_program): with fluid.unique_name.guard(): test_ret = create_model(args, "test_reader", dataset.vocab_size, dataset.num_labels) test_ret["pyreader"].decorate_paddle_reader( paddle.batch(dataset.file_reader(args.test_data), batch_size=args.batch_size)) test_program = test_program.clone( for_test=True) # to share parameters with train model if args.do_infer: infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_program): with fluid.unique_name.guard(): infer_ret = create_model(args, "infer_reader", dataset.vocab_size, dataset.num_labels) infer_ret["pyreader"].decorate_paddle_reader( paddle.batch(dataset.file_reader(args.infer_data), batch_size=args.batch_size)) infer_program = infer_program.clone(for_test=True) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = multiprocessing.cpu_count() exe = fluid.Executor(place) exe.run(startup_program) # load checkpoints if args.do_train: if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, train_program) elif args.do_test: if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if only doing validation or testing!" ) utils.init_checkpoint(exe, args.init_checkpoint, test_program) if args.do_infer: utils.init_checkpoint(exe, args.init_checkpoint, infer_program) # do start to train if args.do_train: num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) ce_info = [] batch_id = 0 for epoch_id in range(args.epoch): train_ret["pyreader"].start() ce_time = 0 try: while True: start_time = time.time() avg_cost, nums_infer, nums_label, nums_correct = exe.run( train_program, fetch_list=[ train_ret["avg_cost"], train_ret["num_infer_chunks"], train_ret["num_label_chunks"], train_ret["num_correct_chunks"], ], ) end_time = time.time() train_ret["chunk_evaluator"].reset() train_ret["chunk_evaluator"].update( nums_infer, nums_label, nums_correct) precision, recall, f1_score = train_ret[ "chunk_evaluator"].eval() batch_id += 1 print( "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f " % (batch_id, avg_cost, precision, recall, f1_score, end_time - start_time)) ce_time += end_time - start_time ce_info.append( [ce_time, avg_cost, precision, recall, f1_score]) # save checkpoints if (batch_id % args.save_model_per_batches == 0): save_path = os.path.join(args.model_save_dir, "step_" + str(batch_id)) fluid.io.save_persistables(exe, save_path, train_program) # evaluate if (batch_id % args.valid_model_per_batches == 0) and args.do_test: evaluate(exe, test_program, test_ret) except fluid.core.EOFException: save_path = os.path.join(args.model_save_dir, "step_" + str(batch_id)) fluid.io.save_persistables(exe, save_path, train_program) train_ret["pyreader"].reset() # break? if args.do_train and args.enable_ce: card_num = get_cards() ce_cost = 0 ce_f1 = 0 ce_p = 0 ce_r = 0 ce_time = 0 try: ce_time = ce_info[-2][0] ce_cost = ce_info[-2][1] ce_p = ce_info[-2][2] ce_r = ce_info[-2][3] ce_f1 = ce_info[-2][4] except: print("ce info error") print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time)) print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost)) print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p)) print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1)) # only test if args.do_test: evaluate(exe, test_program, test_ret) if args.do_infer: infer_ret["pyreader"].start() while True: try: ( words, crf_decode, ) = exe.run(infer_program, fetch_list=[ infer_ret["words"], infer_ret["crf_decode"], ], return_numpy=False) results = utils.parse_result(words, crf_decode, dataset) for result in results: print(result) except fluid.core.EOFException: infer_ret["pyreader"].reset() break
def main(): signal.signal(signal.SIGINT, signal_handler) parser = argparse.ArgumentParser(description="Integer Factorization with " "the Number Field Sieve") parser.add_argument("parameters", help="A file with the parameters to use") parser.add_argument("options", metavar="OPTION", help="An option as in " "parameter file (format: key=value)", nargs="*") parser.add_argument('--resume', '-r', help="checkpoint file to resume from") parser.add_argument( '--stage', '-s', action='append', help= "stage to complete ('start','polysel','sieving','linalg','complete'), add + to run all subsequent stages" ) args = parser.parse_args() parameters = utils.get_params(args.parameters, args.options) name = parameters.myparams({"name": str}, ['tasks'])["name"] workdir = parameters.myparams({"workdir": str}, ['tasks'])["workdir"] if not os.path.exists(workdir): logger.info("Creating work directory %s", workdir) os.makedirs(workdir) setup_logging(workdir, name) # Load or create initial checkpoint checkpoint_file = args.resume if not checkpoint_file: checkpoint_file = os.path.join(workdir, "checkpoint.dat") utils.init_checkpoint(checkpoint_file) # set parameters that are unlikely to change from run to run, such as filenames and directories parameters = set_static_parameters(parameters) # check that all required parameters are present params = check_parameters(parameters) utils.update_checkpoint({'params': params}) # set parameters that will likely change from run to run parameters = set_dynamic_parameters(parameters) # Write a snapshot of the parameters to a file snapshot_filename = "%s/%s.parameters_snapshot" % (workdir, name) with open(snapshot_filename, "w") as snapshot_file: logger.debug("Writing parameter snapshot to %s", snapshot_filename) snapshot_file.write(str(parameters)) snapshot_file.write("\n") start_time = time.time() # For each checkpointed stage, check if the stage should be run again. # A stage should be run again under the following circumstances: # - The user manually requested to run the stage # - No checkpoint exists for the stage # - A stage on which this stage depends will be re-run # - Parameters on which the stage depends have been changed since the last run if args.stage: for stage in args.stage: if stage.endswith('+'): stage = stage[:-1] if stage not in stages: continue stage_required.manual_stages = range(stages.index(stage), len(stages)) break if stage not in stages: args.stage.pop(stage) else: stage_required.manual_stages.append(stages.index(stage)) else: # since no stage were specified to run manually, choose the first stage based on the checkpoint file stage_required.manual_stages = set_manual_stages(params) # Run polynomial selection polysel_result = do_polysel(parameters) # Run sieving sieve_result = do_sieve(parameters, polysel_result) # Run linalg linalg_result = do_linalg(parameters, sieve_result) # Run square root sqrt_result = do_sqrt(parameters, linalg_result) factoring_duration = polysel_result['duration'] + sieve_result[ 'duration'] + linalg_result['duration'] + sqrt_result['duration'] logger.info('Factoring completed in %s', utils.str_time(factoring_duration)) logger.info('\tPolysel in real/cpu %s/%s', utils.str_time(polysel_result['duration']), utils.str_time(polysel_result['cputime'])) logger.info("\tSieving in real/cpu %s/%s", utils.str_time(sieve_result['duration']), utils.str_time(sieve_result['cputime'])) logger.info("\tLinalg in %s", utils.str_time(linalg_result['duration'])) logger.info("\tSqrt in %s", utils.str_time(sqrt_result['duration'])) logger.info("\tFactors %s", ','.join(sqrt_result['factors'])) post_factor = parameters.myparams({ 'post_factor': None }, ['commands']).get('post_factor') if post_factor != None: logger.info('Post-factor command %s', post_factor) utils.run_command(post_factor, logger=logger)
def main(): signal.signal(signal.SIGINT, signal_handler) parser = argparse.ArgumentParser(description="Integer Factorization with " "the Number Field Sieve") parser.add_argument("parameters", help="A file with the parameters to use") parser.add_argument("options", metavar="OPTION", help="An option as in " "parameter file (format: key=value)", nargs="*") parser.add_argument('--resume','-r', help="checkpoint file to resume from") parser.add_argument('--stage','-s', action='append', help="stage to complete ('start','polysel','sieving','linalg','complete'), add + to run all subsequent stages") args = parser.parse_args() parameters = utils.get_params(args.parameters, args.options) name = parameters.myparams({"name": str}, ['tasks'])["name"] workdir = parameters.myparams({"workdir": str}, ['tasks'])["workdir"] if not os.path.exists(workdir): logger.info("Creating work directory %s", workdir) os.makedirs(workdir) setup_logging(workdir, name) # Load or create initial checkpoint checkpoint_file = args.resume if not checkpoint_file: checkpoint_file = os.path.join(workdir, "checkpoint.dat") utils.init_checkpoint(checkpoint_file) # set parameters that are unlikely to change from run to run, such as filenames and directories parameters = set_static_parameters(parameters) # check that all required parameters are present params = check_parameters(parameters) utils.update_checkpoint({'params': params}) # set parameters that will likely change from run to run parameters = set_dynamic_parameters(parameters) # Write a snapshot of the parameters to a file snapshot_filename = "%s/%s.parameters_snapshot" % (workdir, name) with open(snapshot_filename, "w") as snapshot_file: logger.debug("Writing parameter snapshot to %s", snapshot_filename) snapshot_file.write(str(parameters)) snapshot_file.write("\n") start_time = time.time() # For each checkpointed stage, check if the stage should be run again. # A stage should be run again under the following circumstances: # - The user manually requested to run the stage # - No checkpoint exists for the stage # - A stage on which this stage depends will be re-run # - Parameters on which the stage depends have been changed since the last run if args.stage: for stage in args.stage: if stage.endswith('+'): stage = stage[:-1] if stage not in stages: continue stage_required.manual_stages = range(stages.index(stage), len(stages)) break if stage not in stages: args.stage.pop(stage) else: stage_required.manual_stages.append(stages.index(stage)) else: # since no stage were specified to run manually, choose the first stage based on the checkpoint file stage_required.manual_stages = set_manual_stages(params) # Run polynomial selection polysel_result = do_polysel(parameters) # Run sieving sieve_result = do_sieve(parameters, polysel_result) # Run linalg linalg_result = do_linalg(parameters, sieve_result) # Run square root sqrt_result = do_sqrt(parameters, linalg_result) factoring_duration = polysel_result['duration'] + sieve_result['duration'] + linalg_result['duration'] + sqrt_result['duration'] logger.info('Factoring completed in %s', utils.str_time(factoring_duration)) logger.info('\tPolysel in real/cpu %s/%s', utils.str_time(polysel_result['duration']), utils.str_time(polysel_result['cputime'])) logger.info("\tSieving in real/cpu %s/%s", utils.str_time(sieve_result['duration']), utils.str_time(sieve_result['cputime'])) logger.info("\tLinalg in %s", utils.str_time(linalg_result['duration'])) logger.info("\tSqrt in %s", utils.str_time(sqrt_result['duration'])) logger.info("\tFactors %s", ','.join(sqrt_result['factors'])) post_factor = parameters.myparams({'post_factor': None}, ['commands']).get('post_factor') if post_factor != None: logger.info('Post-factor command %s', post_factor) utils.run_command(post_factor, logger=logger)
def main(args): """ Main Function """ if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = 1 exe = fluid.Executor(place) task_name = args.task_name.lower() processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed, max_seq_len=args.max_seq_len) num_labels = len(processor.get_labels()) if not (args.do_train or args.do_val or args.do_infer): raise ValueError("For args `do_train`, `do_val` and `do_infer`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = processor.data_generator( batch_size=args.batch_size / dev_count, phase='train', epoch=args.epoch, shuffle=True) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_program = fluid.Program() if args.enable_ce and args.random_seed is not None: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_reader, loss, accuracy, num_seqs = create_model( args, pyreader_name='train_reader', num_labels=num_labels, is_prediction=False) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) sgd_optimizer.minimize(loss) if args.verbose: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val: test_data_generator = processor.data_generator( batch_size=args.batch_size / dev_count, phase='dev', epoch=1, shuffle=False) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_reader, loss, accuracy, num_seqs = create_model( args, pyreader_name='test_reader', num_labels=num_labels, is_prediction=False) test_prog = test_prog.clone(for_test=True) if args.do_infer: infer_data_generator = processor.data_generator( batch_size=args.batch_size / dev_count, phase='infer', epoch=1, shuffle=False) infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): infer_reader, prop, _ = create_model( args, pyreader_name='infer_reader', num_labels=num_labels, is_prediction=True) infer_prog = infer_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) elif args.do_val or args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) if args.do_train: train_exe = exe train_reader.set_sample_list_generator(train_data_generator) else: train_exe = None if args.do_val: test_exe = exe test_reader.set_sample_list_generator(test_data_generator) if args.do_infer: test_exe = exe infer_reader.set_sample_list_generator(infer_data_generator) if args.do_train: train_reader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() while True: try: steps += 1 #print("steps...") if steps % args.skip_steps == 0: fetch_list = [loss.name, accuracy.name, num_seqs.name] else: fetch_list = [] outputs = train_exe.run(program=train_program, fetch_list=fetch_list, return_numpy=False) #print("finished one step") if steps % args.skip_steps == 0: np_loss, np_acc, np_num_seqs = outputs np_loss = np.array(np_loss) np_acc = np.array(np_acc) np_num_seqs = np.array(np_num_seqs) total_cost.extend(np_loss * np_num_seqs) total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) print(verbose) time_end = time.time() used_time = time_end - time_begin print("step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: print("do evalatation") evaluate(exe, test_prog, test_reader, [loss.name, accuracy.name, num_seqs.name], "dev") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path) train_reader.reset() break # final eval on dev set if args.do_val: print("Final validation result:") evaluate(exe, test_prog, test_reader, [loss.name, accuracy.name, num_seqs.name], "dev") # final eval on test set if args.do_infer: print("Final test result:") inference(exe, infer_prog, infer_reader, [prop.name], "infer")
def do_compress(args): train_program = fluid.default_main_program() startup_program = fluid.default_startup_program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') test_program = train_program.clone() optimizer = fluid.optimizer.Adam(learning_rate=args.base_learning_rate) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_reader = paddle.batch(dataset.file_reader(args.train_data), batch_size=args.batch_size) test_reader = paddle.batch(dataset.file_reader(args.test_data), batch_size=args.batch_size) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', train_program) train_feed_list = [('words', train_ret['words'].name), ("targets", train_ret["targets"].name)] train_fetch_list = [('loss', train_ret['avg_cost'].name)] test_feed_list = [('words', train_ret['words'].name), ("targets", train_ret["targets"].name)] test_fetch_list = [('f1_score', train_ret['f1_score'].name)] print(train_ret['crf_decode'].name) com_pass = Compressor(place, fluid.global_scope(), train_program=train_program, train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=test_program, eval_reader=test_reader, eval_feed_list=test_feed_list, eval_fetch_list=test_fetch_list, teacher_programs=[], train_optimizer=optimizer, distiller_optimizer=None) com_pass.config(args.compress_config) com_pass.run()
def _build_programs(self): """ Build programs. Build train_program, eval_program and inference_program. Only use in static graph mode. """ if self.run_infer: self.startup_program = fluid.Program() # build infer program self.infer_program = fluid.Program() with fluid.program_guard(self.infer_program, self.startup_program): with fluid.unique_name.guard(): self.infer_feed_dict = inputs = self._get_feed_dict( is_infer=True) outputs = self.forward(inputs, is_infer=True) predictions = self.infer(inputs, outputs) self.infer_fetch_dict = predictions self.infer_program = self.infer_program.clone(for_test=True) self.program = self.infer_program else: if self.is_distributed: exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 exec_strategy.num_iteration_per_drop_scope = 1 dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.nccl_comm_num = 1 dist_strategy.fuse_all_reduce_ops = True if self.use_recompute: dist_strategy.forward_recompute = True dist_strategy.enable_sequential_execution = True if self.use_amp: dist_strategy.use_amp = True dist_strategy.amp_loss_scaling = self.amp_loss_scaling self.dist_strategy = dist_strategy self.startup_program = fluid.Program() # build train program self.train_program = fluid.Program() with fluid.program_guard(self.train_program, self.startup_program): with fluid.unique_name.guard(): self.feed_dict = inputs = self._get_feed_dict() outputs = self.forward(inputs) if self.is_distributed and self.use_recompute: self.dist_strategy.recompute_checkpoints = outputs[ "checkpoints"] metrics, statistics = self.get_metrics_and_statistics( inputs, outputs) # build eval program self.eval_program = self.train_program.clone(for_test=True) self.eval_fetch_dict = {**metrics, **statistics} scheduled_lr = self.optimize(metrics) metrics["scheduled_lr"] = scheduled_lr self.train_fetch_dict = metrics self.program = self.train_program if self.is_distributed: self.train_program = fleet.main_program self.exe.run(self.startup_program) if self.init_pretraining_params != "": init_pretraining_params(self.exe, self.init_pretraining_params, self.program) elif self.init_checkpoint != "": init_checkpoint(self.exe, self.init_checkpoint, self.program) return
def main(args): """ Main Function """ ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, random_seed=args.random_seed) if not (args.do_train or args.do_val or args.do_infer): raise ValueError("For args `do_train`, `do_val` and `do_infer`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): # create ernie_pyreader train_pyreader, ernie_inputs, labels = ernie_pyreader( args, pyreader_name='train_pyreader') # get ernie_embeddings if args.use_paddle_hub: embeddings = ernie_encoder_with_paddle_hub( ernie_inputs, args.max_seq_len) else: embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) # user defined model based on ernie embeddings loss, accuracy, num_seqs = create_model(args, embeddings, labels=labels, is_prediction=False) optimizer = fluid.optimizer.Adam(learning_rate=args.lr) optimizer.minimize(loss) if args.verbose: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val: test_data_generator = reader.data_generator(input_file=args.dev_set, batch_size=args.batch_size, phase='dev', epoch=1, shuffle=False) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): # create ernie_pyreader test_pyreader, ernie_inputs, labels = ernie_pyreader( args, pyreader_name='eval_reader') # get ernie_embeddings if args.use_paddle_hub: embeddings = ernie_encoder_with_paddle_hub( ernie_inputs, args.max_seq_len) else: embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) # user defined model based on ernie embeddings loss, accuracy, num_seqs = create_model(args, embeddings, labels=labels, is_prediction=False) test_prog = test_prog.clone(for_test=True) if args.do_infer: infer_data_generator = reader.data_generator( input_file=args.test_set, batch_size=args.batch_size, phase='infer', epoch=1, shuffle=False) infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): infer_pyreader, ernie_inputs, labels = ernie_pyreader( args, pyreader_name="infer_pyreader") # get ernie_embeddings if args.use_paddle_hub: embeddings = ernie_encoder_with_paddle_hub( ernie_inputs, args.max_seq_len) else: embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) probs = create_model(args, embeddings, labels=labels, is_prediction=True) infer_prog = infer_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=train_program) elif args.do_val: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=test_prog) elif args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=infer_prog) if args.do_train: train_exe = exe train_pyreader.set_batch_generator(train_data_generator) else: train_exe = None if args.do_val: test_exe = exe test_pyreader.set_batch_generator(test_data_generator) if args.do_infer: test_exe = exe infer_pyreader.set_batch_generator(infer_data_generator) if args.do_train: train_pyreader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps == 0: fetch_list = [loss.name, accuracy.name, num_seqs.name] else: fetch_list = [] outputs = train_exe.run(program=train_program, fetch_list=fetch_list, return_numpy=False) if steps % args.skip_steps == 0: np_loss, np_acc, np_num_seqs = outputs np_loss = np.array(np_loss) np_acc = np.array(np_acc) np_num_seqs = np.array(np_num_seqs) total_cost.extend(np_loss * np_num_seqs) total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) print(verbose) time_end = time.time() used_time = time_end - time_begin print("step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: evaluate(exe, test_prog, test_pyreader, [loss.name, accuracy.name, num_seqs.name], "dev") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path) train_pyreader.reset() break # final eval on dev set if args.do_val: print("Final validation result:") evaluate(exe, test_prog, test_pyreader, [loss.name, accuracy.name, num_seqs.name], "dev") # final eval on test set if args.do_infer: print("Final test result:") infer(exe, infer_prog, infer_pyreader, [probs.name], "infer")
def train(conf_dict, args): """ train processic """ # loading vocabulary vocab = utils.load_vocab(args.vocab_path) # get vocab size conf_dict['dict_size'] = len(vocab) # Load network structure dynamically net = utils.import_class("../shared_modules/models/matching", conf_dict["net"]["module_name"], conf_dict["net"]["class_name"])(conf_dict) # Load loss function dynamically loss = utils.import_class("../shared_modules/models/matching/losses", conf_dict["loss"]["module_name"], conf_dict["loss"]["class_name"])(conf_dict) # Load Optimization method optimizer = utils.import_class( "../shared_modules/models/matching/optimizers", "paddle_optimizers", conf_dict["optimizer"]["class_name"])(conf_dict) # load auc method metric = fluid.metrics.Auc(name="auc") # Get device if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() train_program = fluid.Program() # used for continuous evaluation if args.enable_ce: SEED = 102 startup_prog.random_seed = SEED train_program.random_seed = SEED simnet_process = reader.SimNetProcessor(args, vocab) if args.task_mode == "pairwise": # Build network with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_loader, left, pos_right, neg_right = create_model(args) left_feat, pos_score = net.predict(left, pos_right) pred = pos_score _, neg_score = net.predict(left, neg_right) avg_cost = loss.compute(pos_score, neg_score) avg_cost.persistable = True optimizer.ops(avg_cost) # Get Reader get_train_examples = simnet_process.get_reader("train", epoch=args.epoch) if args.do_valid: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_loader, left, pos_right = create_model( args, is_inference=True) left_feat, pos_score = net.predict(left, pos_right) pred = pos_score test_prog = test_prog.clone(for_test=True) else: # Build network with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_loader, left, right, label = create_model( args, is_pointwise=True) left_feat, pred = net.predict(left, right) avg_cost = loss.compute(pred, label) avg_cost.persistable = True optimizer.ops(avg_cost) # Get Feeder and Reader get_train_examples = simnet_process.get_reader("train", epoch=args.epoch) if args.do_valid: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_loader, left, right = create_model(args, is_inference=True) left_feat, pred = net.predict(left, right) test_prog = test_prog.clone(for_test=True) if args.init_checkpoint is not "": utils.init_checkpoint(exe, args.init_checkpoint, startup_prog) def valid_and_test(test_program, test_loader, get_valid_examples, process, mode, exe, fetch_list): """ return auc and acc """ # Get Batch Data batch_data = fluid.io.batch(get_valid_examples, args.batch_size, drop_last=False) test_loader.set_sample_list_generator(batch_data) test_loader.start() pred_list = [] while True: try: _pred = exe.run(program=test_program, fetch_list=[pred.name]) pred_list += list(_pred) except fluid.core.EOFException: test_loader.reset() break pred_list = np.vstack(pred_list) if mode == "test": label_list = process.get_test_label() elif mode == "valid": label_list = process.get_valid_label() if args.task_mode == "pairwise": pred_list = (pred_list + 1) / 2 pred_list = np.hstack( (np.ones_like(pred_list) - pred_list, pred_list)) metric.reset() metric.update(pred_list, label_list) auc = metric.eval() if args.compute_accuracy: acc = utils.get_accuracy(pred_list, label_list, args.task_mode, args.lamda) return auc, acc else: return auc # run train logging.info("start train process ...") # set global step global_step = 0 ce_info = [] train_exe = exe #for epoch_id in range(args.epoch): # used for continuous evaluation if args.enable_ce: train_batch_data = fluid.io.batch(get_train_examples, args.batch_size, drop_last=False) else: train_batch_data = fluid.io.batch(fluid.io.shuffle(get_train_examples, buf_size=10000), args.batch_size, drop_last=False) train_loader.set_sample_list_generator(train_batch_data) train_loader.start() exe.run(startup_prog) losses = [] start_time = time.time() while True: try: global_step += 1 fetch_list = [avg_cost.name] avg_loss = train_exe.run(program=train_program, fetch_list=fetch_list) losses.append(np.mean(avg_loss[0])) if args.do_valid and global_step % args.validation_steps == 0: get_valid_examples = simnet_process.get_reader("valid") valid_result = valid_and_test(test_prog, test_loader, get_valid_examples, simnet_process, "valid", exe, [pred.name]) if args.compute_accuracy: valid_auc, valid_acc = valid_result logging.info( "global_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f" % (global_step, valid_auc, valid_acc, np.mean(losses))) else: valid_auc = valid_result logging.info( "global_steps: %d, valid_auc: %f, valid_loss: %f" % (global_step, valid_auc, np.mean(losses))) if global_step % args.save_steps == 0: model_save_dir = os.path.join(args.output_dir, conf_dict["model_path"]) model_path = os.path.join(model_save_dir, str(global_step)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) if args.task_mode == "pairwise": feed_var_names = [left.name, pos_right.name] target_vars = [left_feat, pos_score] else: feed_var_names = [ left.name, right.name, ] target_vars = [left_feat, pred] fluid.io.save_inference_model(model_path, feed_var_names, target_vars, exe, test_prog) logging.info("saving infer model in %s" % model_path) except fluid.core.EOFException: train_loader.reset() break end_time = time.time() #logging.info("epoch: %d, loss: %f, used time: %d sec" % #(epoch_id, np.mean(losses), end_time - start_time)) ce_info.append([np.mean(losses), end_time - start_time]) #final save logging.info("the final step is %s" % global_step) model_save_dir = os.path.join(args.output_dir, conf_dict["model_path"]) model_path = os.path.join(model_save_dir, str(global_step)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) if args.task_mode == "pairwise": feed_var_names = [left.name, pos_right.name] target_vars = [left_feat, pos_score] else: feed_var_names = [ left.name, right.name, ] target_vars = [left_feat, pred] fluid.io.save_inference_model(model_path, feed_var_names, target_vars, exe, test_prog) logging.info("saving infer model in %s" % model_path) # used for continuous evaluation if args.enable_ce: card_num = get_cards() ce_loss = 0 ce_time = 0 try: ce_loss = ce_info[-1][0] ce_time = ce_info[-1][1] except: logging.info("ce info err!") print("kpis\teach_step_duration_%s_card%s\t%s" % (args.task_name, card_num, ce_time)) print("kpis\ttrain_loss_%s_card%s\t%f" % (args.task_name, card_num, ce_loss)) if args.do_test: if args.task_mode == "pairwise": # Get Feeder and Reader get_test_examples = simnet_process.get_reader("test") else: # Get Feeder and Reader get_test_examples = simnet_process.get_reader("test") test_result = valid_and_test(test_prog, test_loader, get_test_examples, simnet_process, "test", exe, [pred.name]) if args.compute_accuracy: test_auc, test_acc = test_result logging.info("AUC of test is %f, Accuracy of test is %f" % (test_auc, test_acc)) else: test_auc = test_result logging.info("AUC of test is %f" % test_auc)
def main(args): """ Main Function """ args = parser.parse_args() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.SequenceLabelReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, random_seed=args.random_seed) if not (args.do_train or args.do_test or args.do_infer): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: num_train_examples = reader.get_num_examples(args.train_set) max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): # create ernie_pyreader train_pyreader, ernie_inputs, words, labels = ernie_pyreader( args, pyreader_name='train_reader') train_pyreader.decorate_tensor_provider( reader.data_generator(args.train_set, args.batch_size, args.epoch, shuffle=True, phase="train")) # get ernie_embeddings embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) # user defined model based on ernie embeddings train_ret = create_model(args, embeddings, labels=labels, is_prediction=False) optimizer = fluid.optimizer.Adam(learning_rate=args.lr) fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)) optimizer.minimize(train_ret["loss"]) lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_test: test_program = fluid.Program() with fluid.program_guard(test_program, startup_prog): with fluid.unique_name.guard(): # create ernie_pyreader test_pyreader, ernie_inputs, words, labels = ernie_pyreader( args, pyreader_name='test_reader') test_pyreader.decorate_tensor_provider( reader.data_generator(args.test_set, args.batch_size, phase='test', epoch=1, shuffle=False)) # get ernie_embeddings embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) # user defined model based on ernie embeddings test_ret = create_model(args, embeddings, labels=labels, is_prediction=False) test_program = test_program.clone(for_test=True) if args.do_infer: infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_prog): with fluid.unique_name.guard(): # create ernie_pyreader infer_pyreader, ernie_inputs, words, labels = ernie_pyreader( args, pyreader_name='infer_reader') infer_pyreader.decorate_tensor_provider( reader.data_generator(args.infer_set, args.batch_size, phase='infer', epoch=1, shuffle=False)) # get ernie_embeddings embeddings = ernie_encoder(ernie_inputs, ernie_config=ernie_config) # user defined model based on ernie embeddings infer_ret = create_model(args, embeddings, labels=labels, is_prediction=True) infer_ret["words"] = words infer_program = infer_program.clone(for_test=True) exe.run(startup_prog) # load checkpoints if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, startup_prog) elif args.init_pretraining_params: utils.init_pretraining_params(exe, args.init_pretraining_params, startup_prog) elif args.do_test or args.do_infer: if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if only doing test or infer!" ) utils.init_checkpoint(exe, args.init_checkpoint, startup_prog) if args.do_train: train_pyreader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] while True: try: steps += 1 if steps % args.skip_steps == 0: fetch_list = [ train_ret["loss"], train_ret["num_infer_chunks"], train_ret["num_label_chunks"], train_ret["num_correct_chunks"], ] else: fetch_list = [] start_time = time.time() outputs = exe.run(program=train_program, fetch_list=fetch_list) end_time = time.time() if steps % args.skip_steps == 0: loss, nums_infer, nums_label, nums_correct = outputs train_ret["chunk_evaluator"].reset() train_ret["chunk_evaluator"].update( nums_infer, nums_label, nums_correct) precision, recall, f1_score = train_ret[ "chunk_evaluator"].eval() print( "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, " "pyreader queue_size: %d " % (steps, loss, precision, recall, f1_score, end_time - start_time, train_pyreader.queue.size())) if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) print("\tsaving model as %s" % (save_path)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate test set if args.do_test: evaluate(exe, test_program, test_pyreader, test_ret) except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on test set if args.do_test: evaluate(exe, test_program, test_pyreader, test_ret) if args.do_infer: # create dict id2word_dict = dict([(str(word_id), word) for word, word_id in reader.vocab.items()]) id2label_dict = dict([(str(label_id), label) for label, label_id in reader.label_map.items()]) Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"]) dataset = Dataset(id2word_dict, id2label_dict) infer_pyreader.start() while True: try: (words, crf_decode) = exe.run( infer_program, fetch_list=[infer_ret["words"], infer_ret["crf_decode"]], return_numpy=False) # User should notice that words had been clipped if long than args.max_seq_len results = utils.parse_result(words, crf_decode, dataset) for result in results: print(result) except fluid.core.EOFException: infer_pyreader.reset() break
def main(args): """ Main Function """ global DEV_COUNT startup_prog = fluid.default_startup_program() random.seed(args.random_seed) model_config = ConfigReader.read_conf(args.config_path) if args.use_cuda: test_place = fluid.cuda_places(0) place = fluid.cuda_places() DEV_COUNT = len(place) else: test_place = fluid.cpu_places(1) os.environ['CPU_NUM'] = str(args.cpu_num) place = fluid.cpu_places() DEV_COUNT = args.cpu_num logger.info("Dev Num is %s" % str(DEV_COUNT)) exe = fluid.Executor(place[0]) if args.do_train and args.build_dict: DataProcesser.build_dict(args.data_dir + "train.txt", args.data_dir) # read dict char_dict = DataProcesser.read_dict(args.data_dir + "char.dict") dict_dim = len(char_dict) intent_dict = DataProcesser.read_dict(args.data_dir + "domain.dict") id2intent = {} for key, value in intent_dict.items(): id2intent[int(value)] = key num_labels = len(intent_dict) # build model loader_res = build_data_loader(args, char_dict, intent_dict) build_res = build_graph(args, model_config, num_labels, dict_dim, place, test_place, loader_res) build_res["place"] = place build_res["test_place"] = test_place if not (args.do_train or args.do_eval or args.do_test): raise ValueError("For args `do_train`, `do_eval` and `do_test`, at " "least one of them must be True.") exe.run(startup_prog) if args.init_checkpoint and args.init_checkpoint != "None": try: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) logger.info("Load model from %s" % args.init_checkpoint) except Exception as e: logger.exception(str(e)) logger.error("Faild load model from %s [%s]" % (args.init_checkpoint, str(e))) build_strategy = fluid.compiler.BuildStrategy() build_strategy.fuse_all_reduce_ops = False exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 # add compiled prog if args.do_train: compiled_prog = fluid.compiler.CompiledProgram(build_res["train_prog"]).with_data_parallel( \ loss_name=build_res["cost"].name, \ build_strategy=build_strategy, \ exec_strategy=exec_strategy) build_res["compiled_prog"] = compiled_prog if args.do_test: test_compiled_prog = fluid.compiler.CompiledProgram( build_res["test_prog"]) build_res["test_compiled_prog"] = test_compiled_prog if args.do_eval: eval_compiled_prog = fluid.compiler.CompiledProgram( build_res["eval_prog"]) build_res["eval_compiled_prog"] = eval_compiled_prog if args.do_train: train(args, exe, build_res, place) if args.do_eval: evaluate(args, exe, build_res, "eval", \ save_result=True, id2intent=id2intent) if args.do_test: evaluate(args, exe, build_res, "test",\ save_result=True, id2intent=id2intent)