def save_inference_model(args): # model definition if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) # load pretrain check point exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', infer_program) fluid.io.save_inference_model( args.inference_save_dir, ['words'], infer_ret['crf_decode'], exe, main_program=infer_program, model_filename='model.pdmodel', params_filename='params.pdparams', )
def do_eval(args): dataset = reader.Dataset(args) test_program = fluid.Program() with fluid.program_guard(test_program, fluid.default_startup_program()): with fluid.unique_name.guard(): test_ret = creator.create_model( args, dataset.vocab_size, dataset.num_labels, mode='test') test_program = test_program.clone(for_test=True) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = creator.create_pyreader(args, file_name=args.test_data, feed_list=test_ret['feed_list'], place=place, mode='lac', reader=dataset, iterable=True, for_test=True) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint+'.pdckpt', test_program) test_process(exe=exe, program=test_program, reader=pyreader, test_ret=test_ret )
def do_infer(args): dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = creator.create_pyreader(args, file_name=args.infer_data, feed_list=infer_ret['feed_list'], place=place, model='lac', reader=dataset, mode='infer') exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint, infer_program) result = infer_process( exe=exe, program=infer_program, reader=pyreader, fetch_vars=[infer_ret['words'], infer_ret['crf_decode']], dataset=dataset) with open('../processed.txt', 'w') as f: for sent, tags in result: result_list = [ '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags) ] f.write(''.join(result_list) + '\n')
def do_infer(args): dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = fluid.io.PyReader(feed_list=[infer_ret['words']], capacity=10, iterable=True, return_list=False) pyreader.decorate_sample_list_generator(paddle.batch( dataset.file_reader(args.infer_data, mode='infer'), batch_size=args.batch_size), places=place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint, infer_program) result = infer_process( exe=exe, program=infer_program, reader=pyreader, fetch_vars=[infer_ret['words'], infer_ret['crf_decode']], dataset=dataset) for sent, tags in result: result_list = ['(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)] print(''.join(result_list))
def do_train(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): #train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) with fluid.program_guard(test_program, startup_program): with fluid.unique_name.guard(): test_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='test') test_program = test_program.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: model_utils.init_checkpoint(exe, args.init_checkpoint, train_program) if dev_count > 1: device = "GPU" if args.use_cuda else "CPU" print("%d %s are used to train model" % (dev_count, device)) # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() build_strategy = fluid.compiler.BuildStrategy() compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) # start training num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_generator = creator.create_lexnet_data_generator( args, reader=dataset, file_name=args.train_data, place=place, mode='train') test_generator = creator.create_lexnet_data_generator( args, reader=dataset, file_name=args.test_data, place=place, mode='test') train_reader, test_reader = train_ret['pyreader'], test_ret['pyreader'] train_reader.set_batch_generator(train_generator, places=place) test_reader.set_batch_generator(test_generator, places=place) ce_info = [] step = 0 ce_time = 0 train_reader.start() while True: try: # this is for minimizing the fetching op, saving the training speed. if step % args.print_steps == 0: fetch_list = [ train_ret["avg_cost"], train_ret["precision"], train_ret["recall"], train_ret["f1_score"], train_ret["crf_avg_cost"], train_ret["teacher_cost"] ] else: fetch_list = [] start_time = time.time() outputs = exe.run(program=compiled_prog, fetch_list=fetch_list) end_time = time.time() if step % args.print_steps == 0: avg_cost, precision, recall, f1_score, crf_avg_cost, teacher_cost = [ np.mean(x) for x in outputs ] print("Data loader queue size: %d " % train_reader.queue.size()) print( "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, crf_avg_cost: %.5f, teacher_cost: %.5f, elapsed time %.5f" % (step, avg_cost, precision, recall, f1_score, crf_avg_cost, teacher_cost, end_time - start_time)) if step % args.validation_steps == 0: test_process(exe, test_program, test_reader, test_ret) ce_time += end_time - start_time ce_info.append( [ce_time, avg_cost, precision, recall, f1_score]) # save checkpoints if step % args.save_steps == 0 and step != 0: save_path = os.path.join(args.model_save_dir, "step_" + str(step)) fluid.io.save_persistables(exe, save_path, train_program) step += 1 except fluid.core.EOFException: train_reader.reset() break if args.enable_ce: card_num = get_cards() ce_cost = 0 ce_f1 = 0 ce_p = 0 ce_r = 0 ce_time = 0 try: ce_time = ce_info[-2][0] ce_cost = ce_info[-2][1] ce_p = ce_info[-2][2] ce_r = ce_info[-2][3] ce_f1 = ce_info[-2][4] except: print("ce info error") print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time)) print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost)) print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p)) print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))
def do_compress(args): train_program = fluid.default_main_program() startup_program = fluid.default_startup_program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') test_program = train_program.clone() optimizer = fluid.optimizer.Adam(learning_rate=args.base_learning_rate) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_reader = paddle.batch(dataset.file_reader(args.train_data), batch_size=args.batch_size) test_reader = paddle.batch(dataset.file_reader(args.test_data), batch_size=args.batch_size) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', train_program) train_feed_list = [('words', train_ret['words'].name), ("targets", train_ret["targets"].name)] train_fetch_list = [('loss', train_ret['avg_cost'].name)] test_feed_list = [('words', train_ret['words'].name), ("targets", train_ret["targets"].name)] test_fetch_list = [('f1_score', train_ret['f1_score'].name)] print(train_ret['crf_decode'].name) com_pass = Compressor(place, fluid.global_scope(), train_program=train_program, train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=test_program, eval_reader=test_reader, eval_feed_list=test_feed_list, eval_fetch_list=test_fetch_list, teacher_programs=[], train_optimizer=optimizer, distiller_optimizer=None) com_pass.config(args.compress_config) com_pass.run()