def do_save_inference_model(args): test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): test_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): context_wordseq = fluid.data( name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) response_wordseq = fluid.data( name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) data_reader = fluid.io.PyReader( feed_list=input_inst, capacity=4, iterable=False) logits = create_net( is_training=False, model_input=input_field, args=args) if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_params) or (args.init_from_pretrain_model) if args.init_from_params: save_load_io.init_from_params(args, exe, test_prog) elif args.init_from_pretrain_model: save_load_io.init_from_pretrain_model(args, exe, test_prog) # saving inference model fluid.io.save_inference_model( args.inference_model_dir, feeded_var_names=[ input_field.context_wordseq.name, input_field.response_wordseq.name, ], target_vars=[logits, ], executor=exe, main_program=test_prog, model_filename="model.pdmodel", params_filename="params.pdparams") print("save inference model at %s" % (args.inference_model_dir))
def do_predict(args): """ predict function """ test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): test_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): context_wordseq = fluid.data( name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) response_wordseq = fluid.data( name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) data_reader = fluid.io.DataLoader.from_generator( feed_list=input_inst, capacity=4, iterable=False) logits = create_net( is_training=False, model_input=input_field, args=args) fetch_list = [logits.name] #for_test is True if change the is_test attribute of operators to True test_prog = test_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_params) or (args.init_from_pretrain_model) if args.init_from_params: fluid.load(test_prog, args.init_from_params, executor=exe) if args.init_from_pretrain_model: fluid.load(test_prog, args.init_from_pretrain_model, executor=exe) compiled_test_prog = fluid.CompiledProgram(test_prog) processor = reader.DataProcessor( data_path=args.predict_file, max_seq_length=args.max_seq_len, batch_size=args.batch_size) batch_generator = processor.data_generator( place=place, phase="test", shuffle=False, sample_pro=1) num_test_examples = processor.get_num_examples(phase='test') data_reader.set_batch_generator(batch_generator, places=place) data_reader.start() scores = [] while True: try: results = exe.run(compiled_test_prog, fetch_list=fetch_list) scores.extend(results[0]) except fluid.core.EOFException: data_reader.reset() break scores = scores[:num_test_examples] print("Write the predicted results into the output_prediction_file") fw = io.open(args.output_prediction_file, 'w', encoding="utf8") for index, score in enumerate(scores): fw.write(u"%s\t%s\n" % (index, score[0])) print("finish........................................")
def do_train(args): """train function""" train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(train_prog, startup_prog): train_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): context_wordseq = fluid.layers.data( name='context_wordseq', shape=[1], dtype='int64', lod_level=1) response_wordseq = fluid.layers.data( name='response_wordseq', shape=[1], dtype='int64', lod_level=1) labels = fluid.layers.data( name='labels', shape=[1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) data_reader = fluid.io.PyReader(feed_list=input_inst, capacity=4, iterable=False) loss = create_net( is_training=True, model_input=input_field, args=args ) loss.persistable = True # gradient clipping fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue( max=1.0, min=-1.0)) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(loss) if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: dev_count = int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CPUPlace() processor = reader.DataProcessor( data_path=args.training_file, max_seq_length=args.max_seq_len, batch_size=args.batch_size) batch_generator = processor.data_generator( place=place, phase="train", shuffle=True, sample_pro=args.sample_pro) num_train_examples = processor.get_num_examples(phase='train') max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) data_reader.decorate_batch_generator(batch_generator) exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_checkpoint == "") or ( args.init_from_pretrain_model == "") #init from some checkpoint, to resume the previous training if args.init_from_checkpoint: save_load_io.init_from_checkpoint(args, exe, train_prog) #init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: save_load_io.init_from_pretrain_model(args, exe, train_prog) if args.word_emb_init: print("start loading word embedding init ...") if six.PY2: word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype('float32') else: word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32') set_word_embedding(word_emb, place) print("finish init word embedding ...") build_strategy = fluid.compiler.BuildStrategy() build_strategy.enable_inplace = True compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) steps = 0 begin_time = time.time() for epoch_step in range(args.epoch): data_reader.start() sum_loss = 0.0 ce_loss = 0.0 while True: try: steps += 1 fetch_list = [loss.name] outputs = exe.run(compiled_train_prog, fetch_list=fetch_list) np_loss = outputs sum_loss += np.array(np_loss).mean() ce_loss = np.array(np_loss).mean() if steps % args.print_steps == 0: print('epoch: %d, step: %s, avg loss %s' % (epoch_step, steps, sum_loss / args.print_steps)) sum_loss = 0.0 if steps % args.save_steps == 0: if args.save_checkpoint: save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps)) if args.save_param: save_load_io.save_param(args, exe, train_prog, "step_" + str(steps)) except fluid.core.EOFException: data_reader.reset() break if args.save_checkpoint: save_load_io.save_checkpoint(args, exe, train_prog, "step_final") if args.save_param: save_load_io.save_param(args, exe, train_prog, "step_final") def get_cards(): num = 0 cards = os.environ.get('CUDA_VISIBLE_DEVICES', '') if cards != '': num = len(cards.split(",")) return num if args.enable_ce: card_num = get_cards() pass_time_cost = time.time() - begin_time print("test_card_num", card_num) print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost)) print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))