def do_eval(args): dataset = reader.Dataset(args) test_program = fluid.Program() with fluid.program_guard(test_program, fluid.default_startup_program()): with fluid.unique_name.guard(): test_ret = creator.create_model( args, dataset.vocab_size, dataset.num_labels, mode='test') test_program = test_program.clone(for_test=True) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = creator.create_pyreader(args, file_name=args.test_data, feed_list=test_ret['feed_list'], place=place, mode='lac', reader=dataset, iterable=True, for_test=True) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint+'.pdckpt', test_program) test_process(exe=exe, program=test_program, reader=pyreader, test_ret=test_ret )
def eval_model_once(data_path, model_dir): """Evaluates the model. Args: data_path: path where the data is stored. model_dir: path where the model checkpoints are stored. Returns: average loss """ tf.reset_default_graph() mini_batch_size = 32 test_data = reader.Dataset(data_path, 'ts') input_data_pl = tf.placeholder( tf.float32, shape=(mini_batch_size, test_data.feature_dim), name='input_data_pl') v_truth_pl = tf.placeholder( tf.float32, shape=(mini_batch_size, 1), name='v_truth_pl') # pylint: disable=undefined-variable v = utils.mentornet_nn(input_data_pl) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=v_truth_pl, logits=v) loss = tf.reduce_mean(loss) saver = tf.train.Saver() np.random.seed(0) with tf.Session() as session: ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) return _eval_ts_once(test_data, session, loss, v, input_data_pl, v_truth_pl, mini_batch_size)
def save_inference_model(args): # model definition if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) # load pretrain check point exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', infer_program) fluid.io.save_inference_model( args.inference_save_dir, ['words'], infer_ret['crf_decode'], exe, main_program=infer_program, model_filename='model.pdmodel', params_filename='params.pdparams', )
def train(): args = parse_args() args.model_type = ModelType( args.model_type) #--model_type=0,1 classification regression #只使用cpu而且cpu只开一个线程 paddle.init(use_gpu=False, trainer_count=1) '''dnn_input_dim: 61 lr_input_dim: 10040001''' dnn_input_dim, lr_input_dim = reader.load_data_meta(args.data_meta_file) # create ctr model. model = CTRmodel(dnn_layer_dims, dnn_input_dim, lr_input_dim, model_type=args.model_type, is_infer=False) params = paddle.parameters.create(model.train_cost) optimizer = paddle.optimizer.AdaGrad() #学习率优化 trainer = paddle.trainer.SGD(cost=model.train_cost, parameters=params, update_equation=optimizer) dataset = reader.Dataset() def __event_handler__(event): if isinstance(event, paddle.event.EndIteration): num_samples = event.batch_id * args.batch_size if event.batch_id % 100 == 0: logger.warning( "Pass %d, Samples %d, Cost %f, %s" % (event.pass_id, num_samples, event.cost, event.metrics)) if event.batch_id % 1000 == 0: if args.test_data_path: result = trainer.test(reader=paddle.batch( dataset.test(args.test_data_path), batch_size=args.batch_size), feeding=reader.feeding_index) logger.warning("Test %d-%d, Cost %f, %s" % (event.pass_id, event.batch_id, result.cost, result.metrics)) path = "{}-pass-{}-batch-{}-test-{}.tar.gz".format( args.model_output_prefix, event.pass_id, event.batch_id, result.cost) with gzip.open(path, 'w') as f: trainer.save_parameter_to_tar(f) trainer.train(reader=paddle.batch(paddle.reader.shuffle(dataset.train( args.train_data_path), buf_size=500), batch_size=args.batch_size), feeding=reader.feeding_index, event_handler=__event_handler__, num_passes=args.num_passes)
def train(): args = parse_args() if not os.path.isdir(args.model_output_dir): os.mkdir(args.model_output_dir) paddle.init(use_gpu=False, trainer_count=1) optimizer = paddle.optimizer.Adam(learning_rate=1e-4) model = DeepFM(args.factor_size) params = paddle.parameters.create(model) trainer = paddle.trainer.SGD( cost=model, parameters=params, update_equation=optimizer) dataset = reader.Dataset() def __event_handler__(event): if isinstance(event, paddle.event.EndIteration): num_samples = event.batch_id * args.batch_size if event.batch_id % 100 == 0: logger.warning("Pass %d, Batch %d, Samples %d, Cost %f, %s" % (event.pass_id, event.batch_id, num_samples, event.cost, event.metrics)) if event.batch_id % 10000 == 1: if args.test_data_path: result = trainer.test( reader=paddle.batch( dataset.test(args.test_data_path), batch_size=args.batch_size), feeding=reader.feeding) logger.warning("Test %d-%d, Cost %f, %s" % (event.pass_id, event.batch_id, result.cost, result.metrics)) path = "{}/model-pass-{}-batch-{}.tar.gz".format( args.model_output_dir, event.pass_id, event.batch_id) with gzip.open(path, 'w') as f: trainer.save_parameter_to_tar(f) trainer.train( reader=paddle.batch( paddle.reader.shuffle( dataset.train(args.train_data_path), buf_size=args.batch_size * 10000), batch_size=args.batch_size), feeding=reader.feeding, event_handler=__event_handler__, num_passes=args.num_passes)
def do_eval(args): dataset = reader.Dataset(args) if args.use_cuda: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) else: place = fluid.CPUPlace() with fluid.dygraph.guard(place): test_loader = reader.create_dataloader(args, file_name=args.test_data, place=place, model='lac', reader=dataset, mode='test') model = lex_net(args, dataset.vocab_size, dataset.num_labels) load_path = args.init_checkpoint state_dict, _ = fluid.dygraph.load_dygraph(load_path) #import ipdb; ipdb.set_trace() state_dict["linear_chain_crf.weight"] = state_dict[ "crf_decoding.weight"] model.set_dict(state_dict) model.eval() chunk_eval = Chunk_eval(int(math.ceil((dataset.num_labels - 1) / 2.0)), "IOB") chunk_evaluator = fluid.metrics.ChunkEvaluator() chunk_evaluator.reset() # test_process(test_loader, chunk_evaluator) def test_process(reader, chunk_evaluator): start_time = time.time() for batch in reader(): words, targets, length = batch crf_decode = model(words, length=length) (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) = chunk_eval(input=crf_decode, label=targets, seq_length=length) chunk_evaluator.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) precision, recall, f1 = chunk_evaluator.eval() end_time = time.time() print("[test] P: %.5f, R: %.5f, F1: %.5f, elapsed time: %.3f s" % (precision, recall, f1, end_time - start_time)) test_process(test_loader, chunk_evaluator)
def infer(self, data_path): logger.info("infer data...") dataset = reader.Dataset() infer_reader = paddle.batch( dataset.infer(args.data_path), batch_size=1000) logger.warning('write predictions to %s' % args.prediction_output_path) output_f = open(args.prediction_output_path, 'w') for id, batch in enumerate(infer_reader()): res = self.inferer.infer(input=batch) predictions = [x for x in itertools.chain.from_iterable(res)] assert len(batch) == len( predictions), "predict error, %d inputs, but %d predictions" % ( len(batch), len(predictions)) output_f.write('\n'.join(map(str, predictions)) + '\n')
def do_eval(args): words = fluid.data(name='words', shape=[None, 1], dtype='int64', lod_level=1) targets = fluid.data(name='targets', shape=[None, 1], dtype='int64', lod_level=1) dataset = reader.Dataset(args) pyreader = creator.create_pyreader( args, file_name=args.test_data, # feed_list = test_ret['feed_list'], feed_list=[words, targets], place=fluid.CPUPlace(), model='lac', reader=dataset, mode='test') lods = [] words = [] targets = [] sum_words = 0 sum_sentences = 0 for data in pyreader(): print(len(data[0]['words'].lod()[0])) print(data[0]['words']) new_lod = data[0]['words'].lod()[0][1] new_words = np.array(data[0]['words']) new_targets = np.array(data[0]['targets']) assert new_lod == len(new_words) assert new_lod == len(new_targets) lods.append(new_lod) words.extend(new_words.flatten()) targets.extend(new_targets.flatten()) sum_sentences = sum_sentences + 1 sum_words = sum_words + new_lod file1 = open(args.save_bin_path, "w+b") file1.write(np.array(int(sum_sentences)).astype('int64').tobytes()) file1.write(np.array(int(sum_words)).astype('int64').tobytes()) file1.write(np.array(lods).astype('uint64').tobytes()) file1.write(np.array(words).astype('int64').tobytes()) file1.write(np.array(targets).astype('int64').tobytes()) file1.close() print( "SUCCESS!! Binary file saved at ", args.save_bin_path, )
def infer(self, data_path): logger.info("infer data...") dataset = reader.Dataset(train_paths=data_path, test_paths=None, source_dic_path=self.source_dic_path, target_dic_path=self.target_dic_path) infer_reader = paddle.batch(dataset.infer, batch_size=1000) prediction_output_path = config.config["prediction_output_path"] logger.warning("write prediction to %s" % prediction_output_path) with open(prediction_output_path, "w") as f: for id, batch in enumerate(infer_reader()): res = self.inferer.infer(input=batch) prediction = [" ".join(map(str, x)) for x in res] assert len(batch) == len(prediction), ( "predict error, %d inputs," "but %d predictions") % (len(batch), len(prediction)) f.write("\n".join(map(str, prediction)) + "\n")
def do_infer(args): dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = creator.create_pyreader(args, file_name=args.infer_data, feed_list=infer_ret['feed_list'], place=place, model='lac', reader=dataset, mode='infer') exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint, infer_program) result = infer_process( exe=exe, program=infer_program, reader=pyreader, fetch_vars=[infer_ret['words'], infer_ret['crf_decode']], dataset=dataset) with open('../processed.txt', 'w') as f: for sent, tags in result: result_list = [ '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags) ] f.write(''.join(result_list) + '\n')
def do_infer(args): dataset = reader.Dataset(args) infer_program = fluid.Program() with fluid.program_guard(infer_program, fluid.default_startup_program()): with fluid.unique_name.guard(): infer_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='infer') infer_program = infer_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) else: place = fluid.CPUPlace() pyreader = fluid.io.PyReader(feed_list=[infer_ret['words']], capacity=10, iterable=True, return_list=False) pyreader.decorate_sample_list_generator(paddle.batch( dataset.file_reader(args.infer_data, mode='infer'), batch_size=args.batch_size), places=place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model utils.init_checkpoint(exe, args.init_checkpoint, infer_program) result = infer_process( exe=exe, program=infer_program, reader=pyreader, fetch_vars=[infer_ret['words'], infer_ret['crf_decode']], dataset=dataset) for sent, tags in result: result_list = ['(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)] print(''.join(result_list))
def infer(self, data_path): dataset = reader.Dataset( train_path=data_path, test_path=None, source_dic_path=args.source_dic_path, target_dic_path=args.target_dic_path, model_type=args.model_type, ) infer_reader = paddle.batch(dataset.infer, batch_size=1000) logger.warning("Write predictions to %s." % args.prediction_output_path) output_f = open(args.prediction_output_path, "w") for id, batch in enumerate(infer_reader()): res = self.inferer.infer(input=batch) predictions = [" ".join(map(str, x)) for x in res] assert len(batch) == len(predictions), ( "Error! %d inputs are given, " "but only %d predictions are returned.") % (len(batch), len(predictions)) output_f.write("\n".join(map(str, predictions)) + "\n")
def infer(self, data_path): logger.info("infer data...") dataset = reader.Dataset( train_path=data_path, test_path=None, source_dic_path=args.source_dic_path, target_dic_path=args.target_dic_path, model_type=args.model_type, ) infer_reader = paddle.batch(dataset.infer, batch_size=1000) logger.warning('write predictions to %s' % args.prediction_output_path) output_f = open(args.prediction_output_path, 'w') for id, batch in enumerate(infer_reader()): res = self.inferer.infer(input=batch) predictions = [' '.join(map(str, x)) for x in res] assert len(batch) == len( predictions ), "predict error, %d inputs, but %d predictions" % ( len(batch), len(predictions)) output_f.write('\n'.join(map(str, predictions)) + '\n')
def infer(): args = parse_args() paddle.init(use_gpu=False, trainer_count=1) model = DeepFM(args.factor_size, infer=True) parameters = paddle.parameters.Parameters.from_tar( gzip.open(args.model_gz_path, 'r')) inferer = paddle.inference.Inference( output_layer=model, parameters=parameters) dataset = reader.Dataset() infer_reader = paddle.batch(dataset.infer(args.data_path), batch_size=1000) with open(args.prediction_output_path, 'w') as out: for id, batch in enumerate(infer_reader()): res = inferer.infer(input=batch) predictions = [x for x in itertools.chain.from_iterable(res)] out.write('\n'.join(map(str, predictions)) + '\n')
def do_train(args): # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): #train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) with fluid.program_guard(test_program, startup_program): with fluid.unique_name.guard(): test_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='test') test_program = test_program.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: model_utils.init_checkpoint(exe, args.init_checkpoint, train_program) if dev_count > 1: device = "GPU" if args.use_cuda else "CPU" print("%d %s are used to train model" % (dev_count, device)) # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() build_strategy = fluid.compiler.BuildStrategy() compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) # start training num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_generator = creator.create_lexnet_data_generator( args, reader=dataset, file_name=args.train_data, place=place, mode='train') test_generator = creator.create_lexnet_data_generator( args, reader=dataset, file_name=args.test_data, place=place, mode='test') train_reader, test_reader = train_ret['pyreader'], test_ret['pyreader'] train_reader.set_batch_generator(train_generator, places=place) test_reader.set_batch_generator(test_generator, places=place) ce_info = [] step = 0 ce_time = 0 train_reader.start() while True: try: # this is for minimizing the fetching op, saving the training speed. if step % args.print_steps == 0: fetch_list = [ train_ret["avg_cost"], train_ret["precision"], train_ret["recall"], train_ret["f1_score"], train_ret["crf_avg_cost"], train_ret["teacher_cost"] ] else: fetch_list = [] start_time = time.time() outputs = exe.run(program=compiled_prog, fetch_list=fetch_list) end_time = time.time() if step % args.print_steps == 0: avg_cost, precision, recall, f1_score, crf_avg_cost, teacher_cost = [ np.mean(x) for x in outputs ] print("Data loader queue size: %d " % train_reader.queue.size()) print( "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, crf_avg_cost: %.5f, teacher_cost: %.5f, elapsed time %.5f" % (step, avg_cost, precision, recall, f1_score, crf_avg_cost, teacher_cost, end_time - start_time)) if step % args.validation_steps == 0: test_process(exe, test_program, test_reader, test_ret) ce_time += end_time - start_time ce_info.append( [ce_time, avg_cost, precision, recall, f1_score]) # save checkpoints if step % args.save_steps == 0 and step != 0: save_path = os.path.join(args.model_save_dir, "step_" + str(step)) fluid.io.save_persistables(exe, save_path, train_program) step += 1 except fluid.core.EOFException: train_reader.reset() break if args.enable_ce: card_num = get_cards() ce_cost = 0 ce_f1 = 0 ce_p = 0 ce_r = 0 ce_time = 0 try: ce_time = ce_info[-2][0] ce_cost = ce_info[-2][1] ce_p = ce_info[-2][2] ce_r = ce_info[-2][3] ce_f1 = ce_info[-2][4] except: print("ce info error") print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time)) print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost)) print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p)) print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))
def do_compress(args): train_program = fluid.default_main_program() startup_program = fluid.default_startup_program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') test_program = train_program.clone() optimizer = fluid.optimizer.Adam(learning_rate=args.base_learning_rate) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_reader = paddle.batch(dataset.file_reader(args.train_data), batch_size=args.batch_size) test_reader = paddle.batch(dataset.file_reader(args.test_data), batch_size=args.batch_size) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', train_program) train_feed_list = [('words', train_ret['words'].name), ("targets", train_ret["targets"].name)] train_fetch_list = [('loss', train_ret['avg_cost'].name)] test_feed_list = [('words', train_ret['words'].name), ("targets", train_ret["targets"].name)] test_fetch_list = [('f1_score', train_ret['f1_score'].name)] print(train_ret['crf_decode'].name) com_pass = Compressor(place, fluid.global_scope(), train_program=train_program, train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=test_program, eval_reader=test_reader, eval_feed_list=test_feed_list, eval_fetch_list=test_fetch_list, teacher_programs=[], train_optimizer=optimizer, distiller_optimizer=None) com_pass.config(args.compress_config) com_pass.run()
print("Load inference model from %s" % (model_dir)) # get lac result crf_decode = exe.run(inferencer, feed={feed_target_names[0]: tensor_words}, fetch_list=fetch_targets, return_numpy=False) # parse the crf_decode result result = utils.parse_result(tensor_words, crf_decode[0], dataset) for i, (sent, tags) in enumerate(result): result_list = [ '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags) ] print(''.join(result_list)) if __name__ == "__main__": parser = argparse.ArgumentParser(__doc__) utils.load_yaml(parser, 'conf/args.yaml') args = parser.parse_args() check_cuda(args.use_cuda) print("save inference model") #save_inference_model(args) #print("inference model save in %s"%args.inference_save_dir) print("test inference model") dataset = reader.Dataset(args) test_data = [u'百度是一家高科技公司', u'中山大学是岭南第一学府'] test_inference_model(args.inference_save_dir, test_data, dataset)
def train(train_data_path=None, test_data_path=None, source_dic_path=None, target_dic_path=None, model_type=ModelType.create_classification(), model_arch=ModelArch.create_cnn(), batch_size=10, num_passes=10, share_semantic_generator=False, share_embed=False, class_num=None, num_workers=1, use_gpu=False): ''' Train the DSSM. ''' default_train_path = './data/rank/train.txt' default_test_path = './data/rank/test.txt' default_dic_path = './data/vocab.txt' if not model_type.is_rank(): default_train_path = './data/classification/train.txt' default_test_path = './data/classification/test.txt' use_default_data = not train_data_path if use_default_data: train_data_path = default_train_path test_data_path = default_test_path source_dic_path = default_dic_path target_dic_path = default_dic_path dataset = reader.Dataset( train_path=train_data_path, test_path=test_data_path, source_dic_path=source_dic_path, target_dic_path=target_dic_path, model_type=model_type, ) train_reader = paddle.batch(paddle.reader.shuffle(dataset.train, buf_size=1000), batch_size=batch_size) test_reader = paddle.batch(paddle.reader.shuffle(dataset.test, buf_size=1000), batch_size=batch_size) paddle.init(use_gpu=use_gpu, trainer_count=num_workers) cost, prediction, label = DSSM( dnn_dims=layer_dims, vocab_sizes=[ len(load_dic(path)) for path in [source_dic_path, target_dic_path] ], model_type=model_type, model_arch=model_arch, share_semantic_generator=share_semantic_generator, class_num=class_num, share_embed=share_embed)() parameters = paddle.parameters.create(cost) adam_optimizer = paddle.optimizer.Adam( learning_rate=1e-3, regularization=paddle.optimizer.L2Regularization(rate=1e-3), model_average=paddle.optimizer.ModelAverage(average_window=0.5)) trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prediction, label=label) if not model_type.is_rank() else None, parameters=parameters, update_equation=adam_optimizer) feeding = {} if model_type.is_classification() or model_type.is_regression(): feeding = {'source_input': 0, 'target_input': 1, 'label_input': 2} else: feeding = { 'source_input': 0, 'left_target_input': 1, 'right_target_input': 2, 'label_input': 3 } def _event_handler(event): ''' Define batch handler ''' if isinstance(event, paddle.event.EndIteration): # output train log if event.batch_id % args.num_batches_to_log == 0: logger.info( "Pass %d, Batch %d, Cost %f, %s" % (event.pass_id, event.batch_id, event.cost, event.metrics)) # test model if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0: if test_reader is not None: if model_type.is_classification(): result = trainer.test(reader=test_reader, feeding=feeding) logger.info("Test at Pass %d, %s" % (event.pass_id, result.metrics)) else: result = None # save model if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0: model_desc = "{type}_{arch}".format(type=str(args.model_type), arch=str(args.model_arch)) with open( "%sdssm_%s_pass_%05d.tar" % (args.model_output_prefix, model_desc, event.pass_id), "w") as f: parameters.to_tar(f) trainer.train(reader=train_reader, event_handler=_event_handler, feeding=feeding, num_passes=num_passes) logger.info("Training has finished.")
def do_train(args): dataset = reader.Dataset(args) if args.use_cuda: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) else: place = fluid.CPUPlace() with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() if args.enable_ce: fluid.default_startup_program().random_seed = 102 fluid.default_main_program().random_seed = 102 np.random.seed(102) random.seed(102) train_loader = reader.create_dataloader(args, file_name=args.train_data, place=place, model='lac', reader=dataset) if args.use_data_parallel: train_loader = fluid.contrib.reader.distributed_batch_reader( train_loader) test_loader = reader.create_dataloader(args, file_name=args.test_data, place=place, model='lac', reader=dataset, mode='test') model = lex_net(args, dataset.vocab_size, dataset.num_labels) if args.use_data_parallel: model = fluid.dygraph.parallel.DataParallel(model, strategy) optimizer = fluid.optimizer.AdamOptimizer( learning_rate=args.base_learning_rate, parameter_list=model.parameters()) chunk_eval = Chunk_eval(int(math.ceil((dataset.num_labels - 1) / 2.0)), "IOB") num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) step = 0 print_start_time = time.time() chunk_evaluator = fluid.metrics.ChunkEvaluator() chunk_evaluator.reset() def test_process(reader, chunk_evaluator): model.eval() chunk_evaluator.reset() start_time = time.time() for batch in reader(): words, targets, length = batch crf_decode = model(words, length=length) (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) = chunk_eval(input=crf_decode, label=targets, seq_length=length) chunk_evaluator.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) precision, recall, f1 = chunk_evaluator.eval() end_time = time.time() print("[test] P: %.5f, R: %.5f, F1: %.5f, elapsed time: %.3f s" % (precision, recall, f1, end_time - start_time)) model.train() ce_time = [] ce_infor = [] for epoch_id in range(args.epoch): for batch in train_loader(): words, targets, length = batch start_time = time.time() avg_cost, crf_decode = model(words, targets, length) if args.use_data_parallel: avg_cost = model.scale_loss(avg_cost) avg_cost.backward() model.apply_collective_grads() else: avg_cost.backward() optimizer.minimize(avg_cost) model.clear_gradients() end_time = time.time() if step % args.print_steps == 0: (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) = chunk_eval(input=crf_decode, label=targets, seq_length=length) outputs = [avg_cost, precision, recall, f1_score] avg_cost, precision, recall, f1_score = [ np.mean(x.numpy()) for x in outputs ] print( "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f" % (step, avg_cost, precision, recall, f1_score, end_time - start_time)) ce_time.append(end_time - start_time) ce_infor.append([precision, recall, f1_score]) if step % args.validation_steps == 0: test_process(test_loader, chunk_evaluator) # save checkpoints if step % args.save_steps == 0 and step != 0: save_path = os.path.join(args.model_save_dir, "step_" + str(step)) paddle.fluid.save_dygraph(model.state_dict(), save_path) step += 1 if args.enable_ce and fluid.dygraph.parallel.Env().local_rank == 0: card_num = fluid.core.get_cuda_device_count() _p = 0 _r = 0 _f1 = 0 _time = 0 try: _time = ce_time[-1] _p = ce_infor[-1][0] _r = ce_infor[-1][1] _f1 = ce_infor[-1][2] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_p_card%s\t%f" % (card_num, _p)) print("kpis\ttrain_r_card%s\t%f" % (card_num, _r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, _f1))
def train(train_data_paths=None, test_data_paths=None, source_dic_path=None, target_dic_path=None, model_arch=ModelArch.create_rnn(), batch_size=10, num_passes=10, share_semantic_generator=False, share_embed=False, class_num=2, num_workers=1, use_gpu=False): """ train DSSM """ default_train_paths = ["./data/classification/train/right.txt", "./data/classification/train/wrong.txt"] default_test_paths = ["./data/classification/test/right.txt", "./data/classification/test/wrong.txt"] default_dic_path = "./data/vocab.txt" layer_dims = [int(i) for i in config.config['dnn_dims'].split(',')] use_default_data = not train_data_paths if use_default_data: train_data_paths = default_train_paths test_data_paths = default_test_paths source_dic_path = default_dic_path target_dic_path = default_dic_path dataset = reader.Dataset( train_paths=train_data_paths, test_paths=test_data_paths, source_dic_path=source_dic_path, target_dic_path=target_dic_path ) train_reader = paddle.batch(paddle.reader.shuffle(dataset.train, buf_size=1000), batch_size=batch_size) test_reader = paddle.batch(paddle.reader.shuffle(dataset.test, buf_size=1000), batch_size=batch_size) paddle.init(use_gpu=use_gpu, trainer_count=num_workers) # DSSM cost, prediction, label = DSSM( dnn_dims=layer_dims, vocab_sizes=[len(load_dic(path)) for path in [source_dic_path, target_dic_path]], model_arch=model_arch, share_semantic_generator=share_semantic_generator, class_num=class_num, share_embed=share_embed)() parameters = paddle.parameters.create(cost) adam_optimizer = paddle.optimizer.Adam( learning_rate=1e-3, regularization=paddle.optimizer.L2Regularization(rate=1e-3), model_average=paddle.optimizer.ModelAverage(average_window=0.5)) trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prediction, label=label), parameters=parameters, update_equation=adam_optimizer) feeding = {"source_input": 0, "target_input": 1, "label_input": 2} def _event_handler(event): """ Define batch handler :param event: :return: """ if isinstance(event, paddle.event.EndIteration): # output train log if event.batch_id % config.config['num_batches_to_log'] == 0: logger.info("Pass %d, Batch %d, Cost %f, %s" % (event.pass_id, event.batch_id, event.cost, event.metrics)) # test model if event.batch_id > 0 and event.batch_id % config.config['num_batches_to_test'] == 0: if test_reader is not None: result = trainer.test(reader=test_reader, feeding=feeding) logger.info("Test at Pass %d, %s" % (event.pass_id, result.metrics)) # save model if event.batch_id > 0 and event.batch_id % config.config['num_batches_to_save_model'] == 0: model_desc = "classification_{arch}".format(arch=str(model_arch)) with open("%sdssm_%s_pass_%05d.tar" % (config.config['model_output_prefix'], model_desc, event.pass_id), "w") as f: parameters.to_tar(f) logger.info("save model: %sdssm_%s_pass_%05d.tar" % (config.config['model_output_prefix'], model_desc, event.pass_id)) # if isinstance(event, paddle.event.EndPass): # result = trainer.test(reader=test_reader, feeding=feeding) # logger.info("Test with pass %d, %s" % (event.pass_id, result.metrics)) # with open("./data/output/endpass/dssm_params_pass" + str(event.pass_id) + ".tar", "w") as f: # parameters.to_tar(f) trainer.train(reader=train_reader, event_handler=_event_handler, feeding=feeding, num_passes=num_passes) logger.info("training finish.")
def main(args): startup_program = fluid.Program() if args.random_seed is not None: startup_program.random_seed = args.random_seed # prepare dataset dataset = reader.Dataset(args) if args.do_train: train_program = fluid.Program() if args.random_seed is not None: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): train_ret = create_model(args, "train_reader", dataset.vocab_size, dataset.num_labels) train_ret["pyreader"].decorate_paddle_reader( paddle.batch(paddle.reader.shuffle( dataset.file_reader(args.train_data), buf_size=args.traindata_shuffle_buffer), batch_size=args.batch_size)) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) if args.do_test: test_program = fluid.Program() with fluid.program_guard(test_program, startup_program): with fluid.unique_name.guard(): test_ret = create_model(args, "test_reader", dataset.vocab_size, dataset.num_labels) test_ret["pyreader"].decorate_paddle_reader( paddle.batch(dataset.file_reader(args.test_data), batch_size=args.batch_size)) test_program = test_program.clone( for_test=True) # to share parameters with train model if args.do_infer: infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_program): with fluid.unique_name.guard(): infer_ret = create_model(args, "infer_reader", dataset.vocab_size, dataset.num_labels) infer_ret["pyreader"].decorate_paddle_reader( paddle.batch(dataset.file_reader(args.infer_data), batch_size=args.batch_size)) infer_program = infer_program.clone(for_test=True) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = multiprocessing.cpu_count() exe = fluid.Executor(place) exe.run(startup_program) # load checkpoints if args.do_train: if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, train_program) elif args.do_test: if not args.init_checkpoint: raise ValueError( "args 'init_checkpoint' should be set if only doing validation or testing!" ) utils.init_checkpoint(exe, args.init_checkpoint, test_program) if args.do_infer: utils.init_checkpoint(exe, args.init_checkpoint, infer_program) # do start to train if args.do_train: num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) ce_info = [] batch_id = 0 for epoch_id in range(args.epoch): train_ret["pyreader"].start() ce_time = 0 try: while True: start_time = time.time() avg_cost, nums_infer, nums_label, nums_correct = exe.run( train_program, fetch_list=[ train_ret["avg_cost"], train_ret["num_infer_chunks"], train_ret["num_label_chunks"], train_ret["num_correct_chunks"], ], ) end_time = time.time() train_ret["chunk_evaluator"].reset() train_ret["chunk_evaluator"].update( nums_infer, nums_label, nums_correct) precision, recall, f1_score = train_ret[ "chunk_evaluator"].eval() batch_id += 1 print( "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f " % (batch_id, avg_cost, precision, recall, f1_score, end_time - start_time)) ce_time += end_time - start_time ce_info.append( [ce_time, avg_cost, precision, recall, f1_score]) # save checkpoints if (batch_id % args.save_model_per_batches == 0): save_path = os.path.join(args.model_save_dir, "step_" + str(batch_id)) fluid.io.save_persistables(exe, save_path, train_program) # evaluate if (batch_id % args.valid_model_per_batches == 0) and args.do_test: evaluate(exe, test_program, test_ret) except fluid.core.EOFException: save_path = os.path.join(args.model_save_dir, "step_" + str(batch_id)) fluid.io.save_persistables(exe, save_path, train_program) train_ret["pyreader"].reset() # break? if args.do_train and args.enable_ce: card_num = get_cards() ce_cost = 0 ce_f1 = 0 ce_p = 0 ce_r = 0 ce_time = 0 try: ce_time = ce_info[-2][0] ce_cost = ce_info[-2][1] ce_p = ce_info[-2][2] ce_r = ce_info[-2][3] ce_f1 = ce_info[-2][4] except: print("ce info error") print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time)) print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost)) print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p)) print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1)) # only test if args.do_test: evaluate(exe, test_program, test_ret) if args.do_infer: infer_ret["pyreader"].start() while True: try: ( words, crf_decode, ) = exe.run(infer_program, fetch_list=[ infer_ret["words"], infer_ret["crf_decode"], ], return_numpy=False) results = utils.parse_result(words, crf_decode, dataset) for result in results: print(result) except fluid.core.EOFException: infer_ret["pyreader"].reset() break