def infer(): with fluid.dygraph.guard(place): processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) infer_data_generator = processor.data_generator( batch_size=args.batch_size, phase='infer', epoch=args.epoch, shuffle=False) if args.model_type == 'cnn_net': model_infer = nets.CNN(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bow_net': model_infer = nets.BOW(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'gru_net': model_infer = nets.GRU(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': model_infer = nets.BiGRU(args.vocab_size, args.batch_size, args.padding_size) print('Do inferring ...... ') restore, _ = fluid.load_dygraph(args.checkpoints) model_infer.set_dict(restore) model_infer.eval() total_acc, total_num_seqs = [], [] steps = 0 time_begin = time.time() for batch_id, data in enumerate(infer_data_generator()): steps += 1 np_doc = np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in data ]).astype('int64').reshape(-1) doc = to_variable(np_doc) label = to_variable( np.array([x[1] for x in data ]).astype('int64').reshape(args.batch_size, 1)) _, _, acc = model_infer(doc, label) mask = (np_doc != args.vocab_size).astype('int32') word_num = np.sum(mask) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) time_end = time.time() used_time = time_end - time_begin print("Final infer result: ave acc: %f, speed: %f steps/s" % (np.sum(total_acc) / np.sum(total_num_seqs), steps / used_time))
def test_inference_model(args): if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(0) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() exe = fluid.Executor(place) test_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): infer_pyreader, probs, feed_target_names = create_model( args, pyreader_name='infer_reader', num_labels=args.num_labels, is_prediction=True) test_prog = test_prog.clone(for_test=True) exe = fluid.Executor(place) exe.run(startup_prog) processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed, max_seq_len=args.max_seq_len) num_labels = len(processor.get_labels()) assert (args.inference_model_dir) infer_program, feed_names, fetch_targets = fluid.io.load_inference_model( dirname=args.inference_model_dir, executor=exe, model_filename="model.pdmodel", params_filename="params.pdparams") infer_data_generator = processor.data_generator( batch_size=args.batch_size/dev_count, phase="infer", epoch=1, shuffle=False) infer_pyreader.set_sample_list_generator(infer_data_generator) inference(exe, test_prog, infer_pyreader, [probs.name], "infer")
def train(): with fluid.dygraph.guard(place): if args.benchmark: args.epoch = 1 processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size, args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] length = len(list(enumerate(train_data_generator()))) for eop in range(args.epoch): time_begin = time.time() batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(length, batch_time, data_time, prefix="epoch: [{}]".format(eop)) end = Tools.time() for batch_id, data in enumerate(train_data_generator()): data_time.update(Tools.time() - end) steps += 1 doc = to_variable( np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in data ]).astype('int64').reshape(-1, 1)) label = to_variable( np.array([x[1] for x in data ]).astype('int64').reshape(args.batch_size, 1)) cnn_net.train() avg_cost, prediction, acc = cnn_net(doc, label) avg_cost.backward() batch_time.update(Tools.time() - end) np_mask = (doc.numpy() != args.vocab_size).astype('int32') word_num = np.sum(np_mask) sgd_optimizer.minimize(avg_cost) cnn_net.clear_gradients() total_cost.append(avg_cost.numpy() * word_num) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) if steps % args.skip_steps == 0: time_end = time.time() used_time = time_end - time_begin progress.print(batch_id + 1) #print("step: %d, ave loss: %f, " # "ave acc: %f, speed: %f steps/s" % # (steps, np.sum(total_cost) / np.sum(total_num_seqs), # np.sum(total_acc) / np.sum(total_num_seqs), # args.skip_steps / used_time)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.validation_steps == 0: total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] cnn_net.eval() eval_steps = 0 for eval_batch_id, eval_data in enumerate( eval_data_generator()): eval_np_doc = np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in eval_data ]).astype('int64').reshape(1, -1) eval_label = to_variable( np.array([x[1] for x in eval_data ]).astype('int64').reshape( args.batch_size, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_avg_cost, eval_prediction, eval_acc = cnn_net( eval_doc, eval_label) eval_np_mask = (eval_np_doc != args.vocab_size).astype('int32') eval_word_num = np.sum(eval_np_mask) total_eval_cost.append(eval_avg_cost.numpy() * eval_word_num) total_eval_acc.append(eval_acc.numpy() * eval_word_num) total_eval_num_seqs.append(eval_word_num) eval_steps += 1 time_end = time.time() used_time = time_end - time_begin print( "Final validation result: step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_eval_cost) / np.sum(total_eval_num_seqs), np.sum(total_eval_acc) / np.sum(total_eval_num_seqs), eval_steps / used_time)) time_begin = time.time() # if steps % args.save_steps == 0: # save_path = "save_dir_" + str(steps) # print('save model to: ' + save_path) # fluid.dygraph.save_persistables(cnn_net.state_dict(), # save_path) end = Tools.time()
def main(args): """ Main Function """ if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = 1 exe = fluid.Executor(place) task_name = args.task_name.lower() processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed, max_seq_len=args.max_seq_len) num_labels = len(processor.get_labels()) if not (args.do_train or args.do_val or args.do_infer): raise ValueError("For args `do_train`, `do_val` and `do_infer`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = processor.data_generator( batch_size=args.batch_size / dev_count, phase='train', epoch=args.epoch, shuffle=True) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) train_program = fluid.Program() if args.enable_ce and args.random_seed is not None: train_program.random_seed = args.random_seed with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_reader, loss, accuracy, num_seqs = create_model( args, pyreader_name='train_reader', num_labels=num_labels, is_prediction=False) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) sgd_optimizer.minimize(loss) if args.verbose: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val: test_data_generator = processor.data_generator( batch_size=args.batch_size / dev_count, phase='dev', epoch=1, shuffle=False) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_reader, loss, accuracy, num_seqs = create_model( args, pyreader_name='test_reader', num_labels=num_labels, is_prediction=False) test_prog = test_prog.clone(for_test=True) if args.do_infer: infer_data_generator = processor.data_generator( batch_size=args.batch_size / dev_count, phase='infer', epoch=1, shuffle=False) infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): infer_reader, prop, _ = create_model( args, pyreader_name='infer_reader', num_labels=num_labels, is_prediction=True) infer_prog = infer_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) elif args.do_val or args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) if args.do_train: train_exe = exe train_reader.set_sample_list_generator(train_data_generator) else: train_exe = None if args.do_val: test_exe = exe test_reader.set_sample_list_generator(test_data_generator) if args.do_infer: test_exe = exe infer_reader.set_sample_list_generator(infer_data_generator) if args.do_train: train_reader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() while True: try: steps += 1 #print("steps...") if steps % args.skip_steps == 0: fetch_list = [loss.name, accuracy.name, num_seqs.name] else: fetch_list = [] outputs = train_exe.run(program=train_program, fetch_list=fetch_list, return_numpy=False) #print("finished one step") if steps % args.skip_steps == 0: np_loss, np_acc, np_num_seqs = outputs np_loss = np.array(np_loss) np_acc = np.array(np_acc) np_num_seqs = np.array(np_num_seqs) total_cost.extend(np_loss * np_num_seqs) total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) print(verbose) time_end = time.time() used_time = time_end - time_begin print("step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: print("do evalatation") evaluate(exe, test_prog, test_reader, [loss.name, accuracy.name, num_seqs.name], "dev") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps), "checkpoint") fluid.save(train_program, save_path) train_reader.reset() break # final eval on dev set if args.do_val: print("Final validation result:") evaluate(exe, test_prog, test_reader, [loss.name, accuracy.name, num_seqs.name], "dev") # final eval on test set if args.do_infer: print("Final test result:") inference(exe, infer_prog, infer_reader, [prop.name], "infer")
def train(): with fluid.dygraph.guard(place): if args.ce: print("ce mode") seed = 90 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count if not args.ce: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) else: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=False) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size, args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] for eop in range(args.epoch): time_begin = time.time() for batch_id, data in enumerate(train_data_generator()): enable_profile = steps > args.profile_steps with profile_context(enable_profile): steps += 1 doc = to_variable( np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in data ]).astype('int64').reshape(-1, 1)) label = to_variable( np.array([x[1] for x in data]).astype('int64').reshape( args.batch_size, 1)) cnn_net.train() avg_cost, prediction, acc = cnn_net(doc, label) avg_cost.backward() np_mask = (doc.numpy() != args.vocab_size).astype('int32') word_num = np.sum(np_mask) sgd_optimizer.minimize(avg_cost) cnn_net.clear_gradients() total_cost.append(avg_cost.numpy() * word_num) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) if steps % args.skip_steps == 0: time_end = time.time() used_time = time_end - time_begin print("step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.validation_steps == 0: total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] cnn_net.eval() eval_steps = 0 for eval_batch_id, eval_data in enumerate( eval_data_generator()): eval_np_doc = np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in eval_data ]).astype('int64').reshape(1, -1) eval_label = to_variable( np.array([x[1] for x in eval_data ]).astype('int64').reshape( args.batch_size, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_avg_cost, eval_prediction, eval_acc = cnn_net( eval_doc, eval_label) eval_np_mask = (eval_np_doc != args.vocab_size).astype('int32') eval_word_num = np.sum(eval_np_mask) total_eval_cost.append(eval_avg_cost.numpy() * eval_word_num) total_eval_acc.append(eval_acc.numpy() * eval_word_num) total_eval_num_seqs.append(eval_word_num) eval_steps += 1 time_end = time.time() used_time = time_end - time_begin print( "Final validation result: step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_eval_cost) / np.sum(total_eval_num_seqs), np.sum(total_eval_acc) / np.sum(total_eval_num_seqs), eval_steps / used_time)) time_begin = time.time() if args.ce: print("kpis\ttrain_loss\t%0.3f" % (np.sum(total_eval_cost) / np.sum(total_eval_num_seqs))) print("kpis\ttrain_acc\t%0.3f" % (np.sum(total_eval_acc) / np.sum(total_eval_num_seqs))) if steps % args.save_steps == 0: save_path = "save_dir_" + str(steps) print('save model to: ' + save_path) fluid.dygraph.save_persistables( cnn_net.state_dict(), save_path) if enable_profile: print('save profile result into /tmp/profile_file') return
def train(): with fluid.dygraph.guard(place): if args.ce: print("ce mode") seed = 90 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count if not args.ce: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) else: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=False) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) if args.model_type == 'cnn_net': model = nets.CNN(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bow_net': model = nets.BOW(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'gru_net': model = nets.GRU(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': model = nets.BiGRU(args.vocab_size, args.batch_size, args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad( learning_rate=args.lr, parameter_list=model.parameters()) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float32') ce_time, ce_infor = [], [] for eop in range(args.epoch): time_begin = time.time() for batch_id, data in enumerate(train_data_generator()): enable_profile = steps > args.profile_steps with profile_context(enable_profile): steps += 1 doc = to_variable( np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in data ]).astype('int64').reshape(-1)) label = to_variable( np.array([x[1] for x in data]).astype('int64').reshape( args.batch_size, 1)) model.train() avg_cost, prediction, acc = model(doc, label) avg_cost.backward() np_mask = (doc.numpy() != args.vocab_size).astype('int32') word_num = np.sum(np_mask) sgd_optimizer.minimize(avg_cost) model.clear_gradients() total_cost.append(avg_cost.numpy() * word_num) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) if steps % args.skip_steps == 0: time_end = time.time() used_time = time_end - time_begin print("step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) ce_time.append(used_time) ce_infor.append( np.sum(total_acc) / np.sum(total_num_seqs)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.validation_steps == 0: total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] model.eval() eval_steps = 0 gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float32') for eval_batch_id, eval_data in enumerate( eval_data_generator()): eval_np_doc = np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in eval_data ]).astype('int64').reshape(-1) eval_label = to_variable( np.array([x[1] for x in eval_data ]).astype('int64').reshape( args.batch_size, 1)) eval_doc = to_variable(eval_np_doc) eval_avg_cost, eval_prediction, eval_acc = model( eval_doc, eval_label) eval_np_mask = (eval_np_doc != args.vocab_size).astype('int32') eval_word_num = np.sum(eval_np_mask) total_eval_cost.append(eval_avg_cost.numpy() * eval_word_num) total_eval_acc.append(eval_acc.numpy() * eval_word_num) total_eval_num_seqs.append(eval_word_num) eval_steps += 1 time_end = time.time() used_time = time_end - time_begin print( "Final validation result: step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_eval_cost) / np.sum(total_eval_num_seqs), np.sum(total_eval_acc) / np.sum(total_eval_num_seqs), eval_steps / used_time)) time_begin = time.time() if args.ce: print("kpis\ttrain_loss\t%0.3f" % (np.sum(total_eval_cost) / np.sum(total_eval_num_seqs))) print("kpis\ttrain_acc\t%0.3f" % (np.sum(total_eval_acc) / np.sum(total_eval_num_seqs))) if steps % args.save_steps == 0: save_path = args.checkpoints + "/" + "save_dir_" + str( steps) print('save model to: ' + save_path) fluid.dygraph.save_dygraph(model.state_dict(), save_path) if enable_profile: print('save profile result into /tmp/profile_file') return if args.ce: card_num = get_cards() _acc = 0 _time = 0 try: _time = ce_time[-1] _acc = ce_infor[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_acc_card%s\t%f" % (card_num, _acc))
def train(): # with fluid.dygraph.guard(place): with fluid.dygraph.guard(): if args.ce: print("ce mode") seed = args.random_seed np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) if not args.ce: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) else: train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=False) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) model = nets.CNN(args.vocab_size) # save initial param to files param_dict = {} for param_name in model.state_dict(): param_dict[param_name] = model.state_dict()[param_name].numpy() if 'embedding' in param_name: state_dict = model.state_dict() param_dict[param_name][0] = 0 state_dict[param_name] = paddle.to_tensor( param_dict[param_name]) model.set_dict(state_dict) # print(param_dict[param_name][0]) np.savez('./paramters.npz', **param_dict) for parameters in model.named_parameters(): print(parameters[0]) if 'embedding' in parameters[0]: print(model.state_dict()[parameters[0]][0].shape) # sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr,parameter_list=model.parameters()) sgd_optimizer = paddle.fluid.optimizer.SGD( learning_rate=args.lr, parameter_list=model.parameters()) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float64') ce_time, ce_infor = [], [] reader_time = 0.0 num_train_examples = processor.get_num_examples(phase="train") for eop in range(args.epoch): time_begin = time.time() for batch_id, data in enumerate(train_data_generator()): reader_begin = time.time() seq_len_arr = np.array([len(x[0]) for x in data], dtype="int64") steps += 1 seq_len = paddle.to_tensor(seq_len_arr) doc = paddle.to_tensor( np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=0) for x in data ]).astype('int64')) label = paddle.to_tensor( np.array([x[1] for x in data ]).astype('int64').reshape(args.batch_size, 1)) reader_end = time.time() reader_time += (reader_end - reader_begin) model.train() avg_cost, prediction, acc = model(doc, seq_len, args.padding_size, label) model.clear_gradients() avg_cost.backward() sgd_optimizer.minimize(avg_cost) # np_mask = (doc.numpy() != 0).astype('int32') # word_num = np.sum(np_mask) word_num = np.sum(seq_len_arr) total_cost.append(avg_cost.numpy() * word_num) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) if steps % args.skip_steps == 0: time_end = time.time() used_time = time_end - time_begin print( "step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s, reader speed: %f steps/s" % (steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time, args.skip_steps / reader_time)) reader_time = 0.0 ce_time.append(used_time) ce_infor.append(np.sum(total_acc) / np.sum(total_num_seqs)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() # if steps % args.validation_steps == 0: # total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] # model.eval() # eval_steps = 0 # gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float64') # for eval_batch_id, eval_data in enumerate( # eval_data_generator()): # eval_seq_arr = np.array([len(x[0]) for x in data], dtype="int64") # eval_seq_len = to_variable(eval_seq_arr) # eval_np_doc = np.array([ # np.pad(x[0][0:args.padding_size], # (0, args.padding_size - # len(x[0][0:args.padding_size])), # 'constant', # constant_values=0) # args.vocab_size)) # for x in eval_data # ]).astype('int64')# .reshape(-1) # eval_label = to_variable( # np.array([x[1] for x in eval_data]).astype( # 'int64').reshape(args.batch_size, 1)) # eval_doc = to_variable(eval_np_doc) # eval_avg_cost, eval_prediction, eval_acc = model( # eval_doc, eval_seq_len, args.padding_size, eval_label) # eval_np_mask = ( # eval_np_doc != 0).astype('int32') # # eval_np_doc != args.vocab_size).astype('int32') # # eval_word_num = np.sum(eval_np_mask) # eval_word_num = np.sum(eval_seq_arr) # total_eval_cost.append(eval_avg_cost.numpy() * # eval_word_num) # total_eval_acc.append(eval_acc.numpy() * # eval_word_num) # total_eval_num_seqs.append(eval_word_num) # eval_steps += 1 # time_end = time.time() # used_time = time_end - time_begin # print( # "Final validation result: step: %d, ave loss: %f, " # "ave acc: %f, speed: %f steps/s" % # (steps, np.sum(total_eval_cost) / # np.sum(total_eval_num_seqs), np.sum(total_eval_acc) # / np.sum(total_eval_num_seqs), # eval_steps / used_time)) # time_begin = time.time() # if args.ce: # print("kpis\ttrain_loss\t%0.3f" % # (np.sum(total_eval_cost) / # np.sum(total_eval_num_seqs))) # print("kpis\ttrain_acc\t%0.3f" % # (np.sum(total_eval_acc) / # np.sum(total_eval_num_seqs))) # if steps % args.save_steps == 0: # save_path = args.checkpoints+"/"+"save_dir_" + str(steps) # print('save model to: ' + save_path) # fluid.dygraph.save_dygraph(model.state_dict(), # save_path) # fluid.dygraph.save_dygraph(model.state_dict(), # save_path) if args.ce: card_num = get_cards() _acc = 0 _time = 0 try: _time = ce_time[-1] _acc = ce_infor[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_acc_card%s\t%f" % (card_num, _acc))