def get_feed_data_reader(): global feed_data_reader if feed_data_reader is not None: return feed_data_reader reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), batch_size=transformer_model.batch_size) all_batch_tensors = [] for batch in reader(): tensors = [] for tensor in prepare_batch_input(batch, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): tensors.append(np.array(tensor)) all_batch_tensors.append(tensors) def __reader__(): for t in all_batch_tensors: yield t feed_data_reader = FeedDataReader(feed_list=transformer_model.build_inputs( ModelHyperParams.max_length + 1, ModelHyperParams.n_head), reader=__reader__) return feed_data_reader
def __for_train__(): train_reader = paddle.batch(wmt16.train(args.src_vocab_size, args.trg_vocab_size), batch_size=args.batch_size) for batch in train_reader(): tensors = prepare_train_input(batch, args.eos_idx, args.eos_idx, args.n_head) yield tensors
def setUpClass(cls): reader = paddle.batch( wmt16.train(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), batch_size=transformer_model.batch_size) with fluid.recordio_writer.create_recordio_writer( WMT16_RECORDIO_FILE) as writer: for batch in reader(): for tensor in prepare_batch_input( batch, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): t = fluid.LoDTensor() t.set(tensor, fluid.CPUPlace()) writer.append_tensor(t) writer.complete_append_tensor()
def setUpClass(cls): reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), batch_size=transformer_model.batch_size) with fluid.recordio_writer.create_recordio_writer( WMT16_RECORDIO_FILE) as writer: for batch in reader(): for tensor in prepare_batch_input(batch, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): t = fluid.LoDTensor() t.set(tensor, fluid.CPUPlace()) writer.append_tensor(t) writer.complete_append_tensor()
def train(): """ train models :return: """ with guard(): transformer = TransFormer( 'transformer', ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps) optimizer = fluid.optimizer.SGD(learning_rate=0.003) reader = paddle.batch( wmt16.train(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), batch_size=TrainTaskConfig.batch_size) for i in range(200): dy_step = 0 for batch in reader(): np_values = prepare_batch_input( batch, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head) enc_inputs, dec_inputs, label, weights = create_data(np_values) dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer( enc_inputs, dec_inputs, label, weights) dy_avg_cost.backward() optimizer.minimize(dy_avg_cost) transformer.clear_gradients() dy_step = dy_step + 1 if dy_step % 10 == 0: print("pass num : {}, batch_id: {}, dy_graph avg loss: {}". format(i, dy_step, dy_avg_cost.numpy())) print("pass : {} finished".format(i))
def train(): """ train models :return: """ trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() transformer = TransFormer( 'transformer', ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps) optimizer = fluid.optimizer.SGD(learning_rate=0.003) if args.use_data_parallel: transformer = fluid.dygraph.parallel.DataParallel( transformer, strategy) reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), batch_size=TrainTaskConfig.batch_size) if args.use_data_parallel: reader = fluid.contrib.reader.distributed_batch_reader(reader) for i in range(200): dy_step = 0 batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(len(list(reader())) - 1, batch_time, data_time, prefix="epoch: [{}]".format(i)) end = Tools.time() for batch in reader(): data_time.update(Tools.time() - end) np_values = prepare_batch_input(batch, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head) enc_inputs, dec_inputs, label, weights = create_data(np_values) dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer( enc_inputs, dec_inputs, label, weights) if args.use_data_parallel: dy_avg_cost = transformer.scale_loss(dy_avg_cost) dy_avg_cost.backward() transformer.apply_collective_grads() else: dy_avg_cost.backward() optimizer.minimize(dy_avg_cost) transformer.clear_gradients() batch_time.update(Tools.time() - end) dy_step = dy_step + 1 if dy_step % 1 == 0: progress.print(dy_step) print("pass num : {}, batch_id: {}, dy_graph avg loss: {}". format(i, dy_step, dy_avg_cost.numpy())) end = Tools.time() print("pass : {} finished".format(i))