def get_feed_data_reader():
    global feed_data_reader
    if feed_data_reader is not None:
        return feed_data_reader

    reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size,
                                      ModelHyperParams.trg_vocab_size),
                          batch_size=transformer_model.batch_size)
    all_batch_tensors = []
    for batch in reader():
        tensors = []
        for tensor in prepare_batch_input(batch, ModelHyperParams.src_pad_idx,
                                          ModelHyperParams.trg_pad_idx,
                                          ModelHyperParams.n_head):
            tensors.append(np.array(tensor))
        all_batch_tensors.append(tensors)

    def __reader__():
        for t in all_batch_tensors:
            yield t

    feed_data_reader = FeedDataReader(feed_list=transformer_model.build_inputs(
        ModelHyperParams.max_length + 1, ModelHyperParams.n_head),
                                      reader=__reader__)

    return feed_data_reader
Ejemplo n.º 2
0
 def __for_train__():
     train_reader = paddle.batch(wmt16.train(args.src_vocab_size,
                                             args.trg_vocab_size),
                                 batch_size=args.batch_size)
     for batch in train_reader():
         tensors = prepare_train_input(batch, args.eos_idx, args.eos_idx,
                                       args.n_head)
         yield tensors
    def setUpClass(cls):
        reader = paddle.batch(
            wmt16.train(ModelHyperParams.src_vocab_size,
                        ModelHyperParams.trg_vocab_size),
            batch_size=transformer_model.batch_size)

        with fluid.recordio_writer.create_recordio_writer(
                WMT16_RECORDIO_FILE) as writer:
            for batch in reader():
                for tensor in prepare_batch_input(
                        batch, ModelHyperParams.src_pad_idx,
                        ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head):
                    t = fluid.LoDTensor()
                    t.set(tensor, fluid.CPUPlace())
                    writer.append_tensor(t)
                writer.complete_append_tensor()
    def setUpClass(cls):
        reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size,
                                          ModelHyperParams.trg_vocab_size),
                              batch_size=transformer_model.batch_size)

        with fluid.recordio_writer.create_recordio_writer(
                WMT16_RECORDIO_FILE) as writer:
            for batch in reader():
                for tensor in prepare_batch_input(batch,
                                                  ModelHyperParams.src_pad_idx,
                                                  ModelHyperParams.trg_pad_idx,
                                                  ModelHyperParams.n_head):
                    t = fluid.LoDTensor()
                    t.set(tensor, fluid.CPUPlace())
                    writer.append_tensor(t)
                writer.complete_append_tensor()
Ejemplo n.º 5
0
def train():
    """
    train models
    :return:
    """

    with guard():
        transformer = TransFormer(
            'transformer', ModelHyperParams.src_vocab_size,
            ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
            ModelHyperParams.prepostprocess_dropout,
            ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
            ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
            ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)

        optimizer = fluid.optimizer.SGD(learning_rate=0.003)

        reader = paddle.batch(
            wmt16.train(ModelHyperParams.src_vocab_size,
                        ModelHyperParams.trg_vocab_size),
            batch_size=TrainTaskConfig.batch_size)
        for i in range(200):
            dy_step = 0
            for batch in reader():
                np_values = prepare_batch_input(
                    batch, ModelHyperParams.src_pad_idx,
                    ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head)

                enc_inputs, dec_inputs, label, weights = create_data(np_values)
                dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
                    enc_inputs, dec_inputs, label, weights)
                dy_avg_cost.backward()
                optimizer.minimize(dy_avg_cost)
                transformer.clear_gradients()
                dy_step = dy_step + 1
                if dy_step % 10 == 0:
                    print("pass num : {}, batch_id: {}, dy_graph avg loss: {}".
                          format(i, dy_step, dy_avg_cost.numpy()))
            print("pass : {} finished".format(i))
Ejemplo n.º 6
0
def train():
    """
    train models
    :return:
    """

    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        transformer = TransFormer(
            'transformer', ModelHyperParams.src_vocab_size,
            ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
            ModelHyperParams.prepostprocess_dropout,
            ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
            ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
            ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)

        optimizer = fluid.optimizer.SGD(learning_rate=0.003)

        if args.use_data_parallel:
            transformer = fluid.dygraph.parallel.DataParallel(
                transformer, strategy)

        reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size,
                                          ModelHyperParams.trg_vocab_size),
                              batch_size=TrainTaskConfig.batch_size)
        if args.use_data_parallel:
            reader = fluid.contrib.reader.distributed_batch_reader(reader)

        for i in range(200):
            dy_step = 0
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(len(list(reader())) - 1,
                                     batch_time,
                                     data_time,
                                     prefix="epoch: [{}]".format(i))
            end = Tools.time()
            for batch in reader():
                data_time.update(Tools.time() - end)
                np_values = prepare_batch_input(batch,
                                                ModelHyperParams.src_pad_idx,
                                                ModelHyperParams.trg_pad_idx,
                                                ModelHyperParams.n_head)

                enc_inputs, dec_inputs, label, weights = create_data(np_values)
                dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
                    enc_inputs, dec_inputs, label, weights)

                if args.use_data_parallel:
                    dy_avg_cost = transformer.scale_loss(dy_avg_cost)
                    dy_avg_cost.backward()
                    transformer.apply_collective_grads()
                else:
                    dy_avg_cost.backward()

                optimizer.minimize(dy_avg_cost)
                transformer.clear_gradients()
                batch_time.update(Tools.time() - end)
                dy_step = dy_step + 1
                if dy_step % 1 == 0:
                    progress.print(dy_step)
                    print("pass num : {}, batch_id: {}, dy_graph avg loss: {}".
                          format(i, dy_step, dy_avg_cost.numpy()))
                end = Tools.time()
            print("pass : {} finished".format(i))