Ejemplo n.º 1
0
def create_data_generator(word_dict, batch_size, num_epochs=1, is_train=False):
    """样本生成器创建函数,用于创建样本生成器。
        Args: 
            - batch_size: 训练和推理时传入网络的batch的大小
            - num_epochs: 对数据集的遍历次数
            - is_train: 训练/推理标志位
        Return:
            - data_generator_fn: 样本生成器函数"""

    if is_train:
        examples = [i for i in imdb.train(word_dict)()]
        np.random.shuffle(examples)
    else:
        examples = [i for i in imdb.test(word_dict)()]

    def data_generator_fn():
        batch_x = []
        batch_y = []
        for i in range(num_epochs):
            print('Training epoch {}:'.format(i))
            for _x, _y in examples:
                # 为了避免遭遇过长样本导致显存溢出,我们将句子长度截断到800
                batch_x.append(_x[:800])
                batch_y.append(_y)
                if len(batch_x) == batch_size:
                    batch_x = array_normalize(batch_x, return_lod_tensor=False)
                    batch_y = array_normalize(batch_y)
                    yield [batch_x, batch_y]
                    batch_x = []
                    batch_y = []

    return data_generator_fn
Ejemplo n.º 2
0
def get_model(args, is_train, main_prog, startup_prog):
    if args.use_reader_op:
        raise Exception(
            "stacked_dynamic_lstm do not support reader op for now.")
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    with fluid.program_guard(main_prog, startup_prog):
        with fluid.unique_name.guard():
            data = fluid.layers.data(name="words",
                                     shape=[1],
                                     lod_level=1,
                                     dtype='int64')
            sentence = fluid.layers.embedding(input=data,
                                              size=[len(word_dict), emb_dim])
            logit = lstm_net(sentence, lstm_size)
            loss = fluid.layers.cross_entropy(input=logit,
                                              label=fluid.layers.data(
                                                  name='label',
                                                  shape=[1],
                                                  dtype='int64'))
            loss = fluid.layers.mean(x=loss)

            # add acc
            batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                        shape=[1], dtype='int64'), total=batch_size_tensor)

            if is_train:
                adam = fluid.optimizer.Adam()
                adam.minimize(loss)

    if is_train:
        reader = crop_sentence(imdb.train(word_dict), crop_size)
    else:
        reader = crop_sentence(imdb.test(word_dict), crop_size)

    batched_reader = paddle.batch(paddle.reader.shuffle(reader,
                                                        buf_size=25000),
                                  batch_size=args.batch_size * args.gpus)

    return loss, adam, [batch_acc], batched_reader, None
Ejemplo n.º 3
0
def main():
    args = parse_args()
    lstm_size = args.hidden_dim

    data = fluid.layers.data(name="words",
                             shape=[1],
                             lod_level=1,
                             dtype='int64')
    sentence = fluid.layers.embedding(input=data,
                                      size=[len(word_dict), args.emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
            ipt,
            hidden,
            size,
        ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(x=forget_gate, y=prev_cell),
            fluid.layers.elementwise_mul(x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(x=output_gate,
                                              y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(input=logit,
                                      label=fluid.layers.data(name='label',
                                                              shape=[1],
                                                              dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    adam = fluid.optimizer.Adam()
    adam.minimize(loss)

    fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    train_reader = batch(paddle.reader.shuffle(crop_sentence(
        imdb.train(word_dict), args.crop_size),
                                               buf_size=25000),
                         batch_size=args.batch_size)

    train_acc_kpi = None
    for kpi in tracking_kpis:
        if kpi.name == 'imdb_%s_train_acc' % (args.batch_size):
            train_acc_kpi = kpi
    train_speed_kpi = None
    for kpi in tracking_kpis:
        if kpi.name == 'imdb_%s_train_speed' % (args.batch_size):
            train_speed_kpi = kpi

    iters, num_samples, start_time = 0, 0, time.time()
    for pass_id in range(args.pass_num):
        train_accs = []
        train_losses = []
        for batch_id, data in enumerate(train_reader()):
            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            if iters == args.iterations:
                break
            tensor_words = to_lodtensor([x[0] for x in data], place)
            label = numpy.array([x[1] for x in data]).astype("int64")
            label = label.reshape((-1, 1))
            loss_np, acc, weight = exe.run(
                fluid.default_main_program(),
                feed={
                    "words": tensor_words,
                    "label": label
                },
                fetch_list=[loss, batch_acc, batch_size_tensor])
            iters += 1
            for x in data:
                num_samples += len(x[0])
            print(
                "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
                (pass_id, iters, loss_np, acc)
            )  # The accuracy is the accumulation of batches, but not the current batch.

        train_elapsed = time.time() - start_time
        examples_per_sec = num_samples / train_elapsed
        print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
              (num_samples, train_elapsed, examples_per_sec))
        train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32'))
        break
    train_speed_kpi.persist()
Ejemplo n.º 4
0
# https://aistudio.baidu.com/aistudio/projectdetail/96357
# 导入必要的包
import paddle
import paddle.dataset.imdb as imdb
import paddle.fluid as fluid
import numpy as np
import os

word_dict = imdb.word_dict()
dict_dim = len(word_dict)
train_reader = paddle.batch(paddle.reader.shuffle(imdb.train(word_dict), 512), batch_size=128)
test_reader = paddle.batch(imdb.test(word_dict), batch_size=128)


# 定义长短期记忆网络

def lstm_net(ipt, input_dim):
    # 以数据的IDs作为输入
    emb = fluid.layers.embedding(input=ipt, size=[input_dim, 128], is_sparse=True)
    # 第一个全连接层
    fc1 = fluid.layers.fc(input=emb, size=128)
    # 进行一个长短期记忆操作
    lstm1, _ = fluid.layers.dynamic_lstm(input=fc1,  # 返回:隐藏状态(hidden state),LSTM的神经元状态
                                         size=128)  # size=4*hidden_size
    # 第一个最大序列池操作
    fc2 = fluid.layers.sequence_pool(input=fc1, pool_type='max')
    # 第二个最大序列池操作
    lstm2 = fluid.layers.sequence_pool(input=lstm1, pool_type='max')
    # 以softmax作为全连接的输出层,大小为2,也就是正负面
    out = fluid.layers.fc(input=[fc2, lstm2], size=2, act='softmax')
    return out
Ejemplo n.º 5
0
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法
optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.002)
opt = optimizer.minimize(avg_cost)

# 创建一个执行器,CPU训练速度比较慢
# place = fluid.CPUPlace()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 获取训练和预测数据
print("加载训练数据中...")
train_reader = paddle.batch(paddle.reader.shuffle(imdb.train(word_dict),
                                                  25000),
                            batch_size=128)
print("加载测试数据中...")
test_reader = paddle.batch(imdb.test(word_dict), batch_size=128)

# 定义输入数据的维度
feeder = fluid.DataFeeder(place=place, feed_list=[words, label])

# 开始训练
for pass_id in range(1):
    # 进行训练
    train_cost = 0
    for batch_id, data in enumerate(train_reader()):
        train_cost = exe.run(program=fluid.default_main_program(),
                             feed=feeder.feed(data),
Ejemplo n.º 6
0
def get_model(args):
    if args.use_reader_op:
        raise Exception(
            "stacked_dynamic_lstm do not support reader op for now.")
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    data = fluid.layers.data(
        name="words", shape=[1], lod_level=1, dtype='int64')
    sentence = fluid.layers.embedding(
        input=data, size=[len(word_dict), emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
                ipt,
                hidden,
                size, ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(
                x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
                    x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(
            x=output_gate, y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(
        input=logit,
        label=fluid.layers.data(
            name='label', shape=[1], dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    adam = fluid.optimizer.Adam()

    train_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size * args.gpus)
    test_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)

    return loss, inference_program, adam, train_reader, test_reader, batch_acc
Ejemplo n.º 7
0
def get_model(args):
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    data = fluid.layers.data(
        name="words", shape=[1], lod_level=1, dtype='int64')
    sentence = fluid.layers.embedding(
        input=data, size=[len(word_dict), emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
                ipt,
                hidden,
                size, ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(
                x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
                    x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(
            x=output_gate, y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(
        input=logit,
        label=fluid.layers.data(
            name='label', shape=[1], dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    adam = fluid.optimizer.Adam()

    train_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)
    test_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)

    return loss, inference_program, adam, train_reader, test_reader, batch_acc
Ejemplo n.º 8
0
# 定义优化方法
optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.002)
opt = optimizer.minimize(avg_cost)

# 创建一个执行器,CPU训练速度比较慢
# place = fluid.CPUPlace()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 获取训练和预测数据
print("加载训练数据中...")
train_reader = paddle.batch(
    paddle.reader.shuffle(imdb.train(word_dict), 25000), batch_size=128)
print("加载测试数据中...")
test_reader = paddle.batch(imdb.test(word_dict), batch_size=128)

# 定义输入数据的维度
feeder = fluid.DataFeeder(place=place, feed_list=[words, label])

# 开始训练
for pass_id in range(1):
    # 进行训练
    train_cost = 0
    for batch_id, data in enumerate(train_reader()):
        train_cost, train_acc = exe.run(program=fluid.default_main_program(),
                                        feed=feeder.feed(data),
                                        fetch_list=[avg_cost, acc])
        if batch_id % 10 == 0: