예제 #1
0
def create_data_generator(word_dict, batch_size, num_epochs=1, is_train=False):
    """样本生成器创建函数,用于创建样本生成器。
        Args: 
            - batch_size: 训练和推理时传入网络的batch的大小
            - num_epochs: 对数据集的遍历次数
            - is_train: 训练/推理标志位
        Return:
            - data_generator_fn: 样本生成器函数"""

    if is_train:
        examples = [i for i in imdb.train(word_dict)()]
        np.random.shuffle(examples)
    else:
        examples = [i for i in imdb.test(word_dict)()]

    def data_generator_fn():
        batch_x = []
        batch_y = []
        for i in range(num_epochs):
            print('Training epoch {}:'.format(i))
            for _x, _y in examples:
                # 为了避免遭遇过长样本导致显存溢出,我们将句子长度截断到800
                batch_x.append(_x[:800])
                batch_y.append(_y)
                if len(batch_x) == batch_size:
                    batch_x = array_normalize(batch_x, return_lod_tensor=False)
                    batch_y = array_normalize(batch_y)
                    yield [batch_x, batch_y]
                    batch_x = []
                    batch_y = []

    return data_generator_fn
예제 #2
0
def get_model(args, is_train, main_prog, startup_prog):
    if args.use_reader_op:
        raise Exception(
            "stacked_dynamic_lstm do not support reader op for now.")
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    with fluid.program_guard(main_prog, startup_prog):
        with fluid.unique_name.guard():
            data = fluid.layers.data(name="words",
                                     shape=[1],
                                     lod_level=1,
                                     dtype='int64')
            sentence = fluid.layers.embedding(input=data,
                                              size=[len(word_dict), emb_dim])
            logit = lstm_net(sentence, lstm_size)
            loss = fluid.layers.cross_entropy(input=logit,
                                              label=fluid.layers.data(
                                                  name='label',
                                                  shape=[1],
                                                  dtype='int64'))
            loss = fluid.layers.mean(x=loss)

            # add acc
            batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                        shape=[1], dtype='int64'), total=batch_size_tensor)

            if is_train:
                adam = fluid.optimizer.Adam()
                adam.minimize(loss)

    if is_train:
        reader = crop_sentence(imdb.train(word_dict), crop_size)
    else:
        reader = crop_sentence(imdb.test(word_dict), crop_size)

    batched_reader = paddle.batch(paddle.reader.shuffle(reader,
                                                        buf_size=25000),
                                  batch_size=args.batch_size * args.gpus)

    return loss, adam, [batch_acc], batched_reader, None
예제 #3
0
# https://aistudio.baidu.com/aistudio/projectdetail/96357
# 导入必要的包
import paddle
import paddle.dataset.imdb as imdb
import paddle.fluid as fluid
import numpy as np
import os

word_dict = imdb.word_dict()
dict_dim = len(word_dict)
train_reader = paddle.batch(paddle.reader.shuffle(imdb.train(word_dict), 512), batch_size=128)
test_reader = paddle.batch(imdb.test(word_dict), batch_size=128)


# 定义长短期记忆网络

def lstm_net(ipt, input_dim):
    # 以数据的IDs作为输入
    emb = fluid.layers.embedding(input=ipt, size=[input_dim, 128], is_sparse=True)
    # 第一个全连接层
    fc1 = fluid.layers.fc(input=emb, size=128)
    # 进行一个长短期记忆操作
    lstm1, _ = fluid.layers.dynamic_lstm(input=fc1,  # 返回:隐藏状态(hidden state),LSTM的神经元状态
                                         size=128)  # size=4*hidden_size
    # 第一个最大序列池操作
    fc2 = fluid.layers.sequence_pool(input=fc1, pool_type='max')
    # 第二个最大序列池操作
    lstm2 = fluid.layers.sequence_pool(input=lstm1, pool_type='max')
    # 以softmax作为全连接的输出层,大小为2,也就是正负面
    out = fluid.layers.fc(input=[fc2, lstm2], size=2, act='softmax')
    return out
예제 #4
0
def get_model(args):
    if args.use_reader_op:
        raise Exception(
            "stacked_dynamic_lstm do not support reader op for now.")
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    data = fluid.layers.data(
        name="words", shape=[1], lod_level=1, dtype='int64')
    sentence = fluid.layers.embedding(
        input=data, size=[len(word_dict), emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
                ipt,
                hidden,
                size, ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(
                x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
                    x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(
            x=output_gate, y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(
        input=logit,
        label=fluid.layers.data(
            name='label', shape=[1], dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    adam = fluid.optimizer.Adam()

    train_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size * args.gpus)
    test_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)

    return loss, inference_program, adam, train_reader, test_reader, batch_acc
예제 #5
0
def get_model(args):
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    data = fluid.layers.data(
        name="words", shape=[1], lod_level=1, dtype='int64')
    sentence = fluid.layers.embedding(
        input=data, size=[len(word_dict), emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
                ipt,
                hidden,
                size, ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(
                x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
                    x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(
            x=output_gate, y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(
        input=logit,
        label=fluid.layers.data(
            name='label', shape=[1], dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    adam = fluid.optimizer.Adam()

    train_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)
    test_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)

    return loss, inference_program, adam, train_reader, test_reader, batch_acc