def create_data_generator(word_dict, batch_size, num_epochs=1, is_train=False): """样本生成器创建函数,用于创建样本生成器。 Args: - batch_size: 训练和推理时传入网络的batch的大小 - num_epochs: 对数据集的遍历次数 - is_train: 训练/推理标志位 Return: - data_generator_fn: 样本生成器函数""" if is_train: examples = [i for i in imdb.train(word_dict)()] np.random.shuffle(examples) else: examples = [i for i in imdb.test(word_dict)()] def data_generator_fn(): batch_x = [] batch_y = [] for i in range(num_epochs): print('Training epoch {}:'.format(i)) for _x, _y in examples: # 为了避免遭遇过长样本导致显存溢出,我们将句子长度截断到800 batch_x.append(_x[:800]) batch_y.append(_y) if len(batch_x) == batch_size: batch_x = array_normalize(batch_x, return_lod_tensor=False) batch_y = array_normalize(batch_y) yield [batch_x, batch_y] batch_x = [] batch_y = [] return data_generator_fn
def get_model(args, is_train, main_prog, startup_prog): if args.use_reader_op: raise Exception( "stacked_dynamic_lstm do not support reader op for now.") lstm_size = 512 emb_dim = 512 crop_size = 1500 with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): data = fluid.layers.data(name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding(input=data, size=[len(word_dict), emb_dim]) logit = lstm_net(sentence, lstm_size) loss = fluid.layers.cross_entropy(input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) if is_train: adam = fluid.optimizer.Adam() adam.minimize(loss) if is_train: reader = crop_sentence(imdb.train(word_dict), crop_size) else: reader = crop_sentence(imdb.test(word_dict), crop_size) batched_reader = paddle.batch(paddle.reader.shuffle(reader, buf_size=25000), batch_size=args.batch_size * args.gpus) return loss, adam, [batch_acc], batched_reader, None
# https://aistudio.baidu.com/aistudio/projectdetail/96357 # 导入必要的包 import paddle import paddle.dataset.imdb as imdb import paddle.fluid as fluid import numpy as np import os word_dict = imdb.word_dict() dict_dim = len(word_dict) train_reader = paddle.batch(paddle.reader.shuffle(imdb.train(word_dict), 512), batch_size=128) test_reader = paddle.batch(imdb.test(word_dict), batch_size=128) # 定义长短期记忆网络 def lstm_net(ipt, input_dim): # 以数据的IDs作为输入 emb = fluid.layers.embedding(input=ipt, size=[input_dim, 128], is_sparse=True) # 第一个全连接层 fc1 = fluid.layers.fc(input=emb, size=128) # 进行一个长短期记忆操作 lstm1, _ = fluid.layers.dynamic_lstm(input=fc1, # 返回:隐藏状态(hidden state),LSTM的神经元状态 size=128) # size=4*hidden_size # 第一个最大序列池操作 fc2 = fluid.layers.sequence_pool(input=fc1, pool_type='max') # 第二个最大序列池操作 lstm2 = fluid.layers.sequence_pool(input=lstm1, pool_type='max') # 以softmax作为全连接的输出层,大小为2,也就是正负面 out = fluid.layers.fc(input=[fc2, lstm2], size=2, act='softmax') return out
def get_model(args): if args.use_reader_op: raise Exception( "stacked_dynamic_lstm do not support reader op for now.") lstm_size = 512 emb_dim = 512 crop_size = 1500 data = fluid.layers.data( name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding( input=data, size=[len(word_dict), emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul( x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul( x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy( input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) adam = fluid.optimizer.Adam() train_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size * args.gpus) test_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) return loss, inference_program, adam, train_reader, test_reader, batch_acc
def get_model(args): lstm_size = 512 emb_dim = 512 crop_size = 1500 data = fluid.layers.data( name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding( input=data, size=[len(word_dict), emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul( x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul( x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy( input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) adam = fluid.optimizer.Adam() train_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) test_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) return loss, inference_program, adam, train_reader, test_reader, batch_acc