def create_data_generator(word_dict, batch_size, num_epochs=1, is_train=False): """样本生成器创建函数,用于创建样本生成器。 Args: - batch_size: 训练和推理时传入网络的batch的大小 - num_epochs: 对数据集的遍历次数 - is_train: 训练/推理标志位 Return: - data_generator_fn: 样本生成器函数""" if is_train: examples = [i for i in imdb.train(word_dict)()] np.random.shuffle(examples) else: examples = [i for i in imdb.test(word_dict)()] def data_generator_fn(): batch_x = [] batch_y = [] for i in range(num_epochs): print('Training epoch {}:'.format(i)) for _x, _y in examples: # 为了避免遭遇过长样本导致显存溢出,我们将句子长度截断到800 batch_x.append(_x[:800]) batch_y.append(_y) if len(batch_x) == batch_size: batch_x = array_normalize(batch_x, return_lod_tensor=False) batch_y = array_normalize(batch_y) yield [batch_x, batch_y] batch_x = [] batch_y = [] return data_generator_fn
def get_model(args, is_train, main_prog, startup_prog): if args.use_reader_op: raise Exception( "stacked_dynamic_lstm do not support reader op for now.") lstm_size = 512 emb_dim = 512 crop_size = 1500 with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): data = fluid.layers.data(name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding(input=data, size=[len(word_dict), emb_dim]) logit = lstm_net(sentence, lstm_size) loss = fluid.layers.cross_entropy(input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) if is_train: adam = fluid.optimizer.Adam() adam.minimize(loss) if is_train: reader = crop_sentence(imdb.train(word_dict), crop_size) else: reader = crop_sentence(imdb.test(word_dict), crop_size) batched_reader = paddle.batch(paddle.reader.shuffle(reader, buf_size=25000), batch_size=args.batch_size * args.gpus) return loss, adam, [batch_acc], batched_reader, None
def main(): args = parse_args() lstm_size = args.hidden_dim data = fluid.layers.data(name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding(input=data, size=[len(word_dict), args.emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul(x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul(x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy(input=logit, label=fluid.layers.data(name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) adam = fluid.optimizer.Adam() adam.minimize(loss) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) train_reader = batch(paddle.reader.shuffle(crop_sentence( imdb.train(word_dict), args.crop_size), buf_size=25000), batch_size=args.batch_size) train_acc_kpi = None for kpi in tracking_kpis: if kpi.name == 'imdb_%s_train_acc' % (args.batch_size): train_acc_kpi = kpi train_speed_kpi = None for kpi in tracking_kpis: if kpi.name == 'imdb_%s_train_speed' % (args.batch_size): train_speed_kpi = kpi iters, num_samples, start_time = 0, 0, time.time() for pass_id in range(args.pass_num): train_accs = [] train_losses = [] for batch_id, data in enumerate(train_reader()): if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if iters == args.iterations: break tensor_words = to_lodtensor([x[0] for x in data], place) label = numpy.array([x[1] for x in data]).astype("int64") label = label.reshape((-1, 1)) loss_np, acc, weight = exe.run( fluid.default_main_program(), feed={ "words": tensor_words, "label": label }, fetch_list=[loss, batch_acc, batch_size_tensor]) iters += 1 for x in data: num_samples += len(x[0]) print( "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % (pass_id, iters, loss_np, acc) ) # The accuracy is the accumulation of batches, but not the current batch. train_elapsed = time.time() - start_time examples_per_sec = num_samples / train_elapsed print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % (num_samples, train_elapsed, examples_per_sec)) train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) break train_speed_kpi.persist()
# https://aistudio.baidu.com/aistudio/projectdetail/96357 # 导入必要的包 import paddle import paddle.dataset.imdb as imdb import paddle.fluid as fluid import numpy as np import os word_dict = imdb.word_dict() dict_dim = len(word_dict) train_reader = paddle.batch(paddle.reader.shuffle(imdb.train(word_dict), 512), batch_size=128) test_reader = paddle.batch(imdb.test(word_dict), batch_size=128) # 定义长短期记忆网络 def lstm_net(ipt, input_dim): # 以数据的IDs作为输入 emb = fluid.layers.embedding(input=ipt, size=[input_dim, 128], is_sparse=True) # 第一个全连接层 fc1 = fluid.layers.fc(input=emb, size=128) # 进行一个长短期记忆操作 lstm1, _ = fluid.layers.dynamic_lstm(input=fc1, # 返回:隐藏状态(hidden state),LSTM的神经元状态 size=128) # size=4*hidden_size # 第一个最大序列池操作 fc2 = fluid.layers.sequence_pool(input=fc1, pool_type='max') # 第二个最大序列池操作 lstm2 = fluid.layers.sequence_pool(input=lstm1, pool_type='max') # 以softmax作为全连接的输出层,大小为2,也就是正负面 out = fluid.layers.fc(input=[fc2, lstm2], size=2, act='softmax') return out
test_program = fluid.default_main_program().clone(for_test=True) # 定义优化方法 optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.002) opt = optimizer.minimize(avg_cost) # 创建一个执行器,CPU训练速度比较慢 # place = fluid.CPUPlace() place = fluid.CUDAPlace(0) exe = fluid.Executor(place) # 进行参数初始化 exe.run(fluid.default_startup_program()) # 获取训练和预测数据 print("加载训练数据中...") train_reader = paddle.batch(paddle.reader.shuffle(imdb.train(word_dict), 25000), batch_size=128) print("加载测试数据中...") test_reader = paddle.batch(imdb.test(word_dict), batch_size=128) # 定义输入数据的维度 feeder = fluid.DataFeeder(place=place, feed_list=[words, label]) # 开始训练 for pass_id in range(1): # 进行训练 train_cost = 0 for batch_id, data in enumerate(train_reader()): train_cost = exe.run(program=fluid.default_main_program(), feed=feeder.feed(data),
def get_model(args): if args.use_reader_op: raise Exception( "stacked_dynamic_lstm do not support reader op for now.") lstm_size = 512 emb_dim = 512 crop_size = 1500 data = fluid.layers.data( name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding( input=data, size=[len(word_dict), emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul( x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul( x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy( input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) adam = fluid.optimizer.Adam() train_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size * args.gpus) test_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) return loss, inference_program, adam, train_reader, test_reader, batch_acc
def get_model(args): lstm_size = 512 emb_dim = 512 crop_size = 1500 data = fluid.layers.data( name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding( input=data, size=[len(word_dict), emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul( x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul( x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy( input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) adam = fluid.optimizer.Adam() train_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) test_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) return loss, inference_program, adam, train_reader, test_reader, batch_acc
# 定义优化方法 optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.002) opt = optimizer.minimize(avg_cost) # 创建一个执行器,CPU训练速度比较慢 # place = fluid.CPUPlace() place = fluid.CUDAPlace(0) exe = fluid.Executor(place) # 进行参数初始化 exe.run(fluid.default_startup_program()) # 获取训练和预测数据 print("加载训练数据中...") train_reader = paddle.batch( paddle.reader.shuffle(imdb.train(word_dict), 25000), batch_size=128) print("加载测试数据中...") test_reader = paddle.batch(imdb.test(word_dict), batch_size=128) # 定义输入数据的维度 feeder = fluid.DataFeeder(place=place, feed_list=[words, label]) # 开始训练 for pass_id in range(1): # 进行训练 train_cost = 0 for batch_id, data in enumerate(train_reader()): train_cost, train_acc = exe.run(program=fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, acc]) if batch_id % 10 == 0: