with tf.name_scope('dropout'): keep_prob = tf.placeholder("float", name='keep_prob') # In[5]: cells = [ rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(num_hidden), output_keep_prob=keep_prob) for i in range(num_layers) ] stacked_lstm = rnn_cell.MultiRNNCell(cells) with tf.variable_scope("decoders") as scope: decode_outputs, decode_state = seq2seq.embedding_attention_seq2seq( encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden) scope.reuse_variables() decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq( encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, feed_previous=True) # In[6]:
def model(self, mode="train", num_layers=1, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.0001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN network = tflearn.input_data( shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1 ) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack( decoder_inputs, axis=1 ) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") if self.verbose > 3: print("feed_previous = %s" % str(feed_previous)) print("encoder inputs: %s" % str(encoder_inputs)) print("decoder inputs: %s" % str(decoder_inputs)) print("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: cell = single_cell else: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print("model outputs: %s" % model_outputs) network = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print("all_vars = %s" % all_vars) with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
def _prepare_model(self): self.encode_in = [ tf.placeholder(tf.int32, shape=(None, ), name="ei_%i" % i) for i in range(self.seq_in_len) ] self.labels = [ tf.placeholder(tf.int32, shape=(None, ), name="l_%i" % i) for i in range(self.seq_out_len) ] loss_weights = [tf.ones_like(l, dtype=tf.float32) for l in self.labels] decode_in = [ tf.zeros_like(self.encode_in[0], dtype=np.int32, name="GO") ] + self.labels[:-1] cell = rnn_cell.GRUCell(self.cell_units) if hasattr(self.params, 'keep_probability'): self.keep_prob = tf.placeholder("float") cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob) if hasattr(self.params, 'num_layers'): cell = rnn_cell.MultiRNNCell([cell] * self.params.num_layers) with tf.variable_scope("decoders") as scope: if self.params.attention: decode_outs, decode_state = seq2seq.embedding_attention_seq2seq( encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=False) scope.reuse_variables() self.decode_outs_test, decode_state_test = \ seq2seq.embedding_attention_seq2seq(encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=True) else: decode_outs, decode_state = seq2seq.embedding_rnn_seq2seq( encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=False) scope.reuse_variables() self.decode_outs_test, decode_state_test = \ seq2seq.embedding_rnn_seq2seq(encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=True) self.loss = seq2seq.sequence_loss(decode_outs, self.labels, loss_weights, self.vocab_out_size) self.optimizer = tf.train.AdamOptimizer(1e-4) self.train_op = self.optimizer.minimize(self.loss)
def model(self, mode="train", num_layers=1, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.0001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ("%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack(encoder_inputs, axis=1) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack(decoder_inputs, axis=1) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [go_input] + decoder_inputs[: self.out_seq_len-1] # insert GO as first; drop last decoder input feed_previous = not (mode=="train") if self.verbose > 3: print ("feed_previous = %s" % str(feed_previous)) print ("encoder inputs: %s" % str(encoder_inputs)) print ("decoder inputs: %s" % str(decoder_inputs)) print ("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers==1: cell = single_cell else: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model=="embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model=="embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print ("model outputs: %s" % model_outputs) network = tf.stack(model_outputs, axis=1) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print ("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print ("all_vars = %s" % all_vars) with tf.name_scope("TargetsData"): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
def __init__(self,args,infer=False): self.args=args if infer: args.batch_size=1 args.seq_length=1 if args.model=='rnn': cell_fn=rnn_cell.BasicRNNCell elif args.model=='gru': cell_fn=rnn_cell.GRUCell elif args.model=='lstm': cell_fn=rnn_cell.BasicLSTMCell else: raise Exception("模型不支持:{}".format(args.model)) cell=cell_fn(args.rnn_size) self.cell=cell=rnn_cell.MultiRNNCell([cell]*args.num_layers) self.input_data=tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) #(10,25) self.targets=tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) self.initial_state=cell.zero_state(args.batch_size,tf.float32) #因为想要达到变量共享的效果, 就要在 tf.variable_scope()的作用域下使用 tf.get_variable() 这种方式产生和提取变量. #不像 tf.Variable() 每次都会产生新的变量, tf.get_variable() 如果遇到了已经存在名字的变量时, 它会单纯的提取这个同样名字的变量, #如果不存在名字的变量再创建. with tf.variable_scope("rnnlm"): softmax_w=tf.get_variable("softmax_w",[args.rnn_size,args.vocab_size]) #args.vocab_size=19,19个方法 softmax_b=tf.get_variable("softmax_b",[args.vocab_size]) #attention=tf.get_variable("attention",[1,1,args.vocab_size]) ''' with tf.device("/cpu:0"): embedding=tf.get_variable("embedding",[args.vocab_size,args.rnn_size]) #输入数据 self.input_data 的维度是 (batch_size , seq_length) #而输出的input_embedding 的维度成为 (batch_size ,num_steps ,rnn_size). 就是一个立方体,每个样例就是从头顶上削一片下来 #词嵌入后成了这样一个三维数组,里面每一个元素是一个二维数组(25,32) temp=tf.nn.embedding_lookup(embedding,self.input_data) #(10,25,32) #tf.split()函数将长方体按每一列切片,切成了25个片,每一片都是(10,32),表示这是这一批样本们的第t个特征,即在第xt时间步传入的input,embedding代替了ont-hot inputs=tf.split(1,args.seq_length,temp) #len(inputs)=25 #print(inputs[0].shape) (10,1,32) #删除维度1 (10,32) #每个数据从一列变成了一个扁平的长方形 inputs=[tf.squeeze(input_,[1]) for input_ in inputs] ''' ''' def loop(prev,_): prev=tf.matmul(prev,softmax_w)+softmax_b #axis=1的时候,将每一行最大元素所在的索引记录下来,最后返回每一行最大元素所在的索引数组 prev_symbol=tf.stop_gradient(tf.argmax(prev,1)) #stop_gradients也是一个list,list中的元素是tensorflow graph中的op, # 一旦进入这个list,将不会被计算梯度,更重要的是,在该op之后的BP计算都不会运行 return tf.nn.embedding_lookup(embedding,prev_symbol) ''' inputs=tf.split(1,args.seq_length,self.input_data) inputs=[tf.squeeze(input_,[1]) for input_ in inputs] #inputss=[tf.reshape(self.input_data[:,i],-1) for i in range(args.seq_length)] outputs,last_state=seq2seq.embedding_attention_seq2seq(inputs,inputs,cell,args.vocab_size,args.vocab_size, args.rnn_size) #outputs,last_state=seq2seq.attention_decoder(inputs,self.initial_state, attention,cell,loop_function=loop if infer else None,scope='rnnlm') #outputs,last_state=seq2seq.rnn_decoder(inputs,self.initial_state,cell,loop_function=loop if infer else None,scope='rnnlm') self.saved_outputs=outputs #print(len(outputs)) #是一个三维数组,有25个元素,对应步长,每个元素是一个二维数组(10,32) output=tf.reshape(tf.concat(1,outputs),[-1,args.vocab_size]) #print(output) //(250,32),将这25个(10,32)的二维数组按行堆叠了起来,行数变成了10*25 #网络的最后输出(相当于最后添加了一个全连接层) #self.logits=tf.matmul(output,softmax_w)+softmax_b #(250,19) self.logits=output #过一个softmax self.probs=tf.nn.softmax(self.logits) #参数要求:output [batch*numsteps, vocab_size] #target, [batch_size, num_steps] #weight:[tf.ones([batch_size * num_steps] #output具体的维度讲解见chrome"https://blog.csdn.net/xyz1584172808/article/details/83056179?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task" loss=seq2seq.sequence_loss_by_example([self.logits],[tf.reshape(self.targets,[-1])],[tf.ones([args.batch_size*args.seq_length])],args.vocab_size) self.cost=tf.reduce_sum(loss)/args.batch_size/args.seq_length self.final_state=last_state self.lr=tf.Variable(0.0,trainable=False) tvars=tf.trainable_variables() grads,_=tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip) optimizer=tf.train.AdamOptimizer(self.lr) self.train_op=optimizer.apply_gradients(zip(grads,tvars))