def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder encoder_cell = rnn.EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder if output_projection is None: cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous) # 如果feed_previous是张量,我们构造2个图并进行cond def decoder(feed_previous_bool): if feed_previous_bool: reuse = None else: reuse = True with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def testEmbeddingWrapperWithDynamicRnn(self): with self.test_session() as sess: with variable_scope.variable_scope("root"): inputs = ops.convert_to_tensor([[[0], [0]]], dtype=dtypes.int64) input_lengths = ops.convert_to_tensor([2], dtype=dtypes.int64) embedding_cell = contrib_rnn.EmbeddingWrapper( rnn_cell_impl.BasicLSTMCell(1, state_is_tuple=True), embedding_classes=1, embedding_size=2) outputs, _ = rnn.dynamic_rnn(cell=embedding_cell, inputs=inputs, sequence_length=input_lengths, dtype=dtypes.float32) sess.run([variables_lib.global_variables_initializer()]) # This will fail if output's dtype is inferred from input's. sess.run(outputs)
def testEmbeddingWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 1], dtype=dtypes.int32) m = array_ops.zeros([1, 2]) embedding_cell = contrib_rnn.EmbeddingWrapper( rnn_cell_impl.GRUCell(2), embedding_classes=3, embedding_size=2) self.assertEqual(embedding_cell.output_size, 2) g, new_m = embedding_cell(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g, new_m], {x.name: np.array([[1]]), m.name: np.array([[0.1, 0.1]])}) self.assertEqual(res[1].shape, (1, 2)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.17139, 0.17139]])
def create_cell_scopes(self): # self.enc_cells_text = rnn_cell.EmbeddingWrapper(self.cell_type(self.cell_size), self.decoder_words,self.text_embedding_size) self.enc_cells_text = rnn.EmbeddingWrapper( self.cell_type(self.cell_size), self.decoder_words, self.text_embedding_size) self.enc_scope_text = "encoder_text" max_val = np.sqrt(6. / (self.image_rep_size + self.image_embedding_size)) self.W_enc_img = tf.Variable( tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val), name="W_enc_img") self.b_enc_img = tf.Variable(tf.constant( 0., shape=[self.image_embedding_size]), name="b_enc_img") self.enc_scope_img = "encoder_img" self.enc_cells_utter = self.cell_type(self.cell_size) self.enc_scope_utter = "encoder_utter" if self.task_type == "text": self.dec_cells_text = self.cell_type(self.cell_size) self.dec_scope_text = "decoder_text" if self.task_type == "image": self.tgt_scope_img = "target_encoder_img" self.W_enc_tgt_img = tf.Variable(tf.random_uniform( [self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val), name="W_enc_tgt_img") self.b_enc_tgt_img = tf.Variable(tf.constant( 0., shape=[self.image_embedding_size]), name="b_enc_tgt_img") max_val = np.sqrt(6. / (self.cell_size + self.image_embedding_size)) self.proj_scope_utter = "proj_utter" self.W_proj_utter = tf.Variable( tf.random_uniform([self.cell_size, self.image_embedding_size], -1. * max_val, max_val), name="W_proj_utter") self.b_proj_utter = tf.Variable(tf.constant( 0., shape=[self.image_embedding_size]), name="b_proj_utter")
def embedding_attention_encoder(encoder_inputs, cell, num_encoder_symbols, embedding_size, dtype=None, scope=None): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_encoder", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = rnn.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) return encoder_state, attention_states
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell, num_encoder_symbols, num_decoder_symbols_dict, embedding_size, feed_previous=False, dtype=None, scope=None): outputs_dict = {} state_dict = {} with variable_scope.variable_scope( scope or "one2many_rnn_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = rnn.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. for name, decoder_inputs in decoder_inputs_dict.items(): num_decoder_symbols = num_decoder_symbols_dict[name] with variable_scope.variable_scope("one2many_decoder_" + str(name)) as scope: decoder_cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. def filled_embedding_rnn_decoder(feed_previous): """The current decoder with a fixed feed_previous parameter.""" # pylint: disable=cell-var-from-loop reuse = None if feed_previous else True vs = variable_scope.get_variable_scope() with variable_scope.variable_scope(vs, reuse=reuse): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) # pylint: enable=cell-var-from-loop state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond( feed_previous, lambda: filled_embedding_rnn_decoder(True), lambda: filled_embedding_rnn_decoder(False)) # Outputs length is the same as for decoder inputs. outputs_len = len(decoder_inputs) outputs = outputs_and_state[:outputs_len] state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) outputs_dict[name] = outputs state_dict[name] = state return outputs_dict, state_dict
def embedding_attention_seq2seq(encoder_inputs, # [T, batch_size] decoder_inputs, # [T, batch_size] cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, # attention的抽头数量 output_projection=None, #decoder的投影矩阵 feed_previous=False, dtype=None, scope=None, initial_state_attention=False, loop_fn_factory=_extract_argmax_and_embed): """ :param encoder_inputs: encoder的输入,int32型 id tensor list :param decoder_inputs: decoder的输入,int32型id tensor list :param cell: RNN_Cell的实例 :param num_encoder_symbols: 编码的符号数,即词表大小 :param num_decoder_symbols: 解码的符号数,即词表大小 :param embedding_size: 词向量的维度 :param num_heads: attention的抽头数量,一个抽头算一种加权求和方式 :param output_projection: decoder的output向量投影到词表空间时,用到的投影矩阵和偏置项(W, B);W的shape是[output_size, num_decoder_symbols],B的shape是[num_decoder_symbols];若此参数存在且feed_previous=True,上一个decoder的输出先乘W再加上B作为下一个decoder的输入 :param feed_previous: 若为True, 只有第一个decoder的输入(“GO"符号)有用,所有的decoder输入都依赖于上一步的输出;一般在测试时用 :param dtype: :param scope: :param initial_state_attention: 默认为False, 初始的attention是零;若为True,将从initial state和attention states开始attention :param loop_fn_factory: :return: """ with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. # 创建了一个embedding matrix. # 计算encoder的output和state # 生成attention states,用于计算attention encoder_cell = rnn.EmbeddingWrapper( # EmbeddingWrapper, 是RNNCell的前面加一层embedding,作为encoder_cell, input就可以是word的id. cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) # [T,batch_size,size] # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] # T * [batch_size, 1, size] attention_states = array_ops.concat(top_states, 1) # [batch_size,T,size] # Decoder. # 生成decoder的cell,通过OutputProjectionWrapper类对输入参数中的cell实例包装实现 output_size = None if output_projection is None: cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols) # OutputProjectionWrapper将输出映射成想要的维度 output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, loop_fn_factory=loop_fn_factory) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, loop_fn_factory=loop_fn_factory) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = enc_cell encoder_cell = rnn.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ tf.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs ] attention_states = tf.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: dec_cell = rnn.OutputProjectionWrapper(dec_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = tf.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def __init__(self, config, vocab_size, name_scope, dtype=tf.float32): # with tf.variable_scope(name_or_scope=scope_name): emb_dim = config.embed_dim num_layers = config.num_layers # vocab_size = config.vocab_size # max_len = config.max_len num_class = config.num_class buckets = config.buckets self.lr = config.lr self.global_step = tf.Variable(initial_value=0, trainable=False) self.query = [] self.answer = [] for i in range(buckets[-1][0]): self.query.append( tf.placeholder(dtype=tf.int32, shape=[None], name="query{0}".format(i))) for i in range(buckets[-1][1]): self.answer.append( tf.placeholder(dtype=tf.int32, shape=[None], name="answer{0}".format(i))) self.target = tf.placeholder(dtype=tf.int64, shape=[None], name="target") # encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(emb_dim) encoder_cell = tf.nn.rnn_cell.LSTMCell(emb_dim) encoder_mutil = tf.nn.rnn_cell.MultiRNNCell([encoder_cell] * num_layers) encoder_emb = rnn.EmbeddingWrapper(encoder_mutil, embedding_classes=vocab_size, embedding_size=emb_dim) # context_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=emb_dim) context_cell = tf.nn.rnn_cell.LSTMCell(num_units=emb_dim) context_multi = tf.nn.rnn_cell.MultiRNNCell([context_cell] * num_layers) self.b_query_state = [] self.b_answer_state = [] self.b_state = [] self.b_logits = [] self.b_loss = [] # self.b_cost = [] self.b_train_op = [] for i, bucket in enumerate(buckets): with tf.variable_scope(name_or_scope="Hier_RNN_encoder", reuse=True if i > 0 else None) as var_scope: query_output, query_state = rnn.static_rnn( encoder_emb, inputs=self.query[:bucket[0]], dtype=tf.float32) # output [max_len, batch_size, emb_dim] state [num_layer, 2, batch_size, emb_dim] var_scope.reuse_variables() answer_output, answer_state = rnn.static_rnn( encoder_emb, inputs=self.answer[:bucket[1]], dtype=tf.float32) self.b_query_state.append(query_state) self.b_answer_state.append(answer_state) context_input = [query_state[-1][1], answer_state[-1][1]] with tf.variable_scope(name_or_scope="Hier_RNN_context", reuse=True if i > 0 else None): output, state = rnn.static_rnn(context_multi, context_input, dtype=tf.float32) self.b_state.append(state) top_state = state[-1][1] # [batch_size, emb_dim] with tf.variable_scope("Softmax_layer_and_output", reuse=True if i > 0 else None): softmax_w = tf.get_variable("softmax_w", [emb_dim, num_class], dtype=tf.float32) softmax_b = tf.get_variable("softmax_b", [num_class], dtype=tf.float32) logits = tf.matmul(top_state, softmax_w) + softmax_b self.b_logits.append(logits) with tf.name_scope("loss"): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.target, logits=logits) mean_loss = tf.reduce_mean(loss) self.b_loss.append(mean_loss) with tf.name_scope("gradient_descent"): disc_params = [ var for var in tf.trainable_variables() if name_scope in var.name ] grads, norm = tf.clip_by_global_norm( tf.gradients(mean_loss, disc_params), config.max_grad_norm) # optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.AdamOptimizer(self.lr) train_op = optimizer.apply_gradients( zip(grads, disc_params), global_step=self.global_step) self.b_train_op.append(train_op) all_variables = [ v for v in tf.global_variables() if name_scope in v.name ] self.saver = tf.train.Saver(all_variables)