def create_cell_scopes(): self.enc_cells_text = EmbeddingWrapper(self.cell_type(self.cell_size), self.decoder_words, self.text_embedding_size) self.enc_scope_text = "encoder_text" max_val = np.sqrt(6. / (self.image_rep_size + self.image_embedding_size)) self.W_enc_img = tf.Variable( tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val), name="W_enc_img") self.b_enc_img = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_enc_img") self.enc_scope_img = "encoder_img" self.enc_cells_utter = [self.cell_type(self.cell_size), self.cell_type(self.cell_size)] self.enc_scope_utter = "encoder_utter" if self.task_type == "text": self.dec_cells_text = self.cell_type(self.cell_size) self.dec_scope_text = "decoder_text" if self.task_type == "image": self.tgt_scope_img = "target_encoder_img" max_val = np.sqrt(6. / (self.image_rep_size + self.image_embedding_size)) self.W_enc_tgt_img = tf.Variable( tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val), name="W_enc_tgt_img") self.b_enc_tgt_img = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_enc_tgt_img") max_val = np.sqrt(6. / (self.cell_size + self.image_embedding_size)) self.proj_scope_utter = "proj_utter" self.W_proj_utter = tf.Variable( tf.random_uniform([self.cell_size, self.image_embedding_size], -1. * max_val, max_val), name="W_proj_utter") self.b_proj_utter = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_proj_utter")
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x cell.input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. cell: RNNCell defining the cell function and size. num_encoder_symbols: integer; number of symbols on the encoder side. num_decoder_symbols: integer; number of symbols on the decoder side. num_heads: number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [cell.output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with vs.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = EmbeddingWrapper(cell, num_encoder_symbols, embedding_size) encoder_outputs, encoder_states = rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, embedding_size, num_heads, output_size, output_projection, feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, states1 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, embedding_size, num_heads, output_size, output_projection, True) vs.get_variable_scope().reuse_variables() outputs2, states2 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, embedding_size, num_heads, output_size, output_projection, False) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) states = control_flow_ops.cond(feed_previous, lambda: states1, lambda: states2) return outputs, states
def embedding_attention_seq2seq_context(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): """A seq2seq architecture with two encoders, one for context, one for input DA. The decoder uses twice the cell size. Code adapted from TensorFlow examples.""" with vs.variable_scope(scope or "embedding_attention_seq2seq_context"): # split context and real inputs into separate vectors context_inputs = encoder_inputs[0:len(encoder_inputs) / 2] encoder_inputs = encoder_inputs[len(encoder_inputs) / 2:] # build separate encoders encoder_cell = EmbeddingWrapper(cell, num_encoder_symbols, embedding_size) with vs.variable_scope("context_rnn") as scope: context_outputs, context_states = tf06s2s.rnn(encoder_cell, context_inputs, dtype=dtype, scope=scope) with vs.variable_scope("input_rnn") as scope: encoder_outputs, encoder_states = tf06s2s.rnn(encoder_cell, encoder_inputs, dtype=dtype, scope=scope) # concatenate outputs & states # adding positional arguments and concatenating output, cell and hidden states encoder_outputs = [ array_ops.concat([co, eo], axis=1, name="context-and-encoder-output") for co, eo in zip(context_outputs, encoder_outputs) ] encoder_states = [ (array_ops.concat([c1, c2], axis=1), array_ops.concat([h1, h2], axis=1)) for (c1, h1), (c2, h2) in zip(context_states, encoder_states) ] # calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size * 2]) for e in encoder_outputs ] #added positional arguements as it was taking axis to be the values attention_states = array_ops.concat(axis=1, values=top_states) # change the decoder cell to accommodate wider input # TODO this will work for BasicLSTMCell and GRUCell, but not for others cell = type(cell)(num_units=(cell.output_size * 2)) # Decoder. output_size = None if output_projection is None: cell = OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return tf06s2s.embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, embedding_size, num_heads, output_size, output_projection, feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, states1 = tf06s2s.embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, embedding_size, num_heads, output_size, output_projection, True) vs.get_variable_scope().reuse_variables() outputs2, states2 = tf06s2s.embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, embedding_size, num_heads, output_size, output_projection, False) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) states = control_flow_ops.cond(feed_previous, lambda: states1, lambda: states2) return outputs, states
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False, copy=False, attn_type="linear"): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. copy: If True use a copy mechanism in decoding to copy from encoder inputs attn_type: Attn type to use Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols # Modify num_decoder symbols to include len of src if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, copy=copy, attn_type=attn_type) def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
from tensorflow.contrib.rnn import BasicLSTMCell from tensorflow.contrib.rnn import DropoutWrapper from tensorflow.contrib.rnn import MultiRNNCell from tensorflow.contrib.rnn import EmbeddingWrapper from tensorflow.contrib.rnn import static_rnn import tensorflow.contrib.seq2seq as seq2seq tf.reset_default_graph() sess = tf.Session() # encoder == RNN(EmbeddingWrapper(cell)) lstm_cell = BasicLSTMCell(num_units=embedding_dim) encoder_cell = EmbeddingWrapper(cell=lstm_cell, embedding_classes=input_vocab_size, embedding_size=embedding_dim) encoder_outputs, encoder_state = static_rnn(cell=encoder_cell, inputs=encode_input, dtype=tf.float32) # Attention == attn_mech = seq2seq.BahdanauAttention(num_units=input_seq_length, memory=encoder_outputs, normalize=False, name='BahdanauAttention') lstm_cell_decoder = BasicLSTMCell(embedding_dim) attn_cell = seq2seq.DynamicAttentionWrapper(cell=lstm_cell_decoder,