def translate_model(X, y): byte_list = skflow.ops.one_hot_matrix(X, 256) in_X, in_y, out_y = skflow.ops.seq2seq_inputs( byte_list, y, MAX_DOCUMENT_LENGTH, MAX_DOCUMENT_LENGTH) cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(HIDDEN_SIZE), 256) decoding, _, sampling_decoding, _ = rnn_seq2seq(in_X, in_y, cell) return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, beam_search=True, beam_size=10): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None, beam_search=True, beam_size=10): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = tf.contrib.rnn.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = tf.contrib.rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, scope=scope, beam_search=beam_search, beam_size=beam_size) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, beam_search=beam_search, beam_size=beam_size) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def testOutputProjectionWrapper(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) m = tf.zeros([1, 3]) cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(3), 2) g, new_m = cell(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run( [g, new_m], { x.name: np.array([[1., 1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1]]) }) self.assertEqual(res[1].shape, (1, 3)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.231907, 0.231907]])
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, num_layers=3, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. with tf.device('/gpu:0'): single_cell_1 = rnn_cell.LSTMCell(embedding_size) with tf.device('/gpu:1'): single_cell_2 = rnn_cell.LSTMCell(embedding_size) encoder_fw_cell = rnn_cell.EmbeddingWrapper(single_cell_1, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_bw_cell = rnn_cell.EmbeddingWrapper(single_cell_2, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) outputs, _, _ = rnn.bidirectional_rnn(encoder_fw_cell, encoder_bw_cell, encoder_inputs, dtype=dtype) list_of_cell = [] for layer in xrange(num_layers): if layer % 2 == 0: with tf.device('/gpu:0'): single_cell = tf.nn.rnn_cell.LSTMCell(embedding_size) list_of_cell.append(single_cell) else: with tf.device('/gpu:1'): single_cell = tf.nn.rnn_cell.LSTMCell(embedding_size) list_of_cell.append(single_cell) cell2 = Stack_Residual_RNNCell.Stack_Residual_RNNCell(list_of_cell) encoder_outputs, encoder_state = rnn.rnn( cell2, outputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_symbols, embedding_size, num_decoder_symbols=None, output_projection=None, feed_previous=False, dtype=None, scope=None): """Embedding RNN sequence-to-sequence model with tied (shared) parameters. This model first embeds encoder_inputs by a newly created embedding (of shape [num_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs using the same embedding. Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. The decoder output is over symbols from 0 to num_decoder_symbols - 1 if num_decoder_symbols is none; otherwise it is over 0 to num_symbols - 1. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_symbols: Integer; number of symbols for both encoder and decoder. embedding_size: Integer, the length of the embedding vector for each symbol. num_decoder_symbols: Integer; number of output symbols for decoder. If provided, the decoder output is over symbols 0 to num_decoder_symbols - 1. Otherwise, decoder output is over symbols 0 to num_symbols - 1. Note that this assumes that the vocabulary is set up such that the first num_decoder_symbols of num_symbols are part of decoding. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_symbols] and B has shape [num_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype to use for the initial RNN states (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_tied_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_symbols] containing the generated outputs where output_symbols = num_decoder_symbols if num_decoder_symbols is not None otherwise output_symbols = num_symbols. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. Raises: ValueError: When output_projection has the wrong shape. """ with variable_scope.variable_scope( scope or "embedding_tied_rnn_seq2seq", dtype=dtype) as scope: dtype = scope.dtype if output_projection is not None: proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with([None, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) embedding = variable_scope.get_variable( "embedding", [num_symbols, embedding_size], dtype=dtype) emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in encoder_inputs] emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in decoder_inputs] output_symbols = num_symbols if num_decoder_symbols is not None: output_symbols = num_decoder_symbols if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, output_symbols) if isinstance(feed_previous, bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, True) if feed_previous else None return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, False) if feed_previous_bool else None reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=reuse): outputs, state = tied_rnn_seq2seq( emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] # Calculate zero-state to know it's structure. static_batch_size = encoder_inputs[0].get_shape()[0] for inp in encoder_inputs[1:]: static_batch_size.merge_with(inp.get_shape()[0]) batch_size = static_batch_size.value if batch_size is None: batch_size = array_ops.shape(encoder_inputs[0])[0] zero_state = cell.zero_state(batch_size, dtype) if nest.is_sequence(zero_state): state = nest.pack_sequence_as(structure=zero_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def basic_rnn_seq2seq(encoder_inputs, en_seq_length, decoder_inputs, cell, num_decoder_symbols, output_projection=None, feed_previous=False, dtype=None, scope=None): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. en_seq_length: Specifies the length of each sequence in encoder inputs [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors. The output is of shape [batch_size x cell.output_size] when output_projection is not None (and represents the dense representation of predicted tokens). It is of shape [batch_size x num_decoder_symbols] when output_projection is None. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. _, encoder_state = rnn.rnn(cell, encoder_inputs, sequence_length=en_seq_length, scope='rnn_encoder', dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) # TODO(ysu): embedding and feed_previous # loop_function = _extract_argmax_and_embed( # embedding, output_projection, # update_embedding_for_previous) if feed_previous else None return rnn_decoder(decoder_inputs, encoder_state, cell, scope='rnn_decoder')
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_symbols, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): """Embedding RNN sequence-to-sequence model with tied (shared) parameters. This model first embeds encoder_inputs by a newly created embedding (of shape [num_symbols x cell.input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs using the same embedding. Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_symbols: integer; number of symbols for both encoder and decoder. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [cell.output_size x num_symbols] and B has shape [num_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype to use for the initial RNN states (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_tied_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. Raises: ValueError: when output_projection has the wrong shape. """ if output_projection is not None: proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with([cell.output_size, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) with vs.variable_scope(scope or "embedding_tied_rnn_seq2seq"): with ops.device("/cpu:0"): embedding = vs.get_variable("embedding", [num_symbols, cell.input_size]) emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in encoder_inputs] emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in decoder_inputs] def extract_argmax_and_embed(prev, _): """Loop_function that extracts the symbol from prev and embeds it.""" if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1)) return embedding_ops.embedding_lookup(embedding, prev_symbol) if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols) if isinstance(feed_previous, bool): loop_function = extract_argmax_and_embed if feed_previous else None return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, state1 = tied_rnn_seq2seq( emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=extract_argmax_and_embed, dtype=dtype) vs.get_variable_scope().reuse_variables() outputs2, state2 = tied_rnn_seq2seq( emb_encoder_inputs, emb_decoder_inputs, cell, dtype=dtype) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) state = control_flow_ops.cond(feed_previous, lambda: state1, lambda: state2) return outputs, state
def many2one_attention_seq2seq(encoder_inputs_list, decoder_inputs, text_len, speech_len, feat_dim, text_cell, speech_cell, parse_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, attention_vec_size, fixed_word_length, filter_sizes, num_filters, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False): text_encoder_inputs, speech_encoder_inputs = encoder_inputs_list encoder_size = len(text_encoder_inputs) #speech_encoder_inputs is size [seq_len, batch_size, fixed_word_length, feat_dim] with variable_scope.variable_scope(scope or "many2one_attention_seq2seq"): with ops.device("/cpu:0"): embedding_words = variable_scope.get_variable( "embedding_words", [num_encoder_symbols, embedding_size]) text_encoder_inputs = [ embedding_ops.embedding_lookup(embedding_words, i) for i in text_encoder_inputs ] # Encoder. with variable_scope.variable_scope(scope or "text_encoder"): text_encoder_outputs, text_encoder_state = rnn.rnn( text_cell, text_encoder_inputs, sequence_length=text_len, dtype=dtype) #print(text_encoder_inputs) #print(speech_encoder_inputs) # Convolution stuff happens here for speech inputs pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): print(i, filter_size) #with tf.name_scope("conv-maxpool-%s" % filter_size): with variable_scope.variable_scope( scope or "conv-maxpool-%s" % filter_size): filter_shape = [filter_size, feat_dim, 1, num_filters] #W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") #b = tf.Variable(tf.truncated_normal(shape=[num_filters], stddev=0.1), name="b") W = variable_scope.get_variable("W-%d" % i, filter_shape) b = variable_scope.get_variable("B-%d" % i, num_filters) pooled_words = [] for j in range(encoder_size): feats = speech_encoder_inputs[j] feats_conv = tf.expand_dims(feats, -1) conv = tf.nn.conv2d(feats_conv, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool( h, ksize=[1, fixed_word_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_words.append(pooled) pooled_outputs.append(pooled_words) #print(pooled_words) num_filters_total = num_filters * len(filter_sizes) out_seq = tf.unpack(tf.concat(1, pooled_outputs)) speech_conv_outputs = [ tf.reshape(x, [-1, num_filters_total]) for x in out_seq ] #print("pooled_outputs:", [x[0].get_shape() for x in pooled_outputs]) #h_pool = tf.concat(3, pooled_outputs) #h_pool_squeeze = tf.squeeze(h_pool) #speech_conv_outputs = tf.transpose(h_pool_squeeze, perm=[1,0,2]) #print(h_pool) #speech_conv_outputs = tf.unstack(speech_conv_outputs) with variable_scope.variable_scope(scope or "speech_encoder"): speech_encoder_outputs, speech_encoder_state = rnn.rnn( speech_cell, speech_conv_outputs, sequence_length=speech_len, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. text_top_states = [ array_ops.reshape(e, [-1, 1, text_cell.output_size]) for e in text_encoder_outputs ] # h_states = attention_states in original code h_states = array_ops.concat(1, text_top_states) speech_top_states = [ array_ops.reshape(e, [-1, 1, speech_cell.output_size]) for e in speech_encoder_outputs ] m_states = array_ops.concat(1, speech_top_states) attention_states = [h_states, m_states] both_encoder_states = [text_encoder_state, speech_encoder_state] # Decoder. output_size = None if output_projection is None: parse_cell = rnn_cell.OutputProjectionWrapper( parse_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return many2one_embedding_attention_decoder( decoder_inputs, both_encoder_states, attention_states, parse_cell, num_decoder_symbols, embedding_size, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, attention_vec_size=attention_vec_size) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = many2one_embedding_attention_decoder( decoder_inputs, both_encoder_states, attention_states, parse_cell, num_decoder_symbols, embedding_size, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, attention_vec_size=attention_vec_size) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell, num_encoder_symbols, num_decoder_symbols_dict, embedding_size, feed_previous=False, dtype=dtypes.float32, scope=None): """One-to-many RNN sequence-to-sequence model (multi-task). This is a multi-task sequence-to-sequence model with one encoder and multiple decoders. Reference to multi-task sequence-to-sequence learning can be found here: http://arxiv.org/abs/1511.06114 Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs_dict: A dictionany mapping decoder name (string) to the corresponding decoder_inputs; each decoder_inputs is a list of 1D Tensors of shape [batch_size]; num_decoders is defined as len(decoder_inputs_dict). cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an integer specifying number of symbols for the corresponding decoder; len(num_decoder_symbols_dict) must be equal to num_decoders. embedding_size: Integer, the length of the embedding vector for each symbol. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "one2many_rnn_seq2seq" Returns: A tuple of the form (outputs_dict, state_dict), where: outputs_dict: A mapping from decoder name (string) to a list of the same length as decoder_inputs_dict[name]; each element in the list is a 2D Tensors with shape [batch_size x num_decoder_symbol_list[name]] containing the generated outputs. state_dict: A mapping from decoder name (string) to the final state of the corresponding decoder RNN; it is a 2D Tensor of shape [batch_size x cell.state_size]. """ outputs_dict = {} state_dict = {} with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. for name, decoder_inputs in decoder_inputs_dict.items(): num_decoder_symbols = num_decoder_symbols_dict[name] with variable_scope.variable_scope("one2many_decoder_" + str(name)): decoder_cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. def filled_embedding_rnn_decoder(feed_previous): # pylint: disable=cell-var-from-loop reuse = None if feed_previous else True vs = variable_scope.get_variable_scope() with variable_scope.variable_scope(vs, reuse=reuse): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) # pylint: enable=cell-var-from-loop return outputs + [state] outputs_and_state = control_flow_ops.cond( feed_previous, lambda: filled_embedding_rnn_decoder(True), lambda: filled_embedding_rnn_decoder(False)) outputs = outputs_and_state[:-1] state = outputs_and_state[-1] outputs_dict[name] = outputs state_dict[name] = state return outputs_dict, state_dict
def embedding_attention_bidirectional_seq2seq(encoder_inputs, decoder_inputs, encoder_input_length, list_of_mask, encoder_cell, decoder_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, beam_size=1, output_projection=None, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an bidirectional RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. encoder_cell: rnn_cell.RNNCell defining the cell function and size. decoder_cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder_cell at the final time-step. It is a 2D Tensor of shape [batch_size x decoder_cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_bidirectional_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. # encoder_cell = rnn_cell.EmbeddingWrapper( # encoder_cell, embedding_classes=num_encoder_symbols, # embedding_size=embedding_size) embedding = variable_scope.get_variable( "encoder_embedding", [num_encoder_symbols, embedding_size]) encoder_inputs = array_ops.pack([ embedding_ops.embedding_lookup(embedding, i) for i in encoder_inputs ]) # encoder_inputs = array_ops.reshape(array_ops.pack(encoder_inputs), [50, -1, 1]) encoder_outputs, encoder_states = rnn.bidirectional_dynamic_rnn( cell_fw=encoder_cell, cell_bw=encoder_cell, inputs=encoder_inputs, sequence_length=encoder_input_length, time_major=True, dtype=dtype) encoder_state_fw, encoder_state_bw = encoder_states # Concatenate output_fw and output_bw => [step, batch_size, cell.out_size * 2]. concat_encoder_outputs = array_ops.concat(2, encoder_outputs) # Transpose to [batch_size, step, cell.out_size * 2]. attention_states = array_ops.transpose(concat_encoder_outputs, [1, 0, 2]) # Decoder. output_size = None if output_projection is None: decoder_cell = rnn_cell.OutputProjectionWrapper( decoder_cell, num_decoder_symbols) output_size = num_decoder_symbols return embedding_attention_decoder( decoder_inputs, list_of_mask, encoder_state_bw, attention_states, decoder_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, beam_size=beam_size, output_size=output_size, output_projection=output_projection, update_embedding_for_previous=False, initial_state_attention=initial_state_attention)
def maxpool_attention_seq2seq(encoder_inputs_list, decoder_inputs, seq_len, feat_dim, encoder_cell, parse_cell, num_encoder_symbols, num_pause_symbols, num_decoder_symbols, embedding_size, pause_size, use_conv, conv_filter_width, conv_num_channels, attention_vec_size, fixed_word_length, filter_sizes, num_filters, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False, use_speech=False, use_pause=False, use_wd=False): text_encoder_inputs, speech_encoder_inputs, pause_bef, pause_aft, word_durs = encoder_inputs_list encoder_size = len(text_encoder_inputs) #print(encoder_size) #speech_encoder_inputs is size [seq_len, batch_size, fixed_word_length, feat_dim] with variable_scope.variable_scope(scope or "many2one_attention_seq2seq"): with ops.device("/cpu:0"): embedding_words = variable_scope.get_variable( "embedding_words", [num_encoder_symbols, embedding_size]) if use_pause: with ops.device("/cpu:0"): embedding_pauses = variable_scope.get_variable( "embedding_pauses", [num_pause_symbols, pause_size]) ## We need to do the embedding beforehand so that the rnn infers the input type ## to be float and doesn't cause trouble in copying state after sequence length ## This issue has been fixed in 0.10 version ## The issue is referred here - https://github.com/tensorflow/tensorflow/issues/3322 text_encoder_inputs = [ embedding_ops.embedding_lookup(embedding_words, i) for i in text_encoder_inputs ] if use_pause: pause_bef = [ embedding_ops.embedding_lookup(embedding_pauses, i) for i in pause_bef ] pause_aft = [ embedding_ops.embedding_lookup(embedding_pauses, i) for i in pause_aft ] if use_pause and not use_wd: text_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], pause_bef[i], pause_aft[i], \ ]) for i in range(encoder_size)] elif use_wd and not use_pause: text_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], \ tf.expand_dims(word_durs[i],-1) ]) for i in range(encoder_size)] elif use_pause and use_wd: text_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], pause_bef[i], pause_aft[i], \ tf.expand_dims(word_durs[i],-1) ]) for i in range(encoder_size)] if use_speech: # Convolution stuff happens here for speech inputs pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): print(i, filter_size) #with tf.name_scope("conv-maxpool-%s" % filter_size): with variable_scope.variable_scope( scope or "conv-maxpool-%s" % filter_size): filter_shape = [filter_size, feat_dim, 1, num_filters] W = variable_scope.get_variable("W-%d" % i, filter_shape) b = variable_scope.get_variable("B-%d" % i, num_filters) pooled_words = [] for j in range(encoder_size): feats = speech_encoder_inputs[j] feats_conv = tf.expand_dims(feats, -1) conv = tf.nn.conv2d(feats_conv, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool(h, ksize=[ 1, fixed_word_length - filter_size + 1, 1, 1 ], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_words.append(pooled) pooled_outputs.append(pooled_words) num_filters_total = num_filters * len(filter_sizes) out_seq = tf.unpack(tf.concat(2, pooled_outputs)) speech_conv_outputs = [ tf.reshape(x, [-1, num_filters_total]) for x in out_seq ] # concat text_encoder_inputs and speech_conv_outputs both_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], speech_conv_outputs[i]]) \ for i in range(encoder_size)] else: both_encoder_inputs = text_encoder_inputs # Encoder. with variable_scope.variable_scope(scope or "encoder"): encoder_outputs, encoder_states = rnn.rnn(encoder_cell, both_encoder_inputs, sequence_length=seq_len, dtype=dtype) # with variable_scope.variable_scope(scope or "speech_encoder"): # speech_encoder_outputs, speech_encoder_state = rnn.rnn( # speech_cell, speech_conv_outputs, sequence_length=speech_len, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) #speech_top_states = [array_ops.reshape(e, [-1, 1, speech_cell.output_size]) # for e in speech_encoder_outputs] #m_states = array_ops.concat(1, speech_top_states) #attention_states = [h_states, m_states] #both_encoder_states = [text_encoder_state, speech_encoder_state] # Decoder. output_size = None if output_projection is None: parse_cell = rnn_cell.OutputProjectionWrapper( parse_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_states, attention_states, parse_cell, seq_len, num_decoder_symbols, embedding_size, use_conv, conv_filter_width, conv_num_channels, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, attention_vec_size=attention_vec_size) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = embedding_attention_decoder( decoder_inputs, both_encoder_states, attention_states, parse_cell, seq_len, num_decoder_symbols, embedding_size, use_conv, conv_filter_width, conv_num_channels, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, attention_vec_size=attention_vec_size) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False, scheduling_rate=1.0): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. scheduling_rate : Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols ## feed_previous : forward propagation only ( previous prediction value input next decoder ) ## this is role of GO symbol of bool ## forward path --> Valid or Test if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, scheduling_rate=scheduling_rate) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, scheduling_rate=scheduling_rate) #print (outputs) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def embedding_kv_attention_seq2seq(encoder_inputs, decoder_inputs, kb_inputs, kb_mask_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, attn_type="linear", enc_attn=False, use_types=False, type_to_idx=None, use_bidir=False, seq_lengths=None, enc_query=False, dtype=None, scope=None): """Embedding sequence-to-sequence model with attention over a KB. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs as well as an embedded KB. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. kb_inputs: Kbs for the given batch of dialogues kb_col_inputs: Column indices for given batch of kbs kb_mask_inputs: Kb masks for the given batch of dialogues cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_kb_attention_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ if type_to_idx is not None: # Mapping from entity type to idx for augmenting encoder input num_entity_types = len(type_to_idx.keys()) entity_encoding = np.zeros((num_entity_types, num_entity_types - 1), dtype=np.float32) for idx in range(num_entity_types - 1): entity_encoding[idx, idx] = 1. with variable_scope.variable_scope(scope or "embedding_kb_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. if use_types: print "Typed Encoder Inputs..." # Augment encoder inputs encoder_cell = CustomEmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size, entity_encoding=entity_encoding) else: print "Regular encoding..." # Just regular encoding encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) # Use bidirectional encoding if use_bidir: encoder_cell_backward = copy.deepcopy(encoder_cell) encoder_outputs, encoder_state_fw, encoder_state_bw =\ rnn.bidirectional_rnn(encoder_cell, encoder_cell_backward, encoder_inputs, dtype=dtype, sequence_length=seq_lengths) combined_c = tf.concat(1, [encoder_state_fw.c, encoder_state_bw.c]) combined_h = tf.concat(1, [encoder_state_fw.h, encoder_state_bw.h]) encoder_state = rnn_cell.LSTMStateTuple(c=combined_c, h=combined_h) else: encoder_outputs, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs # to put attention on. if use_bidir: top_states = [ array_ops.reshape(e, [-1, 1, 2 * cell.output_size]) for e in encoder_outputs ] else: top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) if output_projection is None: if use_bidir: # Modify dimension of decoder rnn_size cell = rnn_cell.BasicLSTMCell(2 * cell.output_size, state_is_tuple=True) cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols else: output_size = cell.output_size if isinstance(feed_previous, bool): return kv_attention_decoder(cell, decoder_inputs, kb_inputs, kb_mask_inputs, encoder_state, attention_states, num_decoder_symbols, embedding_size=embedding_size, output_size=output_size, feed_previous=feed_previous, attn_type=attn_type, enc_attn=enc_attn, enc_query=enc_query, dtype=dtype)
def embedding_attention_seq2seq(encoder_inputs, context_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): #print("Inside Method Embedding Attention Seq2Seq") #print("Shape of encoder input {0}".format(len(encoder_inputs))) #print("Shape of decoder input {0}".format(len(decoder_inputs))) #print("num_encoder_symbols = {0}".format(num_encoder_symbols)) ###print("num_decoder_symbols {0}".format(num_decoder_symbols)) #print("embedding_size {0}".format(embedding_size)) print("output_projection {0}".format(output_projection)) with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype, reuse=None) as scope: dtype = scope.dtype with variable_scope.variable_scope("encoder") as scope2: # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) #print(type(encoder_outputs)) #np.savetxt('encoder_output.txt', encoder_outputs) #np.savetxt('encoder_state.txt', encoder_state) with variable_scope.variable_scope("context") as scope3: context_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) context_outputs, context_state = rnn.rnn(context_cell, context_inputs, dtype=dtype) #target = open("context_output.txt", 'w') #for out in context_outputs: #target.write(str(out)) #target.write("\n") #target.close() #np.savetxt('context_output.txt', context_outputs) #np.savetxt('context_state.txt', context_state) #print("The dimension of context state {0}".format(context_state)) #context_state = tf.Print(context_state,[context_state],message="Printing the context State") #context_state.eval() #tf.add(context_state,context_state).eval() #with tf.session as session_c: #session_c.run(context_state) #print("Inside method embedding_attention_seq2seq. Encoder Outputs {0} Encode State {1}".format( #np.shape(encoder_outputs), np.shape(encoder_state))) #encoder_outputs = tf.add(encoder_outputs, context_outputs) #encoder_state = tf.add(encoder_state, context_state) for i in range(len(encoder_outputs)): encoder_outputs[i] = tf.add(encoder_outputs[i], context_outputs[i]) temp = [] for i in range(len(encoder_state)): temp.append(tf.add(encoder_state[i], context_state[i])) encoder_state = tuple(temp) #print(type(encoder_outputs)) #print(type(encoder_outputs[0])) #print(type(encoder_state)) #print(encoder_state) print( "Inside method embedding_attention_seq2seq. Encoder Outputs {0} Encode State {1}" .format(np.shape(encoder_outputs), np.shape(encoder_state))) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) print("Attention States has been created of size {0}".format( np.shape(attention_states))) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols print("The output size is {0}".format(output_size)) if isinstance(feed_previous, bool): print("Number of heads {0}".format(num_heads)) return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def __init__(self, embedding, max_length, initial_state, attention_states, cell, num_samples=512, feed_previous=False, update_embedding_for_previous=True, dtype=dtypes.float32, scope=None, initial_state_attention=False, **kwargs): # account for _GO and _EOS self.max_length = max_length + 2 self.lengths = kwargs.get( 'lengths', tf.placeholder(tf.int32, shape=[None], name="decoder_lengths")) self.inputs = kwargs.get('inputs', [ tf.placeholder( tf.int32, shape=[None], name="decoder_input{0}".format(i)) for i in xrange(self.max_length) ]) self.weights = kwargs.get('weights', [ tf.placeholder( tf.float32, shape=[None], name="decoder_weight{0}".format(i)) for i in xrange(self.max_length) ]) self.targets = [ self.inputs[i + 1] for i in xrange(len(self.inputs) - 1) ] self.targets.append(tf.zeros_like(self.targets[0])) num_symbols = embedding.get_shape()[0].value output_projection = None loss_function = None self.cell = cell self.feed_previous = feed_previous if num_samples > 0 and num_samples < num_symbols: with tf.device('/cpu:0'): w = tf.get_variable('proj_w', [cell.output_size, num_symbols]) w_t = tf.transpose(w) b = tf.get_variable('proj_b', [num_symbols]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device('/cpu:0'): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, num_symbols) loss_function = sampled_loss output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols) output_size = num_symbols if output_size is None: output_size = cell.output_size if output_projection is not None: proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with( [cell.output_size, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) with variable_scope.variable_scope(scope or "embedding_attention_decoder"): loop_function = self._extract_argmax_and_embed( embedding, output_projection, update_embedding_for_previous) if feed_previous else None emb_inp = [ embedding_ops.embedding_lookup(embedding, i) for i in self.inputs ] self.outputs, self.state = attention_decoder( emb_inp, self.lengths, initial_state, attention_states, cell, output_size=output_size, loop_function=loop_function, initial_state_attention=initial_state_attention) targets = [self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)] targets.append(tf.zeros_like(self.inputs[-1])) # loss for each instance in batch self.instance_loss = sequence_loss_by_example( self.outputs, targets, self.weights, softmax_loss_function=loss_function) # aggregated average loss per instance for batch self.loss = tf.reduce_sum(self.instance_loss) / math_ops.cast( array_ops.shape(targets[0])[0], self.instance_loss.dtype) if output_projection is not None: self.projected_output = [ tf.matmul(o, output_projection[0]) + output_projection[1] for o in self.outputs ] self.decoded_outputs = tf.unpack( tf.argmax(tf.pack(self.projected_output), 2)) else: self.decoded_outputs = tf.unpack( tf.argmax(tf.pack(self.outputs), 2)) self.decoded_lenghts = tf.reduce_sum( tf.sign(tf.transpose(tf.pack(self.decoded_outputs))), 1) self.decoded_batch = tf.transpose(tf.pack(self.decoded_outputs))
def attention_seq2seq(encoder_inputs, en_seq_length, decoder_inputs, cell, num_decoder_symbols, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_outputs, encoder_state = rnn.rnn(cell, encoder_inputs, sequence_length=en_seq_length, scope='attention_rnn_encoder', dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols loop_function = None # if feed_previous: # loop_function = _extract_argmax_and_embed( # embedding, output_projection, # update_embedding_for_previous) if feed_previous else None return attention_decoder( decoder_inputs, encoder_state, attention_states, cell, output_size=output_size, num_heads=num_heads, loop_function=loop_function, initial_state_attention=initial_state_attention)
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, seq_len, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"): with ops.device("/cpu:0"): embedding_words = variable_scope.get_variable( "embedding_words", [num_encoder_symbols, embedding_size]) ## We need to do the embedding beforehand so that the rnn infers the input type ## to be float and doesn't cause trouble in copying state after sequence length ## This issue has been fixed in 0.10 version ## The issue is referred here - https://github.com/tensorflow/tensorflow/issues/3322 encoder_inputs = [ embedding_ops.embedding_lookup(embedding_words, i) for i in encoder_inputs ] encoder_outputs, encoder_state = rnn.rnn(cell, encoder_inputs, sequence_length=seq_len, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, seq_len, num_decoder_symbols, embedding_size, use_conv, conv_filter_width, conv_num_channels, attention_vec_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, seq_len, num_decoder_symbols, embedding_size, use_conv, conv_filter_width, conv_num_channels, attention_vec_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def embedding_attention_seq2seq_context(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): """A seq2seq architecture with two encoders, one for context, one for input DA. The decoder uses twice the cell size. Code adapted from TensorFlow examples.""" with vs.variable_scope(scope or "embedding_attention_seq2seq_context"): # split context and real inputs into separate vectors context_inputs = encoder_inputs[0:len(encoder_inputs)/2] encoder_inputs = encoder_inputs[len(encoder_inputs)/2:] # build separate encoders encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols) with vs.variable_scope("context_rnn") as scope: context_outputs, context_states = rnn.rnn( encoder_cell, context_inputs, dtype=dtype, scope=scope) with vs.variable_scope("input_rnn") as scope: encoder_outputs, encoder_states = rnn.rnn( encoder_cell, encoder_inputs, dtype=dtype, scope=scope) # concatenate outputs & states encoder_outputs = [array_ops.concat(1, [co, eo], name="context-and-encoder-output") for co, eo in zip(context_outputs, encoder_outputs)] encoder_states = [array_ops.concat(1, [cs, es], name="context-and-encoder-state") for cs, es in zip(context_states, encoder_states)] # calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size * 2]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) # change the decoder cell to accommodate wider input # TODO this will work for BasicLSTMCell and GRUCell, but not for others cell = type(cell)(num_units=(cell.input_size * 2)) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, states1 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, True) vs.get_variable_scope().reuse_variables() outputs2, states2 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, False) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) states = control_flow_ops.cond(feed_previous, lambda: states1, lambda: states2) return outputs, states
def embedding_attention_pointer_seq2seq_states(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=tf.float32, scope=None, initial_state_attention=False): with variable_scope.variable_scope( scope or "embedding_attention_pointer_seq2seq_states") as scope: # Encoder. encoder_initial_state = tf.placeholder(dtype, [None, cell.state_size], "encoder_initial_state") encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_inputs, initial_state=encoder_initial_state, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): raise Exception("feed_previous must be a tensor!") # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, decoder_state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) return outputs, decoder_state true_outputs, true_decoder_state = decoder(True) false_outputs, false_decoder_state = decoder(False) outputs = tf.cond(feed_previous, lambda: true_outputs, lambda: false_outputs) return outputs, encoder_initial_state, encoder_state, ( true_decoder_state, false_decoder_state)
def many2one_attention_seq2seq(encoder_inputs_list, decoder_inputs, text_len, text_cell, speech_cell, parse_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False, attention_vec_size=None): text_encoder_inputs, speech_encoder_inputs = encoder_inputs_list with variable_scope.variable_scope(scope or "many2one_attention_seq2seq"): with ops.device("/cpu:0"): embedding_words = variable_scope.get_variable( "embedding_words", [num_encoder_symbols, embedding_size]) text_encoder_inputs = [ embedding_ops.embedding_lookup(embedding_words, i) for i in text_encoder_inputs ] # Encoder. with variable_scope.variable_scope(scope or "text_encoder"): text_encoder_outputs, text_encoder_state = rnn.rnn( text_cell, text_encoder_inputs, sequence_length=text_len, dtype=dtype) with variable_scope.variable_scope(scope or "speech_encoder"): speech_encoder_outputs, speech_encoder_state = rnn.rnn( speech_cell, speech_encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. text_top_states = [ array_ops.reshape(e, [-1, 1, text_cell.output_size]) for e in text_encoder_outputs ] # h_states = attention_states in original code h_states = array_ops.concat(1, text_top_states) speech_top_states = [ array_ops.reshape(e, [-1, 1, speech_cell.output_size]) for e in speech_encoder_outputs ] m_states = array_ops.concat(1, speech_top_states) attention_states = [h_states, m_states] both_encoder_states = [text_encoder_state, speech_encoder_state] # Decoder. output_size = None if output_projection is None: parse_cell = rnn_cell.OutputProjectionWrapper( parse_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return many2one_embedding_attention_decoder( decoder_inputs, both_encoder_states, attention_states, parse_cell, num_decoder_symbols, embedding_size, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, attention_vec_size=attention_vec_size) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = many2one_embedding_attention_decoder( decoder_inputs, both_encoder_states, attention_states, parse_cell, num_decoder_symbols, embedding_size, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, attention_vec_size=attention_vec_size) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def embedding_attention_seq2seq_beam(dec_inp, use_initial, supplied_prev, supplied_state, supplied_attns, cell, num_decoder_symbols, embedding_size, encoder_outputs, encoder_state): with variable_scope.variable_scope("embedding_attention_seq2seq"): # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) # Decoder. cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) with variable_scope.variable_scope("embedding_attention_decoder"): embedding = variable_scope.get_variable("embedding", [num_decoder_symbols, embedding_size]) loop_function = _extract_argmax_and_embed(embedding) emb_inp = [embedding_ops.embedding_lookup(embedding, i) for i in dec_inp] decoder_inputs = emb_inp initial_state = encoder_state output_size = cell.output_size # Attention Decoder with variable_scope.variable_scope("attention_decoder"): batch_size_ = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. attn_length = attention_states.get_shape()[1].value attn_size = attention_states.get_shape()[2].value # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size]) hidden_features = [] v = [] attention_vec_size = attn_size # Size of query vectors for attention. for a in xrange(1): k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) v.append(variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size])) state = tf.cond(use_initial > 0, lambda: initial_state, lambda: supplied_state) def attention(query): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. for a in xrange(1): with variable_scope.variable_scope("Attention_%d" % a): y = linear(query, attention_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) a = nn_ops.softmax(s) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum(array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size])) return ds outputs = [] prev = None batch_attn_size = array_ops.pack([batch_size_, attn_size]) attns = [tf.cond(use_initial > 0, lambda: array_ops.zeros(batch_attn_size, dtype=dtypes.float32), lambda: supplied_attns) for _ in xrange(1)] for a in attns: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size]) with variable_scope.variable_scope("loop_function", reuse=True): #inp = tf.cond(use_initial > 0, lambda: decoder_inputs[0], lambda: loop_function(supplied_prev, 0)) inp = decoder_inputs[0] input_size = inp.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from input: %s" % inp.name) x = linear([inp] + attns, input_size, True) # Run the RNN. cell_output, state = cell(x, state) # Run the attention mechanism. attns = attention(state) with variable_scope.variable_scope("AttnOutputProjection"): output = linear([cell_output] + attns, output_size, True) return output, state, attns[0]
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): print("~~~~~~~~~~~") outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, scope=scope) return outputs, state, encoder_state # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, scope=scope) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state, encoder_state
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x cell.input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: integer; number of symbols on the encoder side. num_decoder_symbols: integer; number of symbols on the decoder side. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [cell.output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with vs.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, output_projection, feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, state1 = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, output_projection, True) vs.get_variable_scope().reuse_variables() outputs2, state2 = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, output_projection, False) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) state = control_flow_ops.cond(feed_previous, lambda: state1, lambda: state2) return outputs, state
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors. The output is of shape [batch_size x cell.output_size] when output_projection is not None (and represents the dense representation of predicted tokens). It is of shape [batch_size x num_decoder_symbols] when output_projection is None. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x cell.input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: integer; number of symbols on the encoder side. num_decoder_symbols: integer; number of symbols on the decoder side. num_heads: number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [cell.output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with vs.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols) encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, feed_previous, initial_state_attention=initial_state_attention) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, state1 = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, True, initial_state_attention=initial_state_attention) vs.get_variable_scope().reuse_variables() outputs2, state2 = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, False, initial_state_attention=initial_state_attention) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) state = control_flow_ops.cond(feed_previous, lambda: state1, lambda: state2) return outputs, state
def dialog_attention_seq2seq(encoder_inputs, decoder_inputs, cell, vocab_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False): if len(encoder_inputs) != len(decoder_inputs): raise Exception with variable_scope.variable_scope(scope or "dialog_attention_seq2seq"): encoder_cell = rnn_cell.EmbeddingWrapper(cell, vocab_size) outputs = [] fixed_batch_size = encoder_inputs[0][0].get_shape().with_rank_at_least( 1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(encoder_inputs[0][0])[0] drnn_state = cell.zero_state(batch_size, dtype) for i in range(0, len(encoder_inputs)): if i > 0: variable_scope.get_variable_scope().reuse_variables() encoder_outputs, encoder_state = rnn.rnn(encoder_cell, encoder_inputs[i], dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) with variable_scope.variable_scope("DRNN"): drnn_out, drnn_state = cell(encoder_state, drnn_state) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, vocab_size) output_size = vocab_size answer_output, answer_state = embedding_attention_decoder( decoder_inputs[i], drnn_state, attention_states, cell, vocab_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) outputs.append(answer_output) with variable_scope.variable_scope("DRNN", reuse=True): drnn_out, drnn_state = cell(answer_state, drnn_state) return outputs, drnn_state