def testTimeReversedFusedRNN(self): with self.test_session() as sess: initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890213) cell = core_rnn_cell_impl.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) # test bi-directional rnn with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn( cell, cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_fw_state, basic_bw_state = sess.run( [packed_outputs, fw_state, bw_state]) basic_grads = sess.run( gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(cell) fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(fused_cell) fw_outputs, fw_state = fused_cell(inputs, dtype=dtypes.float64, scope="fw") bw_outputs, bw_state = fused_bw_cell(inputs, dtype=dtypes.float64, scope="bw") outputs = array_ops.concat([fw_outputs, bw_outputs], 2) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] sess.run([variables.global_variables_initializer()]) fused_outputs, fused_fw_state, fused_bw_state = sess.run( [outputs, fw_state, bw_state]) fused_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_wgrads = sess.run( gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_fw_state, fused_fw_state) self.assertAllClose(basic_bw_state, fused_bw_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def testTimeReversedFusedRNN(self): with self.test_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) fw_cell = core_rnn_cell_impl.BasicRNNCell(10) bw_cell = core_rnn_cell_impl.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) # test bi-directional rnn with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn( fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_fw_state, basic_bw_state = sess.run( [packed_outputs, fw_state, bw_state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( core_rnn_cell_impl.BasicRNNCell(10)) fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN( fused_rnn_cell.FusedRNNCellAdaptor( core_rnn_cell_impl.BasicRNNCell(10))) fw_outputs, fw_state = fused_cell( inputs, dtype=dtypes.float64, scope="fw") bw_outputs, bw_state = fused_bw_cell( inputs, dtype=dtypes.float64, scope="bw") outputs = array_ops.concat([fw_outputs, bw_outputs], 2) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] sess.run([variables.global_variables_initializer()]) fused_outputs, fused_fw_state, fused_bw_state = sess.run( [outputs, fw_state, bw_state]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_fw_state, fused_fw_state) self.assertAllClose(basic_bw_state, fused_bw_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def embedding_attention_bidirectional_seq2seq(self, encoder_inputs, decoder_inputs, input_cell1, input_cell2, output_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=4, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): with tf.variable_scope(scope or "embedding_attention_bidirectional_seq2seq") as scope: # Encoder. encoder_cell1 = core_rnn_cell.EmbeddingWrapper(input_cell1, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_cell2 = core_rnn_cell.EmbeddingWrapper(input_cell2, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state1, encoder_state2 = core_rnn.static_bidirectional_rnn(encoder_cell1, encoder_cell2, encoder_inputs, dtype=tf.float32) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, input_cell1.output_size + input_cell2.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(top_states, 1) # Concatenate states of both enocders encoder_state = encoder_state1 + encoder_state2 # Decoder. output_size = None if output_projection is None: output_cell = rnn.OutputProjectionWrapper(output_cell, num_decoder_symbols) output_size = num_decoder_symbols assert isinstance(feed_previous, bool) return seq2seq.embedding_attention_decoder(decoder_inputs, encoder_state, attention_states, output_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention)
def stack_bidirectional_rnn(cells_fw, cells_bw, inputs, initial_states_fw=None, initial_states_bw=None, dtype=None, sequence_length=None, scope=None): """Creates a bidirectional recurrent neural network. Stacks several bidirectional rnn layers. The combined forward and backward layer outputs are used as input of the next layer. tf.bidirectional_rnn does not allow to share forward and backward information between layers. The input_size of the first forward and backward cells must match. The initial state for both directions is zero and no intermediate states are returned. As described in https://arxiv.org/abs/1303.5778 Args: cells_fw: List of instances of RNNCell, one per layer, to be used for forward direction. cells_bw: List of instances of RNNCell, one per layer, to be used for backward direction. inputs: A length T list of inputs, each a tensor of shape [batch_size, input_size], or a nested tuple of such elements. initial_states_fw: (optional) A list of the initial states (one per layer) for the forward RNN. Each tensor must has an appropriate type and shape `[batch_size, cell_fw.state_size]`. initial_states_bw: (optional) Same as for `initial_states_fw`, but using the corresponding properties of `cells_bw`. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. scope: VariableScope for the created subgraph; defaults to None. Returns: A tuple (outputs, output_state_fw, output_state_bw) where: outputs is a length `T` list of outputs (one for each input), which are depth-concatenated forward and backward outputs. output_states_fw is the final states, one tensor per layer, of the forward rnn. output_states_bw is the final states, one tensor per layer, of the backward rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. ValueError: If inputs is None, not a list or an empty list. """ if not cells_fw: raise ValueError( "Must specify at least one fw cell for BidirectionalRNN.") if not cells_bw: raise ValueError( "Must specify at least one bw cell for BidirectionalRNN.") if not isinstance(cells_fw, list): raise ValueError( "cells_fw must be a list of RNNCells (one per layer).") if not isinstance(cells_bw, list): raise ValueError( "cells_bw must be a list of RNNCells (one per layer).") if len(cells_fw) != len(cells_bw): raise ValueError( "Forward and Backward cells must have the same depth.") if initial_states_fw is not None and (not isinstance(cells_fw, list) or len(cells_fw) != len(cells_fw)): raise ValueError( "initial_states_fw must be a list of state tensors (one per layer)." ) if initial_states_bw is not None and (not isinstance(cells_bw, list) or len(cells_bw) != len(cells_bw)): raise ValueError( "initial_states_bw must be a list of state tensors (one per layer)." ) states_fw = [] states_bw = [] prev_layer = inputs with vs.variable_scope(scope or "stack_bidirectional_rnn"): for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)): initial_state_fw = None initial_state_bw = None if initial_states_fw: initial_state_fw = initial_states_fw[i] if initial_states_bw: initial_state_bw = initial_states_bw[i] with vs.variable_scope("cell_%d" % i) as cell_scope: prev_layer, state_fw, state_bw = contrib_rnn.static_bidirectional_rnn( cell_fw, cell_bw, prev_layer, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length, dtype=dtype, scope=cell_scope) states_fw.append(state_fw) states_bw.append(state_bw) return prev_layer, tuple(states_fw), tuple(states_bw)
def stack_bidirectional_rnn(cells_fw, cells_bw, inputs, initial_states_fw=None, initial_states_bw=None, dtype=None, sequence_length=None, scope=None): """Creates a bidirectional recurrent neural network. Stacks several bidirectional rnn layers. The combined forward and backward layer outputs are used as input of the next layer. tf.bidirectional_rnn does not allow to share forward and backward information between layers. The input_size of the first forward and backward cells must match. The initial state for both directions is zero and no intermediate states are returned. As described in https://arxiv.org/abs/1303.5778 Args: cells_fw: List of instances of RNNCell, one per layer, to be used for forward direction. cells_bw: List of instances of RNNCell, one per layer, to be used for backward direction. inputs: A length T list of inputs, each a tensor of shape [batch_size, input_size], or a nested tuple of such elements. initial_states_fw: (optional) A list of the initial states (one per layer) for the forward RNN. Each tensor must has an appropriate type and shape `[batch_size, cell_fw.state_size]`. initial_states_bw: (optional) Same as for `initial_states_fw`, but using the corresponding properties of `cells_bw`. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. scope: VariableScope for the created subgraph; defaults to None. Returns: A tuple (outputs, output_state_fw, output_state_bw) where: outputs is a length `T` list of outputs (one for each input), which are depth-concatenated forward and backward outputs. output_states_fw is the final states, one tensor per layer, of the forward rnn. output_states_bw is the final states, one tensor per layer, of the backward rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. ValueError: If inputs is None, not a list or an empty list. """ if not cells_fw: raise ValueError("Must specify at least one fw cell for BidirectionalRNN.") if not cells_bw: raise ValueError("Must specify at least one bw cell for BidirectionalRNN.") if not isinstance(cells_fw, list): raise ValueError("cells_fw must be a list of RNNCells (one per layer).") if not isinstance(cells_bw, list): raise ValueError("cells_bw must be a list of RNNCells (one per layer).") if len(cells_fw) != len(cells_bw): raise ValueError("Forward and Backward cells must have the same depth.") if initial_states_fw is not None and (not isinstance(cells_fw, list) or len(cells_fw) != len(cells_fw)): raise ValueError( "initial_states_fw must be a list of state tensors (one per layer).") if initial_states_bw is not None and (not isinstance(cells_bw, list) or len(cells_bw) != len(cells_bw)): raise ValueError( "initial_states_bw must be a list of state tensors (one per layer).") states_fw = [] states_bw = [] prev_layer = inputs with vs.variable_scope(scope or "stack_bidirectional_rnn"): for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)): initial_state_fw = None initial_state_bw = None if initial_states_fw: initial_state_fw = initial_states_fw[i] if initial_states_bw: initial_state_bw = initial_states_bw[i] with vs.variable_scope("cell_%d" % i) as cell_scope: prev_layer, state_fw, state_bw = contrib_rnn.static_bidirectional_rnn( cell_fw, cell_bw, prev_layer, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length, dtype=dtype, scope=cell_scope) states_fw.append(state_fw) states_bw.append(state_bw) return prev_layer, tuple(states_fw), tuple(states_bw)