예제 #1
0
    def testTimeReversedFusedRNN(self):
        with self.test_session() as sess:
            initializer = init_ops.random_uniform_initializer(-0.01,
                                                              0.01,
                                                              seed=19890213)
            cell = core_rnn_cell_impl.BasicRNNCell(10)
            batch_size = 5
            input_size = 20
            timelen = 15
            inputs = constant_op.constant(
                np.random.randn(timelen, batch_size, input_size))

            # test bi-directional rnn
            with variable_scope.variable_scope("basic",
                                               initializer=initializer):
                unpacked_inputs = array_ops.unstack(inputs)
                outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn(
                    cell, cell, unpacked_inputs, dtype=dtypes.float64)
                packed_outputs = array_ops.stack(outputs)
                basic_vars = [
                    v for v in variables.trainable_variables()
                    if v.name.startswith("basic/")
                ]
                sess.run([variables.global_variables_initializer()])
                basic_outputs, basic_fw_state, basic_bw_state = sess.run(
                    [packed_outputs, fw_state, bw_state])
                basic_grads = sess.run(
                    gradients_impl.gradients(packed_outputs, inputs))
                basic_wgrads = sess.run(
                    gradients_impl.gradients(packed_outputs, basic_vars))

            with variable_scope.variable_scope("fused",
                                               initializer=initializer):
                fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(cell)
                fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(fused_cell)
                fw_outputs, fw_state = fused_cell(inputs,
                                                  dtype=dtypes.float64,
                                                  scope="fw")
                bw_outputs, bw_state = fused_bw_cell(inputs,
                                                     dtype=dtypes.float64,
                                                     scope="bw")
                outputs = array_ops.concat([fw_outputs, bw_outputs], 2)
                fused_vars = [
                    v for v in variables.trainable_variables()
                    if v.name.startswith("fused/")
                ]
                sess.run([variables.global_variables_initializer()])
                fused_outputs, fused_fw_state, fused_bw_state = sess.run(
                    [outputs, fw_state, bw_state])
                fused_grads = sess.run(
                    gradients_impl.gradients(outputs, inputs))
                fused_wgrads = sess.run(
                    gradients_impl.gradients(outputs, fused_vars))

            self.assertAllClose(basic_outputs, fused_outputs)
            self.assertAllClose(basic_fw_state, fused_fw_state)
            self.assertAllClose(basic_bw_state, fused_bw_state)
            self.assertAllClose(basic_grads, fused_grads)
            for basic, fused in zip(basic_wgrads, fused_wgrads):
                self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
  def testTimeReversedFusedRNN(self):
    with self.test_session() as sess:
      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890213)
      fw_cell = core_rnn_cell_impl.BasicRNNCell(10)
      bw_cell = core_rnn_cell_impl.BasicRNNCell(10)
      batch_size = 5
      input_size = 20
      timelen = 15
      inputs = constant_op.constant(
          np.random.randn(timelen, batch_size, input_size))

      # test bi-directional rnn
      with variable_scope.variable_scope("basic", initializer=initializer):
        unpacked_inputs = array_ops.unstack(inputs)
        outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn(
            fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64)
        packed_outputs = array_ops.stack(outputs)
        basic_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("basic/")
        ]
        sess.run([variables.global_variables_initializer()])
        basic_outputs, basic_fw_state, basic_bw_state = sess.run(
            [packed_outputs, fw_state, bw_state])
        basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs))
        basic_wgrads = sess.run(
            gradients_impl.gradients(packed_outputs, basic_vars))

      with variable_scope.variable_scope("fused", initializer=initializer):
        fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(
            core_rnn_cell_impl.BasicRNNCell(10))
        fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(
            fused_rnn_cell.FusedRNNCellAdaptor(
                core_rnn_cell_impl.BasicRNNCell(10)))
        fw_outputs, fw_state = fused_cell(
            inputs, dtype=dtypes.float64, scope="fw")
        bw_outputs, bw_state = fused_bw_cell(
            inputs, dtype=dtypes.float64, scope="bw")
        outputs = array_ops.concat([fw_outputs, bw_outputs], 2)
        fused_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused/")
        ]
        sess.run([variables.global_variables_initializer()])
        fused_outputs, fused_fw_state, fused_bw_state = sess.run(
            [outputs, fw_state, bw_state])
        fused_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars))

      self.assertAllClose(basic_outputs, fused_outputs)
      self.assertAllClose(basic_fw_state, fused_fw_state)
      self.assertAllClose(basic_bw_state, fused_bw_state)
      self.assertAllClose(basic_grads, fused_grads)
      for basic, fused in zip(basic_wgrads, fused_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
예제 #3
0
파일: models.py 프로젝트: p-singh/ASDL2017
    def embedding_attention_bidirectional_seq2seq(self, encoder_inputs, decoder_inputs, input_cell1, input_cell2,
                                                  output_cell,
                                                  num_encoder_symbols,
                                                  num_decoder_symbols, embedding_size, num_heads=4,
                                                  output_projection=None, feed_previous=False, dtype=None, scope=None,
                                                  initial_state_attention=False):

        with tf.variable_scope(scope or "embedding_attention_bidirectional_seq2seq") as scope:
            # Encoder.
            encoder_cell1 = core_rnn_cell.EmbeddingWrapper(input_cell1, embedding_classes=num_encoder_symbols,
                                                           embedding_size=embedding_size)
            encoder_cell2 = core_rnn_cell.EmbeddingWrapper(input_cell2, embedding_classes=num_encoder_symbols,
                                                           embedding_size=embedding_size)

            encoder_outputs, encoder_state1, encoder_state2 = core_rnn.static_bidirectional_rnn(encoder_cell1,
                                                                                                encoder_cell2,
                                                                                                encoder_inputs,
                                                                                                dtype=tf.float32)

            # First calculate a concatenation of encoder outputs to put attention on.
            top_states = [array_ops.reshape(e, [-1, 1, input_cell1.output_size + input_cell2.output_size]) for e in
                          encoder_outputs]

            attention_states = array_ops.concat(top_states, 1)

            # Concatenate states of both enocders
            encoder_state = encoder_state1 + encoder_state2

            # Decoder.
            output_size = None
            if output_projection is None:
                output_cell = rnn.OutputProjectionWrapper(output_cell, num_decoder_symbols)
                output_size = num_decoder_symbols

            assert isinstance(feed_previous, bool)
            return seq2seq.embedding_attention_decoder(decoder_inputs, encoder_state, attention_states,
                                                       output_cell,
                                                       num_decoder_symbols, embedding_size, num_heads=num_heads,
                                                       output_size=output_size,
                                                       output_projection=output_projection,
                                                       feed_previous=feed_previous,
                                                       initial_state_attention=initial_state_attention)
예제 #4
0
def stack_bidirectional_rnn(cells_fw,
                            cells_bw,
                            inputs,
                            initial_states_fw=None,
                            initial_states_bw=None,
                            dtype=None,
                            sequence_length=None,
                            scope=None):
    """Creates a bidirectional recurrent neural network.

  Stacks several bidirectional rnn layers. The combined forward and backward
  layer outputs are used as input of the next layer. tf.bidirectional_rnn
  does not allow to share forward and backward information between layers.
  The input_size of the first forward and backward cells must match.
  The initial state for both directions is zero and no intermediate states
  are returned.

  As described in https://arxiv.org/abs/1303.5778

  Args:
    cells_fw: List of instances of RNNCell, one per layer,
      to be used for forward direction.
    cells_bw: List of instances of RNNCell, one per layer,
      to be used for backward direction.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, input_size], or a nested tuple of such elements.
    initial_states_fw: (optional) A list of the initial states (one per layer)
      for the forward RNN.
      Each tensor must has an appropriate type and shape
      `[batch_size, cell_fw.state_size]`.
    initial_states_bw: (optional) Same as for `initial_states_fw`, but using
      the corresponding properties of `cells_bw`.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    scope: VariableScope for the created subgraph; defaults to None.

  Returns:
    A tuple (outputs, output_state_fw, output_state_bw) where:
      outputs is a length `T` list of outputs (one for each input), which
        are depth-concatenated forward and backward outputs.
      output_states_fw is the final states, one tensor per layer,
        of the forward rnn.
      output_states_bw is the final states, one tensor per layer,
        of the backward rnn.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
    ValueError: If inputs is None, not a list or an empty list.
  """
    if not cells_fw:
        raise ValueError(
            "Must specify at least one fw cell for BidirectionalRNN.")
    if not cells_bw:
        raise ValueError(
            "Must specify at least one bw cell for BidirectionalRNN.")
    if not isinstance(cells_fw, list):
        raise ValueError(
            "cells_fw must be a list of RNNCells (one per layer).")
    if not isinstance(cells_bw, list):
        raise ValueError(
            "cells_bw must be a list of RNNCells (one per layer).")
    if len(cells_fw) != len(cells_bw):
        raise ValueError(
            "Forward and Backward cells must have the same depth.")
    if initial_states_fw is not None and (not isinstance(cells_fw, list)
                                          or len(cells_fw) != len(cells_fw)):
        raise ValueError(
            "initial_states_fw must be a list of state tensors (one per layer)."
        )
    if initial_states_bw is not None and (not isinstance(cells_bw, list)
                                          or len(cells_bw) != len(cells_bw)):
        raise ValueError(
            "initial_states_bw must be a list of state tensors (one per layer)."
        )
    states_fw = []
    states_bw = []
    prev_layer = inputs

    with vs.variable_scope(scope or "stack_bidirectional_rnn"):
        for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)):
            initial_state_fw = None
            initial_state_bw = None
            if initial_states_fw:
                initial_state_fw = initial_states_fw[i]
            if initial_states_bw:
                initial_state_bw = initial_states_bw[i]

            with vs.variable_scope("cell_%d" % i) as cell_scope:
                prev_layer, state_fw, state_bw = contrib_rnn.static_bidirectional_rnn(
                    cell_fw,
                    cell_bw,
                    prev_layer,
                    initial_state_fw=initial_state_fw,
                    initial_state_bw=initial_state_bw,
                    sequence_length=sequence_length,
                    dtype=dtype,
                    scope=cell_scope)
            states_fw.append(state_fw)
            states_bw.append(state_bw)

    return prev_layer, tuple(states_fw), tuple(states_bw)
예제 #5
0
def stack_bidirectional_rnn(cells_fw,
                            cells_bw,
                            inputs,
                            initial_states_fw=None,
                            initial_states_bw=None,
                            dtype=None,
                            sequence_length=None,
                            scope=None):
  """Creates a bidirectional recurrent neural network.

  Stacks several bidirectional rnn layers. The combined forward and backward
  layer outputs are used as input of the next layer. tf.bidirectional_rnn
  does not allow to share forward and backward information between layers.
  The input_size of the first forward and backward cells must match.
  The initial state for both directions is zero and no intermediate states
  are returned.

  As described in https://arxiv.org/abs/1303.5778

  Args:
    cells_fw: List of instances of RNNCell, one per layer,
      to be used for forward direction.
    cells_bw: List of instances of RNNCell, one per layer,
      to be used for backward direction.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, input_size], or a nested tuple of such elements.
    initial_states_fw: (optional) A list of the initial states (one per layer)
      for the forward RNN.
      Each tensor must has an appropriate type and shape
      `[batch_size, cell_fw.state_size]`.
    initial_states_bw: (optional) Same as for `initial_states_fw`, but using
      the corresponding properties of `cells_bw`.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    scope: VariableScope for the created subgraph; defaults to None.

  Returns:
    A tuple (outputs, output_state_fw, output_state_bw) where:
      outputs is a length `T` list of outputs (one for each input), which
        are depth-concatenated forward and backward outputs.
      output_states_fw is the final states, one tensor per layer,
        of the forward rnn.
      output_states_bw is the final states, one tensor per layer,
        of the backward rnn.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
    ValueError: If inputs is None, not a list or an empty list.
  """
  if not cells_fw:
    raise ValueError("Must specify at least one fw cell for BidirectionalRNN.")
  if not cells_bw:
    raise ValueError("Must specify at least one bw cell for BidirectionalRNN.")
  if not isinstance(cells_fw, list):
    raise ValueError("cells_fw must be a list of RNNCells (one per layer).")
  if not isinstance(cells_bw, list):
    raise ValueError("cells_bw must be a list of RNNCells (one per layer).")
  if len(cells_fw) != len(cells_bw):
    raise ValueError("Forward and Backward cells must have the same depth.")
  if initial_states_fw is not None and (not isinstance(cells_fw, list) or
                                        len(cells_fw) != len(cells_fw)):
    raise ValueError(
        "initial_states_fw must be a list of state tensors (one per layer).")
  if initial_states_bw is not None and (not isinstance(cells_bw, list) or
                                        len(cells_bw) != len(cells_bw)):
    raise ValueError(
        "initial_states_bw must be a list of state tensors (one per layer).")
  states_fw = []
  states_bw = []
  prev_layer = inputs

  with vs.variable_scope(scope or "stack_bidirectional_rnn"):
    for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)):
      initial_state_fw = None
      initial_state_bw = None
      if initial_states_fw:
        initial_state_fw = initial_states_fw[i]
      if initial_states_bw:
        initial_state_bw = initial_states_bw[i]

      with vs.variable_scope("cell_%d" % i) as cell_scope:
        prev_layer, state_fw, state_bw = contrib_rnn.static_bidirectional_rnn(
            cell_fw,
            cell_bw,
            prev_layer,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            sequence_length=sequence_length,
            dtype=dtype,
            scope=cell_scope)
      states_fw.append(state_fw)
      states_bw.append(state_bw)

  return prev_layer, tuple(states_fw), tuple(states_bw)