Example #1
0
 def _reverse(input_, seq_lengths, seq_dim, batch_dim):
   if seq_lengths is not None:
     return array_ops.reverse_sequence(
         input=input_, seq_lengths=seq_lengths,
         seq_dim=seq_dim, batch_dim=batch_dim)
   else:
     return array_ops.reverse(input_, axis=[seq_dim])
Example #2
0
def _reverse_seq(input_seq, lengths):
    """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
    if lengths is None:
        return list(reversed(input_seq))

    input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims)
    for input_ in input_seq:
        input_shape.merge_with(input_.get_shape())
        input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(input_seq)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
        lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r in result:
        r.set_shape(input_shape)
    return result
  def testFloatReverseSequenceGrad(self):
    x = np.asarray(
        [[[1, 2, 3, 4], [5, 6, 7, 8]], [[9, 10, 11, 12], [13, 14, 15, 16]],
         [[17, 18, 19, 20], [21, 22, 23, 24]]],
        dtype=np.float)
    x = x.reshape(3, 2, 4, 1, 1)
    x = x.transpose([2, 1, 0, 3, 4])  # transpose axes 0 <=> 2

    # reverse dim 0 up to (0:3, none, 0:4) along dim=2
    seq_axis = 0
    batch_axis = 2
    seq_lengths = np.asarray([3, 0, 4], dtype=np.int64)

    with self.cached_session():
      input_t = constant_op.constant(x, shape=x.shape)
      seq_lengths_t = constant_op.constant(seq_lengths, shape=seq_lengths.shape)
      reverse_sequence_out = array_ops.reverse_sequence(
          input_t,
          batch_axis=batch_axis,
          seq_axis=seq_axis,
          seq_lengths=seq_lengths_t)
      err = gradient_checker.compute_gradient_error(
          input_t, x.shape, reverse_sequence_out, x.shape, x_init_value=x)
    print("ReverseSequence gradient error = %g" % err)
    self.assertLess(err, 1e-8)
Example #4
0
def _reverse_seq(input_seq, lengths):
  """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply
               reverses the list.

  Returns:
    time-reversed sequence
  """
  if lengths is None:
    return list(reversed(input_seq))

  for input_ in input_seq:
    input_.set_shape(input_.get_shape().with_rank(2))

  # Join into (time, batch_size, depth)
  s_joined = array_ops_.pack(input_seq)

  # Reverse along dimension 0
  s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1)
  # Split again into list
  result = array_ops_.unpack(s_reversed)
  return result
Example #5
0
def _ReverseSequenceGrad(op, grad):
  seq_lengths = op.inputs[1]
  return [array_ops.reverse_sequence(grad,
                                     batch_dim=op.get_attr("batch_dim"),
                                     seq_dim=op.get_attr("seq_dim"),
                                     seq_lengths=seq_lengths),
          None]
Example #6
0
 def _reverse(input_, seq_lengths, seq_axis, batch_axis):
   if seq_lengths is not None:
     return array_ops.reverse_sequence(
         input=input_,
         seq_lengths=seq_lengths,
         seq_axis=seq_axis,
         batch_axis=batch_axis)
   else:
     return array_ops.reverse(input_, axis=[seq_axis])
 def _reverse(input_, seq_lengths, seq_dim, batch_dim):
   if seq_lengths is not None:
     return array_ops.reverse_sequence(
         input=input_, seq_lengths=seq_lengths,
         seq_dim=seq_dim, batch_dim=batch_dim)
   else:
     # See b/69305369.
     assert not use_tpu, (
         'Bidirectional with variable sequence lengths unsupported on TPU')
     return array_ops.reverse(input_, axis=[seq_dim])
  def testShapeFunctionEdgeCases(self):
    t = array_ops.reverse_sequence(
        array_ops.placeholder(
            dtypes.float32, shape=None),
        seq_lengths=array_ops.placeholder(
            dtypes.int64, shape=(32,)),
        batch_axis=0,
        seq_axis=1)
    self.assertIs(t.get_shape().ndims, None)

    # Batch size mismatched between input and seq_lengths.
    with self.assertRaises(ValueError):
      array_ops.reverse_sequence(
          array_ops.placeholder(
              dtypes.float32, shape=(32, 2, 3)),
          seq_lengths=array_ops.placeholder(
              dtypes.int64, shape=(33,)),
          seq_axis=3)

    # seq_axis out of bounds.
    with self.assertRaisesRegexp(ValueError, "seq_dim must be < input rank"):
      array_ops.reverse_sequence(
          array_ops.placeholder(
              dtypes.float32, shape=(32, 2, 3)),
          seq_lengths=array_ops.placeholder(
              dtypes.int64, shape=(32,)),
          seq_axis=3)

    # batch_axis out of bounds.
    with self.assertRaisesRegexp(ValueError, "batch_dim must be < input rank"):
      array_ops.reverse_sequence(
          array_ops.placeholder(
              dtypes.float32, shape=(32, 2, 3)),
          seq_lengths=array_ops.placeholder(
              dtypes.int64, shape=(32,)),
          seq_axis=0,
          batch_axis=3)

    with self.cached_session():
      inputs = array_ops.placeholder(dtypes.float32, shape=(32, 2, 3))
      seq_lengths = array_ops.placeholder(dtypes.int64, shape=(32,))
      output = array_ops.reverse_sequence(
          inputs, seq_lengths=seq_lengths,
          seq_axis=0)  # batch_axis default is 0
      with self.assertRaisesOpError("batch_dim == seq_dim"):
        output.eval(feed_dict={
            inputs: np.random.rand(32, 2, 3),
            seq_lengths: xrange(32)
        })
Example #9
0
def _reverse_seq(input_seq, lengths):
  """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
               or nested tuples of tensors.
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
  if lengths is None:
    return list(reversed(input_seq))

  input_is_tuple = nest.is_sequence(input_seq[0])
  flat_input_seq = (nest.flatten(input_) if input_is_tuple else [input_]
                    for input_ in input_seq)

  flat_results = [[] for _ in range(len(input_seq))]
  for sequence in zip(*flat_input_seq):
    input_shape = tensor_shape.unknown_shape(
        ndims=sequence[0].get_shape().ndims)
    for input_ in sequence:
      input_shape.merge_with(input_.get_shape())
      input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(sequence)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
      lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r, flat_result in zip(result, flat_results):
      r.set_shape(input_shape)
      flat_result.append(r)

  results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
             if input_is_tuple else flat_result[0]
             for input_, flat_result in zip(input_seq, flat_results)]
  return results
Example #10
0
def _reverse_seq(input_seq, lengths):
    """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
               or nested tuples of tensors.
    lengths:   A `Tensor` of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
    if lengths is None:
        return list(reversed(input_seq))

    flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq)

    flat_results = [[] for _ in range(len(input_seq))]
    for sequence in zip(*flat_input_seq):
        input_shape = tensor_shape.unknown_shape(
            ndims=sequence[0].get_shape().ndims)
        for input_ in sequence:
            input_shape.merge_with(input_.get_shape())
            input_.set_shape(input_shape)

        # Join into (time, batch_size, depth)
        s_joined = array_ops.stack(sequence)

        # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
        if lengths is not None:
            lengths = math_ops.to_int64(lengths)

        # Reverse along dimension 0
        s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
        # Split again into list
        result = array_ops.unstack(s_reversed)
        for r, flat_result in zip(result, flat_results):
            r.set_shape(input_shape)
            flat_result.append(r)

    results = [
        nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
        for input_, flat_result in zip(input_seq, flat_results)
    ]
    return results
 def _testReverseSequence(self,
                          x,
                          batch_axis,
                          seq_axis,
                          seq_lengths,
                          truth,
                          use_gpu=False,
                          expected_err_re=None):
   with self.test_session(use_gpu=use_gpu):
     ans = array_ops.reverse_sequence(
         x, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=seq_lengths)
     if expected_err_re is None:
       tf_ans = ans.eval()
       self.assertAllClose(tf_ans, truth, atol=1e-10)
       self.assertShapeEqual(truth, ans)
     else:
       with self.assertRaisesOpError(expected_err_re):
         ans.eval()
 def _testReverseSequence(self,
                          x,
                          batch_axis,
                          seq_axis,
                          seq_lengths,
                          truth,
                          use_gpu=False,
                          expected_err_re=None):
   with self.cached_session(use_gpu=use_gpu):
     ans = array_ops.reverse_sequence(
         x, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=seq_lengths)
     if expected_err_re is None:
       tf_ans = self.evaluate(ans)
       self.assertAllClose(tf_ans, truth, atol=1e-10)
       self.assertShapeEqual(truth, ans)
     else:
       with self.assertRaisesOpError(expected_err_re):
         self.evaluate(ans)
Example #13
0
def biLSTM(inputs, params, num_hidden, param_id):
    '''
    :param inputs: the inputs data e.g.(seqLen, 1, 40), seqLen is time step which represents the number of frames
    :param params: the list parameters
    :param num_hidden: the number of hidden cells per layer, e.g.128
    :param param_id: the index of which part of parameters to start to get
    :return: the sum the forward and backward fruits as output
    '''
    c = array_ops.zeros(
        (1, num_hidden)
    )  # initial cell state zeros tensor shape (1, 128) for the first time
    h = array_ops.zeros(
        (1, num_hidden)
    )  # initial output state zeros tensor shape (1, 128) for the first time
    state = c, h

    forward_inputs = inputs
    forward_state = state
    forward_tmp = []
    for frame in forward_inputs.eval():
        forward_h, forward_state = dynamic_rnn(frame, forward_state, params,
                                               param_id)
        forward_tmp.append(forward_h)
    forward_h = tf.stack(forward_tmp)

    # reverse data when do backward LSTM
    backward_inputs = array_ops.reverse_sequence(
        input=inputs,
        seq_lengths=(inputs.get_shape().as_list()[0], ),
        seq_axis=0,
        batch_dim=1)
    backward_state = state
    backward_tmp = []
    for frame in backward_inputs.eval():
        backward_h, backward_state = dynamic_rnn(frame, backward_state, params,
                                                 param_id + 2)
        backward_tmp.append(backward_h)
    backward_tmp.reverse()
    backward_h = tf.stack(backward_tmp)

    # sum the forward and backward fruits as output
    hidden = tf.reduce_sum((forward_h, backward_h), axis=0)

    return hidden
 def _testReverseSequence(self,
                          x,
                          batch_axis,
                          seq_axis,
                          seq_lengths,
                          truth,
                          expected_err_re=None):
   with self.cached_session():
     p = array_ops.placeholder(dtypes.as_dtype(x.dtype))
     lengths = array_ops.placeholder(dtypes.as_dtype(seq_lengths.dtype))
     with self.test_scope():
       ans = array_ops.reverse_sequence(
           p, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=lengths)
     if expected_err_re is None:
       tf_ans = ans.eval(feed_dict={p: x, lengths: seq_lengths})
       self.assertAllClose(tf_ans, truth, atol=1e-10)
     else:
       with self.assertRaisesOpError(expected_err_re):
         ans.eval(feed_dict={p: x, lengths: seq_lengths})
Example #15
0
  def _reverse(self, t, lengths):
    """Time reverse the provided tensor or list of tensors.

    Assumes the top dimension is the time dimension.

    Args:
      t: 3D tensor or list of 2D tensors to be reversed
      lengths: 1D tensor of lengths, or None

    Returns:
      A reversed tensor or list of tensors
    """
    if isinstance(t, list):
      return list(reversed(t))
    else:
      if lengths is None:
        return array_ops.reverse(t, [True, False, False])
      else:
        return array_ops.reverse_sequence(t, lengths, 0, 1)
Example #16
0
    def _reverse(self, t, lengths):
        """Time reverse the provided tensor or list of tensors.

    Assumes the top dimension is the time dimension.

    Args:
      t: 3D tensor or list of 2D tensors to be reversed
      lengths: 1D tensor of lengths, or `None`

    Returns:
      A reversed tensor or list of tensors
    """
        if isinstance(t, list):
            return list(reversed(t))
        else:
            if lengths is None:
                return array_ops.reverse_v2(t, [0])
            else:
                return array_ops.reverse_sequence(t, lengths, 0, 1)
 def _testReverseSequence(self,
                          x,
                          batch_axis,
                          seq_axis,
                          seq_lengths,
                          truth,
                          expected_err_re=None):
   with self.session():
     p = array_ops.placeholder(dtypes.as_dtype(x.dtype))
     lengths = array_ops.placeholder(dtypes.as_dtype(seq_lengths.dtype))
     with self.test_scope():
       ans = array_ops.reverse_sequence(
           p, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=lengths)
     if expected_err_re is None:
       tf_ans = ans.eval(feed_dict={p: x, lengths: seq_lengths})
       self.assertAllClose(tf_ans, truth, atol=1e-10)
     else:
       with self.assertRaisesOpError(expected_err_re):
         ans.eval(feed_dict={p: x, lengths: seq_lengths})
def _reverse_seq(input_seq, lengths):
    """Reverse a list of Tensors up to specified lengths.
    Args:
        input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
        lengths:   A tensor of dimension batch_size, containing lengths for each
                   sequence in the batch. If "None" is specified, simply reverses
                   the list.
    Returns:
        time-reversed sequence
    """
    for input_ in input_seq:
        input_.set_shape(input_.get_shape().with_rank(2))

    # Join into (time, batch_size, depth)
    s_joined = array_ops_.pack(input_seq)

    # Reverse along dimension 0
    s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops_.unpack(s_reversed)
    return result
Example #19
0
def reverse_seq(input_seq, lengths):
    if lengths is None:
        return list(reversed(input_seq))

    input_shape = tensor_shape.matrix(None, None)
    for input_ in input_seq:
        input_shape.merge_with(input_.get_shape())
        input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(input_seq)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
        lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r in result:
        r.set_shape(input_shape)
    return result
Example #20
0
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
                              initial_state_fw=None, initial_state_bw=None,
                              dtype=None, parallel_iterations=None,
                              swap_memory=False, time_major=False, scope=None):
    """
    Creates a dynamic version of bidirectional recurrent neural network.
    The initial state for both directions is zero by default.
    :param cell_fw: An instance of RNNCell, to be used for forward direction
    :param cell_bw: An instance of RNNCell, to be used for backward direction
    :param inputs: The RNN inputs
    :param sequence_length: An int32/int64 vector
    :param initial_state_fw: An initial state for the forward RNN
    :param initial_state_bw: An initial state for the backward RNN
    :param dtype: The data type for the initial states and expected output
    :param parallel_iterations: The number of iterations in parallel
    :param swap_memory:
    :param time_major:
    :param scope:
    :return: A tuple (outputs, output_states)
    """
    if not isinstance(cell_fw, rnn_cell.RNNCell):
        raise TypeError("cell_fw must be an instance of RNNCell")
    if not isinstance(cell_bw, rnn_cell.RNNCell):
        raise TypeError("cell_bw must be an instance of RNNCell")

    with vs.variable_scope(scope or "bidirectional_rnn"):
        # Forward direction
        with vs.variable_scope("fw") as fw_scope:
            output_fw, output_state_fw = dynamic_rnn(
                cell=cell_fw, inputs=inputs, sequence_length=sequence_length,
                initial_state=initial_state_fw, dtype=dtype,
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory, time_major=time_major, scope=fw_scope)

        # Backward direction
        if not time_major:
            time_dim = 1
            batch_dim = 0
        else:
            time_dim = 0
            batch_dim = 1

        with vs.variable_scope("bw") as bw_scope:
            inputs_reverse = array_ops.reverse_sequence(
                input=inputs, seq_lengths=sequence_length,
                seq_dim=time_dim, batch_dim=batch_dim)
            tmp_output_bw, tmp_output_state_bw = dynamic_rnn(
                cell=cell_bw, inputs=inputs_reverse,
                sequence_length=sequence_length,
                initial_state=initial_state_bw, dtype=dtype,
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory, time_major=time_major,
                scope=bw_scope)

    output_bw = array_ops.reverse_sequence(
        input=tmp_output_bw, seq_lengths=sequence_length,
        seq_dim=time_dim, batch_dim=batch_dim)

    if Config.cell_type == "LSTMCell":
        tmp_output_state_bw_ = tmp_output_state_bw.c
    else:
        tmp_output_state_bw_ = tmp_output_state_bw

    output_state_bw = array_ops.reverse_sequence(
        input=tmp_output_state_bw_, seq_lengths=sequence_length,
        seq_dim=time_dim, batch_dim=batch_dim)

    outputs = (output_fw, output_bw)

    if Config.cell_type == "LSTMCell":
        output_states = (output_state_fw.c, output_state_bw)
    else:
        output_states = (output_state_fw, output_state_bw)

    return (outputs, output_states)
Example #21
0
def bidirectional_dynamic_rnn(cell_fw,
                              cell_bw,
                              inputs,
                              sequence_length=None,
                              initial_state_fw=None,
                              initial_state_bw=None,
                              dtype=None,
                              parallel_iterations=None,
                              swap_memory=False,
                              time_major=False,
                              scope=None):
    """Creates a dynamic version of bidirectional recurrent neural network.

  Similar to the unidirectional case above (rnn) but takes input and builds
  independent forward and backward RNNs. The input_size of forward and
  backward cell must match. The initial state for both directions is zero by
  default (but can be set optionally) and no intermediate states are ever
  returned -- the network is fully unrolled for the given (passed in)
  length(s) of the sequence(s) or completely unrolled if length(s) is not
  given.

  Args:
    cell_fw: An instance of RNNCell, to be used for forward direction.
    cell_bw: An instance of RNNCell, to be used for backward direction.
    inputs: The RNN inputs.
      If time_major == False (default), this must be a tensor of shape:
        `[batch_size, max_time, input_size]`.
      If time_major == True, this must be a tensor of shape:
        `[max_time, batch_size, input_size]`.
      [batch_size, input_size].
    sequence_length: An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    initial_state_fw: (optional) An initial state for the forward RNN.
      This must be a tensor of appropriate type and shape
      `[batch_size, cell_fw.state_size]`.
      If `cell_fw.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
    initial_state_bw: (optional) Same as for `initial_state_fw`, but using
      the corresponding properties of `cell_bw`.
    dtype: (optional) The data type for the initial states and expected output.
      Required if initial_states are not provided or RNN states have a
      heterogeneous dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
      parallel.  Those operations which do not have any temporal dependency
      and can be run in parallel, will be.  This parameter trades off
      time for space.  Values >> 1 use more memory but take less time,
      while smaller values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
      but needed for back prop from GPU to CPU.  This allows training RNNs
      which would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
    time_major: The shape format of the `inputs` and `outputs` Tensors.
      If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
      If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
      Using `time_major = True` is a bit more efficient because it avoids
      transposes at the beginning and end of the RNN calculation.  However,
      most TensorFlow data is batch-major, so by default this function
      accepts input and emits output in batch-major form.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    scope: VariableScope for the created subgraph; defaults to "BiRNN"

  Returns:
    A tuple (outputs, output_states) where:
      outputs: A tuple (output_fw, output_bw) containing the forward and
        the backward rnn output `Tensor`.
        If time_major == False (default),
          output_fw will be a `Tensor` shaped:
          `[batch_size, max_time, cell_fw.output_size]`
          and output_bw will be a `Tensor` shaped:
          `[batch_size, max_time, cell_bw.output_size]`.
        If time_major == True,
          output_fw will be a `Tensor` shaped:
          `[max_time, batch_size, cell_fw.output_size]`
          and output_bw will be a `Tensor` shaped:
          `[max_time, batch_size, cell_bw.output_size]`.
        It returns a tuple instead of a single concatenated `Tensor`, unlike
        in the `bidirectional_rnn`. If the concatenated one is preferred,
        the forward and backward outputs can be concatenated as
        `tf.concat(2, outputs)`.
      output_states: A tuple (output_state_fw, output_state_bw) containing
        the forward and the backward final states of bidirectional rnn.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
  """

    if not isinstance(cell_fw, tf.contrib.rnn.RNNCell):
        raise TypeError("cell_fw must be an instance of RNNCell")
    if not isinstance(cell_bw, tf.contrib.rnn.RNNCell):
        raise TypeError("cell_bw must be an instance of RNNCell")

    with vs.variable_scope(scope or "BiRNN"):
        # Forward direction
        with vs.variable_scope("FW") as fw_scope:
            output_fw, output_state_fw = dynamic_rnn(
                cell=cell_fw,
                inputs=inputs,
                sequence_length=sequence_length,
                initial_state=initial_state_fw,
                dtype=dtype,
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory,
                time_major=time_major,
                scope=fw_scope)

        # Backward direction
        if not time_major:
            time_dim = 1
            batch_dim = 0
        else:
            time_dim = 0
            batch_dim = 1

        with vs.variable_scope("BW") as bw_scope:
            inputs_reverse = array_ops.reverse_sequence(
                input=inputs,
                seq_lengths=sequence_length,
                seq_dim=time_dim,
                batch_dim=batch_dim)
            tmp, output_state_bw = dynamic_rnn(
                cell=cell_bw,
                inputs=inputs_reverse,
                sequence_length=sequence_length,
                initial_state=initial_state_bw,
                dtype=dtype,
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory,
                time_major=time_major,
                scope=bw_scope)

    output_bw = array_ops.reverse_sequence(input=tmp,
                                           seq_lengths=sequence_length,
                                           seq_dim=time_dim,
                                           batch_dim=batch_dim)

    outputs = (output_fw, output_bw)
    output_states = (output_state_fw, output_state_bw)

    return (outputs, output_states)
Example #22
0
def dynamic_bidirectional_rnn(cell_fw,
                              cell_bw,
                              inputs,
                              sequence_length,
                              initial_state_fw=None,
                              initial_state_bw=None,
                              ff_keep_prob=1.,
                              recur_keep_prob=True,
                              dtype=None,
                              parallel_iterations=None,
                              swap_memory=False,
                              time_major=False,
                              scope=None):
    """Creates a bidirectional recurrent neural network.

  Similar to the unidirectional case above (rnn) but takes input and builds
  independent forward and backward RNNs with the final forward and backward
  outputs depth-concatenated, such that the output will have the format
  [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
  forward and backward cell must match. The initial state for both directions
  is zero by default (but can be set optionally) and no intermediate states are
  ever returned -- the network is fully unrolled for the given (passed in)
  length(s) of the sequence(s) or completely unrolled if length(s) is not given.

  Args:
    cell_fw: An instance of RNNCell, to be used for forward direction.
    cell_bw: An instance of RNNCell, to be used for backward direction.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, cell.input_size].
    initial_state_fw: (optional) An initial state for the forward RNN.
      This must be a tensor of appropriate type and shape
      [batch_size x cell.state_size].
    initial_state_bw: (optional) Same as for initial_state_fw.
    dtype: (optional) The data type for the initial state.  Required if either
      of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size [batch_size],
      containing the actual lengths for each of the sequences.
    scope: VariableScope for the created subgraph; defaults to "BiRNN"

  Returns:
    A tuple (outputs, output_state_fw, output_state_bw) where:
      outputs is a length T list of outputs (one for each input), which
      are depth-concatenated forward and backward outputs
      output_state_fw is the final state of the forward rnn
      output_state_bw is the final state of the backward rnn

  Raises:
    TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell.
    ValueError: If inputs is None or an empty list.
  """

    if not isinstance(cell_fw, BaseCell):
        raise TypeError("cell_fw must be an instance of RNNCell")
    if not isinstance(cell_bw, BaseCell):
        raise TypeError("cell_bw must be an instance of RNNCell")
    if not isinstance(inputs, ops.Tensor):
        raise TypeError("inputs must be a Tensor")

    name = scope or "BiRNN"
    # Forward direction
    with vs.variable_scope(name + "_FW") as fw_scope:
        output_fw, output_state_fw = dynamic_rnn(cell_fw,
                                                 inputs,
                                                 sequence_length,
                                                 initial_state_fw,
                                                 ff_keep_prob,
                                                 recur_keep_prob,
                                                 dtype,
                                                 parallel_iterations,
                                                 swap_memory,
                                                 time_major,
                                                 scope=fw_scope)

    # Backward direction
    if time_major:
        rev_inputs = array_ops.reverse_sequence(inputs, sequence_length, 0, 1)
    else:
        rev_inputs = array_ops.reverse_sequence(inputs, sequence_length, 1, 0)
    with vs.variable_scope(name + "_BW") as bw_scope:
        tmp, output_state_bw = dynamic_rnn(cell_bw,
                                           rev_inputs,
                                           sequence_length,
                                           initial_state_bw,
                                           ff_keep_prob,
                                           recur_keep_prob,
                                           dtype,
                                           parallel_iterations,
                                           swap_memory,
                                           time_major,
                                           scope=bw_scope)
    if time_major:
        output_bw = array_ops.reverse_sequence(tmp, sequence_length, 0, 1)
    else:
        output_bw = array_ops.reverse_sequence(tmp, sequence_length, 1, 0)
    # Concat each of the forward/backward outputs
    outputs = array_ops.concat(2, [output_fw, output_bw])

    return (outputs, output_state_fw, output_state_bw)
Example #23
0
 def _reverse(_input, seq_lengths):
     return array_ops.reverse_sequence(input=_input,
                                       seq_lengths=seq_lengths,
                                       seq_dim=1,
                                       batch_dim=0)
Example #24
0
    def testInvalidArguments(self):
        # Batch size mismatched between input and seq_lengths.
        # seq_length too long
        with self.assertRaisesRegex(
            (ValueError, errors.InvalidArgumentError),
            (r"Dimensions must be equal|"
             r"Length of seq_lengths != input.dims\(0\)")):
            array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2, 2], seq_axis=1)

        # seq_length too short
        with self.assertRaisesRegex(
            (ValueError, errors.InvalidArgumentError),
            (r"Dimensions must be equal|"
             r"Length of seq_lengths != input.dims\(0\)")):
            array_ops.reverse_sequence([[1, 2], [3, 4]], [2], seq_axis=1)

        # Invalid seq_length shape
        with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
                                    ("Shape must be rank 1 but is rank 2|"
                                     "seq_lengths must be 1-dim")):
            array_ops.reverse_sequence([[1, 2], [3, 4]], [[2, 2]], seq_axis=1)

        # seq_axis out of bounds.
        with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
                                    "seq_dim must be < input rank"):
            array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2], seq_axis=2)

        # batch_axis out of bounds.
        with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
                                    "batch_dim must be < input rank"):
            array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2],
                                       seq_axis=1,
                                       batch_axis=3)

        with self.assertRaisesRegex(
            (errors.OpError, errors.InvalidArgumentError),
                "batch_dim == seq_dim == 0"):
            output = array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2],
                                                seq_axis=0)
            self.evaluate(output)
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 hidden_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, sequence length, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.seqlen = tf.placeholder(tf.int64, [None], name="seqlen")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                 trainable=True,
                                 name="W")
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            #TODO: Embeddings process ignores commas etc. so seqlens might not be accurate for sentences with commas...

        # Bidirectional LSTM layer
        with tf.name_scope("bidirectional-lstm"):
            lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size,
                                                        forget_bias=1.0)
            lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size,
                                                        forget_bias=1.0)

            # self.lstm_outputs, _, _ = tf.nn.bidirectional_dynamic_rnn(
            #     lstm_fw_cell,
            #     lstm_bw_cell,
            #     self.embedded_chars,
            #     sequence_length=self.seqlen,
            #     dtype=tf.float32)
            # lstm_outputs_fw, lstm_outputs_bw = tf.split(value=self.lstm_outputs, split_dim=2, num_split=2)
            # self.lstm_outputs = tf.add(lstm_outputs_fw, lstm_outputs_bw, name="lstm_outputs")

            with tf.variable_scope("lstm-output-fw"):
                self.lstm_outputs_fw, _ = tf.nn.dynamic_rnn(
                    lstm_fw_cell,
                    self.embedded_chars,
                    sequence_length=self.seqlen,
                    dtype=tf.float32)

            with tf.variable_scope("lstm-output-bw"):
                self.embedded_chars_rev = array_ops.reverse_sequence(
                    self.embedded_chars, seq_lengths=self.seqlen, seq_dim=1)
                tmp, _ = tf.nn.dynamic_rnn(lstm_bw_cell,
                                           self.embedded_chars_rev,
                                           sequence_length=self.seqlen,
                                           dtype=tf.float32)
                self.lstm_outputs_bw = array_ops.reverse_sequence(
                    tmp, seq_lengths=self.seqlen, seq_dim=1)

            # Concatenate outputs
            self.lstm_outputs = tf.add(self.lstm_outputs_fw,
                                       self.lstm_outputs_bw,
                                       name="lstm_outputs")

        self.lstm_outputs_expanded = tf.expand_dims(self.lstm_outputs, -1)

        # Convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, hidden_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")

                conv = tf.nn.conv2d(self.lstm_outputs_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Dropout layer
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                        self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            # Standard output weights initialization
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")

            # # Initialized output weights to 0.0, might improve accuracy
            # W = tf.Variable(tf.constant(0.0, shape=[num_filters_total, num_classes]), name="W")
            # b = tf.Variable(tf.constant(0.0, shape=[num_classes]), name="b")

            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)

            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Example #26
0
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
                              initial_state_fw=None, initial_state_bw=None,
                              dtype=None, parallel_iterations=None,
                              swap_memory=False, time_major=False, scope=None):
  """Creates a dynamic version of bidirectional recurrent neural network.

  Similar to the unidirectional case above (rnn) but takes input and builds
  independent forward and backward RNNs. The input_size of forward and
  backward cell must match. The initial state for both directions is zero by
  default (but can be set optionally) and no intermediate states are ever
  returned -- the network is fully unrolled for the given (passed in)
  length(s) of the sequence(s) or completely unrolled if length(s) is not
  given.

  Args:
    cell_fw: An instance of RNNCell, to be used for forward direction.
    cell_bw: An instance of RNNCell, to be used for backward direction.
    inputs: The RNN inputs.
      If time_major == False (default), this must be a tensor of shape:
        `[batch_size, max_time, input_size]`.
      If time_major == True, this must be a tensor of shape:
        `[max_time, batch_size, input_size]`.
      [batch_size, input_size].
    sequence_length: An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    initial_state_fw: (optional) An initial state for the forward RNN.
      This must be a tensor of appropriate type and shape
      `[batch_size, cell_fw.state_size]`.
      If `cell_fw.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
    initial_state_bw: (optional) Same as for `initial_state_fw`, but using
      the corresponding properties of `cell_bw`.
    dtype: (optional) The data type for the initial states and expected output.
      Required if initial_states are not provided or RNN states have a
      heterogeneous dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
      parallel.  Those operations which do not have any temporal dependency
      and can be run in parallel, will be.  This parameter trades off
      time for space.  Values >> 1 use more memory but take less time,
      while smaller values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
      but needed for back prop from GPU to CPU.  This allows training RNNs
      which would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
    time_major: The shape format of the `inputs` and `outputs` Tensors.
      If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
      If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
      Using `time_major = True` is a bit more efficient because it avoids
      transposes at the beginning and end of the RNN calculation.  However,
      most TensorFlow data is batch-major, so by default this function
      accepts input and emits output in batch-major form.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    scope: VariableScope for the created subgraph; defaults to
      "bidirectional_rnn"

  Returns:
    A tuple (outputs, output_states) where:
      outputs: A tuple (output_fw, output_bw) containing the forward and
        the backward rnn output `Tensor`.
        If time_major == False (default),
          output_fw will be a `Tensor` shaped:
          `[batch_size, max_time, cell_fw.output_size]`
          and output_bw will be a `Tensor` shaped:
          `[batch_size, max_time, cell_bw.output_size]`.
        If time_major == True,
          output_fw will be a `Tensor` shaped:
          `[max_time, batch_size, cell_fw.output_size]`
          and output_bw will be a `Tensor` shaped:
          `[max_time, batch_size, cell_bw.output_size]`.
        It returns a tuple instead of a single concatenated `Tensor`, unlike
        in the `bidirectional_rnn`. If the concatenated one is preferred,
        the forward and backward outputs can be concatenated as
        `tf.concat_v2(outputs, 2)`.
      output_states: A tuple (output_state_fw, output_state_bw) containing
        the forward and the backward final states of bidirectional rnn.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
  """

  if not isinstance(cell_fw, rnn_cell.RNNCell):
    raise TypeError("cell_fw must be an instance of RNNCell")
  if not isinstance(cell_bw, rnn_cell.RNNCell):
    raise TypeError("cell_bw must be an instance of RNNCell")

  with vs.variable_scope(scope or "bidirectional_rnn"):
    # Forward direction
    with vs.variable_scope("fw") as fw_scope:
      output_fw, output_state_fw = dynamic_rnn(
          cell=cell_fw, inputs=inputs, sequence_length=sequence_length,
          initial_state=initial_state_fw, dtype=dtype,
          parallel_iterations=parallel_iterations, swap_memory=swap_memory,
          time_major=time_major, scope=fw_scope)

    # Backward direction
    if not time_major:
      time_dim = 1
      batch_dim = 0
    else:
      time_dim = 0
      batch_dim = 1

    with vs.variable_scope("bw") as bw_scope:
      inputs_reverse = array_ops.reverse_sequence(
          input=inputs, seq_lengths=sequence_length,
          seq_dim=time_dim, batch_dim=batch_dim)
      tmp, output_state_bw = dynamic_rnn(
          cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,
          initial_state=initial_state_bw, dtype=dtype,
          parallel_iterations=parallel_iterations, swap_memory=swap_memory,
          time_major=time_major, scope=bw_scope)

  output_bw = array_ops.reverse_sequence(
      input=tmp, seq_lengths=sequence_length,
      seq_dim=time_dim, batch_dim=batch_dim)

  outputs = (output_fw, output_bw)
  output_states = (output_state_fw, output_state_bw)

  return (outputs, output_states)
Example #27
0
import tensorflow as tf
from tensorflow.python.ops.array_ops import reverse_sequence
from tensorflow.python.util import nest

from tensorflow.python.framework import ops
from tensorflow.python.ops.array_ops import rank
from tensorflow.python.ops import array_ops

i = tf.constant(0)
c = lambda i: tf.less(i, 10)
b = lambda i: tf.add(i, 1)
r = tf.while_loop(c, b, [i])

range = tf.range(20)
range = tf.reshape(range, shape=[2, 2, 5])

seqence_lenght = tf.constant(value=[5, 5])

reverse_range = reverse_sequence(range,
                                 seq_lengths=seqence_lenght,
                                 seq_dim=2,
                                 batch_dim=0)

splits = array_ops.split(value=range, num_or_size_splits=2, axis=1)

with tf.Session() as sess:
    print('source data1: ', sess.run(range))
    print('split: ', sess.run(splits))
Example #28
0
 def _reverse(_input):
     return array_ops.reverse_sequence(input=_input,
                                       seq_lengths=sequence_lengths,
                                       seq_axis=1,
                                       batch_axis=0)
Example #29
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 hidden_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")

        self.seqlen = tf.placeholder(tf.int64, [None], name="seqlen")

        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")

        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)

        l2_loss = tf.constant(0.0)

        # Embedding layer

        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                 trainable=True,
                                 name="W")

            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)

        # Bidirectional LSTM layer

        with tf.name_scope("bidirectional-lstm"):
            lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size,
                                                        forget_bias=1.0)

            lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size,
                                                        forget_bias=1.0)

            with tf.variable_scope("lstm-output-fw"):
                self.lstm_outputs_fw, _ = tf.nn.dynamic_rnn(
                    lstm_fw_cell,
                    self.embedded_chars,
                    sequence_length=self.seqlen,
                    dtype=tf.float32)

            with tf.variable_scope("lstm-output-bw"):
                self.embedded_chars_rev = array_ops.reverse_sequence(
                    self.embedded_chars, seq_lengths=self.seqlen, seq_dim=1)

                tmp, _ = tf.nn.dynamic_rnn(lstm_bw_cell,
                                           self.embedded_chars_rev,
                                           sequence_length=self.seqlen,
                                           dtype=tf.float32)

                self.lstm_outputs_bw = array_ops.reverse_sequence(
                    tmp, seq_lengths=self.seqlen, seq_dim=1)

            self.lstm_outputs = tf.add(self.lstm_outputs_fw,
                                       self.lstm_outputs_bw,
                                       name="lstm_outputs")

        self.lstm_outputs_expanded = tf.expand_dims(self.lstm_outputs, -1)

        pooled_outputs = []

        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):

                filter_shape = [filter_size, hidden_size, 1, num_filters]

                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")

                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")

                conv = tf.nn.conv2d(self.lstm_outputs_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")

                pooled_outputs.append(pooled)

        num_filters_total = num_filters * len(filter_sizes)

        self.h_pool = tf.concat(3, pooled_outputs)

        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                        self.dropout_keep_prob)

        with tf.name_scope("output"):

            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())

            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")

            l2_loss += tf.nn.l2_loss(W)

            l2_loss += tf.nn.l2_loss(b)

            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")

            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)

            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))

            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Example #30
0
def _reverse(input_, seq_lengths, seq_dim,
             batch_dim):  # reverses sequences with right-padding correctly
    return array_ops.reverse_sequence(input=input_,
                                      seq_lengths=seq_lengths,
                                      seq_dim=seq_dim,
                                      batch_dim=batch_dim)
Example #31
0
def bidirectional_dynamic_rnn(cell_fw,
                              cell_bw,
                              inputs,
                              sequence_length=None,
                              initial_state_fw=None,
                              initial_state_bw=None,
                              dtype=None,
                              parallel_iterations=None,
                              swap_memory=False,
                              time_major=False,
                              scope=None):
    """
    Creates a dynamic version of bidirectional recurrent neural network.
    The initial state for both directions is zero by default.
    :param cell_fw: An instance of RNNCell, to be used for forward direction
    :param cell_bw: An instance of RNNCell, to be used for backward direction
    :param inputs: The RNN inputs
    :param sequence_length: An int32/int64 vector
    :param initial_state_fw: An initial state for the forward RNN
    :param initial_state_bw: An initial state for the backward RNN
    :param dtype: The data type for the initial states and expected output
    :param parallel_iterations: The number of iterations in parallel
    :param swap_memory:
    :param time_major:
    :param scope:
    :return: A tuple (outputs, output_states)
    """
    if not isinstance(cell_fw, rnn_cell.RNNCell):
        raise TypeError("cell_fw must be an instance of RNNCell")
    if not isinstance(cell_bw, rnn_cell.RNNCell):
        raise TypeError("cell_bw must be an instance of RNNCell")

    with vs.variable_scope(scope or "bidirectional_rnn"):
        # Forward direction
        with vs.variable_scope("fw") as fw_scope:
            output_fw, output_state_fw = dynamic_rnn(
                cell=cell_fw,
                inputs=inputs,
                sequence_length=sequence_length,
                initial_state=initial_state_fw,
                dtype=dtype,
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory,
                time_major=time_major,
                scope=fw_scope)

        # Backward direction
        if not time_major:
            time_dim = 1
            batch_dim = 0
        else:
            time_dim = 0
            batch_dim = 1

        with vs.variable_scope("bw") as bw_scope:
            inputs_reverse = array_ops.reverse_sequence(
                input=inputs,
                seq_lengths=sequence_length,
                seq_dim=time_dim,
                batch_dim=batch_dim)
            tmp_output_bw, tmp_output_state_bw = dynamic_rnn(
                cell=cell_bw,
                inputs=inputs_reverse,
                sequence_length=sequence_length,
                initial_state=initial_state_bw,
                dtype=dtype,
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory,
                time_major=time_major,
                scope=bw_scope)

    output_bw = array_ops.reverse_sequence(input=tmp_output_bw,
                                           seq_lengths=sequence_length,
                                           seq_dim=time_dim,
                                           batch_dim=batch_dim)

    if Config.cell_type == "LSTMCell":
        tmp_output_state_bw_ = tmp_output_state_bw.c
    else:
        tmp_output_state_bw_ = tmp_output_state_bw

    output_state_bw = array_ops.reverse_sequence(input=tmp_output_state_bw_,
                                                 seq_lengths=sequence_length,
                                                 seq_dim=time_dim,
                                                 batch_dim=batch_dim)

    outputs = (output_fw, output_bw)

    if Config.cell_type == "LSTMCell":
        output_states = (output_state_fw.c, output_state_bw)
    else:
        output_states = (output_state_fw, output_state_bw)

    return (outputs, output_states)
Example #32
0
 def _reverse(input_, seq_lengths, seq_dim, batch_dim):
     return array_ops.reverse_sequence(input=input_,
                                       seq_lengths=seq_lengths,
                                       seq_dim=seq_dim,
                                       batch_dim=batch_dim)