Esempio n. 1
0
            def BiRNN(x):

                # Prepare data shape to match `bidirectional_rnn` function requirements
                # Current data input shape: (batch_size, n_steps, n_input)
                # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

                # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
                # Define lstm cells with tensorflow
                # Forward direction cell
                #lstm_fw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) #, use_peepholes=True)
                lstm_fw_cell = rnn.DropoutWrapper(
                    cudnn_rnn.CudnnCompatibleLSTMCell(n_hidden),
                    self.keep_prob2)  #, use_peepholes=True)
                #lstm_fw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2)
                # Backward direction cell
                #lstm_bw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) #, use_peepholes=True)
                lstm_bw_cell = rnn.DropoutWrapper(
                    cudnn_rnn.CudnnCompatibleLSTMCell(n_hidden),
                    self.keep_prob2)
                #lstm_bw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2)
                # Get lstm cell output

                try:
                    outputs, _, _ = tf.nn.bidirectional_dynamic_rnn(
                        lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)
                except Exception:  # Old TensorFlow version only returns outputs not states
                    outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                        lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)
                #outputs,_ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell,x,
                #dtype=tf.float32)

                # Linear activation, using rnn inner loop last output
                #return tf.matmul(outputs[-1], weights['out']) + biases['out']
                return tf.concat(outputs, 2)
                """
Esempio n. 2
0
def lstm_layer(inputs,
               batch_size,
               num_units,
               lengths=None,
               stack_size=1,
               use_cudnn=False,
               rnn_dropout_drop_amt=0,
               is_training=True,
               bidirectional=True):
  """Create a LSTM layer using the specified backend."""
  if use_cudnn:
    return cudnn_lstm_layer(inputs, batch_size, num_units, lengths, stack_size,
                            rnn_dropout_drop_amt, is_training, bidirectional)
  else:
    assert rnn_dropout_drop_amt == 0
    cells_fw = [
        contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units)
        for _ in range(stack_size)
    ]
    cells_bw = [
        contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units)
        for _ in range(stack_size)
    ]
    with tf.variable_scope('cudnn_lstm'):
      (outputs, unused_state_f,
       unused_state_b) = contrib_rnn.stack_bidirectional_dynamic_rnn(
           cells_fw,
           cells_bw,
           inputs,
           dtype=tf.float32,
           sequence_length=lengths,
           parallel_iterations=1)

    return outputs
Esempio n. 3
0
    def rnn(rnn_in_4d, n_cell=4, num_hidden_shrinkage=1):
        rnn_in_4d = tf.transpose(rnn_in_4d, [0, 2, 1, 3])
        third_dim = rnn_in_4d.shape[2]
        fourth_dim = rnn_in_4d.shape[3]
        rnn_in_3d = tf.reshape(
            rnn_in_4d,
            [-1, rnn_in_4d.shape[1],
             np.prod([third_dim, fourth_dim])])

        # rnn_in_3d = tf.squeeze(rnn_in_4d, axis=[1])

        # basic cells which is used to build RNN
        num_hidden = int((third_dim * fourth_dim).value / num_hidden_shrinkage)
        fourth_dim = int(fourth_dim.value / num_hidden_shrinkage)

        fw_cells = [
            # tf.nn.rnn_cell.LSTMCell(
            #     num_units=num_hidden,
            #     state_is_tuple=True
            cudnn_rnn.CudnnCompatibleLSTMCell(num_units=num_hidden, )
            for _ in range(n_cell)
        ]
        bw_cells = [
            # tf.nn.rnn_cell.LSTMCell(
            #     num_units=num_hidden,
            #     state_is_tuple=True
            cudnn_rnn.CudnnCompatibleLSTMCell(num_units=num_hidden, )
            for _ in range(n_cell)
        ]

        # stack basic cells
        fw_stacked = tf.nn.rnn_cell.MultiRNNCell(fw_cells, state_is_tuple=True)
        bw_stacked = tf.nn.rnn_cell.MultiRNNCell(bw_cells, state_is_tuple=True)

        # bidirectional RNN
        # BxTxF -> BxTx2H
        ((fw, bw), _) = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_stacked,
                                                        cell_bw=bw_stacked,
                                                        inputs=rnn_in_3d,
                                                        dtype=rnn_in_3d.dtype)

        # BxTxH + BxTxH -> BxTx2H -> Bx1xTX2H
        # result = tf.expand_dims(tf.concat([fw, bw], -1), 1)
        rnn_out_4d = tf.reshape(tf.concat(
            [fw, bw], -1), [-1, rnn_in_4d.shape[1], third_dim, num_hidden * 2])
        rnn_out_4d = tf.transpose(rnn_out_4d, [0, 2, 1, 3])

        return rnn_out_4d
Esempio n. 4
0
    def bilstm_with_c(self,
                      x,
                      seq_len,
                      lstm_output_dims=None,
                      lstm_layer_count=1,
                      keep_prob=1.0,
                      name="bilstm"):
        x_shape = x.get_shape()
        input_dims = int(x_shape[-1])
        max_seq_len = int(x_shape[-2])
        u = int(input_dims /
                2) if lstm_output_dims is None else lstm_output_dims

        contexts = []
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            if len(x_shape) >= 4:
                x = tf.reshape(x, [-1, max_seq_len, input_dims])
                seq_len = tf.reshape(seq_len, [-1])

            for i in range(lstm_layer_count):
                with tf.variable_scope("lstm_layer_" + str(i + 1),
                                       reuse=tf.AUTO_REUSE):
                    cell_fw = cudnn_rnn.CudnnCompatibleLSTMCell(num_units=u)
                    cell_bw = cudnn_rnn.CudnnCompatibleLSTMCell(num_units=u)

                    if keep_prob < 1.0 and self.is_training:
                        cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                            cell_fw, output_keep_prob=keep_prob)
                        cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                            cell_bw, output_keep_prob=keep_prob)

                    outputs, state = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        x,
                        sequence_length=seq_len,
                        dtype=tf.float32)
                    contexts.append(
                        tf.concat([state[0].c, state[1].c], axis=-1))

                    x = tf.concat(outputs, axis=-1)

        if len(x_shape) >= 4:
            return \
                tf.reshape(x, [-1 if s is None else s for s in x_shape.as_list()[:-2]] + [max_seq_len, u * 2]), \
                [tf.reshape(c, [-1 if s is None else s for s in x_shape.as_list()[:-2]] + [u * 2]) for c in contexts]
        else:
            return x, contexts
Esempio n. 5
0
def _single_lstm(input_emb, input_len, hidden_size, is_fwd, use_cudnn):
  """Compute the outputs of a single LSTM (subroutine of stacked_bilstm).

  Be careful if used anywhere outside of stacked_bilstm, which converts the
  sequences to the time-major format expected by this function.

  Args:
    input_emb: <float32> [sequence_length, batch_size, emb]
    input_len: <int32> [batch_size]
    hidden_size: Number of units in the LSTM cell.
    is_fwd: Boolean indicator the directionality of the LSTM.
    use_cudnn: Boolean indicating the use of cudnn.

  Returns:
    output_emb: <float32> [sequence_length, batch_size, emb]
  """
  if not is_fwd:
    input_emb = tf.reverse_sequence(
        input_emb,
        input_len,
        seq_axis=0,
        batch_axis=1)
  if use_cudnn:
    lstm = contrib_cudnn_rnn.CudnnLSTM(
        num_layers=1,
        num_units=hidden_size,
        input_mode=cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE,
        direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
    lstm.build(input_emb.shape)
    output_emb, _ = lstm(input_emb)
  else:
    cell = contrib_cudnn_rnn.CudnnCompatibleLSTMCell(hidden_size)
    cell = contrib_rnn.MultiRNNCell([cell])
    output_emb, _ = tf.nn.dynamic_rnn(
        cell=cell,
        inputs=input_emb,
        sequence_length=input_len,
        dtype=tf.float32,
        time_major=True)
  if not is_fwd:
    output_emb = tf.reverse_sequence(
        output_emb,
        input_len,
        seq_axis=0,
        batch_axis=1)
  return output_emb
Esempio n. 6
0
def make_cudnn(inputs, rnn_layer_sizes, batch_size, mode,
               dropout_keep_prob=1.0, residual_connections=False):
  """Builds a sequence of cuDNN LSTM layers from the given hyperparameters.

  Args:
    inputs: A tensor of RNN inputs.
    rnn_layer_sizes: A list of integer sizes (in units) for each layer of the
        RNN.
    batch_size: The number of examples per batch.
    mode: 'train', 'eval', or 'generate'. For 'generate',
        CudnnCompatibleLSTMCell will be used.
    dropout_keep_prob: The float probability to keep the output of any given
        sub-cell.
    residual_connections: Whether or not to use residual connections.

  Returns:
    outputs: A tensor of RNN outputs, with shape
        `[batch_size, inputs.shape[1], rnn_layer_sizes[-1]]`.
    initial_state: The initial RNN states, a tuple with length
        `len(rnn_layer_sizes)` of LSTMStateTuples.
    final_state: The final RNN states, a tuple with length
        `len(rnn_layer_sizes)` of LSTMStateTuples.
  """
  cudnn_inputs = tf.transpose(inputs, [1, 0, 2])

  if len(set(rnn_layer_sizes)) == 1 and not residual_connections:
    initial_state = tuple(
        contrib_rnn.LSTMStateTuple(
            h=tf.zeros([batch_size, num_units], dtype=tf.float32),
            c=tf.zeros([batch_size, num_units], dtype=tf.float32))
        for num_units in rnn_layer_sizes)

    if mode != 'generate':
      # We can make a single call to CudnnLSTM since all layers are the same
      # size and we aren't using residual connections.
      cudnn_initial_state = state_tuples_to_cudnn_lstm_state(initial_state)
      cell = contrib_cudnn_rnn.CudnnLSTM(
          num_layers=len(rnn_layer_sizes),
          num_units=rnn_layer_sizes[0],
          direction='unidirectional',
          dropout=1.0 - dropout_keep_prob)
      cudnn_outputs, cudnn_final_state = cell(
          cudnn_inputs, initial_state=cudnn_initial_state,
          training=mode == 'train')
      final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state)

    else:
      # At generation time we use CudnnCompatibleLSTMCell.
      cell = contrib_rnn.MultiRNNCell([
          contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units)
          for num_units in rnn_layer_sizes
      ])
      cudnn_outputs, final_state = tf.nn.dynamic_rnn(
          cell, cudnn_inputs, initial_state=initial_state, time_major=True,
          scope='cudnn_lstm/rnn')

  else:
    # We need to make multiple calls to CudnnLSTM, keeping the initial and final
    # states at each layer.
    initial_state = []
    final_state = []

    for i in range(len(rnn_layer_sizes)):
      # If we're using residual connections and this layer is not the same size
      # as the previous layer, we need to project into the new size so the
      # (projected) input can be added to the output.
      if residual_connections:
        if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]:
          cudnn_inputs = contrib_layers.linear(cudnn_inputs, rnn_layer_sizes[i])

      layer_initial_state = (contrib_rnn.LSTMStateTuple(
          h=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32),
          c=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32)),)

      if mode != 'generate':
        cudnn_initial_state = state_tuples_to_cudnn_lstm_state(
            layer_initial_state)
        cell = contrib_cudnn_rnn.CudnnLSTM(
            num_layers=1,
            num_units=rnn_layer_sizes[i],
            direction='unidirectional',
            dropout=1.0 - dropout_keep_prob)
        cudnn_outputs, cudnn_final_state = cell(
            cudnn_inputs, initial_state=cudnn_initial_state,
            training=mode == 'train')
        layer_final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state)

      else:
        # At generation time we use CudnnCompatibleLSTMCell.
        cell = contrib_rnn.MultiRNNCell(
            [contrib_cudnn_rnn.CudnnCompatibleLSTMCell(rnn_layer_sizes[i])])
        cudnn_outputs, layer_final_state = tf.nn.dynamic_rnn(
            cell, cudnn_inputs, initial_state=layer_initial_state,
            time_major=True,
            scope='cudnn_lstm/rnn' if i == 0 else 'cudnn_lstm_%d/rnn' % i)

      if residual_connections:
        cudnn_outputs += cudnn_inputs

      cudnn_inputs = cudnn_outputs

      initial_state += layer_initial_state
      final_state += layer_final_state

  outputs = tf.transpose(cudnn_outputs, [1, 0, 2])

  return outputs, tuple(initial_state), tuple(final_state)
Esempio n. 7
0
 def get_lstm_cell(num_hidden):
     return cudnn_rnn.CudnnCompatibleLSTMCell(
         num_hidden, reuse=reuse_variables)