def test_cudnn_lstm(self):
        num_layers = 4
        num_units = 2
        batch_size = 8
        dir_count = 1

        inputs = tf.random_uniform(
            [num_layers * dir_count, batch_size, num_units],
            dtype=dtypes.float32)

        lstm = cudnn_rnn.CudnnLSTM(
            num_layers=num_layers,
            num_units=num_units,
            direction='unidirectional',
            kernel_initializer=tf.constant_initializer(0.),
            bias_initializer=tf.constant_initializer(0.),
            name='test_gru')

        outputs, _ = lstm(inputs)
        total_sum = tf.reduce_sum(outputs)

        with tf.Session() as sess:
            sess.run(variables.global_variables_initializer())
            result = sess.run(total_sum)
            self.assertEqual(0, result)
    def rnn_encoder(self, inputs, scope=None):
        inputs = tf.transpose(inputs, perm=[1, 0, 2])
        with tf.variable_scope(scope or 'rnn_encoder'):

            DIRECTION = "bidirectional"
            if self.num_layers >= 2:
                num_layer = self.num_layers // 2
            else:
                num_layer = self.num_layers

            if self.cell_type.lower() == 'gru':
                cell = cudnn_rnn.CudnnGRU(num_layer,
                                          self.hidden_size,
                                          direction=DIRECTION,
                                          dropout=1 - self.drop)
            else:
                cell = cudnn_rnn.CudnnLSTM(num_layer,
                                           self.hidden_size,
                                           direction=DIRECTION,
                                           dropout=1 - self.drop)

            outputs, _ = cell(inputs)

        outputs = tf.transpose(outputs, perm=[1, 0, 2])
        return outputs
Ejemplo n.º 3
0
 def _build_rnn_graph_cudnn(self, inputs, config, is_training):
     """Build the inference graph using CUDNN cell."""
     inputs = tf.transpose(inputs, [1, 0, 2])
     #    self._cell = tf.contrib.cudnn_rnn.CudnnLSTM(
     self._cell = cudnn_rnn.CudnnLSTM(
         num_layers=config.num_layers,
         num_units=config.hidden_size,
         input_size=config.hidden_size,
         dropout=1 - config.keep_prob if is_training else 0)
     params_size_t = self._cell.params_size()
     self._rnn_params = tf.get_variable(
         "lstm_params",
         initializer=tf.random_uniform([params_size_t], -config.init_scale,
                                       config.init_scale),
         validate_shape=False)
     c = tf.zeros([config.num_layers, self.batch_size, config.hidden_size],
                  tf.float32)
     h = tf.zeros([config.num_layers, self.batch_size, config.hidden_size],
                  tf.float32)
     #    self._initial_state = (tf.contrib.rnn.LSTMStateTuple(h=h, c=c),)
     self._initial_state = (rnn.LSTMStateTuple(h=h, c=c), )
     outputs, h, c = self._cell(inputs, h, c, self._rnn_params, is_training)
     outputs = tf.transpose(outputs, [1, 0, 2])
     outputs = tf.reshape(outputs, [-1, config.hidden_size])
     #    return outputs, (tf.contrib.rnn.LSTMStateTuple(h=h, c=c),)
     return outputs, (rnn.LSTMStateTuple(h=h, c=c), )
Ejemplo n.º 4
0
def cuda_rnn(inputs,
             num_layers,
             hidden_size,
             seq_len,
             init_states=None,
             cell_type="GRU"):
    """Run the CuDNN RNN.
    Arguments:
        - inputs:   A tensor of shape [batch, length, input_size] of inputs.
        - layers:   Number of RNN layers.
        - hidden_size:     Number of units in each layer.
        - is_training:     tf.bool indicating whether training mode is enabled.
        - init_states:
    Return a tuple of (outputs, init_state, final_state).
    """
    input_size = inputs.get_shape()[-1].value
    if input_size is None:
        raise ValueError("Number of input dimensions to CuDNN RNNs must be "
                         "known, but was None.")

    # CUDNN expects the inputs to be time major
    inputs = tf.transpose(inputs, [1, 0, 2])
    if cell_type.lower() == "gru":
        cudnn_cell = cudnn_rnn.CudnnGRU(num_layers,
                                        hidden_size,
                                        input_mode="linear_input",
                                        direction="bidirectional")
    elif cell_type.lower() == "lstm":
        cudnn_cell = cudnn_rnn.CudnnLSTM(num_layers,
                                         hidden_size,
                                         input_mode="linear_input",
                                         direction="bidirectional")
    else:
        raise Exception("LSTM or GRU is required.")

    if init_states is None:
        init_state = tf.tile(
            tf.zeros([2 * num_layers, 1, hidden_size], dtype=tf.float32),
            [1, tf.shape(inputs)[1], 1])
        if cell_type.lower() == "gru":
            init_states = (init_state, )
        else:
            init_states = (init_state, init_state)

    output, *_ = cudnn_cell(inputs, initial_state=init_states, training=True)

    # Convert to batch major
    output = tf.transpose(output, [1, 0, 2])
    final_states = tf.reverse_sequence(output,
                                       seq_lengths=seq_len,
                                       seq_axis=1,
                                       batch_axis=0)[:, 0, :]

    return output, final_states
Ejemplo n.º 5
0
                def gpu_cudnn_lstm_backend(time_major_inputs, hidden_nodes):
                    # Create the Cudnn LSTM factory
                    rnn_lstm = cudnn_rnn.CudnnLSTM(len(lstm_layers), hidden_nodes,
                                                   direction='bidirectional',
                                                   kernel_initializer=tf.initializers.random_uniform(-0.1, 0.1))

                    # TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this
                    rnn_lstm._saveable_cls = cudnn_rnn.CudnnLSTMSaveable

                    # Apply the lstm to the inputs
                    time_major_outputs, (output_h, output_c) = rnn_lstm(time_major_inputs)
                    return time_major_outputs
Ejemplo n.º 6
0
def get_cell(rnn_type, hidden_size, layer_num=1, direction='bidirectional'):
    if rnn_type.endswith('lstm'):
        cudnn_cell = cudnn_rnn.CudnnLSTM(num_layers=layer_num, num_units=hidden_size, direction=direction,
                                         dropout=0)
    elif rnn_type.endswith('gru'):
        cudnn_cell = cudnn_rnn.CudnnGRU(num_layers=layer_num, num_units=hidden_size, direction=direction,
                                        dropout=0)
    elif rnn_type.endswith('rnn'):
        cudnn_cell = cudnn_rnn.CudnnRNNTanh(num_layers=layer_num, num_units=hidden_size, direction=direction,
                                            dropout=0)
    else:
        raise NotImplementedError('Unsuported rnn type: {}'.format(rnn_type))
    return cudnn_cell
Ejemplo n.º 7
0
 def birnn(self, use_cudnn=False):
     if use_cudnn:
         DIRECTION = "bidirectional"
         cell = cudnn_rnn.CudnnLSTM(1, cfg.hidden_size, direction=DIRECTION)
         outputs, _ = cell(self.time_inputs)
     else:
         cell_1 = rnn.BasicLSTMCell(cfg.hidden_size)
         cell_2 = rnn.BasicLSTMCell(cfg.hidden_size)
         outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_1,
                                                      cell_2,
                                                      self.time_inputs,
                                                      dtype=tf.float32,
                                                      time_major=True)
         outputs = tf.concat(outputs, 2)
     return outputs
Ejemplo n.º 8
0
def _single_lstm(input_emb, input_len, hidden_size, is_fwd, use_cudnn):
  """Compute the outputs of a single LSTM (subroutine of stacked_bilstm).

  Be careful if used anywhere outside of stacked_bilstm, which converts the
  sequences to the time-major format expected by this function.

  Args:
    input_emb: <float32> [sequence_length, batch_size, emb]
    input_len: <int32> [batch_size]
    hidden_size: Number of units in the LSTM cell.
    is_fwd: Boolean indicator the directionality of the LSTM.
    use_cudnn: Boolean indicating the use of cudnn.

  Returns:
    output_emb: <float32> [sequence_length, batch_size, emb]
  """
  if not is_fwd:
    input_emb = tf.reverse_sequence(
        input_emb,
        input_len,
        seq_axis=0,
        batch_axis=1)
  if use_cudnn:
    lstm = contrib_cudnn_rnn.CudnnLSTM(
        num_layers=1,
        num_units=hidden_size,
        input_mode=cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE,
        direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
    lstm.build(input_emb.shape)
    output_emb, _ = lstm(input_emb)
  else:
    cell = contrib_cudnn_rnn.CudnnCompatibleLSTMCell(hidden_size)
    cell = contrib_rnn.MultiRNNCell([cell])
    output_emb, _ = tf.nn.dynamic_rnn(
        cell=cell,
        inputs=input_emb,
        sequence_length=input_len,
        dtype=tf.float32,
        time_major=True)
  if not is_fwd:
    output_emb = tf.reverse_sequence(
        output_emb,
        input_len,
        seq_axis=0,
        batch_axis=1)
  return output_emb
Ejemplo n.º 9
0
    def rnn_encode(self, inputs, scope=None):
        inputs = tf.transpose(inputs, perm=[1, 0, 2])
        with tf.variable_scope(scope or 'rnn_encoder'):

            if self.cell_type.lower() == 'gru':
                cell = cudnn_rnn.CudnnGRU(self.num_layers,
                                          self.hidden_size,
                                          dropout=1 - self.drop)
            else:
                cell = cudnn_rnn.CudnnLSTM(self.num_layers,
                                           self.hidden_size,
                                           dropout=1 - self.drop)

            outputs, _ = cell(inputs)

        outputs = tf.transpose(outputs, perm=[1, 0, 2])
        return outputs
Ejemplo n.º 10
0
 def _build_rnn_graph_cudnn(self, inputs, config, is_training):
     inputs = tf.transpose(inputs, [1, 0, 2])
     self._cell = tfcudnn_rnn.CudnnLSTM(
         num_layers=config.num_layers,
         num_units=config.hidden_size,
         input_size=config.hidden_size,
         dropout=1 - config.keep_prob if is_training else 0)
     params_size_t = self._cell.params_size()
     self._rnn_params = tf.get_variable(
         'lstm_params', 
         initializer=tflayers.xavier_initializer(), 
         validate_shape=False)
     c = tf.zeros([config.num_layers, self.batch_size, self.hidden_size], tf.float32)
     h = tf.zeros([config.num_layers, self.batch_size, self.hidden_size], tf.float32)
     self._initial_state = (tfrnn.LSTMStateTuple(h=h, c=c),)
     outputs, h, c = self._cell(inputs, h, c, self._rnn_params, is_training)
     outputs = tf.transpose(outputs, [1, 0, 2])
     outputs = tf.reshape(outputs, [-1, config.hidden_size])
     return outputs,(tfrnn.LSTMStateTuple(h=h, c=c),)
Ejemplo n.º 11
0
    def __init__(self, sequence_length, cell_size, vectors):
        self.stock_x = tf.placeholder(tf.float32, shape=[None, sequence_length, 1], name='stock_x')
        self.stock_y = tf.placeholder(tf.int32, shape=[None, 2], name='stock_y')
        self.text_x = tf.placeholder(tf.int32, shape=[None, sequence_length, 220], name='text_x')

        # event embedding
        with tf.name_scope("embedding_vertices"):
            embedding_W = tf.get_variable("embedding_matrix", initializer=tf.constant(vectors, dtype=tf.float32),
                                          trainable=False)
            self.embedding_texts = tf.nn.embedding_lookup(embedding_W, self.text_x, name='embedded_vertices')

        # attention on texts
        self.pre = tf.reshape(self.embedding_texts, shape=[-1, 220, vectors.shape[-1]])
        self.average = tf.reduce_sum(self.pre,axis=1)
        # with tf.name_scope('Con1V'):
        #     filiter = tf.get_variable('kernel', initializer=tf.truncated_normal([3,128,128]))
        #     cnn_bias = tf.get_variable('cnn_bias', initializer=tf.constant(0.1,shape=[128]))
        #     h_conv1 = tf.nn.tanh(tf.nn.conv1d(self.pre, filiter, 1, 'SAME') + cnn_bias)
        #     max_pool = tf.reduce_max(h_conv1,axis=1)

        # self.embedding_texts_att = self.Attention_Layer(self.pre, "attention_part")
        self.embedding_texts_att = tf.reshape(self.average, shape=[-1, sequence_length, 128])

        # combine texts and stock: [batch_size, sequence_length, embedding+stock]
        self.combined_x = tf.concat([self.stock_x, self.embedding_texts_att], axis=-1)

        with tf.name_scope("LSTM"):
            lstm_cell = cudnn_rnn.CudnnLSTM(1, cell_size, dropout=0.2)
            output, state = lstm_cell(inputs=self.embedding_texts_att)

        with tf.name_scope('output'):
            output_w = tf.get_variable("output_weight", shape=[cell_size, 2],
                                       initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
            output_b = tf.get_variable("output_bias", initializer=tf.constant([0.01] * 2))

            self.scores = tf.nn.xw_plus_b(output[:, -1, :], output_w, output_b, name="ouput_layer")
            self.output = tf.argmax(self.scores, axis=1)

        with tf.name_scope("loss_accuracy"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.stock_y)
            self.loss = tf.reduce_mean(losses)
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output, tf.argmax(self.stock_y, axis=1)), 'float'))
Ejemplo n.º 12
0
def cudnn_lstm_layer(inputs,
                     batch_size,
                     num_units,
                     lengths=None,
                     stack_size=1,
                     rnn_dropout_drop_amt=0,
                     is_training=True,
                     bidirectional=True):
  """Create a LSTM layer that uses cudnn."""
  inputs_t = tf.transpose(inputs, [1, 0, 2])
  if lengths is not None:
    all_outputs = [inputs_t]
    for i in range(stack_size):
      with tf.variable_scope('stack_' + str(i)):
        with tf.variable_scope('forward'):
          lstm_fw = contrib_cudnn_rnn.CudnnLSTM(
              num_layers=1,
              num_units=num_units,
              direction='unidirectional',
              dropout=rnn_dropout_drop_amt,
              kernel_initializer=contrib_layers.variance_scaling_initializer(),
              bias_initializer=tf.zeros_initializer(),
          )

        c_fw = tf.zeros([1, batch_size, num_units], tf.float32)
        h_fw = tf.zeros([1, batch_size, num_units], tf.float32)

        outputs_fw, _ = lstm_fw(
            all_outputs[-1], (h_fw, c_fw), training=is_training)

        combined_outputs = outputs_fw

        if bidirectional:
          with tf.variable_scope('backward'):
            lstm_bw = contrib_cudnn_rnn.CudnnLSTM(
                num_layers=1,
                num_units=num_units,
                direction='unidirectional',
                dropout=rnn_dropout_drop_amt,
                kernel_initializer=contrib_layers.variance_scaling_initializer(
                ),
                bias_initializer=tf.zeros_initializer(),
            )

          c_bw = tf.zeros([1, batch_size, num_units], tf.float32)
          h_bw = tf.zeros([1, batch_size, num_units], tf.float32)

          inputs_reversed = tf.reverse_sequence(
              all_outputs[-1], lengths, seq_axis=0, batch_axis=1)
          outputs_bw, _ = lstm_bw(
              inputs_reversed, (h_bw, c_bw), training=is_training)

          outputs_bw = tf.reverse_sequence(
              outputs_bw, lengths, seq_axis=0, batch_axis=1)

          combined_outputs = tf.concat([outputs_fw, outputs_bw], axis=2)

        all_outputs.append(combined_outputs)

    # for consistency with cudnn, here we just return the top of the stack,
    # although this can easily be altered to do other things, including be
    # more resnet like
    return tf.transpose(all_outputs[-1], [1, 0, 2])
  else:
    lstm = contrib_cudnn_rnn.CudnnLSTM(
        num_layers=stack_size,
        num_units=num_units,
        direction='bidirectional' if bidirectional else 'unidirectional',
        dropout=rnn_dropout_drop_amt,
        kernel_initializer=contrib_layers.variance_scaling_initializer(),
        bias_initializer=tf.zeros_initializer(),
    )
    stack_multiplier = 2 if bidirectional else 1
    c = tf.zeros([stack_multiplier * stack_size, batch_size, num_units],
                 tf.float32)
    h = tf.zeros([stack_multiplier * stack_size, batch_size, num_units],
                 tf.float32)
    outputs, _ = lstm(inputs_t, (h, c), training=is_training)
    outputs = tf.transpose(outputs, [1, 0, 2])

    return outputs
Ejemplo n.º 13
0
def make_cudnn(inputs, rnn_layer_sizes, batch_size, mode,
               dropout_keep_prob=1.0, residual_connections=False):
  """Builds a sequence of cuDNN LSTM layers from the given hyperparameters.

  Args:
    inputs: A tensor of RNN inputs.
    rnn_layer_sizes: A list of integer sizes (in units) for each layer of the
        RNN.
    batch_size: The number of examples per batch.
    mode: 'train', 'eval', or 'generate'. For 'generate',
        CudnnCompatibleLSTMCell will be used.
    dropout_keep_prob: The float probability to keep the output of any given
        sub-cell.
    residual_connections: Whether or not to use residual connections.

  Returns:
    outputs: A tensor of RNN outputs, with shape
        `[batch_size, inputs.shape[1], rnn_layer_sizes[-1]]`.
    initial_state: The initial RNN states, a tuple with length
        `len(rnn_layer_sizes)` of LSTMStateTuples.
    final_state: The final RNN states, a tuple with length
        `len(rnn_layer_sizes)` of LSTMStateTuples.
  """
  cudnn_inputs = tf.transpose(inputs, [1, 0, 2])

  if len(set(rnn_layer_sizes)) == 1 and not residual_connections:
    initial_state = tuple(
        contrib_rnn.LSTMStateTuple(
            h=tf.zeros([batch_size, num_units], dtype=tf.float32),
            c=tf.zeros([batch_size, num_units], dtype=tf.float32))
        for num_units in rnn_layer_sizes)

    if mode != 'generate':
      # We can make a single call to CudnnLSTM since all layers are the same
      # size and we aren't using residual connections.
      cudnn_initial_state = state_tuples_to_cudnn_lstm_state(initial_state)
      cell = contrib_cudnn_rnn.CudnnLSTM(
          num_layers=len(rnn_layer_sizes),
          num_units=rnn_layer_sizes[0],
          direction='unidirectional',
          dropout=1.0 - dropout_keep_prob)
      cudnn_outputs, cudnn_final_state = cell(
          cudnn_inputs, initial_state=cudnn_initial_state,
          training=mode == 'train')
      final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state)

    else:
      # At generation time we use CudnnCompatibleLSTMCell.
      cell = contrib_rnn.MultiRNNCell([
          contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units)
          for num_units in rnn_layer_sizes
      ])
      cudnn_outputs, final_state = tf.nn.dynamic_rnn(
          cell, cudnn_inputs, initial_state=initial_state, time_major=True,
          scope='cudnn_lstm/rnn')

  else:
    # We need to make multiple calls to CudnnLSTM, keeping the initial and final
    # states at each layer.
    initial_state = []
    final_state = []

    for i in range(len(rnn_layer_sizes)):
      # If we're using residual connections and this layer is not the same size
      # as the previous layer, we need to project into the new size so the
      # (projected) input can be added to the output.
      if residual_connections:
        if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]:
          cudnn_inputs = contrib_layers.linear(cudnn_inputs, rnn_layer_sizes[i])

      layer_initial_state = (contrib_rnn.LSTMStateTuple(
          h=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32),
          c=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32)),)

      if mode != 'generate':
        cudnn_initial_state = state_tuples_to_cudnn_lstm_state(
            layer_initial_state)
        cell = contrib_cudnn_rnn.CudnnLSTM(
            num_layers=1,
            num_units=rnn_layer_sizes[i],
            direction='unidirectional',
            dropout=1.0 - dropout_keep_prob)
        cudnn_outputs, cudnn_final_state = cell(
            cudnn_inputs, initial_state=cudnn_initial_state,
            training=mode == 'train')
        layer_final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state)

      else:
        # At generation time we use CudnnCompatibleLSTMCell.
        cell = contrib_rnn.MultiRNNCell(
            [contrib_cudnn_rnn.CudnnCompatibleLSTMCell(rnn_layer_sizes[i])])
        cudnn_outputs, layer_final_state = tf.nn.dynamic_rnn(
            cell, cudnn_inputs, initial_state=layer_initial_state,
            time_major=True,
            scope='cudnn_lstm/rnn' if i == 0 else 'cudnn_lstm_%d/rnn' % i)

      if residual_connections:
        cudnn_outputs += cudnn_inputs

      cudnn_inputs = cudnn_outputs

      initial_state += layer_initial_state
      final_state += layer_final_state

  outputs = tf.transpose(cudnn_outputs, [1, 0, 2])

  return outputs, tuple(initial_state), tuple(final_state)
Ejemplo n.º 14
0
def rnn_dnn(X,
            hidden_size,
            rnn_mode,
            num_layers=1,
            parameters=None,
            h0=None,
            c0=None,
            input_mode='linear',
            direction_mode='unidirectional',
            dropout=0.,
            name=None):
    """CuDNN v5 RNN implementation.

    Parameters
    ----------
    X : input varialbe or placeholder
        shape=(batch_size, timesteps, input_dims)
    hidden_size : int
        the number of units within the RNN model.
    rnn_mode : {'rnn_relu', 'rnn_tanh', 'lstm', 'gru'}
        See cudnn documentation for ``cudnnRNNMode_t``.
    num_layers : int
        the number of layers for the RNN model.
    h0: tensor
        h0 with shape [num_layers, batch_size, hidden_size]
    c0: tensor
        c0 (lstm) with shape [num_layers, batch_size, hidden_size]
    parameters: vector
        vector contain all flatten weights and bias
        check `backend.init.lstm`, `backend.init.gru`, and `backend.init.rnn`
        for more information
    input_mode : {'linear', 'skip'}
        linear: input will be multiplied by a biased matrix
        skip: No operation is performed on the input.  The size must
        match the hidden size.
        (CuDNN docs: cudnnRNNInputMode_t)
    direction_mode : {'unidirectional', 'bidirectional'}
        unidirectional: The network operates recurrently from the
                        first input to the last.
        bidirectional: The network operates from first to last then from last
                       to first and concatenates the results at each layer.
    dropout: float (0.0-1.0)
        whether to enable dropout. With it is 0, dropout is disabled.

    Returns
    -------
    [output, hidden_states, cell_states] for lstm
    [output, hidden_states] for gru and rnn

    output_shape: (batch_size, timesteps, hidden_size)
    hidden_shape: (num_layers, batch_size, hidden_size)
    cell_shape: (num_layers, batch_size, hidden_size)

    Note
    ----
    dropout is turn off if K.set_training(False) or K.is_training() == False

    """
    if CONFIG['device'] == 'cpu':
        raise Exception('This opt is not supported with CPU.')
    if name is None: name = uuid()
    # ====== Check arguments ====== #
    if rnn_mode not in ('rnn_relu', 'rnn_tanh', 'lstm', 'gru'):
        raise ValueError(
            "rnn_mode=%s must be: 'rnn_relu', 'rnn_tanh', 'lstm', 'gru'" %
            rnn_mode)
    if input_mode not in ('linear', 'skip'):
        raise ValueError("input_mode=%s must be: 'linear', 'skip'" %
                         input_mode)
    input_mode = 'linear_input' if input_mode == 'linear' else 'skip_input'
    if direction_mode not in ('unidirectional', 'bidirectional'):
        raise ValueError(
            "direction_mode=%s must be: 'unidirectional', 'bidirectional'" %
            direction_mode)
    is_bidirectional = direction_mode == 'bidirectional'

    # ====== helper function ====== #
    def check_init_states(s0, nb_layers, batch_size):
        if s0 is None: return None
        if s0.get_shape().ndims < 3:
            s0 = expand_dims(s0, dim=0)
        s0shape = get_shape(s0)
        if s0shape[0] == 1 and s0shape[0] != nb_layers:
            s0 = repeat(s0, n=nb_layers, axes=0)
        if s0shape[1] == 1:
            s0 = repeat(s0, n=batch_size, axes=1)
        return s0

    # ====== create RNNBlock ====== #
    from tensorflow.contrib import cudnn_rnn
    input_shape = get_shape(X)
    if X.get_shape().ndims != 3:
        raise ValueError('Input must be 3-D tensor, but X is %d-D tensor' %
                         X.ndim)
    if input_shape[-1] != hidden_size and 'skip' in input_mode:
        raise ValueError(
            'In skip_input mode, input size must be equal to hidden size'
            ', but input_size=%d != hidden_size=%d' %
            (input_shape[-1], hidden_size))
    # IF we dimshuffle here, a lot of error concern GPUarray,
    # and cudnn will happen
    batch_size = get_shape(X, native=True)[0]
    if rnn_mode == 'lstm':
        rnn = cudnn_rnn.CudnnLSTM(num_layers=num_layers,
                                  num_units=hidden_size,
                                  input_size=input_shape[-1],
                                  input_mode=input_mode,
                                  direction=direction_mode,
                                  dropout=dropout,
                                  seed=0,
                                  seed2=0)
    else:
        if rnn_mode == 'gru':
            rnn_class = cudnn_rnn.CudnnGRU
        elif rnn_mode == 'rnn_relu':
            rnn_class = cudnn_rnn.CudnnRNNRelu
        elif rnn_mode == 'rnn_tanh':
            rnn_class = cudnn_rnn.CudnnRNNTanh
        rnn = rnn_class(num_layers=num_layers,
                        num_units=hidden_size,
                        input_size=input_shape[-1],
                        input_mode=input_mode,
                        direction=direction_mode,
                        dropout=dropout,
                        seed=0,
                        seed2=0)
    # layer info (note in case of bidirectional, output from previous
    # layers are concatenated).
    layer_info = [input_shape[-1], hidden_size] + \
                 [hidden_size * (2 if is_bidirectional else 1),
                  hidden_size] * (num_layers - 1)
    with tf.device('/cpu:0'):
        nb_params = rnn.params_size().eval(session=get_session())
    # ====== create parameters ====== #
    # check parameters
    if parameters is None:
        if rnn_mode == 'lstm':
            from odin.backend.init import lstm as init_func
        elif rnn_mode == 'gru':
            from odin.backend.init import gru as init_func
        else:
            from odin.backend.init import rnn as init_func
        parameters = np.concatenate([
            init_func(layer_info[i * 2],
                      layer_info[i * 2 + 1],
                      one_vector=True,
                      return_variable=False,
                      bidirectional=True if is_bidirectional else False)
            for i in range(num_layers)
        ]).astype(FLOATX)
        parameters = variable(parameters, name=name)
    assert nb_params == get_shape(parameters)[0], \
        "Require %d parameters but only %d provided" % (nb_params, get_shape(parameters)[0])
    # check initial states
    num_layers = num_layers * 2 if is_bidirectional else num_layers
    h0 = zeros((num_layers, batch_size, hidden_size)) if h0 is None else h0
    h0 = check_init_states(h0, num_layers, batch_size)
    c0 = (zeros((num_layers, batch_size,
                 hidden_size)) if rnn_mode == 'lstm' and c0 is None else c0)
    c0 = check_init_states(c0, num_layers, batch_size)
    # preprocess arguments
    args = {'input_h': h0}
    if rnn_mode == 'lstm':
        args['input_c'] = c0
    # ====== get output ====== #
    output = rnn(input_data=tf.transpose(X, (1, 0, 2)),
                 params=parameters,
                 is_training=bool(is_training()),
                 **args)
    output = [tf.transpose(output[0], (1, 0, 2))] + list(output[1:])
    add_shape(output[0], (input_shape[0], input_shape[1], hidden_size *
                          (2 if is_bidirectional else 1)))
    for o in output[1:]:
        add_shape(o, (num_layers, input_shape[0], hidden_size))
    return output
Ejemplo n.º 15
0
 def test_cudnn_rnn(self):
     if get_ngpu() == 0:
         return
     print()
     batch_size = 2
     time_steps = 5
     input_dim = 12
     hidden_dim = 8
     X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim),
                    dtype='float32',
                    name='X')
     for rnn_mode in ('lstm', 'rnn_relu', 'gru'):
         for num_layers in [1, 2]:
             for W_init in [
                     init_ops.glorot_uniform_initializer(seed=1234),
                     init_ops.random_normal_initializer(seed=1234)
             ]:
                 for b_init in [0, 1]:
                     for bidirectional in (True, False):
                         for skip_input in (False, ):
                             print('RNNmode:%s' % rnn_mode,
                                   "#Layers:%d" % num_layers,
                                   'Bidirectional:%s' % bidirectional,
                                   'SkipInput:%s' % skip_input)
                             weights, biases = K.init_rnn(
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 num_gates=rnn_mode,
                                 num_layers=num_layers,
                                 W_init=W_init,
                                 b_init=b_init,
                                 skip_input=skip_input,
                                 cudnn_vector=False,
                                 is_bidirectional=bidirectional,
                                 name=None)
                             # ====== check number of params ====== #
                             params1 = K.params_to_cudnn(weights, biases)
                             n = params1.shape[0].value
                             nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional')
                             nb_params = K.eval(nb_params)
                             assert n == nb_params
                             # ====== check cannonical shape match ====== #
                             kwargs = {
                                 'num_layers':
                                 num_layers,
                                 'num_units':
                                 hidden_dim,
                                 'input_mode':
                                 'skip_input'
                                 if skip_input else 'linear_input',
                                 'direction':
                                 'bidirectional'
                                 if bidirectional else 'unidirectional'
                             }
                             if rnn_mode == 'lstm':
                                 rnn = cudnn_rnn.CudnnLSTM(**kwargs)
                             elif rnn_mode == 'gru':
                                 rnn = cudnn_rnn.CudnnGRU(**kwargs)
                             if rnn_mode == 'rnn_relu':
                                 rnn = cudnn_rnn.CudnnRNNRelu(**kwargs)
                             if rnn_mode == 'rnn_tanh':
                                 rnn = cudnn_rnn.CudnnRNNTanh(**kwargs)
                             rnn.build(input_shape=(None, None, input_dim))
                             assert len(weights) == len(
                                 rnn.canonical_weight_shapes)
                             assert len(biases) == len(
                                 rnn.canonical_bias_shapes)
                             for w, s in zip(weights,
                                             rnn.canonical_weight_shapes):
                                 assert tuple(w.shape.as_list()) == s
                             # ====== check params conversion ====== #
                             K.initialize_all_variables()
                             params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional',
                                 weights=weights,
                                 biases=biases)
                             assert np.all(
                                 K.eval(params1) == K.eval(params2))
                             # ====== odin cudnn implementation ====== #
                             name = 'TEST' + uuid(length=25)
                             outputs = K.cudnn_rnn(
                                 X=X,
                                 num_units=hidden_dim,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 parameters=None,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 dropout=0.1,
                                 name=name)
                             K.initialize_all_variables()
                             s0 = K.eval(outputs[0]).sum()
                             s1 = K.eval(outputs[1]).sum()
                             all_variables = K.get_all_variables(scope=name)
                             new_weights = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Weight)
                             ]
                             new_biases = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Bias)
                             ]
                             new_weights, new_biases = K.sort_cudnn_params(
                                 new_weights, new_biases, rnn_mode=rnn_mode)
                             assert len(weights) == len(weights)
                             assert len(biases) == len(biases)
                             for i, j in zip(weights + biases,
                                             new_weights + new_biases):
                                 assert i.name.split(
                                     '/')[-1] == j.name.split('/')[-1]
                             # ====== CudnnRNN wrapper ====== #
                             rnn = N.CudnnRNN(
                                 num_units=hidden_dim,
                                 W_init=new_weights,
                                 b_init=new_biases,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 return_states=True,
                                 dropout=0.)
                             outputs = rnn(X)
                             K.initialize_all_variables()
                             y0 = K.eval(outputs[0]).sum()
                             y1 = K.eval(outputs[1]).sum()
                             assert y0 == s0
                             assert y1 == s1