def test_cudnn_lstm(self): num_layers = 4 num_units = 2 batch_size = 8 dir_count = 1 inputs = tf.random_uniform( [num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) lstm = cudnn_rnn.CudnnLSTM( num_layers=num_layers, num_units=num_units, direction='unidirectional', kernel_initializer=tf.constant_initializer(0.), bias_initializer=tf.constant_initializer(0.), name='test_gru') outputs, _ = lstm(inputs) total_sum = tf.reduce_sum(outputs) with tf.Session() as sess: sess.run(variables.global_variables_initializer()) result = sess.run(total_sum) self.assertEqual(0, result)
def rnn_encoder(self, inputs, scope=None): inputs = tf.transpose(inputs, perm=[1, 0, 2]) with tf.variable_scope(scope or 'rnn_encoder'): DIRECTION = "bidirectional" if self.num_layers >= 2: num_layer = self.num_layers // 2 else: num_layer = self.num_layers if self.cell_type.lower() == 'gru': cell = cudnn_rnn.CudnnGRU(num_layer, self.hidden_size, direction=DIRECTION, dropout=1 - self.drop) else: cell = cudnn_rnn.CudnnLSTM(num_layer, self.hidden_size, direction=DIRECTION, dropout=1 - self.drop) outputs, _ = cell(inputs) outputs = tf.transpose(outputs, perm=[1, 0, 2]) return outputs
def _build_rnn_graph_cudnn(self, inputs, config, is_training): """Build the inference graph using CUDNN cell.""" inputs = tf.transpose(inputs, [1, 0, 2]) # self._cell = tf.contrib.cudnn_rnn.CudnnLSTM( self._cell = cudnn_rnn.CudnnLSTM( num_layers=config.num_layers, num_units=config.hidden_size, input_size=config.hidden_size, dropout=1 - config.keep_prob if is_training else 0) params_size_t = self._cell.params_size() self._rnn_params = tf.get_variable( "lstm_params", initializer=tf.random_uniform([params_size_t], -config.init_scale, config.init_scale), validate_shape=False) c = tf.zeros([config.num_layers, self.batch_size, config.hidden_size], tf.float32) h = tf.zeros([config.num_layers, self.batch_size, config.hidden_size], tf.float32) # self._initial_state = (tf.contrib.rnn.LSTMStateTuple(h=h, c=c),) self._initial_state = (rnn.LSTMStateTuple(h=h, c=c), ) outputs, h, c = self._cell(inputs, h, c, self._rnn_params, is_training) outputs = tf.transpose(outputs, [1, 0, 2]) outputs = tf.reshape(outputs, [-1, config.hidden_size]) # return outputs, (tf.contrib.rnn.LSTMStateTuple(h=h, c=c),) return outputs, (rnn.LSTMStateTuple(h=h, c=c), )
def cuda_rnn(inputs, num_layers, hidden_size, seq_len, init_states=None, cell_type="GRU"): """Run the CuDNN RNN. Arguments: - inputs: A tensor of shape [batch, length, input_size] of inputs. - layers: Number of RNN layers. - hidden_size: Number of units in each layer. - is_training: tf.bool indicating whether training mode is enabled. - init_states: Return a tuple of (outputs, init_state, final_state). """ input_size = inputs.get_shape()[-1].value if input_size is None: raise ValueError("Number of input dimensions to CuDNN RNNs must be " "known, but was None.") # CUDNN expects the inputs to be time major inputs = tf.transpose(inputs, [1, 0, 2]) if cell_type.lower() == "gru": cudnn_cell = cudnn_rnn.CudnnGRU(num_layers, hidden_size, input_mode="linear_input", direction="bidirectional") elif cell_type.lower() == "lstm": cudnn_cell = cudnn_rnn.CudnnLSTM(num_layers, hidden_size, input_mode="linear_input", direction="bidirectional") else: raise Exception("LSTM or GRU is required.") if init_states is None: init_state = tf.tile( tf.zeros([2 * num_layers, 1, hidden_size], dtype=tf.float32), [1, tf.shape(inputs)[1], 1]) if cell_type.lower() == "gru": init_states = (init_state, ) else: init_states = (init_state, init_state) output, *_ = cudnn_cell(inputs, initial_state=init_states, training=True) # Convert to batch major output = tf.transpose(output, [1, 0, 2]) final_states = tf.reverse_sequence(output, seq_lengths=seq_len, seq_axis=1, batch_axis=0)[:, 0, :] return output, final_states
def gpu_cudnn_lstm_backend(time_major_inputs, hidden_nodes): # Create the Cudnn LSTM factory rnn_lstm = cudnn_rnn.CudnnLSTM(len(lstm_layers), hidden_nodes, direction='bidirectional', kernel_initializer=tf.initializers.random_uniform(-0.1, 0.1)) # TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this rnn_lstm._saveable_cls = cudnn_rnn.CudnnLSTMSaveable # Apply the lstm to the inputs time_major_outputs, (output_h, output_c) = rnn_lstm(time_major_inputs) return time_major_outputs
def get_cell(rnn_type, hidden_size, layer_num=1, direction='bidirectional'): if rnn_type.endswith('lstm'): cudnn_cell = cudnn_rnn.CudnnLSTM(num_layers=layer_num, num_units=hidden_size, direction=direction, dropout=0) elif rnn_type.endswith('gru'): cudnn_cell = cudnn_rnn.CudnnGRU(num_layers=layer_num, num_units=hidden_size, direction=direction, dropout=0) elif rnn_type.endswith('rnn'): cudnn_cell = cudnn_rnn.CudnnRNNTanh(num_layers=layer_num, num_units=hidden_size, direction=direction, dropout=0) else: raise NotImplementedError('Unsuported rnn type: {}'.format(rnn_type)) return cudnn_cell
def birnn(self, use_cudnn=False): if use_cudnn: DIRECTION = "bidirectional" cell = cudnn_rnn.CudnnLSTM(1, cfg.hidden_size, direction=DIRECTION) outputs, _ = cell(self.time_inputs) else: cell_1 = rnn.BasicLSTMCell(cfg.hidden_size) cell_2 = rnn.BasicLSTMCell(cfg.hidden_size) outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_1, cell_2, self.time_inputs, dtype=tf.float32, time_major=True) outputs = tf.concat(outputs, 2) return outputs
def _single_lstm(input_emb, input_len, hidden_size, is_fwd, use_cudnn): """Compute the outputs of a single LSTM (subroutine of stacked_bilstm). Be careful if used anywhere outside of stacked_bilstm, which converts the sequences to the time-major format expected by this function. Args: input_emb: <float32> [sequence_length, batch_size, emb] input_len: <int32> [batch_size] hidden_size: Number of units in the LSTM cell. is_fwd: Boolean indicator the directionality of the LSTM. use_cudnn: Boolean indicating the use of cudnn. Returns: output_emb: <float32> [sequence_length, batch_size, emb] """ if not is_fwd: input_emb = tf.reverse_sequence( input_emb, input_len, seq_axis=0, batch_axis=1) if use_cudnn: lstm = contrib_cudnn_rnn.CudnnLSTM( num_layers=1, num_units=hidden_size, input_mode=cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE, direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) lstm.build(input_emb.shape) output_emb, _ = lstm(input_emb) else: cell = contrib_cudnn_rnn.CudnnCompatibleLSTMCell(hidden_size) cell = contrib_rnn.MultiRNNCell([cell]) output_emb, _ = tf.nn.dynamic_rnn( cell=cell, inputs=input_emb, sequence_length=input_len, dtype=tf.float32, time_major=True) if not is_fwd: output_emb = tf.reverse_sequence( output_emb, input_len, seq_axis=0, batch_axis=1) return output_emb
def rnn_encode(self, inputs, scope=None): inputs = tf.transpose(inputs, perm=[1, 0, 2]) with tf.variable_scope(scope or 'rnn_encoder'): if self.cell_type.lower() == 'gru': cell = cudnn_rnn.CudnnGRU(self.num_layers, self.hidden_size, dropout=1 - self.drop) else: cell = cudnn_rnn.CudnnLSTM(self.num_layers, self.hidden_size, dropout=1 - self.drop) outputs, _ = cell(inputs) outputs = tf.transpose(outputs, perm=[1, 0, 2]) return outputs
def _build_rnn_graph_cudnn(self, inputs, config, is_training): inputs = tf.transpose(inputs, [1, 0, 2]) self._cell = tfcudnn_rnn.CudnnLSTM( num_layers=config.num_layers, num_units=config.hidden_size, input_size=config.hidden_size, dropout=1 - config.keep_prob if is_training else 0) params_size_t = self._cell.params_size() self._rnn_params = tf.get_variable( 'lstm_params', initializer=tflayers.xavier_initializer(), validate_shape=False) c = tf.zeros([config.num_layers, self.batch_size, self.hidden_size], tf.float32) h = tf.zeros([config.num_layers, self.batch_size, self.hidden_size], tf.float32) self._initial_state = (tfrnn.LSTMStateTuple(h=h, c=c),) outputs, h, c = self._cell(inputs, h, c, self._rnn_params, is_training) outputs = tf.transpose(outputs, [1, 0, 2]) outputs = tf.reshape(outputs, [-1, config.hidden_size]) return outputs,(tfrnn.LSTMStateTuple(h=h, c=c),)
def __init__(self, sequence_length, cell_size, vectors): self.stock_x = tf.placeholder(tf.float32, shape=[None, sequence_length, 1], name='stock_x') self.stock_y = tf.placeholder(tf.int32, shape=[None, 2], name='stock_y') self.text_x = tf.placeholder(tf.int32, shape=[None, sequence_length, 220], name='text_x') # event embedding with tf.name_scope("embedding_vertices"): embedding_W = tf.get_variable("embedding_matrix", initializer=tf.constant(vectors, dtype=tf.float32), trainable=False) self.embedding_texts = tf.nn.embedding_lookup(embedding_W, self.text_x, name='embedded_vertices') # attention on texts self.pre = tf.reshape(self.embedding_texts, shape=[-1, 220, vectors.shape[-1]]) self.average = tf.reduce_sum(self.pre,axis=1) # with tf.name_scope('Con1V'): # filiter = tf.get_variable('kernel', initializer=tf.truncated_normal([3,128,128])) # cnn_bias = tf.get_variable('cnn_bias', initializer=tf.constant(0.1,shape=[128])) # h_conv1 = tf.nn.tanh(tf.nn.conv1d(self.pre, filiter, 1, 'SAME') + cnn_bias) # max_pool = tf.reduce_max(h_conv1,axis=1) # self.embedding_texts_att = self.Attention_Layer(self.pre, "attention_part") self.embedding_texts_att = tf.reshape(self.average, shape=[-1, sequence_length, 128]) # combine texts and stock: [batch_size, sequence_length, embedding+stock] self.combined_x = tf.concat([self.stock_x, self.embedding_texts_att], axis=-1) with tf.name_scope("LSTM"): lstm_cell = cudnn_rnn.CudnnLSTM(1, cell_size, dropout=0.2) output, state = lstm_cell(inputs=self.embedding_texts_att) with tf.name_scope('output'): output_w = tf.get_variable("output_weight", shape=[cell_size, 2], initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01)) output_b = tf.get_variable("output_bias", initializer=tf.constant([0.01] * 2)) self.scores = tf.nn.xw_plus_b(output[:, -1, :], output_w, output_b, name="ouput_layer") self.output = tf.argmax(self.scores, axis=1) with tf.name_scope("loss_accuracy"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.stock_y) self.loss = tf.reduce_mean(losses) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output, tf.argmax(self.stock_y, axis=1)), 'float'))
def cudnn_lstm_layer(inputs, batch_size, num_units, lengths=None, stack_size=1, rnn_dropout_drop_amt=0, is_training=True, bidirectional=True): """Create a LSTM layer that uses cudnn.""" inputs_t = tf.transpose(inputs, [1, 0, 2]) if lengths is not None: all_outputs = [inputs_t] for i in range(stack_size): with tf.variable_scope('stack_' + str(i)): with tf.variable_scope('forward'): lstm_fw = contrib_cudnn_rnn.CudnnLSTM( num_layers=1, num_units=num_units, direction='unidirectional', dropout=rnn_dropout_drop_amt, kernel_initializer=contrib_layers.variance_scaling_initializer(), bias_initializer=tf.zeros_initializer(), ) c_fw = tf.zeros([1, batch_size, num_units], tf.float32) h_fw = tf.zeros([1, batch_size, num_units], tf.float32) outputs_fw, _ = lstm_fw( all_outputs[-1], (h_fw, c_fw), training=is_training) combined_outputs = outputs_fw if bidirectional: with tf.variable_scope('backward'): lstm_bw = contrib_cudnn_rnn.CudnnLSTM( num_layers=1, num_units=num_units, direction='unidirectional', dropout=rnn_dropout_drop_amt, kernel_initializer=contrib_layers.variance_scaling_initializer( ), bias_initializer=tf.zeros_initializer(), ) c_bw = tf.zeros([1, batch_size, num_units], tf.float32) h_bw = tf.zeros([1, batch_size, num_units], tf.float32) inputs_reversed = tf.reverse_sequence( all_outputs[-1], lengths, seq_axis=0, batch_axis=1) outputs_bw, _ = lstm_bw( inputs_reversed, (h_bw, c_bw), training=is_training) outputs_bw = tf.reverse_sequence( outputs_bw, lengths, seq_axis=0, batch_axis=1) combined_outputs = tf.concat([outputs_fw, outputs_bw], axis=2) all_outputs.append(combined_outputs) # for consistency with cudnn, here we just return the top of the stack, # although this can easily be altered to do other things, including be # more resnet like return tf.transpose(all_outputs[-1], [1, 0, 2]) else: lstm = contrib_cudnn_rnn.CudnnLSTM( num_layers=stack_size, num_units=num_units, direction='bidirectional' if bidirectional else 'unidirectional', dropout=rnn_dropout_drop_amt, kernel_initializer=contrib_layers.variance_scaling_initializer(), bias_initializer=tf.zeros_initializer(), ) stack_multiplier = 2 if bidirectional else 1 c = tf.zeros([stack_multiplier * stack_size, batch_size, num_units], tf.float32) h = tf.zeros([stack_multiplier * stack_size, batch_size, num_units], tf.float32) outputs, _ = lstm(inputs_t, (h, c), training=is_training) outputs = tf.transpose(outputs, [1, 0, 2]) return outputs
def make_cudnn(inputs, rnn_layer_sizes, batch_size, mode, dropout_keep_prob=1.0, residual_connections=False): """Builds a sequence of cuDNN LSTM layers from the given hyperparameters. Args: inputs: A tensor of RNN inputs. rnn_layer_sizes: A list of integer sizes (in units) for each layer of the RNN. batch_size: The number of examples per batch. mode: 'train', 'eval', or 'generate'. For 'generate', CudnnCompatibleLSTMCell will be used. dropout_keep_prob: The float probability to keep the output of any given sub-cell. residual_connections: Whether or not to use residual connections. Returns: outputs: A tensor of RNN outputs, with shape `[batch_size, inputs.shape[1], rnn_layer_sizes[-1]]`. initial_state: The initial RNN states, a tuple with length `len(rnn_layer_sizes)` of LSTMStateTuples. final_state: The final RNN states, a tuple with length `len(rnn_layer_sizes)` of LSTMStateTuples. """ cudnn_inputs = tf.transpose(inputs, [1, 0, 2]) if len(set(rnn_layer_sizes)) == 1 and not residual_connections: initial_state = tuple( contrib_rnn.LSTMStateTuple( h=tf.zeros([batch_size, num_units], dtype=tf.float32), c=tf.zeros([batch_size, num_units], dtype=tf.float32)) for num_units in rnn_layer_sizes) if mode != 'generate': # We can make a single call to CudnnLSTM since all layers are the same # size and we aren't using residual connections. cudnn_initial_state = state_tuples_to_cudnn_lstm_state(initial_state) cell = contrib_cudnn_rnn.CudnnLSTM( num_layers=len(rnn_layer_sizes), num_units=rnn_layer_sizes[0], direction='unidirectional', dropout=1.0 - dropout_keep_prob) cudnn_outputs, cudnn_final_state = cell( cudnn_inputs, initial_state=cudnn_initial_state, training=mode == 'train') final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state) else: # At generation time we use CudnnCompatibleLSTMCell. cell = contrib_rnn.MultiRNNCell([ contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units) for num_units in rnn_layer_sizes ]) cudnn_outputs, final_state = tf.nn.dynamic_rnn( cell, cudnn_inputs, initial_state=initial_state, time_major=True, scope='cudnn_lstm/rnn') else: # We need to make multiple calls to CudnnLSTM, keeping the initial and final # states at each layer. initial_state = [] final_state = [] for i in range(len(rnn_layer_sizes)): # If we're using residual connections and this layer is not the same size # as the previous layer, we need to project into the new size so the # (projected) input can be added to the output. if residual_connections: if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]: cudnn_inputs = contrib_layers.linear(cudnn_inputs, rnn_layer_sizes[i]) layer_initial_state = (contrib_rnn.LSTMStateTuple( h=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32), c=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32)),) if mode != 'generate': cudnn_initial_state = state_tuples_to_cudnn_lstm_state( layer_initial_state) cell = contrib_cudnn_rnn.CudnnLSTM( num_layers=1, num_units=rnn_layer_sizes[i], direction='unidirectional', dropout=1.0 - dropout_keep_prob) cudnn_outputs, cudnn_final_state = cell( cudnn_inputs, initial_state=cudnn_initial_state, training=mode == 'train') layer_final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state) else: # At generation time we use CudnnCompatibleLSTMCell. cell = contrib_rnn.MultiRNNCell( [contrib_cudnn_rnn.CudnnCompatibleLSTMCell(rnn_layer_sizes[i])]) cudnn_outputs, layer_final_state = tf.nn.dynamic_rnn( cell, cudnn_inputs, initial_state=layer_initial_state, time_major=True, scope='cudnn_lstm/rnn' if i == 0 else 'cudnn_lstm_%d/rnn' % i) if residual_connections: cudnn_outputs += cudnn_inputs cudnn_inputs = cudnn_outputs initial_state += layer_initial_state final_state += layer_final_state outputs = tf.transpose(cudnn_outputs, [1, 0, 2]) return outputs, tuple(initial_state), tuple(final_state)
def rnn_dnn(X, hidden_size, rnn_mode, num_layers=1, parameters=None, h0=None, c0=None, input_mode='linear', direction_mode='unidirectional', dropout=0., name=None): """CuDNN v5 RNN implementation. Parameters ---------- X : input varialbe or placeholder shape=(batch_size, timesteps, input_dims) hidden_size : int the number of units within the RNN model. rnn_mode : {'rnn_relu', 'rnn_tanh', 'lstm', 'gru'} See cudnn documentation for ``cudnnRNNMode_t``. num_layers : int the number of layers for the RNN model. h0: tensor h0 with shape [num_layers, batch_size, hidden_size] c0: tensor c0 (lstm) with shape [num_layers, batch_size, hidden_size] parameters: vector vector contain all flatten weights and bias check `backend.init.lstm`, `backend.init.gru`, and `backend.init.rnn` for more information input_mode : {'linear', 'skip'} linear: input will be multiplied by a biased matrix skip: No operation is performed on the input. The size must match the hidden size. (CuDNN docs: cudnnRNNInputMode_t) direction_mode : {'unidirectional', 'bidirectional'} unidirectional: The network operates recurrently from the first input to the last. bidirectional: The network operates from first to last then from last to first and concatenates the results at each layer. dropout: float (0.0-1.0) whether to enable dropout. With it is 0, dropout is disabled. Returns ------- [output, hidden_states, cell_states] for lstm [output, hidden_states] for gru and rnn output_shape: (batch_size, timesteps, hidden_size) hidden_shape: (num_layers, batch_size, hidden_size) cell_shape: (num_layers, batch_size, hidden_size) Note ---- dropout is turn off if K.set_training(False) or K.is_training() == False """ if CONFIG['device'] == 'cpu': raise Exception('This opt is not supported with CPU.') if name is None: name = uuid() # ====== Check arguments ====== # if rnn_mode not in ('rnn_relu', 'rnn_tanh', 'lstm', 'gru'): raise ValueError( "rnn_mode=%s must be: 'rnn_relu', 'rnn_tanh', 'lstm', 'gru'" % rnn_mode) if input_mode not in ('linear', 'skip'): raise ValueError("input_mode=%s must be: 'linear', 'skip'" % input_mode) input_mode = 'linear_input' if input_mode == 'linear' else 'skip_input' if direction_mode not in ('unidirectional', 'bidirectional'): raise ValueError( "direction_mode=%s must be: 'unidirectional', 'bidirectional'" % direction_mode) is_bidirectional = direction_mode == 'bidirectional' # ====== helper function ====== # def check_init_states(s0, nb_layers, batch_size): if s0 is None: return None if s0.get_shape().ndims < 3: s0 = expand_dims(s0, dim=0) s0shape = get_shape(s0) if s0shape[0] == 1 and s0shape[0] != nb_layers: s0 = repeat(s0, n=nb_layers, axes=0) if s0shape[1] == 1: s0 = repeat(s0, n=batch_size, axes=1) return s0 # ====== create RNNBlock ====== # from tensorflow.contrib import cudnn_rnn input_shape = get_shape(X) if X.get_shape().ndims != 3: raise ValueError('Input must be 3-D tensor, but X is %d-D tensor' % X.ndim) if input_shape[-1] != hidden_size and 'skip' in input_mode: raise ValueError( 'In skip_input mode, input size must be equal to hidden size' ', but input_size=%d != hidden_size=%d' % (input_shape[-1], hidden_size)) # IF we dimshuffle here, a lot of error concern GPUarray, # and cudnn will happen batch_size = get_shape(X, native=True)[0] if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(num_layers=num_layers, num_units=hidden_size, input_size=input_shape[-1], input_mode=input_mode, direction=direction_mode, dropout=dropout, seed=0, seed2=0) else: if rnn_mode == 'gru': rnn_class = cudnn_rnn.CudnnGRU elif rnn_mode == 'rnn_relu': rnn_class = cudnn_rnn.CudnnRNNRelu elif rnn_mode == 'rnn_tanh': rnn_class = cudnn_rnn.CudnnRNNTanh rnn = rnn_class(num_layers=num_layers, num_units=hidden_size, input_size=input_shape[-1], input_mode=input_mode, direction=direction_mode, dropout=dropout, seed=0, seed2=0) # layer info (note in case of bidirectional, output from previous # layers are concatenated). layer_info = [input_shape[-1], hidden_size] + \ [hidden_size * (2 if is_bidirectional else 1), hidden_size] * (num_layers - 1) with tf.device('/cpu:0'): nb_params = rnn.params_size().eval(session=get_session()) # ====== create parameters ====== # # check parameters if parameters is None: if rnn_mode == 'lstm': from odin.backend.init import lstm as init_func elif rnn_mode == 'gru': from odin.backend.init import gru as init_func else: from odin.backend.init import rnn as init_func parameters = np.concatenate([ init_func(layer_info[i * 2], layer_info[i * 2 + 1], one_vector=True, return_variable=False, bidirectional=True if is_bidirectional else False) for i in range(num_layers) ]).astype(FLOATX) parameters = variable(parameters, name=name) assert nb_params == get_shape(parameters)[0], \ "Require %d parameters but only %d provided" % (nb_params, get_shape(parameters)[0]) # check initial states num_layers = num_layers * 2 if is_bidirectional else num_layers h0 = zeros((num_layers, batch_size, hidden_size)) if h0 is None else h0 h0 = check_init_states(h0, num_layers, batch_size) c0 = (zeros((num_layers, batch_size, hidden_size)) if rnn_mode == 'lstm' and c0 is None else c0) c0 = check_init_states(c0, num_layers, batch_size) # preprocess arguments args = {'input_h': h0} if rnn_mode == 'lstm': args['input_c'] = c0 # ====== get output ====== # output = rnn(input_data=tf.transpose(X, (1, 0, 2)), params=parameters, is_training=bool(is_training()), **args) output = [tf.transpose(output[0], (1, 0, 2))] + list(output[1:]) add_shape(output[0], (input_shape[0], input_shape[1], hidden_size * (2 if is_bidirectional else 1))) for o in output[1:]: add_shape(o, (num_layers, input_shape[0], hidden_size)) return output
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1