def testWrapperWeights(self, wrapper): """Tests that wrapper weights contain wrapped cells weights.""" base_cell = layers.SimpleRNNCell(1, name="basic_rnn_cell") rnn_cell = wrapper(base_cell) rnn_layer = layers.RNN(rnn_cell) inputs = tf.convert_to_tensor([[[1]]], dtype=tf.float32) rnn_layer(inputs) wrapper_name = generic_utils.to_snake_case(wrapper.__name__) expected_weights = [ "rnn/" + wrapper_name + "/" + var for var in ("kernel:0", "recurrent_kernel:0", "bias:0") ] self.assertLen(rnn_cell.weights, 3) self.assertCountEqual( [v.name for v in rnn_cell.weights], expected_weights ) self.assertCountEqual( [v.name for v in rnn_cell.trainable_variables], expected_weights ) self.assertCountEqual( [v.name for v in rnn_cell.non_trainable_variables], [] ) self.assertCountEqual( [v.name for v in rnn_cell.cell.weights], expected_weights )
def build_model_gru_cell(): inputs = keras.Input(shape=(None, ModelConfig.L_FRAME // 2 + 1)) rnn_cells = [ layers.GRUCell(ModelConfig.HID_SIZE) for _ in range(ModelConfig.NUM_LAYERS) ] stacked_gru = layers.StackedRNNCells(rnn_cells) output_rnn = layers.RNN(stacked_gru)(inputs) input_size = np.shape(inputs)[2] src1 = layers.Dense(input_size, activation="relu")(output_rnn) # src2_pre = layers.Dense(input_size, activation="relu")(output_rnn) # time-freq masking layer # src1 = src1_pre / (src1_pre + src2_pre + np.finfo(float).eps) * inputs # src2 = src2_pre / (src1_pre + src2_pre + np.finfo(float).eps) * inputs model = keras.Model(inputs=inputs, outputs=src1, name="GRUCell_model") model.summary() return model
N, T, D, H = 2, 3, 4, 5 x = np.random.uniform(size=(N, T, D)) x[0, -1:, :] = np.nan x[1, -2:, :] = np.nan h0 = np.random.uniform(size=(N, H)) hr = np.random.uniform(size=(N, H)) rnn_cell = RNNCell(in_features=D, units=H) brnn = BidirectionalRNN(rnn_cell, h0=h0, hr=hr) out = brnn.forward(x) keras_x = layers.Input(shape=(T, D), name='x') keras_h0 = layers.Input(shape=(H, ), name='h0') keras_hr = layers.Input(shape=(H, ), name='hr') keras_x_masked = layers.Masking(mask_value=0.)(keras_x) keras_rnn = layers.RNN(layers.SimpleRNNCell(H), return_sequences=True) keras_brnn = layers.Bidirectional(keras_rnn, merge_mode='concat', name='brnn')( keras_x_masked, initial_state=[keras_h0, keras_hr]) keras_model = keras.Model(inputs=[keras_x, keras_h0, keras_hr], outputs=keras_brnn) keras_model.get_layer('brnn').set_weights([ brnn.forward_rnn.kernel, brnn.forward_rnn.recurrent_kernel, brnn.forward_rnn.bias, brnn.backward_rnn.kernel, brnn.backward_rnn.recurrent_kernel, brnn.backward_rnn.bias ]) keras_out = keras_model.predict_on_batch([np.nan_to_num(x), h0, hr]) nan_indices = np.where(np.any(np.isnan(x), axis=2)) keras_out[nan_indices[0], nan_indices[1], :] = np.nan print('Relative error (<1e-5 will be fine): {}'.format( rel_error(keras_out, out)))
import rnn_layers importlib.reload(rnn_layers) from rnn_layers import RNNCell from utils.tools import rel_error N, D, H = 3, 10, 4 x = np.random.uniform(size=(N, D)) prev_h = np.random.uniform(size=(N, H)) rnn_cell = RNNCell(in_features=D, units=H) out = rnn_cell.forward([x, prev_h]) # compare with the keras implementation keras_x = layers.Input(shape=(1, D), name='x') keras_prev_h = layers.Input(shape=(H, ), name='prev_h') keras_rnn = layers.RNN(layers.SimpleRNNCell(H), name='rnn')(keras_x, initial_state=keras_prev_h) keras_model = keras.Model(inputs=[keras_x, keras_prev_h], outputs=keras_rnn) keras_model.get_layer('rnn').set_weights( [rnn_cell.kernel, rnn_cell.recurrent_kernel, rnn_cell.bias]) keras_out = keras_model.predict_on_batch([x[:, None, :], prev_h]) print('Relative error (<1e-5 will be fine): {}'.format( rel_error(keras_out, out))) # %% [markdown] # ## Backward # # Please implement the function `RNNCell.backward(self, in_grads, inputs)` and test the implementation using the following code. We need to compute the gradients to both the inputs and hidden states, as well as those trainable weights. # %% import numpy as np