def test_lstm(self): W_in_to_ingate = random(28, 32) / 12 W_hid_to_ingate = random(32, 32) / 12 b_ingate = random(32) / 12 W_in_to_forgetgate = random(28, 32) / 12 W_hid_to_forgetgate = random(32, 32) / 12 b_forgetgate = random(32) / 12 W_in_to_cell = random(28, 32) / 12 W_hid_to_cell = random(32, 32) / 12 b_cell = random(32) / 12 W_in_to_outgate = random(28, 32) / 12 W_hid_to_outgate = random(32, 32) / 12 b_outgate = random(32) / 12 W_cell_to_ingate = random(32) / 12 W_cell_to_forgetgate = random(32) / 12 W_cell_to_outgate = random(32) / 12 cell_init = random(1, 32) / 12 hid_init = random(1, 32) / 12 # ====== pre-define parameters ====== # x = random(12, 28, 28) x_mask = np.random.randint(0, 2, size=(12, 28)) # x_mask = np.ones(shape=(12, 28)) # ====== odin ====== # X = K.placeholder(shape=(None, 28, 28), name='X') mask = K.placeholder(shape=(None, 28), name='mask', dtype='int32') f = N.Sequence([ N.Merge([ N.Dense(32, W_init=W_in_to_ingate, b_init=b_ingate, activation=K.linear), N.Dense(32, W_init=W_in_to_forgetgate, b_init=b_forgetgate, activation=K.linear), N.Dense(32, W_init=W_in_to_cell, b_init=b_cell, activation=K.linear), N.Dense(32, W_init=W_in_to_outgate, b_init=b_outgate, activation=K.linear) ], merge_function=K.concatenate), N.LSTM(32, activation=K.tanh, gate_activation=K.sigmoid, W_hid_init=[ W_hid_to_ingate, W_hid_to_forgetgate, W_hid_to_cell, W_hid_to_outgate ], W_peepholes=[ W_cell_to_ingate, W_cell_to_forgetgate, W_cell_to_outgate ], input_mode='skip', name='lstm') ]) y = f(X, h0=hid_init, c0=cell_init, mask=mask) f = K.function([X, mask], y) out1 = f(x, x_mask) # ====== lasagne ====== # if get_backend() == 'tensorflow': self.assertTrue(repr(np.sum(out1))[:4] == repr(43.652363)[:4]) return l = lasagne.layers.InputLayer(shape=(None, 28, 28)) l.input_var = X l_mask = lasagne.layers.InputLayer(shape=(None, 28)) l_mask.input_var = mask l = lasagne.layers.LSTMLayer( l, num_units=32, ingate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate), forgetgate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate), cell=lasagne.layers.Gate(nonlinearity=lasagne.nonlinearities.tanh, W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell), outgate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_outgate, W_hid=W_hid_to_outgate, W_cell=W_cell_to_outgate, b=b_outgate), nonlinearity=lasagne.nonlinearities.tanh, cell_init=cell_init, hid_init=hid_init, mask_input=l_mask, precompute_input=True, backwards=False) y = lasagne.layers.get_output(l) f = K.function([X, mask], y) out2 = f(x, x_mask) # ====== test ====== # self.assertAlmostEqual(np.sum(np.abs(out1 - out2)), 0.)
N.Flatten(outdim=3), N.Merge( [ N.Dense(lstm_output_size, activation=K.linear, name='ingate'), # input-gate N.Dense(lstm_output_size, activation=K.linear, name='forgetgate'), # forget-gate N.Dense(lstm_output_size, activation=K.linear, name='cellupdate'), # cell-update N.Dense(lstm_output_size, activation=K.linear, name='outgate') # output-gate ], merge_function=K.concatenate), N.LSTM(num_units=lstm_output_size, input_mode='skip')[:, -1], N.Dense(1, activation=K.sigmoid) ], debug=True) K.set_training(True) y_pred_train = f(X_train) K.set_training(False) y_pred_score = f(X_score) cost_train = K.mean(K.binary_crossentropy(y_pred_train, y)) cost_score = K.mean(K.binary_accuracy(y_pred_score, y)) parameters = f.parameters print('Params:', [p.name for p in parameters]) updates = K.optimizers.Adam(lr=0.001).get_updates(cost_train, parameters)