def testBasicLSTMCellWithDropout(self): def _is_close(x, y, digits=4): delta = x - y return delta < 10**(-digits) def _is_close_in(x, items, digits=4): for i in items: if _is_close(x, i, digits): return True return False keep_prob = 0.5 c_high = 2.9998924946 c_low = 0.999983298578 h_low = 0.761552567265 h_high = 0.995008519604 num_units = 5 allowed_low = [1, 2, 3] with self.cached_session() as sess: with tf.variable_scope("other", initializer=tf.constant_initializer(1)): x = tf.zeros([1, 5]) c = tf.zeros([1, 5]) h = tf.zeros([1, 5]) state = rnn_cell.LSTMStateTuple(c, h) cell = contrib_rnn.LayerNormBasicLSTMCell( num_units, layer_norm=False, dropout_keep_prob=keep_prob) g, s = cell(x, state) sess.run([tf.global_variables_initializer()]) res = sess.run( [g, s], { x.name: np.ones([1, 5]), c.name: np.ones([1, 5]), h.name: np.ones([1, 5]), }) # Since the returned tensors are of size [1,n] # get the first component right now. actual_h = res[0][0] actual_state_c = res[1].c[0] actual_state_h = res[1].h[0] # For each item in `c` (the cell inner state) check that # it is equal to one of the allowed values `c_high` (not # dropped out) or `c_low` (dropped out) and verify that the # corresponding item in `h` (the cell activation) is coherent. # Count the dropped activations and check that their number is # coherent with the dropout probability. dropped_count = 0 self.assertTrue((actual_h == actual_state_h).all()) for citem, hitem in zip(actual_state_c, actual_state_h): self.assertTrue(_is_close_in(citem, [c_low, c_high])) if _is_close(citem, c_low): self.assertTrue(_is_close(hitem, h_low)) dropped_count += 1 elif _is_close(citem, c_high): self.assertTrue(_is_close(hitem, h_high)) self.assertIn(dropped_count, allowed_low)
def lstm_cell(self, hparams, train): keep_prob = 1.0 - hparams.rec_dropout * tf.to_float(train) recurrent_dropout_cell = contrib_rnn.LayerNormBasicLSTMCell( hparams.hidden_size, layer_norm=hparams.layer_norm, dropout_keep_prob=keep_prob) if hparams.ff_dropout: return rnn.DropoutWrapper(recurrent_dropout_cell, input_keep_prob=keep_prob) return recurrent_dropout_cell
def testBasicLSTMCellWithStateTupleLayerNorm(self): """The results of LSTMCell and LayerNormBasicLSTMCell should be the same.""" with self.cached_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) c0 = tf.zeros([1, 2]) h0 = tf.zeros([1, 2]) state0 = rnn_cell.LSTMStateTuple(c0, h0) c1 = tf.zeros([1, 2]) h1 = tf.zeros([1, 2]) state1 = rnn_cell.LSTMStateTuple(c1, h1) cell = rnn_cell.MultiRNNCell([ contrib_rnn.LayerNormBasicLSTMCell(2, layer_norm=True, norm_gain=1.0, norm_shift=0.0) for _ in range(2) ]) h, (s0, s1) = cell(x, (state0, state1)) sess.run([tf.global_variables_initializer()]) res = sess.run( [h, s0, s1], { x.name: np.array([[1., 1.]]), c0.name: 0.1 * np.asarray([[0, 1]]), h0.name: 0.1 * np.asarray([[2, 3]]), c1.name: 0.1 * np.asarray([[4, 5]]), h1.name: 0.1 * np.asarray([[6, 7]]), }) expected_h = np.array([[-0.38079708, 0.38079708]]) expected_h0 = np.array([[-0.38079708, 0.38079708]]) expected_c0 = np.array([[-1.0, 1.0]]) expected_h1 = np.array([[-0.38079708, 0.38079708]]) expected_c1 = np.array([[-1.0, 1.0]]) self.assertEqual(len(res), 3) self.assertAllClose(res[0], expected_h, 1e-5) self.assertAllClose(res[1].c, expected_c0, 1e-5) self.assertAllClose(res[1].h, expected_h0, 1e-5) self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5)
def testBasicLSTMCellWithoutNorm(self): """Tests that BasicLSTMCell with layer_norm=False.""" with self.cached_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) c0 = tf.zeros([1, 2]) h0 = tf.zeros([1, 2]) state0 = rnn_cell.LSTMStateTuple(c0, h0) c1 = tf.zeros([1, 2]) h1 = tf.zeros([1, 2]) state1 = rnn_cell.LSTMStateTuple(c1, h1) state = (state0, state1) single_cell = lambda: contrib_rnn.LayerNormBasicLSTMCell(2, layer_norm=False) # pylint: disable=line-too-long cell = rnn_cell.MultiRNNCell([single_cell() for _ in range(2)]) g, out_m = cell(x, state) sess.run([tf.global_variables_initializer()]) res = sess.run( [g, out_m], { x.name: np.array([[1., 1.]]), c0.name: 0.1 * np.asarray([[0, 1]]), h0.name: 0.1 * np.asarray([[2, 3]]), c1.name: 0.1 * np.asarray([[4, 5]]), h1.name: 0.1 * np.asarray([[6, 7]]), }) expected_h = np.array([[0.70230919, 0.72581059]]) expected_state0_c = np.array([[0.8020075, 0.89599884]]) expected_state0_h = np.array([[0.56668288, 0.60858738]]) expected_state1_c = np.array([[1.17500675, 1.26892781]]) expected_state1_h = np.array([[0.70230919, 0.72581059]]) actual_h = res[0] actual_state0_c = res[1][0].c actual_state0_h = res[1][0].h actual_state1_c = res[1][1].c actual_state1_h = res[1][1].h self.assertAllClose(actual_h, expected_h, 1e-5) self.assertAllClose(expected_state0_c, actual_state0_c, 1e-5) self.assertAllClose(expected_state0_h, actual_state0_h, 1e-5) self.assertAllClose(expected_state1_c, actual_state1_c, 1e-5) self.assertAllClose(expected_state1_h, actual_state1_h, 1e-5) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros( [1, 3]) # Test BasicLSTMCell with input_size != num_units. c = tf.zeros([1, 2]) h = tf.zeros([1, 2]) state = rnn_cell.LSTMStateTuple(c, h) cell = contrib_rnn.LayerNormBasicLSTMCell(2, layer_norm=False) g, out_m = cell(x, state) sess.run([tf.global_variables_initializer()]) res = sess.run( [g, out_m], { x.name: np.array([[1., 1., 1.]]), c.name: 0.1 * np.asarray([[0, 1]]), h.name: 0.1 * np.asarray([[2, 3]]), }) expected_h = np.array([[0.64121795, 0.68166804]]) expected_c = np.array([[0.88477188, 0.98103917]]) self.assertEqual(len(res), 2) self.assertAllClose(res[0], expected_h, 1e-5) self.assertAllClose(res[1].c, expected_c, 1e-5) self.assertAllClose(res[1].h, expected_h, 1e-5)
def testBasicLSTMCell(self): with self.cached_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) c0 = tf.zeros([1, 2]) h0 = tf.zeros([1, 2]) state0 = rnn_cell.LSTMStateTuple(c0, h0) c1 = tf.zeros([1, 2]) h1 = tf.zeros([1, 2]) state1 = rnn_cell.LSTMStateTuple(c1, h1) state = (state0, state1) single_cell = lambda: contrib_rnn.LayerNormBasicLSTMCell(2) cell = rnn_cell.MultiRNNCell([single_cell() for _ in range(2)]) g, out_m = cell(x, state) sess.run([tf.global_variables_initializer()]) res = sess.run( [g, out_m], { x.name: np.array([[1., 1.]]), c0.name: 0.1 * np.asarray([[0, 1]]), h0.name: 0.1 * np.asarray([[2, 3]]), c1.name: 0.1 * np.asarray([[4, 5]]), h1.name: 0.1 * np.asarray([[6, 7]]), }) expected_h = np.array([[-0.38079708, 0.38079708]]) expected_state0_c = np.array([[-1.0, 1.0]]) expected_state0_h = np.array([[-0.38079708, 0.38079708]]) expected_state1_c = np.array([[-1.0, 1.0]]) expected_state1_h = np.array([[-0.38079708, 0.38079708]]) actual_h = res[0] actual_state0_c = res[1][0].c actual_state0_h = res[1][0].h actual_state1_c = res[1][1].c actual_state1_h = res[1][1].h self.assertAllClose(actual_h, expected_h, 1e-5) self.assertAllClose(expected_state0_c, actual_state0_c, 1e-5) self.assertAllClose(expected_state0_h, actual_state0_h, 1e-5) self.assertAllClose(expected_state1_c, actual_state1_c, 1e-5) self.assertAllClose(expected_state1_h, actual_state1_h, 1e-5) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros( [1, 3]) # Test BasicLSTMCell with input_size != num_units. c = tf.zeros([1, 2]) h = tf.zeros([1, 2]) state = rnn_cell.LSTMStateTuple(c, h) cell = contrib_rnn.LayerNormBasicLSTMCell(2) g, out_m = cell(x, state) sess.run([tf.global_variables_initializer()]) res = sess.run( [g, out_m], { x.name: np.array([[1., 1., 1.]]), c.name: 0.1 * np.asarray([[0, 1]]), h.name: 0.1 * np.asarray([[2, 3]]), }) expected_h = np.array([[-0.38079708, 0.38079708]]) expected_c = np.array([[-1.0, 1.0]]) self.assertEqual(len(res), 2) self.assertAllClose(res[0], expected_h, 1e-5) self.assertAllClose(res[1].c, expected_c, 1e-5) self.assertAllClose(res[1].h, expected_h, 1e-5)