def testIntermediateLookupGrad(self): """ Test the gradient of a standard lookup somewhere in the middle of a stack recurrence. """ batch_size = 2 model_dim = 5 embedding_dim = 5 num_timesteps = 5 num_tokens = (num_timesteps + 1) / 2 with self.test_session(use_gpu=self.use_gpu) as s: # Example 1: S S R S # Example 2: S S S R # ^ # we are running lookup at the above timestep stack = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]]) buffer = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.]]) queue = Variable([2., 0., 0., 1., 0., 2., 0., 0., 0., 0.]) cursors = Variable([0., 2.]) buffer_cursors = Variable([2., 3.]) s.run(initialize_variables([stack, buffer, queue, cursors, buffer_cursors])) stack_val = stack.eval() buffer_val = buffer.eval() lookup = ts.thin_stack_lookup(stack, buffer, queue, cursors, buffer_cursors, timestep=3) #### GRADIENT stack1_grad = tf.random_uniform((batch_size, model_dim)) stack2_grad = tf.random_uniform((batch_size, model_dim)) buf_top_grad = tf.random_uniform((batch_size, model_dim)) in_grads = (stack1_grad, stack2_grad, buf_top_grad, None) # HACK: Zero out stack and buffer before invoking this op. # In a real / full bprop, things would have been zeroed out # at the start of the bprop algorithm. zero_stack = tf.assign(stack, stack * 0.) zero_buffer = tf.assign(buffer, buffer * 0.) # Enforce computation order: lookup, then zero out, then grad with tf.control_dependencies(lookup + (zero_stack, zero_buffer)): out_grads = ts._thin_stack_lookup_gradient(lookup[0].op, in_grads) out_grads = out_grads[:2] fetch = out_grads + (stack1_grad, stack2_grad, buf_top_grad) ret = s.run(fetch) grad_stack, grad_buffer, stack1_grad, stack2_grad, buf_top_grad = ret grad_stack_expected = np.zeros_like(stack_val)
def testIntermediateUpdate(self): """Test a standard update somewhere in the middle of a stack recurrence.""" batch_size = 2 model_dim = 5 embedding_dim = 5 num_timesteps = 5 num_tokens = (num_timesteps + 1) / 2 with self.test_session(use_gpu=self.use_gpu) as s: # Example 1: S S R S # Example 2: S S S R # ^ # we are running lookup at the above timestep stack = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]]) buffer = Variable([[-1., -1., -1., -1., -1.], [ 1., 1., 1., 1., 1.], [-2., -2., -2., -2., -2.], [ 2., 2., 2., 2., 2.], [-3., -3., -3., -3., -3.], [ 3., 3., 3., 3., 3.]]) queue = Variable([2., 0., 0., 1., 0., 2., 0., 0., 0., 0.]) cursors = Variable([0., 2.]) buffer_cursors = constant_op.constant([2., 3.]) t = 3 s.run(initialize_variables([stack, buffer, queue, cursors])) stack_val = stack.eval() buffer_val = buffer.eval() shift_in = constant_op.constant(np.array([buffer_val[4], buffer_val[5]])) reduce_in = constant_op.constant(np.array([stack_val[4] + stack_val[0], stack_val[5] + stack_val[3]])) transitions = tf.expand_dims(constant_op.constant([0., 1.]), 1) input_val = transitions * reduce_in + (1. - transitions) * shift_in ret = ts.thin_stack_update(input_val, transitions, stack, queue, cursors, buffer_cursors, t) stack_next, queue_next, cursors_next, buffer_cursors_next = s.run(ret) stack_expected = np.copy(stack_val) stack_expected[6] = buffer_val[4] stack_expected[7] = stack_val[5] + stack_val[3] queue_expected = np.array([2., 0., 3., 3., 0., 2., # NB: we didn't erase this, but it's okay 0., 0., 0., 0.]) cursors_expected = np.array([1., 1.]) buffer_cursors_expected = np.array([3., 3.]) self.assertAllEqual(stack_next, stack_expected) self.assertAllEqual(queue_next, queue_expected) self.assertAllEqual(cursors_next, cursors_expected) self.assertAllEqual(buffer_cursors_next, buffer_cursors_expected)