def _testDecoderFPropGradientCheckerHelper(self, func_inline=False): config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( do_function_inlining=func_inline))) with self.session(graph=tf.Graph(), use_gpu=False, config=config) as sess: tf.set_random_seed(8372749040) np.random.seed(274854) vn_config = py_utils.VariationalNoiseParams(None, False, False) p = self._DecoderParams(vn_config) p.dtype = tf.float64 dec = p.cls(p) src_seq_len = 5 src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)), tf.float64) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float64) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1]], dtype=tf.float64)) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics = dec.FPropDefaultTheta(encoder_outputs, targets) loss = metrics['loss'][0] all_vars = tf.all_variables() grads = tf.gradients(loss, all_vars) def DenseGrad(var, grad): if isinstance(grad, tf.Tensor): return grad elif isinstance(grad, tf.IndexedSlices): return tf.unsorted_segment_sum(grad.values, grad.indices, tf.shape(var)[0]) dense_grads = [DenseGrad(x, y) for (x, y) in zip(all_vars, grads)] tf.global_variables_initializer().run() test_utils.CompareToGoldenSingleFloat(self, 3.493656, loss.eval()) # Second run to make sure the function is determistic. test_utils.CompareToGoldenSingleFloat(self, 3.493656, loss.eval()) symbolic_grads = [x.eval() for x in dense_grads if x is not None] numerical_grads = [] for v in all_vars: numerical_grads.append( test_utils.ComputeNumericGradient(sess, loss, v)) for x, y in zip(symbolic_grads, numerical_grads): self.assertAllClose(x, y)
def testDecoderFPropFloatInline2Layers(self): actual_value = self._testDecoderFPropFloatHelper(func_inline=True, num_decoder_layers=2) test_utils.CompareToGoldenSingleFloat(self, 3.512603, actual_value)
def testDecoderFPropFloat2LayersResidual(self): actual_value = self._testDecoderFPropFloatHelper(num_decoder_layers=2, residual_start=2) test_utils.CompareToGoldenSingleFloat(self, 3.513235, actual_value)
def testDecoderFPropFloatInline(self): actual_value = self._testDecoderFPropFloatHelper(func_inline=True) test_utils.CompareToGoldenSingleFloat(self, 3.512219, actual_value)
def testDecoderFPropFloatNoInlinePadTargetsToLongerLength(self): actual_value = self._testDecoderFPropFloatHelper(func_inline=False, target_seq_len=10) test_utils.CompareToGoldenSingleFloat(self, 3.512219, actual_value)
def testDecoderFPropFloatNoInline2Layers(self): actual_value = self._testDecoderFPropFloatHelper(func_inline=False, num_decoder_layers=2) test_utils.CompareToGoldenSingleFloat(self, 3.457761, actual_value)
def testDecoderFPropFloatNoInline(self): actual_value = self._testDecoderFPropFloatHelper(func_inline=False) test_utils.CompareToGoldenSingleFloat(self, 3.458980, actual_value)
def testFProp(self): with self.session(use_gpu=False): tf.set_random_seed(93820985) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() tf.global_variables_initializer().run() test_utils.CompareToGoldenSingleFloat(self, 4.472597, mdl.loss.eval()) actual_var_names = [_.name for _ in tf.all_variables()] print('all vars \n', '\n'.join(actual_var_names)) expected_var_names = [ 'global_step:0', 'test_mdl/enc/conv_L0/w/var:0', 'test_mdl/enc/conv_L0/beta/var:0', 'test_mdl/enc/conv_L0/gamma/var:0', 'test_mdl/enc/conv_L0/moving_mean/var:0', 'test_mdl/enc/conv_L0/moving_variance/var:0', 'test_mdl/enc/conv_L1/w/var:0', 'test_mdl/enc/conv_L1/beta/var:0', 'test_mdl/enc/conv_L1/gamma/var:0', 'test_mdl/enc/conv_L1/moving_mean/var:0', 'test_mdl/enc/conv_L1/moving_variance/var:0', 'test_mdl/enc/f_conv_lstm_0/wm/var:0', 'test_mdl/enc/f_conv_lstm_0/b/var:0', 'test_mdl/enc/b_conv_lstm_0/wm/var:0', 'test_mdl/enc/b_conv_lstm_0/b/var:0', 'test_mdl/enc/conv_lstm_cnn_0/w/var:0', 'test_mdl/enc/conv_lstm_cnn_0/beta/var:0', 'test_mdl/enc/conv_lstm_cnn_0/gamma/var:0', 'test_mdl/enc/conv_lstm_cnn_0/moving_mean/var:0', 'test_mdl/enc/conv_lstm_cnn_0/moving_variance/var:0', 'test_mdl/enc/fwd_rnn_L0/wm/var:0', 'test_mdl/enc/fwd_rnn_L0/b/var:0', 'test_mdl/enc/bak_rnn_L0/wm/var:0', 'test_mdl/enc/bak_rnn_L0/b/var:0', 'test_mdl/enc/proj_L0/w/var:0', 'test_mdl/enc/proj_L0/beta/var:0', 'test_mdl/enc/proj_L0/gamma/var:0', 'test_mdl/enc/proj_L0/moving_mean/var:0', 'test_mdl/enc/proj_L0/moving_variance/var:0', 'test_mdl/enc/fwd_rnn_L1/wm/var:0', 'test_mdl/enc/fwd_rnn_L1/b/var:0', 'test_mdl/enc/bak_rnn_L1/wm/var:0', 'test_mdl/enc/bak_rnn_L1/b/var:0', 'test_mdl/enc/proj_L1/w/var:0', 'test_mdl/enc/proj_L1/beta/var:0', 'test_mdl/enc/proj_L1/gamma/var:0', 'test_mdl/enc/proj_L1/moving_mean/var:0', 'test_mdl/enc/proj_L1/moving_variance/var:0', 'test_mdl/enc/fwd_rnn_L2/wm/var:0', 'test_mdl/enc/fwd_rnn_L2/b/var:0', 'test_mdl/enc/bak_rnn_L2/wm/var:0', 'test_mdl/enc/bak_rnn_L2/b/var:0', 'test_mdl/dec/emb/var_0/var:0', 'test_mdl/dec/rnn_cell/wm/var:0', 'test_mdl/dec/rnn_cell/b/var:0', 'test_mdl/dec/atten/source_var/var:0', 'test_mdl/dec/atten/query_var/var:0', 'test_mdl/dec/atten/hidden_var/var:0', 'test_mdl/dec/softmax/weight_0/var:0', 'test_mdl/dec/softmax/bias_0/var:0' ] self.assertEqual(sorted(expected_var_names), sorted(actual_var_names))