Beispiel #1
0
    def _TestStackedElmanGradient(self, num, seqlen=7, batch=5):
        """Tests a stacked Elman recurrent network with num layers."""
        g = tf.Graph()
        with g.as_default():
            # Sequence length, batdh size, hidden dimension
            trailing_pad_len, dims, layers = 2, 8, num
            _, _, loss, xs, dxs = self._BuildStackedRecurrentElman(
                seqlen, trailing_pad_len, batch, dims, layers)

        # Fetches all gradients (dxs) in one session run and compare
        # them with their respective numerical gradient.
        with self.session(graph=g) as sess:
            s_dxs = sess.run(dxs)
            for (x, s_dx) in zip(xs, s_dxs):
                n_dx = test_utils.ComputeNumericGradient(sess, loss, x)
                self._LogDiff(n_dx, s_dx)
                self.assertAllClose(n_dx, s_dx)

        # Randomly pick a few (x, dx) pairs, and fetch dx via one sess.run
        # and compare with its numerical gradient.
        xs_dxs = list(zip(xs, dxs))
        np.random.shuffle(xs_dxs)
        with self.session(graph=g) as sess:
            for (x, dx) in xs_dxs[:4]:
                s_dx = sess.run(dx)
                n_dx = test_utils.ComputeNumericGradient(sess, loss, x)
                self._LogDiff(n_dx, s_dx)
                self.assertAllClose(n_dx, s_dx)
Beispiel #2
0
    def testBasicGrad(self):
        time, batch, dims, vocab = 5, 3, 6, 8
        p = self._testParams(dims, vocab)
        p.dtype = tf.float64

        with self.session(use_gpu=False, graph=tf.Graph()) as sess:
            lm = p.Instantiate()
            np.random.seed(12345)
            inputs = np.random.normal(size=[time, batch, dims])
            inputs = tf.constant(inputs, tf.float64)
            paddings = np.zeros([time, batch])
            paddings[-1] = 1.0
            paddings = tf.constant(paddings, tf.float64)
            targets = tf.constant(np.random.randint(vocab, size=(time, batch)),
                                  tf.int32)

            xent_output, _ = lm.FPropDefaultTheta(
                inputs=inputs,
                paddings=paddings,
                state0=lm.zero_state(lm.theta, batch),
                labels=py_utils.NestedMap(class_weights=1 - paddings,
                                          class_ids=targets))

            lm_vars = lm.vars.Flatten()
            # Now add the backward graph.
            grads = tf.gradients(xent_output.avg_xent, lm_vars)

            tf.global_variables_initializer().run()
            self.assertEqual(len(lm_vars), len(grads))
            for x, grad_x in zip(lm_vars, grads):
                grad_symbolic = sess.run(grad_x)
                grad_numeric = test_utils.ComputeNumericGradient(
                    sess, xent_output.avg_xent, x, delta=1e-6)
                self.assertAllClose(grad_symbolic, grad_numeric, atol=0.005)
Beispiel #3
0
    def testBasicGrad(self):
        time, batch, dims, vocab, condition_dim = 5, 3, 6, 8, 7

        p = lm_layers.ConditionalRnnLm.Params()
        p.name = 'conditionalrnnlm'
        p.dtype = tf.float64
        p.vocab_size = vocab
        p.emb.vocab_size = vocab
        p.emb.embedding_dim = dims
        model_dim = dims + condition_dim
        p.rnns.cell_tpl.num_output_nodes = model_dim
        p.rnns.cell_tpl.num_input_nodes = model_dim
        p.softmax.input_dim = model_dim
        p.softmax.num_classes = vocab
        p.condition_dim = condition_dim

        with self.session(use_gpu=False, graph=tf.Graph()) as sess:
            lm = p.Instantiate()
            np.random.seed(12345)
            inputs = np.random.randint(vocab, size=[time, batch])
            targets = np.zeros([time, batch])
            targets[:-1] = inputs[1:]
            inputs = tf.constant(inputs, tf.int32)
            paddings = np.zeros([time, batch])
            paddings[-1] = 1.0
            paddings = tf.constant(paddings, tf.float64)
            targets = tf.constant(targets, tf.int32)
            condition = tf.constant(np.ones([batch, condition_dim]),
                                    tf.float64)
            sess.run(tf.global_variables_initializer())
            xent_output, _ = lm.FPropDefaultTheta(
                inputs=inputs,
                paddings=paddings,
                state0=lm.zero_state(lm.theta, batch),
                condition=condition,
                labels=py_utils.NestedMap(class_weights=1 - paddings,
                                          class_ids=targets))

            lm_vars = lm.vars.Flatten()
            # Now add the backward graph.
            grads = tf.gradients(xent_output.avg_xent, lm_vars)

            for i, x in enumerate(grads):
                if isinstance(x, tf.IndexedSlices):
                    grads[i] = tf.math.unsorted_segment_sum(
                        x.values, x.indices, x.dense_shape[0])

            tf.global_variables_initializer().run()
            self.assertEqual(len(lm_vars), len(grads))
            for x, grad_x in zip(lm_vars, grads):
                grad_symbolic = sess.run(grad_x)
                grad_numeric = test_utils.ComputeNumericGradient(
                    sess, xent_output.avg_xent, x, delta=1e-6)
                self.assertAllClose(grad_symbolic, grad_numeric, atol=0.005)
Beispiel #4
0
 def testNormalizedDepthwiseConv2DLayerBackProp(self):
   with self.session(use_gpu=True) as sess:
     output = self._testNormalizedDepthwiseConv2DHelper(dropconnect_prob=0.1)
     loss = tf.reduce_sum(output)
     all_vars = tf.trainable_variables()
     grads = tf.gradients(loss, all_vars)
     self.evaluate(tf.global_variables_initializer())
     sym_grads = [sg.eval() for sg in grads]
     num_grads = [
         test_utils.ComputeNumericGradient(sess, loss, v) for v in all_vars
     ]
     for sg, ng in zip(sym_grads, num_grads):
       self.assertAllClose(sg, ng, rtol=1e-02, atol=1e-02)
Beispiel #5
0
 def testBasicGrad(self):
   time, batch, dims, hidden_dim, vocab = 5, 3, 6, 4, 8
   p = self._testParams(batch, dims, hidden_dim, vocab)
   p.dtype = tf.float64
   xent_output, lm_vars, grads = self._SetupGraph(
       p, time, batch, vocab, return_grad=True)
   with self.session() as sess:
     sess.run(tf.global_variables_initializer())
     for x, grad_x in zip(lm_vars, grads):
       grad_symbolic = sess.run(grad_x)
       grad_numeric = test_utils.ComputeNumericGradient(
           sess, xent_output.avg_xent, x, delta=1e-6)
       self.assertAllClose(grad_symbolic, grad_numeric, atol=0.005)
Beispiel #6
0
    def testBasicGrad(self):
        time, batch, dims, hidden_dim, vocab = 5, 3, 6, 4, 8

        p = lm_layers.TransformerLm.Params()
        p.dtype = tf.float64
        p.name = 'transformerlm'
        p.vocab_size = vocab
        p.emb.vocab_size = vocab
        p.emb.embedding_dim = dims
        p.model_dim = dims
        p.num_trans_layers = 1
        p.trans_tpl.source_dim = dims
        p.trans_tpl.tr_atten_tpl.num_attention_heads = 2
        p.trans_tpl.tr_fflayer_tpl.hidden_dim = hidden_dim
        p.softmax.input_dim = dims
        p.softmax.num_classes = vocab

        with self.session(use_gpu=False, graph=tf.Graph()) as sess:
            lm = p.Instantiate()
            np.random.seed(12345)
            inputs = np.random.randint(vocab, size=[time, batch])
            targets = np.zeros([time, batch])
            targets[:-1] = inputs[1:]
            inputs = tf.constant(inputs, tf.int32)
            paddings = np.zeros([time, batch])
            paddings[-1] = 1.0
            paddings = tf.constant(paddings, tf.float64)
            targets = tf.constant(targets, tf.int32)
            xent_output, _ = lm.FPropDefaultTheta(
                inputs=inputs,
                paddings=paddings,
                labels=py_utils.NestedMap(class_weights=1 - paddings,
                                          class_ids=targets))

            lm_vars = lm.vars.Flatten()
            grads = tf.gradients(xent_output.avg_xent, lm_vars)

            for i, x in enumerate(grads):
                if isinstance(x, tf.IndexedSlices):
                    grads[i] = tf.math.unsorted_segment_sum(
                        x.values, x.indices, x.dense_shape[0])

            tf.global_variables_initializer().run()
            self.assertEqual(len(lm_vars), len(grads))
            for x, grad_x in zip(lm_vars, grads):
                grad_symbolic = sess.run(grad_x)
                grad_numeric = test_utils.ComputeNumericGradient(
                    sess, xent_output.avg_xent, x, delta=1e-6)
                self.assertAllClose(grad_symbolic, grad_numeric, atol=0.005)
Beispiel #7
0
    def _DecoderGradientCheckerHelper(self,
                                      decoder_cls,
                                      feed_att_context_to_softmax=False):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(_TF_RANDOM_SEED)
            p = self._DecoderParams(dtype=tf.float64)
            p.feed_attention_context_vec_to_softmax = feed_att_context_to_softmax
            dec = decoder_cls(p)
            src_enc, src_enc_padding, targets = self._testInputs(
                dtype=tf.float64)
            loss, _ = dec.FPropDefaultTheta(src_enc, src_enc_padding, targets,
                                            None)['loss']
            all_vars = tf.trainable_variables()
            grads = tf.gradients(loss, all_vars)
            print('num of vars ', len(all_vars))

            def DenseGrad(var, grad):
                if isinstance(grad, tf.Tensor):
                    return grad
                elif isinstance(grad, tf.IndexedSlices):
                    return tf.unsorted_segment_sum(grad.values, grad.indices,
                                                   tf.shape(var)[0])

            grads = [DenseGrad(x, y) for x, y in zip(all_vars, grads)]

        with self.session(use_gpu=False, graph=g) as sess:
            tf.global_variables_initializer().run()
            symbolic_grads = [gd.eval() for gd in grads]
            numerical_grads = []
            for v in all_vars:
                numerical_grads.append(
                    test_utils.ComputeNumericGradient(sess,
                                                      loss,
                                                      v,
                                                      delta=1e-5))

            rets = {}
            for v, x, y in zip(all_vars, symbolic_grads, numerical_grads):
                print('symbolic_grads, numerical_grads :', v.name)
                print(x)
                print(y)
                self.assertAllClose(x, y)
                rets[v.name] = x

            return rets
Beispiel #8
0
    def testBasicGrad(self):
        p = self._testParams(dtype=tf.float64)
        with self.session(use_gpu=False, graph=tf.Graph()) as sess:
            lm = p.Instantiate()
            inputs, paddings, targets = self._testInputs(dtype=tf.float64)
            xent_output, _ = lm.FPropDefaultTheta(
                inputs=inputs,
                paddings=paddings,
                labels=py_utils.NestedMap(class_weights=1 - paddings,
                                          class_ids=targets))

            lm_vars = lm.vars.Flatten()
            # Now add the backward graph.
            grads = tf.gradients(xent_output.avg_xent, lm_vars)

            tf.global_variables_initializer().run()
            self.assertEqual(len(lm_vars), len(grads))
            for x, grad_x in zip(lm_vars, grads):
                grad_symbolic = sess.run(grad_x)
                grad_numeric = test_utils.ComputeNumericGradient(
                    sess, xent_output.avg_xent, x, delta=1e-6)
                self.assertAllClose(grad_symbolic, grad_numeric, atol=0.005)
Beispiel #9
0
    def testBProp(self):
        vocab, time, batch = 7, 4, 3
        p = self._MoeLmParams(vocab, True)
        p.dtype = tf.float64

        with self.session(graph=tf.Graph()) as sess:
            np.random.seed(54321)
            tf.random.set_seed(123456)
            lm = p.Instantiate()
            inputs, paddings, labels = self._GetData(vocab, time, batch)
            sess.run(tf.global_variables_initializer())
            xent_output, _ = lm.FPropDefaultTheta(
                inputs=inputs,
                paddings=tf.cast(paddings, p.dtype),
                state0=lm.zero_state(lm.theta, batch),
                labels=labels)

            lm_vars = lm.vars.Flatten()
            # Now add the backward graph.
            grads = tf.gradients(xent_output.avg_xent, lm_vars)

            for i, x in enumerate(grads):
                if isinstance(x, tf.IndexedSlices):
                    grads[i] = tf.math.unsorted_segment_sum(
                        x.values, x.indices, x.dense_shape[0])

            tf.global_variables_initializer().run()
            self.assertEqual(len(lm_vars), len(grads))
            step = 11  # Speed up the test.
            for x, grad_x in zip(lm_vars, grads):
                grad_symbolic = sess.run(grad_x)
                grad_numeric = test_utils.ComputeNumericGradient(
                    sess, xent_output.avg_xent, x, step=step, delta=1e-6)
                self.assertAllClose(
                    grad_symbolic.reshape([-1])[::step],
                    grad_numeric.reshape([-1])[::step])
Beispiel #10
0
    def _testDecoderFPropGradientCheckerHelper(self, func_inline=False):
        config = tf.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(
                do_function_inlining=func_inline)))
        with self.session(graph=tf.Graph(), use_gpu=False,
                          config=config) as sess:
            tf.set_random_seed(8372749040)
            np.random.seed(274854)
            vn_config = py_utils.VariationalNoiseParams(None, False, False)
            p = self._DecoderParams(vn_config)
            p.dtype = tf.float64

            dec = p.cls(p)
            src_seq_len = 5
            src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)),
                                  tf.float64)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float64)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)
            target_ids = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15],
                             [5, 6, 7, 8], [10, 5, 2, 5]],
                            dtype=tf.int32))
            target_labels = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13],
                             [5, 7, 8, 10], [10, 5, 2, 4]],
                            dtype=tf.int32))
            target_paddings = tf.transpose(
                tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0],
                             [0, 1, 0, 0], [1, 1, 1, 1]],
                            dtype=tf.float64))
            target_transcripts = tf.constant(
                ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf'])
            target_weights = 1.0 - target_paddings

            targets = py_utils.NestedMap({
                'ids': target_ids,
                'labels': target_labels,
                'weights': target_weights,
                'paddings': target_paddings,
                'transcripts': target_transcripts,
            })
            metrics = dec.FPropDefaultTheta(encoder_outputs, targets)
            loss = metrics['loss'][0]
            all_vars = tf.all_variables()
            grads = tf.gradients(loss, all_vars)

            def DenseGrad(var, grad):
                if isinstance(grad, tf.Tensor):
                    return grad
                elif isinstance(grad, tf.IndexedSlices):
                    return tf.unsorted_segment_sum(grad.values, grad.indices,
                                                   tf.shape(var)[0])

            dense_grads = [DenseGrad(x, y) for (x, y) in zip(all_vars, grads)]

            tf.global_variables_initializer().run()

            test_utils.CompareToGoldenSingleFloat(self, 3.493656, loss.eval())
            # Second run to make sure the function is determistic.
            test_utils.CompareToGoldenSingleFloat(self, 3.493656, loss.eval())

            symbolic_grads = [x.eval() for x in dense_grads if x is not None]
            numerical_grads = []
            for v in all_vars:
                numerical_grads.append(
                    test_utils.ComputeNumericGradient(sess, loss, v))

            for x, y in zip(symbolic_grads, numerical_grads):
                self.assertAllClose(x, y)