Esempio n. 1
0
    def testZeroGradientNoOpAtFirstStep(self):
        """Test that checks that epsilon handling is unncessary."""

        with self.cached_session() as sess:
            var = tf.Variable(0.5)
            grad = tf.Variable(0.0)
            opt = sm3.SM3Optimizer(learning_rate=self._learning_rate,
                                   momentum=self._momentum)

            step = opt.apply_gradients([(grad, var)])
            sess.run(tf.global_variables_initializer())

            # Check that variable and momentum are as expected before starting
            # training.
            var_np = sess.run(var)
            gbar_np = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose(0.5, var_np)
            self.assertAllClose(0.0, gbar_np)

            # Run one step of training.
            step.run()
            var_np = sess.run(var)
            gbar_np = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose(0.5, var_np)
            self.assertAllClose(0.0, gbar_np)
Esempio n. 2
0
    def testDenseLayerMatrix(self):
        """SM3 update with gbar, and epsilon."""

        with self.cached_session() as sess:
            var = tf.Variable([[0.5, 0.5], [0.5, 0.5]])
            grad = tf.Variable([[0.1, 0.1], [0.01, 0.01]])
            opt = sm3.SM3Optimizer(learning_rate=0.1, momentum=0.9)

            step = opt.apply_gradients([(grad, var)])
            tf.global_variables_initializer().run()

            pre_var = sess.run(var)
            pre_gbar = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose([[0.5, 0.5], [0.5, 0.5]], pre_var)
            self.assertAllClose([[0.0, 0.0], [0.0, 0.0]], pre_gbar)
            step.run()
            pre_var = sess.run(var)
            pre_gbar = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose([[0.49, 0.49], [0.49, 0.49]], pre_var)
            self.assertAllClose([[0.1, 0.1], [0.1, 0.1]], pre_gbar)
            step.run()
            pre_var = sess.run(var)
            pre_gbar = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose([[0.4739, 0.4739], [0.4739, 0.4739]],
                                pre_var,
                                atol=1e-4)
            self.assertAllClose([[0.16, 0.16], [0.16, 0.16]],
                                pre_gbar,
                                atol=1e-2)
Esempio n. 3
0
    def testDenseLayerMatrix(self):
        """Test a single dense matrix layer."""

        with self.cached_session() as sess:
            var = tf.Variable([[0.5, 0.5], [0.5, 0.5]])
            grad_np = [[0.1, 0.05], [0.03, 0.02]]
            grad = tf.Variable(grad_np)
            opt = sm3.SM3Optimizer(learning_rate=self._learning_rate,
                                   momentum=self._momentum)

            step = opt.apply_gradients([(grad, var)])
            sess.run(tf.global_variables_initializer())

            # Check that variable and momentum are as expected before starting
            # training.
            var_np = sess.run(var)
            gbar_np = sess.run(opt.get_slot(var, 'momentum'))

            self.assertAllClose(var_np, [[0.5, 0.5], [0.5, 0.5]])
            self.assertAllClose([[0.0, 0.0], [0.0, 0.0]], gbar_np)

            row_accumulator = numpy.zeros([2, 1])
            col_accumulator = numpy.zeros([1, 2])
            accumulator = numpy.zeros_like(gbar_np)
            for _ in range(2):
                # Run a step of training.
                step.run()

                accumulator = numpy.minimum(row_accumulator, col_accumulator)
                # Expected preconditioned gradient, momentum, and parameter.
                accumulator += numpy.square(grad_np)
                # Update SM3 accumulators.
                row_accumulator = numpy.amax(accumulator,
                                             axis=1,
                                             keepdims=True)
                col_accumulator = numpy.amax(accumulator,
                                             axis=0,
                                             keepdims=True)
                exp_p_grad = grad_np / numpy.sqrt(accumulator)
                exp_gbar_np = (self._momentum * gbar_np +
                               (1 - self._momentum) * exp_p_grad)
                exp_var = var_np - self._learning_rate * exp_gbar_np
                # Check that variable and momentum are as expected after one step of
                # training.
                var_np = sess.run(var)
                gbar_np = sess.run(opt.get_slot(var, 'momentum'))

                self.assertAllClose(exp_var, var_np)
                self.assertAllClose(exp_gbar_np, gbar_np)
Esempio n. 4
0
    def testSparseUpdates(self):
        """SM3 sparse updates."""

        with self.cached_session() as sess:
            var = tf.Variable([[0.5], [0.5], [0.5], [0.5]])
            grad = tf.IndexedSlices(tf.constant([0.1, 0.1], shape=[2, 1]),
                                    tf.constant([1, 3]), tf.constant([2, 1]))
            opt = sm3.SM3Optimizer(learning_rate=0.1, momentum=0.9)
            step = opt.apply_gradients([(grad, var)])
            tf.global_variables_initializer().run()

            pre_var = sess.run(var)
            self.assertAllClose([[0.5], [0.5], [0.5], [0.5]], pre_var)
            step.run()
            pre_var = sess.run(var)
            self.assertAllClose([[0.5], [0.4], [0.5], [0.4]], pre_var)
Esempio n. 5
0
    def testNoEpsilon(self):
        """SM3 update without epsilon."""

        with self.cached_session() as sess:
            var = tf.Variable(0.5)
            grad = tf.Variable(0.0)
            opt = sm3.SM3Optimizer(learning_rate=0.1, momentum=0.9)

            step = opt.apply_gradients([(grad, var)])
            tf.global_variables_initializer().run()

            pre_var = sess.run(var)
            pre_gbar = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose(0.5, pre_var)
            self.assertAllClose(0.0, pre_gbar)
            step.run()
            pre_var = sess.run(var)
            pre_gbar = sess.run(opt.get_slot(var, 'momentum'))
            self.assertAllClose(0.5, pre_var)
            self.assertAllClose(0.0, pre_gbar)
Esempio n. 6
0
    def testDenseVectorLayer(self):
        """Test a single dense vector layer."""

        with self.cached_session() as sess:
            var = tf.Variable([0.5, 0.3])
            grad_np = [0.1, 0.1]
            grad = tf.Variable(grad_np)
            opt = sm3.SM3Optimizer(learning_rate=self._learning_rate,
                                   momentum=self._momentum)

            step = opt.apply_gradients([(grad, var)])
            sess.run(tf.global_variables_initializer())

            # Check that variable and momentum are as expected before starting
            # training.
            var_np = sess.run(var)
            gbar_np = sess.run(opt.get_slot(var, 'momentum'))

            self.assertAllClose([0.5, 0.3], var_np)
            self.assertAllClose([0.0, 0.0], gbar_np)

            accumulator = numpy.zeros_like(gbar_np)
            for _ in range(2):
                # Run a step of training.
                step.run()

                # Expected preconditioned gradient, momentum, and parameter.
                accumulator += numpy.square(grad_np)
                exp_p_grad = grad_np / numpy.sqrt(accumulator)
                exp_gbar_np = (self._momentum * gbar_np +
                               (1 - self._momentum) * exp_p_grad)
                exp_var = var_np - self._learning_rate * exp_gbar_np
                # Check that variable and momentum are as expected after one step of
                # training.
                var_np = sess.run(var)
                gbar_np = sess.run(opt.get_slot(var, 'momentum'))

                self.assertAllClose(exp_var, var_np)
                self.assertAllClose(exp_gbar_np, gbar_np)
Esempio n. 7
0
    def testSparseUpdates(self):
        """Test that checks sparse updates."""

        with self.cached_session() as sess:
            var = tf.Variable([[0.5, 0.05], [0.05, 1.0], [0.15, 3.0],
                               [0.35, 2.0]])
            # A sparse gradient that updates index 1, and 3.
            grad_np = [[0.1, 0.05], [0.01, 1.5]]
            indices_np = [1, 3]
            shape = [2, 2]
            grad = tf.IndexedSlices(
                tf.constant(grad_np, shape=shape),
                tf.constant(indices_np),  # indices
                tf.constant(shape))  # shape
            opt = sm3.SM3Optimizer(learning_rate=self._learning_rate,
                                   momentum=self._momentum)
            step = opt.apply_gradients([(grad, var)])
            sess.run(tf.global_variables_initializer())
            # Check that variable and momentum are as expected before starting
            # training.
            var_np = sess.run(var)
            self.assertAllClose(
                [[0.5, 0.05], [0.05, 1.0], [0.15, 3.0], [0.35, 2.0]], var_np)
            # Run one step of training.
            step.run()
            accumulator = numpy.zeros_like(var_np)
            accumulator[indices_np, :] += numpy.square(grad_np)
            row_accumulator = numpy.amax(accumulator, axis=1, keepdims=True)
            # Update SM3 accumulators.
            exp_p_grad = grad_np / numpy.sqrt(accumulator[indices_np, :])
            exp_var_np = var_np
            exp_var_np[indices_np, :] = var_np[
                indices_np, :] - self._learning_rate * exp_p_grad
            var_np = sess.run(var)
            self.assertAllClose(exp_var_np, var_np)
            row_accumulator_var = numpy.reshape(
                sess.run(opt.get_slot(var, 'accumulator_0')), [4, 1])
            self.assertAllClose(row_accumulator_var, row_accumulator)