Esempio n. 1
0
    def testTensorLearningRateAndMomentum(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.test_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
                mom_opt = momentum_lib.MomentumOptimizer(
                    learning_rate=constant_op.constant(2.0),
                    momentum=constant_op.constant(0.9))
                mom_update = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()
                # Check we have slots
                self.assertEqual(["momentum"], mom_opt.get_slot_names())
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                self.assertFalse(slot0 in variables.trainable_variables())
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEquals(slot1.get_shape(), var1.get_shape())
                self.assertFalse(slot1 in variables.trainable_variables())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Step 1: the momentum accumulators where 0. So we should see a normal
                # update: v -= grad * learning_rate
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
                                                   slot0.eval())
                self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
                                                   slot1.eval())
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                    var0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                    var1.eval())
                # Step 2: the momentum accumulators contain the previous update.
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                    slot0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                    slot1.eval())
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]), var0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([
                        2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]), var1.eval())
Esempio n. 2
0
 def testNesterovMomentum(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         with self.test_session():
             var0 = variables.Variable([1.0, 2.0], dtype=dtype)
             var1 = variables.Variable([3.0, 4.0], dtype=dtype)
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             cost = 5 * var0 * var0 + 3 * var1
             global_step = variables.Variable(array_ops.zeros([],
                                                              dtypes.int64),
                                              name="global_step")
             mom_op = momentum_lib.MomentumOptimizer(learning_rate=2.0,
                                                     momentum=0.9,
                                                     use_nesterov=True)
             opt_op = mom_op.minimize(cost, global_step, [var0, var1])
             variables.global_variables_initializer().run()
             for t in range(1, 5):
                 opt_op.run()
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
                 self.assertAllClose(var0_np, var0.eval())
                 self.assertAllClose(var1_np, var1.eval())
Esempio n. 3
0
    def testVariablesAcrossGraphs(self):
        optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5)
        with ops.Graph().as_default():
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtypes.float32,
                                                          name="var0")
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtypes.float32,
                                                          name="var1")
            if context.executing_eagerly():
                loss = lambda: math_ops.reduce_sum(var0 + var1)
            else:
                loss = math_ops.reduce_sum(var0 + var1)
            optimizer.minimize(loss)
            optimizer_variables = optimizer.variables()
            self.assertStartsWith(optimizer_variables[0].name, "var0")
            self.assertStartsWith(optimizer_variables[1].name, "var1")
            self.assertEquals(2, len(optimizer_variables))

        with ops.Graph().as_default():
            var2 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtypes.float32,
                                                          name="var2")
            var3 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtypes.float32,
                                                          name="var3")
            if context.executing_eagerly():
                loss = lambda: math_ops.reduce_sum(var2 + var3)
            else:
                loss = math_ops.reduce_sum(var2 + var3)
            optimizer.minimize(loss)
            optimizer_variables = optimizer.variables()
            self.assertStartsWith(optimizer_variables[0].name, "var2")
            self.assertStartsWith(optimizer_variables[1].name, "var3")
            self.assertEquals(2, len(optimizer_variables))
Esempio n. 4
0
    def testMinimizeSparseResourceVariable(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            # This test invokes the ResourceSparseApplyMomentum operation, which
            # did not have a registered GPU kernel as of April 2018. With graph
            # execution, the placement algorithm notices this and automatically
            # places the variable in CPU (host) memory. With eager execution,
            # the variable would be placed in GPU memory if available, which
            # would then conflict with the future invocation of the
            # ResourceSparseApplyMomentum operation.
            # To work around this discrepancy, for now we force the variable
            # to be placed on CPU.
            with ops.device("/cpu:0"):
                var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                              dtype=dtype)

            # pylint: disable=cell-var-from-loop
            def loss():
                x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
                pred = math_ops.matmul(
                    embedding_ops.embedding_lookup([var0], [0]), x)
                return pred * pred

            # pylint: enable=cell-var-from-loop

            opt = momentum_lib.MomentumOptimizer(learning_rate=1.0,
                                                 momentum=0.0)
            sgd_op = opt.minimize(loss)
            self.evaluate(variables.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType([[-111, -138]],
                                               self.evaluate(var0))
Esempio n. 5
0
 def testLikeDistBeliefMom01(self):
     with self.test_session():
         db_grad, db_out = self._dbParamsMom01()
         num_samples = len(db_grad)
         var0 = variables.Variable([0.0] * num_samples)
         grads0 = constant_op.constant([0.0] * num_samples)
         mom_opt = momentum_lib.MomentumOptimizer(learning_rate=0.1,
                                                  momentum=0.1)
         mom_update = mom_opt.apply_gradients(zip([grads0], [var0]))
         variables.global_variables_initializer().run()
         for i in xrange(num_samples):
             mom_update.run(feed_dict={grads0: db_grad[i]})
             self.assertAllClose(np.array(db_out[i]), var0.eval())
Esempio n. 6
0
    def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
        var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2]))

        def loss():
            return math_ops.reduce_sum(
                embedding_ops.embedding_lookup(var0, [[1]]))

        opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
        sgd_op = opt.minimize(loss)
        self.evaluate(variables.global_variables_initializer())
        self.evaluate(sgd_op)
        self.assertAllCloseAccordingToType([[1, 1], [0, 0]],
                                           self.evaluate(var0))
Esempio n. 7
0
 def testSparseNesterovMomentum(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         with self.test_session():
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             grads = []
             for t in range(1, 5):
                 grads.append(var0_np * 10)
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             var0 = variables.Variable(var0_np)
             var1 = variables.Variable(var1_np)
             loss = 5 * var0 * var0 + 3 * var1
             mom_op = momentum_lib.MomentumOptimizer(learning_rate=2.0,
                                                     momentum=0.9,
                                                     use_nesterov=True)
             x_feed = array_ops.placeholder(dtype)
             y_feed = ops.IndexedSlices(x_feed, constant_op.constant([0,
                                                                      1]),
                                        constant_op.constant([2]))
             grads_and_vars = [(y_feed, var0),
                               (constant_op.constant([3.0, 3.0],
                                                     dtype=dtype), var1)]
             opt_update = mom_op.apply_gradients(grads_and_vars)
             variables.global_variables_initializer().run()
             for t in range(1, 5):
                 opt_update.run(feed_dict={x_feed: grads[t - 1]})
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
                 self.assertAllClose(var0_np, var0.eval())
                 self.assertAllClose(var1_np, var1.eval())
Esempio n. 8
0
    def testMinimizeSparseResourceVariable(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                          dtype=dtype)

            # pylint: disable=cell-var-from-loop
            def loss():
                x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
                pred = math_ops.matmul(
                    embedding_ops.embedding_lookup([var0], [0]), x)
                return pred * pred

            # pylint: enable=cell-var-from-loop

            opt = momentum_lib.MomentumOptimizer(learning_rate=1.0,
                                                 momentum=0.0)
            sgd_op = opt.minimize(loss)
            self.evaluate(variables.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType([[-111, -138]],
                                               self.evaluate(var0))
Esempio n. 9
0
    def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
        # This test invokes the ResourceSparseApplyMomentum operation, which
        # did not have a registered GPU kernel as of April 2018. With graph
        # execution, the placement algorithm notices this and automatically
        # places the variable in CPU (host) memory. With eager execution,
        # the variable would be placed in GPU memory if available, which
        # would then conflict with the future invocation of the
        # ResourceSparseApplyMomentum operation.
        # To work around this discrepancy, for now we force the variable
        # to be placed on CPU.
        with ops.device("/cpu:0"):
            var0 = resource_variable_ops.ResourceVariable(
                array_ops.ones([2, 2]))

        def loss():
            return math_ops.reduce_sum(
                embedding_ops.embedding_lookup(var0, [[1]]))

        opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
        sgd_op = opt.minimize(loss)
        self.evaluate(variables.global_variables_initializer())
        self.evaluate(sgd_op)
        self.assertAllCloseAccordingToType([[1, 1], [0, 0]],
                                           self.evaluate(var0))
Esempio n. 10
0
    def testSparse(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.test_session():
                var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
                var1 = variables.Variable(
                    constant_op.constant(1.0, dtype, [4, 2]))
                grads0 = ops.IndexedSlices(
                    constant_op.constant([[.1, .1]], dtype=dtype),
                    constant_op.constant([1]), constant_op.constant([4, 2]))
                grads1 = ops.IndexedSlices(
                    constant_op.constant([[.01, .01], [.01, .01]],
                                         dtype=dtype),
                    constant_op.constant([2, 3]), constant_op.constant([4, 2]))
                mom_opt = momentum_lib.MomentumOptimizer(learning_rate=2.0,
                                                         momentum=0.9)
                mom_update = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()

                # Check we have slots
                self.assertEqual(["momentum"], mom_opt.get_slot_names())
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEquals(slot1.get_shape(), var1.get_shape())

                # Fetch params to validate initial values
                self.assertAllClose([0, 0], var0.eval()[0])
                self.assertAllClose([0, 0], var0.eval()[1])
                self.assertAllClose([1, 1], var1.eval()[2])

                # Step 1: the momentum accumulators are 0. So we should see a normal
                # update: v -= grad * learning_rate
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([0, 0]),
                                                   slot0.eval()[0])
                self.assertAllCloseAccordingToType(np.array([.1, .1]),
                                                   slot0.eval()[1])
                self.assertAllCloseAccordingToType(np.array([.01, .01]),
                                                   slot1.eval()[2])
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(np.array([0, 0]),
                                                   var0.eval()[0])
                self.assertAllCloseAccordingToType(
                    np.array([-(0.1 * 2.0), -(0.1 * 2.0)]),
                    var0.eval()[1])
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]),
                    var1.eval()[2])
                # Step 2: the momentum accumulators contain the previous update.
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllClose(np.array([0, 0]), slot0.eval()[0])
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                    slot0.eval()[1])
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                    slot1.eval()[2])
                # Check that the parameters have been updated.
                self.assertAllClose(np.array([0, 0]), var0.eval()[0])
                self.assertAllCloseAccordingToType(
                    np.array([
                        -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]),
                    var0.eval()[1])
                self.assertAllCloseAccordingToType(
                    np.array([
                        0.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        0.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]),
                    var1.eval()[2])
Esempio n. 11
0
    def doTestBasic(self, use_resource=False, use_callable_params=False):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            if use_resource:
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype,
                                                              name="var0_%d" %
                                                              i)
                var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                              dtype=dtype,
                                                              name="var1_%d" %
                                                              i)
            else:
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
            grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
            grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
            learning_rate = lambda: 2.0
            momentum = lambda: 0.9
            if not use_callable_params:
                learning_rate = learning_rate()
                momentum = momentum()
            mom_opt = momentum_lib.MomentumOptimizer(
                learning_rate=learning_rate, momentum=momentum)
            mom_update = mom_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))

            if not context.executing_eagerly():
                self.evaluate(variables.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Check we have slots
            self.assertEqual(["momentum"], mom_opt.get_slot_names())
            slot0 = mom_opt.get_slot(var0, "momentum")
            self.assertEquals(slot0.get_shape(), var0.get_shape())
            slot1 = mom_opt.get_slot(var1, "momentum")
            self.assertEquals(slot1.get_shape(), var1.get_shape())
            if not context.executing_eagerly():
                self.assertFalse(slot0 in variables.trainable_variables())
                self.assertFalse(slot1 in variables.trainable_variables())

            # Step 1: the momentum accumulators where 0. So we should see a normal
            # update: v -= grad * learning_rate
            if not context.executing_eagerly():
                self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
                                               self.evaluate(slot0))
            self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
                                               self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                self.evaluate(var1))
            # Step 2: the momentum accumulators contain the previous update.
            if context.executing_eagerly():
                mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            else:
                self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                self.evaluate(slot0))
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                    2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                    3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                ]), self.evaluate(var1))