Esempio n. 1
0
    def testConfig(self):
        opt = sgd.SGD(learning_rate=1.0, momentum=2.0, nesterov=True)
        config = opt.get_config()
        opt2 = sgd.SGD.from_config(config)
        self.assertEqual(opt._hyper["learning_rate"][1],
                         opt2._hyper["learning_rate"][1])
        self.assertEqual(opt._hyper["momentum"][1], opt2._hyper["momentum"][1])
        self.assertEqual(opt2._use_nesterov, True)

        opt = sgd.SGD(momentum=None)
        config = opt.get_config()
        opt2 = sgd.SGD.from_config(config)
        self.assertEqual(False, opt2._use_momentum)
 def testMinimizeResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                           dtype=dtype)
             var1 = resource_variable_ops.ResourceVariable([3.0],
                                                           dtype=dtype)
             x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
             pred = math_ops.matmul(var0, x) + var1
             loss = pred * pred
             sgd_op = sgd.SGD(1.0).minimize(loss)
             # TODO(apassos) calling initialize_resources on all resources here
             # doesn't work because the sessions and graph are reused across unit
             # tests and this would mean trying to reinitialize variables. Figure out
             # a long-term solution for this.
             resources.initialize_resources([var0, var1]).run()
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
             self.assertAllCloseAccordingToType([3.0], var1.eval())
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params
             np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
             np_grad = 2 * np_pred
             self.assertAllCloseAccordingToType(
                 [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
             self.assertAllCloseAccordingToType([3.0 - np_grad],
                                                var1.eval())
Esempio n. 3
0
    def testAggregationMethod(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                cost = 5 * var0 + 3 * var1
                global_step = variables.Variable(array_ops.zeros([],
                                                                 dtypes.int64),
                                                 name='global_step')
                sgd_op = sgd.SGD(3.0)
                opt_op = sgd_op.minimize(
                    cost,
                    global_step, [var0, var1],
                    aggregation_method=gradients_impl.AggregationMethod.
                    EXPERIMENTAL_ACCUMULATE_N)

                variables.global_variables_initializer().run()
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Run 1 step of sgd through optimizer
                opt_op.run()
                # Validate updated params
                self.assertAllClose([-14., -13.], var0.eval())
                self.assertAllClose([-6., -5.], var1.eval())
 def testNesterovMomentum(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0 = variables.Variable([1.0, 2.0], dtype=dtype)
             var1 = variables.Variable([3.0, 4.0], dtype=dtype)
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             cost = 5 * var0 * var0 + 3 * var1
             global_step = variables.Variable(array_ops.zeros([],
                                                              dtypes.int64),
                                              name="global_step")
             mom_op = sgd.SGD(learning_rate=2.0,
                              momentum=0.9,
                              nesterov=True)
             opt_op = mom_op.minimize(cost, global_step, [var0, var1])
             variables.global_variables_initializer().run()
             for t in range(1, 5):
                 opt_op.run()
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
                 self.assertAllClose(var0_np, var0.eval())
                 self.assertAllClose(var1_np, var1.eval())
    def testMinimizeSparseResourceVariable(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            # This test invokes the ResourceSparseApplyMomentum operation, which
            # did not have a registered GPU kernel as of April 2018. With graph
            # execution, the placement algorithm notices this and automatically
            # places the variable in CPU (host) memory. With eager execution,
            # the variable would be placed in GPU memory if available, which
            # would then conflict with the future invocation of the
            # ResourceSparseApplyMomentum operation.
            # To work around this discrepancy, for now we force the variable
            # to be placed on CPU.
            with ops.device("/cpu:0"):
                var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                              dtype=dtype)

            # pylint: disable=cell-var-from-loop
            def loss():
                x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
                pred = math_ops.matmul(
                    embedding_ops.embedding_lookup([var0], [0]), x)
                return pred * pred

            # pylint: enable=cell-var-from-loop

            opt = sgd.SGD(learning_rate=1.0, momentum=0.0)
            sgd_op = opt.minimize(loss)
            self.evaluate(variables.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType([[-111, -138]],
                                               self.evaluate(var0))
Esempio n. 6
0
    def testPrecomputedGradient(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                cost = 5 * var0 + 3 * var1
                grad_loss = constant_op.constant([42, -42], dtype=dtype)
                global_step = variables.Variable(array_ops.zeros([],
                                                                 dtypes.int64),
                                                 name='global_step')
                sgd_op = sgd.SGD(3.0)
                opt_op = sgd_op.minimize(cost,
                                         global_step, [var0, var1],
                                         grad_loss=grad_loss)

                variables.global_variables_initializer().run()
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Run 1 step of sgd through optimizer
                opt_op.run()
                # Validate updated params
                self.assertAllClose(
                    [1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], var0.eval())
                self.assertAllClose(
                    [3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], var1.eval())
    def testVariablesAcrossGraphs(self):
        optimizer = sgd.SGD(0.01, 0.5)
        with ops.Graph().as_default():
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtypes.float32,
                                                          name="var0")
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtypes.float32,
                                                          name="var1")
            loss = math_ops.reduce_sum(var0 + var1)
            optimizer.minimize(loss)
            optimizer_variables = optimizer.variables()
            self.assertStartsWith(optimizer_variables[0].name, "var0")
            self.assertStartsWith(optimizer_variables[1].name, "var1")
            self.assertEquals(2, len(optimizer_variables))

        with ops.Graph().as_default():
            var2 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtypes.float32,
                                                          name="var2")
            var3 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtypes.float32,
                                                          name="var3")
            loss = math_ops.reduce_sum(var2 + var3)
            optimizer.minimize(loss)
            optimizer_variables = optimizer.variables()
            self.assertStartsWith(optimizer_variables[0].name, "var2")
            self.assertStartsWith(optimizer_variables[1].name, "var3")
            self.assertEquals(2, len(optimizer_variables))
Esempio n. 8
0
    def testBasic(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            def loss():
                return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

            # Note that for eager execution, minimize expects a function instead of a
            # Tensor.
            global_step = resource_variable_ops.ResourceVariable(
                array_ops.zeros([], dtypes.int64), name='global_step_%d' % i)
            sgd_op = sgd.SGD(3.0)

            self.evaluate(variables.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))
            # Run 1 step of sgd through optimizer
            opt_op = sgd_op.minimize(loss, global_step, [var0, var1])
            self.evaluate(opt_op)
            # Validate updated params
            self.assertAllClose([-14., -13.], self.evaluate(var0))
            self.assertAllClose([-6., -5.], self.evaluate(var1))
 def testBasicResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                           dtype=dtype)
             var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                           dtype=dtype)
             grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
             grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
             sgd_op = sgd.SGD(3.0).apply_gradients(
                 zip([grads0, grads1], [var0, var1]))
             # TODO(apassos) calling initialize_resources on all resources here
             # doesn't work because the sessions and graph are reused across unit
             # tests and this would mean trying to reinitialize variables. Figure out
             # a long-term solution for this.
             resources.initialize_resources([var0, var1]).run()
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
             self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params
             self.assertAllCloseAccordingToType(
                 [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval())
             self.assertAllCloseAccordingToType(
                 [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
    def testTensorLearningRateAndMomentum(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
                mom_opt = sgd.SGD(learning_rate=constant_op.constant(2.0),
                                  momentum=constant_op.constant(0.9))
                mom_update = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()
                # Check we have slots
                self.assertEqual(["momentum"], mom_opt.get_slot_names())
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                self.assertFalse(slot0 in variables.trainable_variables())
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEquals(slot1.get_shape(), var1.get_shape())
                self.assertFalse(slot1 in variables.trainable_variables())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Step 1: the momentum accumulators where 0. So we should see a normal
                # update: v -= grad * learning_rate
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
                                                   slot0.eval())
                self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
                                                   slot1.eval())
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                    var0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                    var1.eval())
                # Step 2: the momentum accumulators contain the previous update.
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                    slot0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                    slot1.eval())
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]), var0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([
                        2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]), var1.eval())
Esempio n. 11
0
 def testTrainOp(self):
     with self.cached_session():
         var0 = variables.Variable([1.0, 2.0])
         var1 = variables.Variable([3.0, 4.0])
         cost = 5 * var0 + 3 * var1
         global_step = variables.Variable(array_ops.zeros([], dtypes.int64),
                                          name='global_step')
         sgd_op = sgd.SGD(3.0)
         opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
         self.assertTrue(
             opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
Esempio n. 12
0
 def testNoGradientsForAnyVariables_ApplyGradients(self):
     for i, dtype in enumerate(
         [dtypes.half, dtypes.float32, dtypes.float64]):
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                       dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                       dtype=dtype)
         sgd_op = sgd.SGD(3.0)
         with self.assertRaisesRegexp(
                 ValueError, 'No gradients provided for any variable'):
             sgd_op.apply_gradients([(None, var0), (None, var1)])
 def testGradWrtRef(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             opt = sgd.SGD(3.0)
             values = [1.0, 3.0]
             vars_ = [variables.Variable([v], dtype=dtype) for v in values]
             grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1],
                                                    vars_)
             variables.global_variables_initializer().run()
             for grad, _ in grads_and_vars:
                 self.assertAllCloseAccordingToType([1.0], grad.eval())
Esempio n. 14
0
 def testStopGradients(self):
     with self.cached_session():
         var0 = variables.Variable([1.0, 2.0], name='var0')
         var1 = variables.Variable([3.0, 4.0], name='var1')
         var0_id = array_ops.identity(var0)
         cost = 5 * var0_id + 3 * var1
         sgd_op = sgd.SGD(3.0)
         grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1],
                                                   stop_gradients=[var0_id])
         grad_dict = {var.op.name: grad for grad, var in grads_and_vars}
         self.assertIsNone(grad_dict['var0'])
         self.assertIsNotNone(grad_dict['var1'])
 def testLikeDistBeliefMom01(self):
     with self.cached_session():
         db_grad, db_out = self._dbParamsMom01()
         num_samples = len(db_grad)
         var0 = variables.Variable([0.0] * num_samples)
         grads0 = constant_op.constant([0.0] * num_samples)
         mom_opt = sgd.SGD(learning_rate=0.1, momentum=0.1)
         mom_update = mom_opt.apply_gradients(zip([grads0], [var0]))
         variables.global_variables_initializer().run()
         for i in xrange(num_samples):
             mom_update.run(feed_dict={grads0: db_grad[i]})
             self.assertAllClose(np.array(db_out[i]), var0.eval())
Esempio n. 16
0
    def testNoGradientsForAnyVariables_Minimize(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            def loss():
                return constant_op.constant(5.0)

            sgd_op = sgd.SGD(3.0)
            with self.assertRaisesRegexp(
                    ValueError, 'No gradients provided for any variable'):
                sgd_op.minimize(loss, var_list=[var0, var1])
Esempio n. 17
0
    def testComputeGradientsWithTensors(self):
        x = ops.convert_to_tensor(1.0)

        def f():
            return x * x

        sgd_op = sgd.SGD(3.0)
        grads_and_vars = sgd_op.compute_gradients(f, [x])
        self.assertEqual(1, len(grads_and_vars))
        grad, x_as_var = grads_and_vars[0]
        self.assertIs(x, x_as_var)
        self.assertEqual(2.0, self.evaluate(grad))

        with self.assertRaises(NotImplementedError):
            sgd_op.apply_gradients(grads_and_vars)
Esempio n. 18
0
    def testNoGradients(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            # pylint: disable=cell-var-from-loop
            def loss():
                return 5 * var0

            # pylint: enable=cell-var-from-loop
            sgd_op = sgd.SGD(3.0)
            with self.assertRaisesRegexp(ValueError, 'No gradients'):
                # var1 has no gradient
                sgd_op.minimize(loss, var_list=[var1])
Esempio n. 19
0
    def testNoVariables(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            # pylint: disable=cell-var-from-loop
            def loss():
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype,
                                                              trainable=False,
                                                              name='a')
                var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                              dtype=dtype,
                                                              trainable=False,
                                                              name='b')
                return 5 * var0 + var1

            # pylint: enable=cell-var-from-loop
            sgd_op = sgd.SGD(3.0)
            with self.assertRaisesRegexp(ValueError, 'No.*variables'):
                sgd_op.minimize(loss)
 def testSparseNesterovMomentum(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             grads = []
             for t in range(1, 5):
                 grads.append(var0_np * 10)
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             var0 = variables.Variable(var0_np)
             var1 = variables.Variable(var1_np)
             loss = 5 * var0 * var0 + 3 * var1
             mom_op = sgd.SGD(learning_rate=2.0,
                              momentum=0.9,
                              nesterov=True)
             x_feed = array_ops.placeholder(dtype)
             y_feed = ops.IndexedSlices(x_feed, constant_op.constant([0,
                                                                      1]),
                                        constant_op.constant([2]))
             grads_and_vars = [(y_feed, var0),
                               (constant_op.constant([3.0, 3.0],
                                                     dtype=dtype), var1)]
             opt_update = mom_op.apply_gradients(grads_and_vars)
             variables.global_variables_initializer().run()
             for t in range(1, 5):
                 opt_update.run(feed_dict={x_feed: grads[t - 1]})
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
                 self.assertAllClose(var0_np, var0.eval())
                 self.assertAllClose(var1_np, var1.eval())
Esempio n. 21
0
    def testConstraint(self):
        constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
        constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
        with self.cached_session():
            var0 = variables.Variable([1.0, 2.0], constraint=constraint_01)
            var1 = variables.Variable([3.0, 4.0], constraint=constraint_0)
            cost = 5 * var0 + 3 * var1
            global_step = variables.Variable(array_ops.zeros([], dtypes.int64),
                                             name='global_step')
            sgd_op = sgd.SGD(3.0)
            opt_op = sgd_op.minimize(cost, global_step, [var0, var1])

            variables.global_variables_initializer().run()
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([3.0, 4.0], var1.eval())
            # Run 1 step of sgd through optimizer
            opt_op.run()
            # Validate updated params
            self.assertAllClose([-0.1, -0.1], var0.eval())
            self.assertAllClose([0., 0.], var1.eval())
 def testTensorLearningRate(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0 = variables.Variable([1.0, 2.0], dtype=dtype)
             var1 = variables.Variable([3.0, 4.0], dtype=dtype)
             grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
             grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
             lrate = constant_op.constant(3.0)
             sgd_op = sgd.SGD(lrate).apply_gradients(
                 zip([grads0, grads1], [var0, var1]))
             variables.global_variables_initializer().run()
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
             self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params
             self.assertAllCloseAccordingToType(
                 [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval())
             self.assertAllCloseAccordingToType(
                 [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
Esempio n. 23
0
    def testGradientsAsVariables(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            def loss():
                return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

            sgd_op = sgd.SGD(3.0)
            grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1])
            # Convert gradients to tf.Variables
            converted_grads = [
                resource_variable_ops.ResourceVariable(array_ops.zeros([2],
                                                                       dtype),
                                                       name='c_%d_%d' % (i, j))
                for j, gv in enumerate(grads_and_vars)
            ]
            convert_ops = [
                state_ops.assign(converted_grads[j], gv[0])
                for j, gv in enumerate(grads_and_vars)
            ]

            self.evaluate(variables.global_variables_initializer())
            # Run convert_ops to achieve the gradietns converting
            self.evaluate(convert_ops)
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 1 step of sgd through optimizer
            converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
            opt_op = sgd_op.apply_gradients(converted_grads_and_vars)
            self.evaluate(opt_op)

            # Validate updated params
            self.assertAllClose([-14., -13.], self.evaluate(var0))
            self.assertAllClose([-6., -5.], self.evaluate(var1))
 def testWithGlobalStep(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             global_step = variables.Variable(0, trainable=False)
             var0 = variables.Variable([1.0, 2.0], dtype=dtype)
             var1 = variables.Variable([3.0, 4.0], dtype=dtype)
             grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
             grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
             sgd_op = sgd.SGD(3.0).apply_gradients(zip([grads0, grads1],
                                                       [var0, var1]),
                                                   global_step=global_step)
             variables.global_variables_initializer().run()
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
             self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params and global_step
             self.assertAllCloseAccordingToType(
                 [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval())
             self.assertAllCloseAccordingToType(
                 [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
             self.assertAllCloseAccordingToType(1, global_step.eval())
    def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
        # This test invokes the ResourceSparseApplyMomentum operation, which
        # did not have a registered GPU kernel as of April 2018. With graph
        # execution, the placement algorithm notices this and automatically
        # places the variable in CPU (host) memory. With eager execution,
        # the variable would be placed in GPU memory if available, which
        # would then conflict with the future invocation of the
        # ResourceSparseApplyMomentum operation.
        # To work around this discrepancy, for now we force the variable
        # to be placed on CPU.
        with ops.device("/cpu:0"):
            var0 = resource_variable_ops.ResourceVariable(
                array_ops.ones([2, 2]))

        def loss():
            return math_ops.reduce_sum(
                embedding_ops.embedding_lookup(var0, [[1]]))

        opt = sgd.SGD(learning_rate=1.0, momentum=0.0)
        sgd_op = opt.minimize(loss)
        self.evaluate(variables.global_variables_initializer())
        self.evaluate(sgd_op)
        self.assertAllCloseAccordingToType([[1, 1], [0, 0]],
                                           self.evaluate(var0))
 def testSparseBasic(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
             var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
             grads0 = ops.IndexedSlices(
                 constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
                 constant_op.constant([0]), constant_op.constant([2, 1]))
             grads1 = ops.IndexedSlices(
                 constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
                 constant_op.constant([1]), constant_op.constant([2, 1]))
             sgd_op = sgd.SGD(3.0).apply_gradients(
                 zip([grads0, grads1], [var0, var1]))
             variables.global_variables_initializer().run()
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval())
             self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params
             self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                                var0.eval())
             self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                                var1.eval())
    def testSparse(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
                var1 = variables.Variable(
                    constant_op.constant(1.0, dtype, [4, 2]))
                grads0 = ops.IndexedSlices(
                    constant_op.constant([[.1, .1]], dtype=dtype),
                    constant_op.constant([1]), constant_op.constant([4, 2]))
                grads1 = ops.IndexedSlices(
                    constant_op.constant([[.01, .01], [.01, .01]],
                                         dtype=dtype),
                    constant_op.constant([2, 3]), constant_op.constant([4, 2]))
                mom_opt = sgd.SGD(learning_rate=2.0, momentum=0.9)
                mom_update = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()

                # Check we have slots
                self.assertEqual(["momentum"], mom_opt.get_slot_names())
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEquals(slot1.get_shape(), var1.get_shape())

                # Fetch params to validate initial values
                self.assertAllClose([0, 0], var0.eval()[0])
                self.assertAllClose([0, 0], var0.eval()[1])
                self.assertAllClose([1, 1], var1.eval()[2])

                # Step 1: the momentum accumulators are 0. So we should see a normal
                # update: v -= grad * learning_rate
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([0, 0]),
                                                   slot0.eval()[0])
                self.assertAllCloseAccordingToType(np.array([.1, .1]),
                                                   slot0.eval()[1])
                self.assertAllCloseAccordingToType(np.array([.01, .01]),
                                                   slot1.eval()[2])
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(np.array([0, 0]),
                                                   var0.eval()[0])
                self.assertAllCloseAccordingToType(
                    np.array([-(0.1 * 2.0), -(0.1 * 2.0)]),
                    var0.eval()[1])
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]),
                    var1.eval()[2])
                # Step 2: the momentum accumulators contain the previous update.
                mom_update.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllClose(np.array([0, 0]), slot0.eval()[0])
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                    slot0.eval()[1])
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                    slot1.eval()[2])
                # Check that the parameters have been updated.
                self.assertAllClose(np.array([0, 0]), var0.eval()[0])
                self.assertAllCloseAccordingToType(
                    np.array([
                        -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]),
                    var0.eval()[1])
                self.assertAllCloseAccordingToType(
                    np.array([
                        0.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        0.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]),
                    var1.eval()[2])
    def doTestBasic(self, use_resource=False, use_callable_params=False):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            if use_resource:
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype,
                                                              name="var0_%d" %
                                                              i)
                var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                              dtype=dtype,
                                                              name="var1_%d" %
                                                              i)
            else:
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
            grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
            grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
            learning_rate = lambda: 2.0
            momentum = lambda: 0.9
            if not use_callable_params:
                learning_rate = learning_rate()
                momentum = momentum()
            mom_opt = sgd.SGD(learning_rate=learning_rate, momentum=momentum)
            mom_update = mom_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))

            if not context.executing_eagerly():
                self.evaluate(variables.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Check we have slots
            self.assertEqual(["momentum"], mom_opt.get_slot_names())
            slot0 = mom_opt.get_slot(var0, "momentum")
            self.assertEquals(slot0.get_shape(), var0.get_shape())
            slot1 = mom_opt.get_slot(var1, "momentum")
            self.assertEquals(slot1.get_shape(), var1.get_shape())
            if not context.executing_eagerly():
                self.assertFalse(slot0 in variables.trainable_variables())
                self.assertFalse(slot1 in variables.trainable_variables())

            # Step 1: the momentum accumulators where 0. So we should see a normal
            # update: v -= grad * learning_rate
            if not context.executing_eagerly():
                self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
                                               self.evaluate(slot0))
            self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
                                               self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                self.evaluate(var1))
            # Step 2: the momentum accumulators contain the previous update.
            if context.executing_eagerly():
                mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            else:
                self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                self.evaluate(slot0))
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                    2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                    3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                ]), self.evaluate(var1))