예제 #1
0
    def testPrecomputedGradient(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                cost = 5 * var0 + 3 * var1
                grad_loss = constant_op.constant([42, -42], dtype=dtype)
                global_step = variables.Variable(array_ops.zeros([],
                                                                 dtypes.int64),
                                                 name='global_step')
                sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
                opt_op = sgd_op.minimize(cost,
                                         global_step, [var0, var1],
                                         grad_loss=grad_loss)

                variables.global_variables_initializer().run()
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Run 1 step of sgd through optimizer
                opt_op.run()
                # Validate updated params
                self.assertAllClose(
                    [1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], var0.eval())
                self.assertAllClose(
                    [3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], var1.eval())
예제 #2
0
 def testBasicResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.test_session():
             var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                           dtype=dtype)
             var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                           dtype=dtype)
             grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
             grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
             sgd_op = gradient_descent.GradientDescentOptimizer(
                 3.0).apply_gradients(zip([grads0, grads1], [var0, var1]))
             # TODO(apassos) calling initialize_resources on all resources here
             # doesn't work because the sessions and graph are reused across unit
             # tests and this would mean trying to reinitialize variables. Figure out
             # a long-term solution for this.
             resources.initialize_resources([var0, var1]).run()
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
             self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params
             self.assertAllCloseAccordingToType(
                 [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval())
             self.assertAllCloseAccordingToType(
                 [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
예제 #3
0
 def testMinimizeResourceVariable(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
       var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
       x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
       pred = math_ops.matmul(var0, x) + var1
       loss = pred * pred
       sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss)
       # TODO(apassos) calling initialize_resources on all resources here
       # doesn't work because the sessions and graph are reused across unit
       # tests and this would mean trying to reinitialize variables. Figure out
       # a long-term solution for this.
       resources.initialize_resources([var0, var1]).run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
       self.assertAllCloseAccordingToType([3.0], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
       np_grad = 2 * np_pred
       self.assertAllCloseAccordingToType(
           [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
       self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
예제 #4
0
    def testBasic(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            def loss():
                return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

            # Note that for eager execution, minimize expects a function instead of a
            # Tensor.
            global_step = resource_variable_ops.ResourceVariable(
                array_ops.zeros([], dtypes.int64), name='global_step_%d' % i)
            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)

            self.evaluate(variables.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))
            # Run 1 step of sgd through optimizer
            opt_op = sgd_op.minimize(loss, global_step, [var0, var1])
            self.evaluate(opt_op)
            # Validate updated params
            self.assertAllClose([-14., -13.], self.evaluate(var0))
            self.assertAllClose([-6., -5.], self.evaluate(var1))
예제 #5
0
 def testSparseBasic(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
       var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
       grads0 = ops.IndexedSlices(
           constant_op.constant(
               [0.1], shape=[1, 1], dtype=dtype),
           constant_op.constant([0]),
           constant_op.constant([2, 1]))
       grads1 = ops.IndexedSlices(
           constant_op.constant(
               [0.01], shape=[1, 1], dtype=dtype),
           constant_op.constant([1]),
           constant_op.constant([2, 1]))
       sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients(
           zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval())
       self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                          var0.eval())
       self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                          var1.eval())
예제 #6
0
    def testAggregationMethod(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                cost = 5 * var0 + 3 * var1
                global_step = variables.Variable(array_ops.zeros([],
                                                                 dtypes.int64),
                                                 name='global_step')
                sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
                opt_op = sgd_op.minimize(
                    cost,
                    global_step, [var0, var1],
                    aggregation_method=gradients_impl.AggregationMethod.
                    EXPERIMENTAL_ACCUMULATE_N)

                variables.global_variables_initializer().run()
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Run 1 step of sgd through optimizer
                opt_op.run()
                # Validate updated params
                self.assertAllClose([-14., -13.], var0.eval())
                self.assertAllClose([-6., -5.], var1.eval())
예제 #7
0
 def testGradWrtRef(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       opt = gradient_descent.GradientDescentOptimizer(3.0)
       values = [1.0, 3.0]
       vars_ = [variables.Variable([v], dtype=dtype) for v in values]
       grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_)
       variables.global_variables_initializer().run()
       for grad, _ in grads_and_vars:
         self.assertAllCloseAccordingToType([1.0], grad.eval())
예제 #8
0
 def testTrainOp(self):
     with self.cached_session():
         var0 = variables.Variable([1.0, 2.0])
         var1 = variables.Variable([3.0, 4.0])
         cost = 5 * var0 + 3 * var1
         global_step = variables.Variable(array_ops.zeros([], dtypes.int64),
                                          name='global_step')
         sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
         opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
         self.assertTrue(
             opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
예제 #9
0
 def testNoGradientsForAnyVariables_ApplyGradients(self):
     for i, dtype in enumerate(
         [dtypes.half, dtypes.float32, dtypes.float64]):
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                       dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                       dtype=dtype)
         sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
         with self.assertRaisesRegexp(
                 ValueError, 'No gradients provided for any variable'):
             sgd_op.apply_gradients([(None, var0), (None, var1)])
예제 #10
0
 def testStopGradients(self):
     with self.cached_session():
         var0 = variables.Variable([1.0, 2.0], name='var0')
         var1 = variables.Variable([3.0, 4.0], name='var1')
         var0_id = array_ops.identity(var0)
         cost = 5 * var0_id + 3 * var1
         sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
         grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1],
                                                   stop_gradients=[var0_id])
         grad_dict = {var.op.name: grad for grad, var in grads_and_vars}
         self.assertIsNone(grad_dict['var0'])
         self.assertIsNotNone(grad_dict['var1'])
예제 #11
0
    def testNoGradientsForAnyVariables_Minimize(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            def loss():
                return constant_op.constant(5.0)

            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            with self.assertRaisesRegexp(
                    ValueError, 'No gradients provided for any variable'):
                sgd_op.minimize(loss, var_list=[var0, var1])
예제 #12
0
 def testNoGradientsForAnyVariables_ApplyGradients(self):
     for i, dtype in enumerate(
         [dtypes.half, dtypes.float32, dtypes.float64]):
         # Note that we name the variables uniquely here since the variables don't
         # seem to be getting deleted at the end of the loop.
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                       dtype=dtype,
                                                       name='a_%d' % i)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                       dtype=dtype,
                                                       name='b_%d' % i)
         sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
         with self.assertRaisesRegexp(
                 ValueError, 'No gradients provided for any variable'):
             sgd_op.apply_gradients([(None, var0), (None, var1)])
예제 #13
0
    def testComputeGradientsWithTensors(self):
        x = ops.convert_to_tensor(1.0)

        def f():
            return x * x

        sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
        grads_and_vars = sgd_op.compute_gradients(f, [x])
        self.assertEqual(1, len(grads_and_vars))
        grad, x_as_var = grads_and_vars[0]
        self.assertIs(x, x_as_var)
        self.assertEqual(2.0, self.evaluate(grad))

        with self.assertRaises(NotImplementedError):
            sgd_op.apply_gradients(grads_and_vars)
예제 #14
0
    def testNoGradients(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            # pylint: disable=cell-var-from-loop
            def loss():
                return 5 * var0

            # pylint: enable=cell-var-from-loop
            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            with self.assertRaisesRegexp(ValueError, 'No gradients'):
                # var1 has no gradient
                sgd_op.minimize(loss, var_list=[var1])
예제 #15
0
    def testNoVariables(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            # pylint: disable=cell-var-from-loop
            def loss():
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype,
                                                              trainable=False,
                                                              name='a')
                var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                              dtype=dtype,
                                                              trainable=False,
                                                              name='b')
                return 5 * var0 + var1

            # pylint: enable=cell-var-from-loop
            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            with self.assertRaisesRegexp(ValueError, 'No.*variables'):
                sgd_op.minimize(loss)
예제 #16
0
    def testGradientsAsVariables(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            # Note that we name the variables uniquely here since the variables don't
            # seem to be getting deleted at the end of the loop.
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype,
                                                          name='a%d' % i)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype,
                                                          name='b%d' % i)

            def loss():
                return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1])
            # Convert gradients to tf.Variables
            converted_grads = [
                resource_variable_ops.ResourceVariable(array_ops.zeros([2],
                                                                       dtype),
                                                       name='c_%d_%d' % (i, j))
                for j, gv in enumerate(grads_and_vars)
            ]
            convert_ops = [
                state_ops.assign(converted_grads[j], gv[0])
                for j, gv in enumerate(grads_and_vars)
            ]

            self.evaluate(variables.global_variables_initializer())
            # Run convert_ops to achieve the gradietns converting
            self.evaluate(convert_ops)
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 1 step of sgd through optimizer
            converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
            opt_op = sgd_op.apply_gradients(converted_grads_and_vars)
            self.evaluate(opt_op)

            # Validate updated params
            self.assertAllClose([-14., -13.], self.evaluate(var0))
            self.assertAllClose([-6., -5.], self.evaluate(var1))
예제 #17
0
    def testNoGradientsForAnyVariables_Minimize(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            # Note that we name the variables uniquely here since the variables don't
            # seem to be getting deleted at the end of the loop.
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype,
                                                          name='a_%d' % i)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype,
                                                          name='b_%d' % i)

            def loss():
                return constant_op.constant(5.0)

            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            with self.assertRaisesRegexp(
                    ValueError, 'No gradients provided for any variable'):
                sgd_op.minimize(loss, var_list=[var0, var1])
예제 #18
0
 def testTensorLearningRate(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = variables.Variable([1.0, 2.0], dtype=dtype)
       var1 = variables.Variable([3.0, 4.0], dtype=dtype)
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
       grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
       lrate = constant_op.constant(3.0)
       sgd_op = gradient_descent.GradientDescentOptimizer(
           lrate).apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
       self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
                                          var0.eval())
       self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
                                          var1.eval())
예제 #19
0
    def testConstraint(self):
        constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
        constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
        with self.cached_session():
            var0 = variables.Variable([1.0, 2.0], constraint=constraint_01)
            var1 = variables.Variable([3.0, 4.0], constraint=constraint_0)
            cost = 5 * var0 + 3 * var1
            global_step = variables.Variable(array_ops.zeros([], dtypes.int64),
                                             name='global_step')
            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            opt_op = sgd_op.minimize(cost, global_step, [var0, var1])

            variables.global_variables_initializer().run()
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([3.0, 4.0], var1.eval())
            # Run 1 step of sgd through optimizer
            opt_op.run()
            # Validate updated params
            self.assertAllClose([-0.1, -0.1], var0.eval())
            self.assertAllClose([0., 0.], var1.eval())
예제 #20
0
    def testNoGradients(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            # Note that we name the variables uniquely here since the variables don't
            # seem to be getting deleted at the end of the loop.
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype,
                                                          name='a%d' % i)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype,
                                                          name='b%d' % i)

            # pylint: disable=cell-var-from-loop
            def loss():
                return 5 * var0

            # pylint: enable=cell-var-from-loop
            sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
            with self.assertRaisesRegexp(ValueError, 'No gradients'):
                # var1 has no gradient
                sgd_op.minimize(loss, var_list=[var1])
예제 #21
0
 def testWithGlobalStep(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       global_step = variables.Variable(0, trainable=False)
       var0 = variables.Variable([1.0, 2.0], dtype=dtype)
       var1 = variables.Variable([3.0, 4.0], dtype=dtype)
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
       grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
       sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients(
           zip([grads0, grads1], [var0, var1]), global_step=global_step)
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
       self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params and global_step
       self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
                                          var0.eval())
       self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
                                          var1.eval())
       self.assertAllCloseAccordingToType(1, global_step.eval())
예제 #22
0

gradient_descent_optimizer_v1_fn = NamedObject(
    "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2))
adagrad_optimizer_v1_fn = NamedObject(
    "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
adam_optimizer_v1_fn = NamedObject("AdamV1",
                                   lambda: adam.AdamOptimizer(0.001, epsilon=1))
rmsprop_optimizer_v1_fn = NamedObject(
    "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))

optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn]

gradient_descent_optimizer_v2_fn = NamedObject(
    "GradientDescentV2",
    lambda: gradient_descent_v2.GradientDescentOptimizer(0.2))
adagrad_optimizer_v2_fn = NamedObject(
    "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001))
adam_optimizer_v2_fn = NamedObject(
    "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1))

optimizers_v2 = [gradient_descent_optimizer_v2_fn, adagrad_optimizer_v2_fn]

graph_and_eager_modes = ["graph", "eager"]


def distributions_and_v1_optimizers():
  """A common set of combination with DistributionStrategies and Optimizers."""
  return combine(
      distribution=[
          one_device_strategy,