def testNoGlobalStepWithDecay(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.session(graph=g): x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) with self.assertRaisesRegexp( ValueError, "global_step is required for learning_rate_decay_fn"): optimizers_lib.optimize_loss( loss, global_step=None, learning_rate=0.1, learning_rate_decay_fn=_no_op_learning_rate_decay_fn, optimizer=optimizer, update_ops=[update_op])
def testBadSummaries(self): with ops.Graph().as_default() as g, self.session(graph=g): _, _, loss, global_step = _setup_model() with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", summaries=["loss", "bad_summary"])
def testInvalidLearningRate(self): with ops.Graph().as_default() as g, self.session(graph=g): _, _, loss, global_step = _setup_model() with self.assertRaises(ValueError): optimizers_lib.optimize_loss(loss, global_step, learning_rate=-0.1, optimizer="SGD")
def testWrongOptimizer(self): optimizers = ["blah", variables.Variable, object(), lambda x: None] for optimizer in optimizers: with ops.Graph().as_default() as g: with self.session(graph=g): _, _, loss, global_step = _setup_model() with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer)
def testIgnoreVariablesWithNoGradients(self): _, _, loss, global_step = _setup_model() unused_variable = variable_scope.get_variable("ignore_me", []) optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0, gradient_multipliers={unused_variable: 1.}, clip_gradients=10.0)
def testUpdateOpFromCollection(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.session( graph=g) as session: x, var, loss, global_step = _setup_model() update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer=optimizer) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, update_var_value, global_step_value = session.run( [var, update_var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(update_var_value, 20) self.assertEqual(global_step_value, 1)
def testUpdateOpWithNoOpDecay(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.session( graph=g) as session: x, var, loss, global_step = _setup_model() update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, learning_rate_decay_fn=_no_op_learning_rate_decay_fn, optimizer=optimizer, update_ops=[update_op]) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) self.assertEqual(9.5, var.eval()) self.assertEqual(20, update_var.eval()) self.assertEqual(1, global_step.eval())
def testNoGlobalStep(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.session( graph=g) as session: x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) train = optimizers_lib.optimize_loss(loss, global_step=None, learning_rate=0.1, optimizer=optimizer, update_ops=[update_op]) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) self.assertEqual(9.5, var.eval()) self.assertEqual(20, update_var.eval())
def testInvalidGlobalStep(self): with ops.Graph().as_default() as g, self.session(graph=g): x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) with self.assertRaises(AttributeError): optimizers_lib.optimize_loss(loss, global_step=constant_op.constant( 43, dtype=dtypes.int64), learning_rate=0.1, optimizer="SGD") with self.assertRaises(TypeError): optimizers_lib.optimize_loss( loss, global_step=variable_scope.get_variable( "global_step", [], trainable=False, dtype=dtypes.float64, initializer=init_ops.constant_initializer( 0.0, dtype=dtypes.float64)), learning_rate=0.1, optimizer="SGD") with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step=variable_scope.get_variable( "global_step", [1], trainable=False, dtype=dtypes.int64, initializer=init_ops.constant_initializer( [0], dtype=dtypes.int64)), learning_rate=0.1, optimizer="SGD")
def testGradientClip(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", clip_gradients=0.1) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.98999, 4) self.assertEqual(global_step_value, 1)
def testGradientNoiseWithClipping(self): random_seed.set_random_seed(42) with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0, clip_gradients=10.0) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.801016, 4) self.assertEqual(global_step_value, 1)
def testGradientNoise(self): random_seed.set_random_seed(42) with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) # Due to randomness the following number may change if graph is different. self.assertAlmostEqual(var_value, 9.801016, 4) self.assertEqual(global_step_value, 1)
def testNoLrCallable(self): def optimizer_fn(): return gradient_descent.GradientDescentOptimizer(learning_rate=0.1) with ops.Graph().as_default() as g: with self.session(graph=g) as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=None, optimizer=optimizer_fn) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(global_step_value, 1)
def testGradientMultiplyInt64Tensor(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() v = array_ops.placeholder(dtypes.float64, []) train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_multipliers={var: v}) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5, v: 7.}) var_value, global_step_value = session.run([var, global_step]) # var(0) = 10, x = 5, var(0)/dx = 5, # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1)
def testSGDOptimizer(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1), lambda lr: gradient_descent.GradientDescentOptimizer(learning_rate=lr), "Momentum" ] for optimizer in optimizers: with ops.Graph().as_default() as g: with self.session(graph=g) as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(global_step_value, 1)
def testAdaptiveGradientClip(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() clip_gradients = optimizers_lib.adaptive_clipping_fn() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", clip_gradients=clip_gradients) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.8916, 4) self.assertEqual(global_step_value, 1) var_count = 0 for var in variables.global_variables(): if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"): var_count += 1 self.assertEqual(2, var_count)