def testWithGlobalStep(self, dtype): with self.cached_session(): global_step = tf.Variable(0, trainable=False) var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) lars_op = utils.LARSOptimizer(3.0).apply_gradients( zip([grads0, grads1], [var0, var1]), global_step=global_step) tf.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd lars_op.run() # Validate updated params and global_step self.assertAllCloseAccordingToType([ 1.0 - 3.0 * (0.001 * (np.sqrt(5.) / np.sqrt(2.))), 2.0 - 3.0 * (0.001 * (np.sqrt(5.) / np.sqrt(2.))) ], self.evaluate(var0)) self.assertAllCloseAccordingToType([ 3.0 - 3.0 * (0.001 * (5. / np.sqrt(2.))), 4.0 - 3.0 * (0.001 * (5. / np.sqrt(2.))) ], self.evaluate(var1)) self.assertAllCloseAccordingToType(1, self.evaluate(global_step))
def testBasic(self, dtype): with self.cached_session(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) optimizer = utils.LARSOptimizer(3.0) lars_op = optimizer.apply_gradients( zip([grads0, grads1], [var0, var1])) tf.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd lars_op.run() # Validate updated params self.assertAllCloseAccordingToType([ 1.0 - 3.0 * (0.001 * (np.sqrt(5.) / np.sqrt(2.))), 2.0 - 3.0 * (0.001 * (np.sqrt(5.) / np.sqrt(2.))) ], self.evaluate(var0)) self.assertAllCloseAccordingToType([ 3.0 - 3.0 * (0.001 * (5. / np.sqrt(2.))), 4.0 - 3.0 * (0.001 * (5. / np.sqrt(2.))) ], self.evaluate(var1)) self.assertEmpty(list(optimizer.variables()))
def testGradWrtRef(self, dtype): with self.cached_session(): opt = utils.LARSOptimizer(3.0) values = [1.0, 3.0] vars_ = [tf.Variable([v], dtype=dtype) for v in values] grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) tf.global_variables_initializer().run() for grad, _ in grads_and_vars: self.assertAllCloseAccordingToType([1.0], self.evaluate(grad))