def testProximalAdagradwithoutRegularization2(self): with self.session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0]) var1 = resource_variable_ops.ResourceVariable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([4.0, 3.0], self.evaluate(var1)) # Run 3 steps Proximal Adagrad. for _ in range(3): update.run() self.assertAllClose(np.array([-1.60261, -2.296985]), self.evaluate(var0)) self.assertAllClose(np.array([3.715679, 2.433051]), self.evaluate(var1))
def testProximalAdagradWithL1_L2(self): with self.test_session() as sess: var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([4.0, 3.0], v1_val) # Run 10 steps Proximal Adagrad. for _ in range(10): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose(np.array([-0.0495, -0.0995]), v0_val) self.assertAllClose(np.array([-0.0045, -0.0095]), v1_val)
def testResourceProximalAdagradwithoutRegularization(self): with self.session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([0.0, 0.0]) var1 = resource_variable_ops.ResourceVariable([0.0, 0.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) self.assertAllClose([0.0, 0.0], self.evaluate(var0)) self.assertAllClose([0.0, 0.0], self.evaluate(var1)) # Run 3 steps Proximal Adagrad. for _ in range(3): update.run() self.assertAllClose(np.array([-2.60260963, -4.29698515]), self.evaluate(var0)) self.assertAllClose(np.array([-0.28432083, -0.56694895]), self.evaluate(var1)) opt_vars = opt.variables() self.assertStartsWith(opt_vars[0].name, var0._shared_name) self.assertStartsWith(opt_vars[1].name, var1._shared_name) self.assertEqual(2, len(opt_vars))
def testProximalAdagradWithL1(self): # ProximalAdagradOptimizer is supported only in V1. with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([4.0, 3.0], v1_val) # Run 10 steps Proximal Adagrad for _ in range(10): update.run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose(np.array([-6.663634, -9.190331]), v0_val) self.assertAllClose(np.array([2.959304, 1.029232]), v1_val)
def testProximalAdagradwithoutRegularization(self): with self.test_session() as sess: var0 = variables.Variable([0.0, 0.0]) var1 = variables.Variable([0.0, 0.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose([0.0, 0.0], v0_val) self.assertAllClose([0.0, 0.0], v1_val) # Run 3 steps Proximal Adagrad. for _ in range(3): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose(np.array([-2.60260963, -4.29698515]), v0_val) self.assertAllClose(np.array([-0.28432083, -0.56694895]), v1_val)
def doTestProximalAdagradwithoutRegularization(self, use_resource=False): # ProximalAdagradOptimizer is supported only in V1. with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([0.0, 0.0]) var1 = variables.Variable([0.0, 0.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([0.0, 0.0], v0_val) self.assertAllClose([0.0, 0.0], v1_val) # Run 3 steps Proximal Adagrad. for _ in range(3): update.run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose(np.array([-2.60260963, -4.29698515]), v0_val) self.assertAllClose(np.array([-0.28432083, -0.56694895]), v1_val) opt_vars = opt.variables() self.assertStartsWith(opt_vars[0].name, var0._shared_name) self.assertStartsWith(opt_vars[1].name, var1._shared_name) self.assertEqual(2, len(opt_vars))
def testProximalAdagradWithL1(self): with self.session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0]) var1 = resource_variable_ops.ResourceVariable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([4.0, 3.0], self.evaluate(var1)) # Run 10 steps Proximal Adagrad for _ in range(10): update.run() self.assertAllClose(np.array([-6.663634, -9.190331]), self.evaluate(var0)) self.assertAllClose(np.array([2.959304, 1.029232]), self.evaluate(var1))
def testProximalAdagradwithoutRegularization2(self): # ProximalAdagradOptimizer is supported only in V1. with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) grads1 = constant_op.constant([0.01, 0.02]) opt = proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([4.0, 3.0], v1_val) # Run 3 steps Proximal Adagrad. for _ in range(3): update.run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose(np.array([-1.60261, -2.296985]), v0_val) self.assertAllClose(np.array([3.715679, 2.433051]), v1_val)
def test_ops_with_var_and_proximal_adagrad(self): var_list = [ deo.get_variable('sp_var', initializer=0.0, dim=2), ] opt_list = [ proximal_adagrad.ProximalAdagradOptimizer(0.1), ] self.common_run_context(var_list, opt_list, name='proximal_adagrad_test')
def testEquivAdagradwithoutRegularization(self): with self.session(), self.test_scope(): val0, val1 = self.applyOptimizer( proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0)) with self.session(), self.test_scope(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3)
def testEquivAdagradwithoutRegularization(self): # ProximalAdagradOptimizer is supported only in V1. with ops.Graph().as_default(), self.cached_session(): val0, val1 = self.applyOptimizer( proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0)) with ops.Graph().as_default(), self.cached_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3)
def testEquivSparseAdagradwithoutRegularization(self): with self.cached_session(): val0, val1 = self.applyOptimizer( proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), is_sparse=True) with self.cached_session(): val2, val3 = self.applyOptimizer(adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1), is_sparse=True) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3)
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.float32, dtypes.float64]: with self.test_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul( embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = proximal_adagrad.ProximalAdagradOptimizer( 1.0).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[0, 1]], var0.eval(), atol=0.01)
def test_proximal_adagrad_minimize_trainable(self): base_opt = proximal_adagrad.ProximalAdagradOptimizer(1.0) test_opt = proximal_adagrad.ProximalAdagradOptimizer(1.0) self.common_minimize_trainable(base_opt, test_opt, name="proximal_adagrad")
def test_proximal_adagrad_apply_restriction(self): opt = proximal_adagrad.ProximalAdagradOptimizer(0.1) self.commonly_apply_restriction_verify(opt)
def test_proximal_adagrad_restrict_on_policy(self): opt = proximal_adagrad.ProximalAdagradOptimizer(0.1) self.common_single_step_restrict_verification(opt)
def test_proximal_adagrad_restrictor_update(self): opt = proximal_adagrad.ProximalAdagradOptimizer(0.1) self.common_single_step_update_verification(opt)