def testSparseStability(self): for dtype in [dtypes.half]: with self.cached_session(): shape = [1, 6] var0_np = np.array([[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np) grads0_np = np.array([[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], dtype=dtype.as_numpy_dtype) grads0 = ops.IndexedSlices(constant_op.constant(grads0_np), constant_op.constant([0]), constant_op.constant(shape)) ada_opt = adagrad.Adagrad(1.0) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) slot0 = ada_opt.get_slot(var0, "accumulator") init = variables.global_variables_initializer() for _ in range(100): init.run() ada_update.run() self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), slot0.eval()) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), var0.eval())
def testTensorLearningRate(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with ops.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = constant_op.constant(3.0) ada_opt = adagrad.Adagrad(learning_rate) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) # Run 3 steps of adagrad for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testSparseStability(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with ops.Graph().as_default(): for dtype in [dtypes.half]: shape = [1, 6] var0_np = np.array([[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) grads0_np = np.array([[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], dtype=dtype.as_numpy_dtype) grads0 = indexed_slices.IndexedSlices( constant_op.constant(grads0_np), constant_op.constant([0]), constant_op.constant(shape)) ada_opt = adagrad.Adagrad(1.0) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) slot0 = ada_opt.get_slot(var0, "accumulator") init = variables.global_variables_initializer() for _ in range(100): self.evaluate(init) self.evaluate(ada_update) self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), self.evaluate(var0))
def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = constant_op.constant(3.0) ada_opt = adagrad.Adagrad(learning_rate) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) # Run 3 steps of adagrad for _ in range(3): ada_update.run() var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def doTestBasic(self, use_resource=False): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): if use_resource: var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) else: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 3 steps of adagrad for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testSparseSingleVarDim(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = ops.IndexedSlices( constant_op.constant(grads0_np[grads0_np_indices]), constant_op.constant(grads0_np_indices), constant_op.constant([3])) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0], var0.eval()) accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) # Run 3 step of sgd for _ in range(3): ada_update.run() var0_np, accum0_np = sparse_adagrad_update_numpy( var0_np, accum0_np, grads0_np_indices, grads0_np[grads0_np_indices], learning_rate, epsilon=1.) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant( [0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant( [0.01], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([[1.0], [2.0]], var0.eval()) self.assertAllClose([[3.0], [4.0]], var1.eval()) # Run 3 step of sgd for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([[-1.6026098728179932], [2.0]]), var0.eval()) self.assertAllCloseAccordingToType( np.array([[3.0], [3.715679168701172]]), var1.eval())
def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.Adagrad(3.0) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEquals(slot1.get_shape(), var1.get_shape()) variables.global_variables_initializer().run() # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Mix the first and the second adagrad for 3 steps. ada_update1.run() ada_update2.run() ada_update1.run() # Validate updated params (the same as with only 1 Adagrad). self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testConfig(self): opt = adagrad.Adagrad(learning_rate=lambda: ops.convert_to_tensor(1.0), initial_accumulator_value=2.0) config = opt.get_config() opt2 = adagrad.Adagrad.from_config(config) self.assertIsInstance(opt2._hyper["learning_rate"][1], python_types.LambdaType) self.assertEqual(opt._initial_accumulator_value, opt2._initial_accumulator_value)
def testDynamicShapeVariable_Ok(self): with self.cached_session(): v = variable_scope.get_variable( "v", initializer=constant_op.constant(1.), validate_shape=False) self.assertFalse(v.shape.is_fully_defined()) # Creating optimizer should cause no exception. adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
def testSparseRepeatedIndicesResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var_repeated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_repeated = math_ops.reduce_sum( embedding_ops.embedding_lookup(var_repeated, [0, 0])) var_aggregated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_aggregated = 2 * math_ops.reduce_sum( embedding_ops.embedding_lookup(var_aggregated, [0])) update_op_repeated = adagrad.Adagrad(2.0).minimize(loss_repeated) update_op_aggregated = adagrad.Adagrad(2.0).minimize(loss_aggregated) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType( var_repeated.eval(), var_aggregated.eval()) for _ in range(3): update_op_repeated.run() update_op_aggregated.run() self.assertAllCloseAccordingToType( var_repeated.eval(), var_aggregated.eval())
def testSparseRepeatedIndicesByEmbeddingLookUp(self): for dtype in _DATA_TYPES: var_repeated = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) loss_repeated = lambda: math_ops.reduce_sum( # pylint: disable=g-long-lambda embedding_ops.embedding_lookup(var_repeated, [0, 0])) # pylint: disable=cell-var-from-loop var_aggregated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_aggregated = lambda: 2 * math_ops.reduce_sum( # pylint: disable=g-long-lambda embedding_ops.embedding_lookup(var_aggregated, [0])) # pylint: disable=cell-var-from-loop update_op_repeated = adagrad.Adagrad(2.0).minimize( loss_repeated, var_list=[var_repeated]) update_op_aggregated = adagrad.Adagrad(2.0).minimize( loss_aggregated, var_list=[var_aggregated]) self.evaluate(variables.global_variables_initializer()) self.assertAllCloseAccordingToType(self.evaluate(var_repeated), self.evaluate(var_aggregated)) for _ in range(3): self.evaluate(update_op_repeated) self.evaluate(update_op_aggregated) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated))
def testSparseBasic(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with ops.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0, 0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0_np_indices = np.array([0, 2], dtype=np.int32) grads0 = ops.IndexedSlices( constant_op.constant(grads0_np[grads0_np_indices]), constant_op.constant(grads0_np_indices), constant_op.constant([3])) grads1_np_indices = np.array([0, 2], dtype=np.int32) grads1 = ops.IndexedSlices( constant_op.constant(grads1_np[grads1_np_indices]), constant_op.constant(grads1_np_indices), constant_op.constant([3])) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) accum0_np = np.array([0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype) # Run 3 step of sgd for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = sparse_adagrad_update_numpy( var0_np, accum0_np, grads0_np_indices, grads0_np[grads0_np_indices], learning_rate) var1_np, accum1_np = sparse_adagrad_update_numpy( var1_np, accum1_np, grads1_np_indices, grads1_np[grads1_np_indices], learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def test_numpy_input_fn_with_optimizer_instance(self): """Tests complete flow with optimizer_v2 instance.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, optimizer=adagrad_v2.Adagrad(0.01)) # Test with optimizer_v2 instance
def testSparseRepeatedIndicesByEmbeddingLookUp(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var_repeated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_repeated = lambda: math_ops.reduce_sum( # pylint: disable=g-long-lambda embedding_ops.embedding_lookup(var_repeated, [0, 0])) # pylint: disable=cell-var-from-loop var_aggregated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_aggregated = lambda: 2 * math_ops.reduce_sum( # pylint: disable=g-long-lambda embedding_ops.embedding_lookup(var_aggregated, [0])) # pylint: disable=cell-var-from-loop update_op_repeated = adagrad.Adagrad(2.0).minimize( loss_repeated, var_list=[var_repeated]) update_op_aggregated = adagrad.Adagrad(2.0).minimize( loss_aggregated, var_list=[var_aggregated]) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(var_repeated.eval(), var_aggregated.eval()) for _ in range(3): update_op_repeated.run() update_op_aggregated.run() self.assertAllCloseAccordingToType(var_repeated.eval(), var_aggregated.eval())
def test_wide_deep_model(self, distribution, data_fn): with distribution.scope(): linear_model = linear.LinearModel(units=1) dnn_model = sequential.Sequential([core.Dense(units=1)]) wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) linear_opt = gradient_descent.SGD(learning_rate=0.05) dnn_opt = adagrad.Adagrad(learning_rate=0.1) wide_deep_model.compile(optimizer=[linear_opt, dnn_opt], loss='mse') if data_fn == 'numpy': inputs, output = get_numpy() hist = wide_deep_model.fit(inputs, output, epochs=5) else: hist = wide_deep_model.fit(get_dataset(), epochs=5) self.assertLess(hist.history['loss'][4], 0.2)
def testBasicWithLearningRateInverseTimeDecay(self): for dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 3.0 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) ada_opt = adagrad.Adagrad(lr_schedule) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not context.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for t in range(3): if not context.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, lr_np) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, lr_np) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with ops.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEqual(slot0.shape, var0.shape) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEqual(slot1.shape, var1.shape) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Mix the first and the second adagrad for 3 steps. self.evaluate(ada_update1) self.evaluate(ada_update2) self.evaluate(ada_update1) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) for _ in range(3): var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def doTestBasic(self, use_callable_params=False): for dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = lambda: 3.0 if not use_callable_params: learning_rate = learning_rate() ada_opt = adagrad.Adagrad(learning_rate) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not context.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not context.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, 3.0) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, 3.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEqual(slot0.shape, var0.shape) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEqual(slot1.shape, var1.shape) variables.global_variables_initializer().run() # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Mix the first and the second adagrad for 3 steps. ada_update1.run() ada_update2.run() ada_update1.run() accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) for _ in range(3): var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testBasicWithLearningRateDecay(self): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 3.0 decay = 0.5 ada_opt = adagrad.Adagrad(learning_rate, decay=decay) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not context.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for t in range(3): if not context.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, lr_np) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, lr_np) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable( [[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType( [[1.0, 2.0], [3.0, 4.0]], var0.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType( [[0, 1], [3, 4]], var0.eval(), atol=0.01)
def testBasicWithLargeEpsilon(self): with self.cached_session(): var0_np = np.array([1.0, 2.0]) var1_np = np.array([3.0, 4.0]) grads0_np = np.array([0.1, 0.1]) grads1_np = np.array([0.01, 0.01]) var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) accum0_np = np.array([0.1, 0.1]) accum1_np = np.array([0.1, 0.1]) if not context.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not context.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, 3.0, 1.0) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, 3.0, 1.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testMinimizeSparseResourceVariable(self): for dtype in _DATA_TYPES: var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[0, 1], [3, 4]], self.evaluate(var0), atol=0.01)
"GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) adagrad_optimizer_v1_fn = combinations.NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) adam_optimizer_v1_fn = combinations.NamedObject( "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) rmsprop_optimizer_v1_fn = combinations.NamedObject( "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] adadelta_optimizer_keras_v2_fn = combinations.NamedObject( "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)) adagrad_optimizer_keras_v2_fn = combinations.NamedObject( "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)) adam_optimizer_keras_v2_fn = combinations.NamedObject( "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)) adamax_optimizer_keras_v2_fn = combinations.NamedObject( "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0)) nadam_optimizer_keras_v2_fn = combinations.NamedObject( "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0)) ftrl_optimizer_keras_v2_fn = combinations.NamedObject( "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)) gradient_descent_optimizer_keras_v2_fn = combinations.NamedObject( "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.2)) rmsprop_optimizer_keras_v2_fn = combinations.NamedObject( "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001)) # TODO(shiningsun): consider adding the other v2 optimizers optimizers_v2 = [
def testAdagradCompatibility(self): opt_v1 = optimizers.Adagrad(lr=0.01) opt_v2 = adagrad.Adagrad(learning_rate=0.01) self._testOptimizersCompatibility(opt_v1, opt_v2)
# Hyper-paratmeters of optimizer in graph. HP_IN_GRAPH = { 'Adam': ['decay', 'learning_rate'], 'Ftrl': [ 'decay', 'l1_regularization_strength', 'l2_regularization_strength', 'learning_rate', 'learning_rate_power' ], 'RMSProp': ['decay', 'learning_rate', 'momentum', 'rho'], 'Adagrad': ['decay', 'learning_rate'], 'SGD': ['decay', 'learning_rate', 'momentum'], } # optimizer v2 instance. OPT_V2_INSTANCE = { 'Adagrad': adagrad.Adagrad(), 'Adam': adam.Adam(), 'Ftrl': ftrl.Ftrl(), 'RMSProp': rmsprop.RMSprop(), 'SGD': gradient_descent.SGD(), } def _add_new_variable(initial_value, var_name_v2, var_name_v1, var_map, var_names_map): """Creates a new variable and add it to the variable maps.""" var = tf.Variable(initial_value, name=var_name_v2) var_map[var_name_v2] = var var_names_map[var_name_v2] = var_name_v1