def testSparseRepeatedIndices(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): repeated_index_update_var = variables.Variable( [[1.0], [2.0]], dtype=dtype) aggregated_update_var = variables.Variable( [[1.0], [2.0]], dtype=dtype) grad_repeated_index = ops.IndexedSlices( constant_op.constant( [0.1, 0.1], shape=[2, 1], dtype=dtype), constant_op.constant([1, 1]), constant_op.constant([2, 1])) grad_aggregated = ops.IndexedSlices( constant_op.constant( [0.2], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) repeated_update = adagrad.AdagradOptimizer(3.0).apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) aggregated_update = adagrad.AdagradOptimizer(3.0).apply_gradients( [(grad_aggregated, aggregated_update_var)]) variables.global_variables_initializer().run() self.assertAllClose(aggregated_update_var.eval(), repeated_index_update_var.eval()) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose(aggregated_update_var.eval(), repeated_index_update_var.eval())
def _get_estimator(self, train_distribute, eval_distribute, remote_cluster=None): input_dimension = LABEL_DIMENSION linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] return dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=LABEL_DIMENSION, model_dir=self._model_dir, dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=train_distribute, eval_distribute=eval_distribute, remote_cluster=remote_cluster)))
def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig(train_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def doTestBasic(self, use_locking=False, use_resource=False): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): if use_resource: var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) else: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1, use_locking=use_locking) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 3 steps of adagrad for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testDynamicShapeVariable_Ok(self): with self.cached_session(): v = variable_scope.get_variable("v", initializer=constant_op.constant(1.), validate_shape=False) self.assertFalse(v.shape.is_fully_defined()) # Creating optimizer should cause no exception. adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)
def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer(3.0) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEquals(slot1.get_shape(), var1.get_shape()) variables.global_variables_initializer().run() # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Mix the first and the second adagrad for 3 steps. ada_update1.run() ada_update2.run() ada_update1.run() # Validate updated params (the same as with only 1 Adagrad). self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testSparseStability(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): shape = [1, 6] var0 = variables.Variable( [[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant( [[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], shape=shape, dtype=dtype), constant_op.constant([0]), constant_op.constant(shape)) ada_opt = adagrad.AdagradOptimizer(1.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") init = variables.global_variables_initializer() for _ in range(100): init.run() ada_update.run() self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), slot0.eval()) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), var0.eval())
def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant( [0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant( [0.01], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) ada_opt = adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([[1.0], [2.0]], var0.eval()) self.assertAllClose([[3.0], [4.0]], var1.eval()) # Run 3 step of sgd for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([[-1.6026098728179932], [2.0]]), var0.eval()) self.assertAllCloseAccordingToType( np.array([[3.0], [3.715679168701172]]), var1.eval())
def testSparseRepeatedIndicesResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var_repeated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_repeated = math_ops.reduce_sum( embedding_ops.embedding_lookup(var_repeated, [0, 0])) var_aggregated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_aggregated = 2 * math_ops.reduce_sum( embedding_ops.embedding_lookup(var_aggregated, [0])) update_op_repeated = adagrad.AdagradOptimizer( 2.0).minimize(loss_repeated) update_op_aggregated = adagrad.AdagradOptimizer( 2.0).minimize(loss_aggregated) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType( var_repeated.eval(), var_aggregated.eval()) for _ in range(3): update_op_repeated.run() update_op_aggregated.run() self.assertAllCloseAccordingToType( var_repeated.eval(), var_aggregated.eval())
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable( [[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = adagrad.AdagradOptimizer(1.0).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType( [[1.0, 2.0], [3.0, 4.0]], var0.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType( [[0, 1], [3, 4]], var0.eval(), atol=0.01)
def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer( constant_op.constant(3.0), initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 3 steps of adagrad for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
gradient_descent_optimizer_v1_fn = NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) adagrad_optimizer_v1_fn = NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) adam_optimizer_v1_fn = NamedObject("AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) rmsprop_optimizer_v1_fn = NamedObject( "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] gradient_descent_optimizer_v2_fn = NamedObject( "GradientDescentV2", lambda: gradient_descent_v2.GradientDescentOptimizer(0.2)) adagrad_optimizer_v2_fn = NamedObject( "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001)) adam_optimizer_v2_fn = NamedObject( "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1)) optimizers_v2 = [gradient_descent_optimizer_v2_fn, adagrad_optimizer_v2_fn] graph_and_eager_modes = ["graph", "eager"] def distributions_and_v1_optimizers(): """A common set of combination with DistributionStrategies and Optimizers.""" return combine( distribution=[ one_device_strategy, mirrored_strategy_with_gpu_and_cpu, mirrored_strategy_with_two_gpus,