def test_times_variable_arguments(self): c1 = combinations.combine(mode=["graph", "eager"]) c2 = combinations.combine(optimizer=["adam", "gd"]) c3 = combinations.combine(distribution=["d1", "d2"]) c4 = combinations.times(c3, c1, c2) self.assertEqual([ OrderedDict([("distribution", "d1"), ("mode", "graph"), ("optimizer", "adam")]), OrderedDict([("distribution", "d1"), ("mode", "graph"), ("optimizer", "gd")]), OrderedDict([("distribution", "d1"), ("mode", "eager"), ("optimizer", "adam")]), OrderedDict([("distribution", "d1"), ("mode", "eager"), ("optimizer", "gd")]), OrderedDict([("distribution", "d2"), ("mode", "graph"), ("optimizer", "adam")]), OrderedDict([("distribution", "d2"), ("mode", "graph"), ("optimizer", "gd")]), OrderedDict([("distribution", "d2"), ("mode", "eager"), ("optimizer", "adam")]), OrderedDict([("distribution", "d2"), ("mode", "eager"), ("optimizer", "gd")]) ], c4) self.assertEqual( combinations.combine( mode=["graph", "eager"], optimizer=["adam", "gd"], distribution=["d1", "d2"]), c4)
def test_add(self): self.assertEqual( [{ "a": 1 }, { "a": 2 }, { "b": 2 }, { "b": 3 }], combinations.combine(a=[1, 2]) + combinations.combine(b=[2, 3]))
def strategy_and_input_combinations(): return ( combinations.times( combinations.combine(distribution=strategies_minus_tpu), combinations.combine(mode=['graph'], use_numpy=[True, False], use_validation_data=[True, False]) + combinations.combine(mode=['eager'], use_numpy=[False], use_validation_data=[False])) + combinations.times( combinations.combine(distribution=tpu_strategies), combinations.combine(mode=['graph'], use_numpy=[True, False], use_validation_data=[True, False])))
def test_combinations_for_embedding_model(): return ( combinations.times( combinations.combine(distribution= strategies_for_embedding_models()), (graph_mode_test_configuration() + eager_mode_test_configuration())))
def test_arguments_sorted(self): self.assertEqual([ OrderedDict([("aa", 1), ("ab", 2)]), OrderedDict([("aa", 1), ("ab", 3)]), OrderedDict([("aa", 2), ("ab", 2)]), OrderedDict([("aa", 2), ("ab", 3)]) ], combinations.combine(ab=[2, 3], aa=[1, 2]))
def all_combinations(): return combinations.combine( distribution=[combinations.default_strategy, combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus], mode=["graph"])
def strategy_and_optimizer_combinations(): return combinations.combine( distribution=strategies, optimizer=[combinations.adagrad_optimizer_v1_fn, combinations.adam_optimizer_v1_fn, combinations.gradient_descent_optimizer_v1_fn, combinations.rmsprop_optimizer_v1_fn], mode=['graph'])
def test_combine_single_parameter(self): self.assertEqual([{ "a": 1, "b": 2 }, { "a": 2, "b": 2 }], combinations.combine(a=[1, 2], b=2))
def test_combinations_for_stateful_embedding_model(): return ( combinations.combine( distribution=strategies_for_stateful_embedding_model(), mode='graph', use_numpy=False, use_validation_data=False ))
def test_combinations_with_tpu_strategies(): tpu_strategies = [combinations.tpu_strategy, combinations.tpu_strategy_one_step] return ( combinations.times( combinations.combine(distribution=tpu_strategies), graph_mode_test_configuration()))
def strategy_and_optimizer_combinations(): return combinations.times( all_strategy_combinations(), combinations.combine( optimizer=[combinations.adagrad_optimizer_v1_fn, combinations.adam_optimizer_v1_fn, combinations.gradient_descent_optimizer_v1_fn, combinations.rmsprop_optimizer_v1_fn]))
def strategy_and_input_combinations(): def cnn_model_with_batch_norm(**kwargs): return _create_cnn_model(with_batch_norm=True, **kwargs) return ( combinations.times( combinations.combine(distribution=all_strategies), combinations.combine(mode=['graph', 'eager'], use_numpy=[True, False], use_validation_data=[True, False]), combinations.combine(model_with_data=[ ModelWithData('dnn', _create_dnn_model, _dnn_training_data), ModelWithData('cnn', _create_cnn_model, _cnn_training_data), ModelWithData('cnn_batch_norm', cnn_model_with_batch_norm, _cnn_training_data, with_batch_norm=True), ])))
def strategy_minus_tpu_combinations(): return combinations.combine( distribution=[combinations.default_strategy, combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus], mode=['graph'])
def test_times(self): c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"]) c2 = combinations.combine(mode=["eager"], loss=["callable"]) c3 = combinations.combine(distribution=["d1", "d2"]) c4 = combinations.times(c3, c1 + c2) self.assertEqual([ OrderedDict([("distribution", "d1"), ("loss", "callable"), ("mode", "graph")]), OrderedDict([("distribution", "d1"), ("loss", "tensor"), ("mode", "graph")]), OrderedDict([("distribution", "d1"), ("loss", "callable"), ("mode", "eager")]), OrderedDict([("distribution", "d2"), ("loss", "callable"), ("mode", "graph")]), OrderedDict([("distribution", "d2"), ("loss", "tensor"), ("mode", "graph")]), OrderedDict([("distribution", "d2"), ("loss", "callable"), ("mode", "eager")]) ], c4)
def all_strategy_combinations_minus_default(): strategy_minus_default_combinations = combinations.combine( distribution=[ combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus], mode=['graph', 'eager']) return strategy_minus_default_combinations + tpu_strategy_combinations()
def test_combine(self): self.assertEqual([{ "a": 1, "b": 2 }, { "a": 1, "b": 3 }, { "a": 2, "b": 2 }, { "a": 2, "b": 3 }], combinations.combine(a=[1, 2], b=[2, 3]))
class DistributedCollectiveAllReduceStrategyTest( CollectiveAllReduceStrategyTestBase, strategy_test_lib.DistributionTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): """Create a local cluster with 3 workers.""" cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=0) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def test_num_replicas_in_sync(self, use_core_strategy): distribution, _, _ = create_test_objects( cluster_spec=self._cluster_spec, task_type='worker', task_id=0, num_gpus=2, use_core_strategy=use_core_strategy) num_workers = len( self._cluster_spec.get('chief', []) + self._cluster_spec.get('worker', [])) self.assertEqual(2 * num_workers, distribution.num_replicas_in_sync) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1, use_core_strategy=[True, False])) def testMinimizeLossGraph(self, num_gpus, use_core_strategy): self._run_between_graph_clients(self._test_minimize_loss_graph, self._cluster_spec, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1, use_core_strategy=[True, False])) def testVariableInitialization(self, num_gpus, use_core_strategy): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') self._run_between_graph_clients(self._test_variable_initialization, self._cluster_spec, num_gpus=num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1, use_core_strategy=[True, False])) def testComplexModel(self, num_gpus, use_core_strategy): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') self._run_between_graph_clients(self._test_complex_model, self._cluster_spec, num_gpus=num_gpus, use_core_strategy=use_core_strategy) # TODO(b/124344198): Re-enable after fixing this flaky test. # TODO(yuefengz): Update how we use num_gpus and required_gpus @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1, use_dataset=[True, False], use_core_strategy=[True, False])) def DISABLED_testMakeInputFnIterator(self, num_gpus, use_dataset, use_core_strategy): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') if use_dataset: fn = lambda: dataset_ops.Dataset.range(100) else: def fn(): dataset = dataset_ops.Dataset.range(100) it = dataset.make_one_shot_iterator() return it.get_next # We use CPU as the device when num_gpus = 0 devices_per_worker = max(1, num_gpus) expected_values = [[i + j for j in range(devices_per_worker)] for i in range(0, 100, devices_per_worker)] input_fn = self._input_fn_to_test_input_context( fn, expected_num_replicas_in_sync=3 * devices_per_worker, expected_num_input_pipelines=3, expected_input_pipeline_id=1) # because task_id = 1 self._test_input_fn_iterator('worker', 1, num_gpus, input_fn, expected_values, test_reinitialize=use_dataset, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testUpdateConfigProto(self, use_core_strategy): strategy, _, _ = self._get_test_object( task_type='worker', task_id=1, num_gpus=2, use_core_strategy=use_core_strategy) config_proto = config_pb2.ConfigProto( device_filters=['to_be_overridden']) rewrite_options = config_proto.graph_options.rewrite_options rewrite_options.scoped_allocator_opts.enable_op.append('to_be_removed') new_config = strategy.update_config_proto(config_proto) # Verify group leader self.assertEqual('/job:worker/replica:0/task:0', new_config.experimental.collective_group_leader) # Verify device filters. self.assertEqual(['/job:worker/task:1'], new_config.device_filters) # Verify rewrite options. new_rewrite_options = new_config.graph_options.rewrite_options self.assertEqual(rewriter_config_pb2.RewriterConfig.ON, new_rewrite_options.scoped_allocator_optimization) self.assertEqual(['CollectiveReduce'], new_rewrite_options.scoped_allocator_opts.enable_op)
class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): @combinations.generate( combinations.combine(distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_gpu_and_cpu ], mode=['graph', 'eager'])) def test_unsupported_features(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) # Test with validation split with self.assertRaisesRegexp( ValueError, '`validation_split` argument is not ' 'supported when input `x` is a dataset or a ' 'dataset iterator.+'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_split=0.5, validation_steps=2) # Test with sample weight. sample_weight = np.random.random((10, )) with self.assertRaisesRegexp( ValueError, '`sample_weight` argument is not supported when input ' '`x` is a dataset or a dataset iterator.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, sample_weight=sample_weight) # Test with not specifying the `steps` argument for dataset with # infinite cardinality. dataset = dataset.repeat() with self.assertRaisesRegexp( ValueError, 'When passing an infinitely ' 'repeating dataset, you must specify the ' '`steps_per_epoch` argument'): model.fit(dataset, epochs=1, verbose=0) with self.assertRaisesRegexp( ValueError, 'When passing an infinitely ' 'repeating dataset, you must specify the ' '`steps` argument'): model.evaluate(dataset, verbose=0) with self.assertRaisesRegexp( ValueError, 'When passing an infinitely ' 'repeating dataset, you must specify the ' '`steps` argument'): model.predict(dataset, verbose=0) @combinations.generate( combinations.combine(distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_gpu_and_cpu ], mode=['graph', 'eager'])) def test_calling_with_unsupported_predefined_callbacks(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) def schedule(_): return 0.001 with self.assertRaisesRegexp( ValueError, 'You must specify a Keras Optimizer V2 when ' 'using'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[ keras.callbacks.LearningRateScheduler(schedule) ]) with self.assertRaisesRegexp( ValueError, 'You must specify a Keras Optimizer V2 when ' 'using'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.ReduceLROnPlateau()])
def test_combinations_for_stateful_embedding_model(): return (combinations.combine( distribution=strategies_for_stateful_embedding_model(), mode='graph', use_numpy=False, use_validation_data=False))
class DistributeCoordinatorIntegrationTest( multi_worker_test_base.IndependentWorkerTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): """Create a local cluster with 2 workers.""" super(DistributeCoordinatorIntegrationTest, cls).setUpClass() cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2, has_eval=True) def setUp(self): self._model_dir = tempfile.mkdtemp() super(DistributeCoordinatorIntegrationTest, self).setUp() def dataset_input_fn(self, x, y, batch_size, shuffle): def input_fn(): dataset = dataset_ops.Dataset.from_tensor_slices((x, y)) if shuffle: dataset = dataset.shuffle(batch_size) dataset = dataset.repeat(100).batch(batch_size) return dataset return input_fn def _get_exporter(self, name, fc): feature_spec = feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn) def _extract_loss_and_global_step(self, event_folder): """Returns the loss and global step in last event.""" event_paths = glob.glob(os.path.join(event_folder, "events*")) self.assertNotEmpty(event_paths, msg="Event file not found in dir %s" % event_folder) loss = None global_step_count = None for e in summary_iterator.summary_iterator(event_paths[-1]): current_loss = None for v in e.summary.value: if v.tag == "loss": current_loss = v.simple_value # If loss is not found, global step is meaningless. if current_loss is None: continue current_global_step = e.step if global_step_count is None or current_global_step > global_step_count: global_step_count = current_global_step loss = current_loss return (loss, global_step_count) def _get_estimator(self, train_distribute, eval_distribute, remote_cluster=None): input_dimension = LABEL_DIMENSION linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension, )) ] return dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=LABEL_DIMENSION, model_dir=self._model_dir, dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=train_distribute, eval_distribute=eval_distribute, remote_cluster=remote_cluster))) def _complete_flow(self, train_distribute, eval_distribute, remote_cluster=None, use_train_and_evaluate=True): estimator = self._get_estimator(train_distribute, eval_distribute, remote_cluster) input_dimension = LABEL_DIMENSION train_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync, shuffle=True) if eval_distribute: eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync else: eval_batch_size = BATCH_SIZE eval_input_fn = self.dataset_input_fn(x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns eval_spec = estimator_training.EvalSpec(name=EVAL_NAME, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter( EXPORTER_NAME, feature_columns), start_delay_secs=0, throttle_secs=1) if use_train_and_evaluate: estimator_training.train_and_evaluate( estimator, estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS), eval_spec) else: estimator.train(train_input_fn, max_steps=MAX_STEPS) latest_ckpt_path = estimator.latest_checkpoint() metrics = estimator.evaluate(eval_input_fn, checkpoint_path=latest_ckpt_path, name=EVAL_NAME) # Export the eval result to files. eval_result = estimator_training._EvalResult( status=estimator_training._EvalStatus.EVALUATED, metrics=metrics, checkpoint_path=latest_ckpt_path) evaluator = estimator_training._TrainingExecutor._Evaluator( estimator, eval_spec, None) evaluator._export_eval_result(eval_result, True) return estimator def _inspect_train_and_eval_events(self, estimator): # Make sure nothing is stuck in limbo. writer_cache.FileWriterCache.clear() # Examine the training events. Use a range to check global step to avoid # flakyness due to global step race condition. training_loss, _ = self._extract_loss_and_global_step(self._model_dir) self.assertIsNotNone(training_loss) # Examine the eval events. The global step should be accurate. eval_dir = os.path.join(self._model_dir, "eval_" + EVAL_NAME) eval_loss, eval_global_step = self._extract_loss_and_global_step( event_folder=eval_dir) self.assertIsNotNone(eval_loss) self.assertGreaterEqual(eval_global_step, MAX_STEPS) # Examine the export folder. export_dir = os.path.join(os.path.join(self._model_dir, "export"), EXPORTER_NAME) self.assertTrue(gfile.Exists(export_dir)) # Examine the ckpt for predict. def predict_input_fn(): return dataset_ops.Dataset.from_tensor_slices({ "x": DATA }).batch(BATCH_SIZE) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((BATCH_SIZE, LABEL_DIMENSION), predicted_proba.shape) def _get_strategy_object(self, strategy_cls): if strategy_cls == mirrored_strategy.CoreMirroredStrategy: return strategy_cls() else: return strategy_cls(num_gpus_per_worker=context.num_gpus()) @combinations.generate( combinations.combine( mode=["graph"], train_distribute_cls=[ collective_all_reduce_strategy.CollectiveAllReduceStrategy, mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy, parameter_server_strategy.ParameterServerStrategy ], eval_distribute_cls=[ None, mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy, parameter_server_strategy.ParameterServerStrategy, collective_all_reduce_strategy.CollectiveAllReduceStrategy, ], required_gpus=[0, 1])) def test_complete_flow_standalone_client(self, train_distribute_cls, eval_distribute_cls): train_distribute = self._get_strategy_object(train_distribute_cls) if eval_distribute_cls: eval_distribute = self._get_strategy_object(eval_distribute_cls) else: eval_distribute = None cluster_spec = copy.deepcopy(self._cluster_spec) if (train_distribute_cls != parameter_server_strategy.ParameterServerStrategy): cluster_spec.pop("ps", None) estimator = self._complete_flow(train_distribute, eval_distribute, cluster_spec) self._inspect_train_and_eval_events(estimator) @combinations.generate( combinations.combine( mode=["graph"], eval_distribute_class=[ None, mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy, parameter_server_strategy.ParameterServerStrategy, ], required_gpus=[0, 1])) def test_complete_flow_standalone_client_collective_nccl( self, eval_distribute_class): train_distribute = ( collective_all_reduce_strategy.CollectiveAllReduceStrategy( num_gpus_per_worker=context.num_gpus(), communication=cross_device_ops_lib.CollectiveCommunication.NCCL )) if eval_distribute_class: eval_distribute = self._get_strategy_object(eval_distribute_class) else: eval_distribute = None cluster_spec = copy.deepcopy(self._cluster_spec) cluster_spec.pop("ps", None) estimator = self._complete_flow(train_distribute, eval_distribute, cluster_spec) self._inspect_train_and_eval_events(estimator) @combinations.generate( combinations.combine(mode=["graph"], train_distribute_cls=[ mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy, ], eval_distribute_cls=[ None, mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy, ], required_gpus=[0, 1])) def test_estimator_standalone_client(self, train_distribute_cls, eval_distribute_cls): train_distribute = self._get_strategy_object(train_distribute_cls) if eval_distribute_cls: eval_distribute = self._get_strategy_object(eval_distribute_cls) else: eval_distribute = None # We use the whole cluster for evaluation. cluster = copy.deepcopy(self._cluster_spec) cluster.pop("evaluator", None) estimator = self._complete_flow(train_distribute, eval_distribute, remote_cluster=cluster, use_train_and_evaluate=False) self._inspect_train_and_eval_events(estimator) def _mock_run_std_server(self, *args, **kwargs): ret = original_run_std_server(*args, **kwargs) # Wait for all std servers to be brought up in order to reduce the chance of # remote sessions taking local ports that have been assigned to std servers. self._barrier.wait() return ret def _independent_worker_fn( self, train_distribute, eval_distribute, ): with test.mock.patch.object(dc, "_run_std_server", self._mock_run_std_server): self._complete_flow(train_distribute, eval_distribute) @combinations.generate( combinations.combine( mode=["graph"], train_distribute_cls=[ collective_all_reduce_strategy.CollectiveAllReduceStrategy, parameter_server_strategy.ParameterServerStrategy, ], eval_distribute_cls=[ None, mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy, parameter_server_strategy.ParameterServerStrategy, collective_all_reduce_strategy.CollectiveAllReduceStrategy, ], required_gpus=[0, 1])) def test_complete_flow_independent_worker_between_graph( self, train_distribute_cls, eval_distribute_cls): if (context.num_gpus() < 2 and eval_distribute_cls == collective_all_reduce_strategy.CollectiveAllReduceStrategy): self.skipTest( "`CollectiveAllReduceStrategy` needs at least two towers.") train_distribute = self._get_strategy_object(train_distribute_cls) if eval_distribute_cls: eval_distribute = self._get_strategy_object(eval_distribute_cls) else: eval_distribute = None if (train_distribute_cls == parameter_server_strategy.ParameterServerStrategy): cluster_spec = multi_worker_test_base.create_cluster_spec( num_workers=3, num_ps=2, has_eval=True) # 3 workers, 2 ps and 1 evaluator. self._barrier = dc._Barrier(6) else: cluster_spec = multi_worker_test_base.create_cluster_spec( num_workers=3, num_ps=0, has_eval=True) # 3 workers and 1 evaluator. self._barrier = dc._Barrier(4) threads = self.run_multiple_tasks_in_threads( self._independent_worker_fn, cluster_spec, train_distribute, eval_distribute) threads_to_join = [] for task_type, ts in threads.items(): if task_type == PS: continue for t in ts: threads_to_join.append(t) self.join_independent_workers(threads_to_join) estimator = self._get_estimator(train_distribute, eval_distribute) self._inspect_train_and_eval_events(estimator) @combinations.generate( combinations.combine(mode=["graph"], train_distribute_cls=[ mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy ], eval_distribute_cls=[ None, mirrored_strategy.MirroredStrategy, mirrored_strategy.CoreMirroredStrategy ], required_gpus=[0, 1])) def test_complete_flow_independent_worker_in_graph(self, train_distribute_cls, eval_distribute_cls): train_distribute = self._get_strategy_object(train_distribute_cls) if eval_distribute_cls: eval_distribute = self._get_strategy_object(eval_distribute_cls) else: eval_distribute = None cluster_spec = multi_worker_test_base.create_cluster_spec( num_workers=3, num_ps=0, has_eval=True) # 3 workers and 1 evaluator. self._barrier = dc._Barrier(4) threads = self.run_multiple_tasks_in_threads( self._independent_worker_fn, cluster_spec, train_distribute, eval_distribute) self.join_independent_workers( [threads[WORKER][0], threads[EVALUATOR][0]]) estimator = self._get_estimator(train_distribute, eval_distribute) self._inspect_train_and_eval_events(estimator)
def eager_mode_test_configuration(): return combinations.combine(mode='eager', use_numpy=False, use_validation_data=False)
def test_overlapping_keys(self): c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"]) c2 = combinations.combine(mode=["eager"], loss=["callable"]) with self.assertRaisesRegexp(ValueError, ".*Keys.+overlap.+"): _ = combinations.times(c1, c2)
class InputIteratorSingleWorkerTest(InputIteratorTestBase, parameterized.TestCase): @combinations.generate(combinations.combine( mode=["graph", "eager"], input_type=["input_fn", "dataset"])) def testOneDeviceCPU(self, input_type): worker_device_pairs = [("", ["/device:CPU:0"])] dataset_fn = lambda: dataset_ops.Dataset.range(10) expected_values = [[i] for i in range(10)] self._test_iterator(input_type, dataset_fn, worker_device_pairs, expected_values) @combinations.generate(combinations.combine( mode=["graph", "eager"], input_type=["input_fn", "dataset"], required_gpus=1)) def testTwoDevicesOneGPUOneCPU(self, input_type): worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] dataset_fn = lambda: dataset_ops.Dataset.range(10) expected_values = [[i, i+1] for i in range(0, 10, 2)] self._test_iterator(input_type, dataset_fn, worker_device_pairs, expected_values) @combinations.generate(combinations.combine( mode=["graph", "eager"], input_type=["input_fn", "dataset"], required_gpus=1)) def testTupleDataset(self, input_type): worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] def dataset_fn(): dataset1 = dataset_ops.Dataset.range(10) dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2) return dataset_ops.Dataset.zip((dataset1, dataset2)) expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)] self._test_iterator(input_type, dataset_fn, worker_device_pairs, expected_values) @combinations.generate(combinations.combine( mode=["graph", "eager"], input_type=["input_fn", "dataset"], required_gpus=1)) def testUnevenDatasetBatches(self, input_type): worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] dataset_fn = lambda: dataset_ops.Dataset.range(11) expected_values = [[i, i+1] for i in range(0, 10, 2)] self._test_iterator(input_type, dataset_fn, worker_device_pairs, expected_values) @combinations.generate(combinations.combine( mode=["graph", "eager"], input_type=["dataset"], split_batch_by=[None, 2], required_gpus=1)) def testBatchSplitting(self, input_type, split_batch_by): worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] batch_size = 10 dataset_fn = lambda: dataset_ops.Dataset.range(100).batch(batch_size) updated_batch_size = ( batch_size // split_batch_by if split_batch_by else batch_size) expected_values = [[range(i, i+updated_batch_size), range(i+updated_batch_size, i+2*updated_batch_size)] for i in range(0, 100, updated_batch_size*2)] self._test_iterator(input_type, dataset_fn, worker_device_pairs, expected_values, sess=None, split_batch_by=split_batch_by)
def all_strategy_combinations_with_eager_and_graph_modes(): return combinations.combine(distribution=all_strategies, mode=['graph', 'eager'])
class DNNLinearCombinedClassifierIntegrationTest(test.TestCase, parameterized.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def dataset_input_fn(self, x, y, batch_size, shuffle): def input_fn(): dataset = dataset_ops.Dataset.from_tensor_slices((x, y)) if shuffle: dataset = dataset.shuffle(batch_size) dataset = dataset.repeat(10).batch(batch_size) return dataset return input_fn @combinations.generate( combinations.combine( mode=['graph'], distribution=[ combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus ])) def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir)) def tearDown(self): if self._model_dir: writer_cache.FileWriterCache.clear() shutil.rmtree(self._model_dir)
class ParameterServerStrategyWithChiefTest(ParameterServerStrategyTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2, has_chief=True) cls._default_target = 'grpc://' + cls._cluster_spec[CHIEF][0] @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testSimpleBetweenGraph(self, use_core_strategy): self._run_between_graph_clients(self._test_simple_increment, self._cluster_spec, context.num_gpus(), use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], use_core_strategy=[True, False])) def testMinimizeLossGraph(self, num_gpus, use_core_strategy): self._run_between_graph_clients(self._test_minimize_loss_graph, self._cluster_spec, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testGlobalStepIsWrappedOnTwoGPUs(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual( created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(values.AggregatingVariable, type(created_step)) self.assertIs(values.AggregatingVariable, type(get_step)) self.assertIs(strategy, created_step.distribute_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=1, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual( created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(resource_variable_ops.ResourceVariable, type(created_step)) self.assertIs(resource_variable_ops.ResourceVariable, type(get_step)) # All variables have an _distribute_strategy parameter. Only variable # subclasses in distribution strategy expose it publicly. self.assertFalse(hasattr(strategy, 'distribute_strategy')) self.assertIs(strategy, created_step._distribute_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testValueContainer(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): def f(): with backprop.GradientTape() as tape: v = variable_scope.get_variable('v', initializer=10.0) _ = v * v v, = tape.watched_variables() w = strategy.extended.value_container(v) self.assertIs(values.AggregatingVariable, type(w)) strategy.extended.call_for_each_replica(f)
class ParameterServerStrategyTest( ParameterServerStrategyTestBase, strategy_test_lib.DistributionTestBase, strategy_test_lib.TwoDeviceDistributionTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2) cls._default_target = 'grpc://' + cls._cluster_spec[WORKER][0] @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def test_num_replicas_in_sync(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) # All the devices on a given worker are in sync which in this case is the # number of gpus on each worker. self.assertEqual(2, strategy.num_replicas_in_sync) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testDeviceAssignmentLocalCPU(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=0, use_core_strategy=use_core_strategy) self._test_device_assignment_local(strategy, compute_device='CPU', variable_device='CPU', num_gpus=0) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testDeviceAssignmentLocalOneGPU(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=1, use_core_strategy=use_core_strategy) self._test_device_assignment_local(strategy, compute_device='GPU', variable_device='GPU', num_gpus=1) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testDeviceAssignmentLocalTwoGPUs(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) self._test_device_assignment_local(strategy, compute_device='GPU', variable_device='CPU', num_gpus=2) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], use_core_strategy=[True, False])) def testDeviceAssignmentDistributed(self, num_gpus, use_core_strategy): self._test_device_assignment_distributed( 'worker', 1, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], use_core_strategy=[True, False])) def testDeviceAssignmentDistributedEnablePartitioner( self, num_gpus, use_core_strategy): self._test_device_assignment_distributed_enable_partitioner( 'worker', 1, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testSimpleBetweenGraph(self, use_core_strategy): self._run_between_graph_clients(self._test_simple_increment, self._cluster_spec, context.num_gpus(), use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], use_core_strategy=[True, False])) def testLocalSimpleIncrement(self, num_gpus, use_core_strategy): self._test_simple_increment(None, 0, num_gpus, use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], use_core_strategy=[True, False])) def testMinimizeLossGraphDistributed(self, num_gpus, use_core_strategy): self._run_between_graph_clients(self._test_minimize_loss_graph, self._cluster_spec, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], use_core_strategy=[True, False])) def testMinimizeLossGraphLocal(self, num_gpus, use_core_strategy): self._test_minimize_loss_graph(None, None, num_gpus, use_core_strategy) # TODO(b/124344198): Re-enable after fixing this flaky test. # TODO(priyag): Refactor this and other multi worker tests. @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[1, 2], required_gpus=1, use_core_strategy=[True, False], use_dataset=[True, False])) def DISABLED_testMakeInputFnIteratorDistributed(self, num_gpus, use_core_strategy, use_dataset): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') if use_dataset: fn = lambda: dataset_ops.Dataset.range(100) else: def fn(): dataset = dataset_ops.Dataset.range(100) it = dataset.make_one_shot_iterator() return it.get_next expected_values = [[i + j for j in range(num_gpus)] for i in range(0, 100, num_gpus)] input_fn = self._input_fn_to_test_input_context( fn, expected_num_replicas_in_sync=num_gpus, expected_num_input_pipelines=3, expected_input_pipeline_id=1) # because task_id = 1 self._test_input_fn_iterator('worker', 1, num_gpus, input_fn, expected_values, test_reinitialize=use_dataset, use_core_strategy=use_core_strategy) # TODO(b/124344198): Re-enable after fixing this flaky test. @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[1, 2], required_gpus=1, use_core_strategy=[True, False], use_dataset=[True, False])) def DISABLED_testMakeInputFnIteratorLocal(self, num_gpus, use_core_strategy, use_dataset): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') if use_dataset: fn = lambda: dataset_ops.Dataset.range(100) else: def fn(): dataset = dataset_ops.Dataset.range(100) it = dataset.make_one_shot_iterator() return it.get_next expected_values = [[i + j for j in range(num_gpus)] for i in range(0, 100, num_gpus)] input_fn = self._input_fn_to_test_input_context( fn, expected_num_replicas_in_sync=num_gpus, expected_num_input_pipelines=1, expected_input_pipeline_id=0 ) # only one worker and pipeline for local. self._test_input_fn_iterator(None, None, num_gpus, input_fn, expected_values, test_reinitialize=use_dataset, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testGlobalStepUpdate(self, use_core_strategy): strategy, _, _ = create_test_objects( use_core_strategy=use_core_strategy) self._test_global_step_update(strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testUpdateConfigProtoMultiWorker(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) strategy.configure(cluster_spec=self._cluster_spec, task_type='worker', task_id=1) config_proto = config_pb2.ConfigProto( device_filters=['to_be_overridden']) new_config = strategy.update_config_proto(config_proto) # Verify device filters. self.assertEqual(['/job:worker/task:1', '/job:ps'], new_config.device_filters) # Verify isolate_session_state self.assertFalse(new_config.isolate_session_state) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testUpdateConfigProtoLocal(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) config_proto = config_pb2.ConfigProto() new_config = strategy.update_config_proto(config_proto) # Verify isolate_session_state self.assertTrue(new_config.isolate_session_state) def testAllReduceSum(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_all_reduce_sum(distribution) def testAllReduceSumGradients(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_all_reduce_sum_gradients(distribution) def testAllReduceSumGradientTape(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_all_reduce_sum_gradient_tape(distribution) def testAllReduceMean(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_all_reduce_mean(distribution) def testAllReduceMeanGradients(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_all_reduce_mean_gradients(distribution) def testAllReduceMeanGradientTape(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_all_reduce_mean_gradient_tape(distribution)
def tpu_combinations(): return combinations.combine(distribution=[ combinations.tpu_strategy_one_step, combinations.tpu_strategy ], mode=["graph"])
class ParameterServerStrategyTest(ParameterServerStrategyTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2) cls._default_target = 'grpc://' + cls._cluster_spec[WORKER][0] def test_num_replicas_in_sync(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) # All the devices on a given worker are in sync which in this case is the # number of gpus on each worker. self.assertEqual(2, distribution.num_replicas_in_sync) def testDeviceAssignmentLocalCPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=0) self._test_device_assignment_local(distribution, compute_device='CPU', variable_device='CPU', num_gpus=0) def testDeviceAssignmentLocalOneGPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=1) self._test_device_assignment_local(distribution, compute_device='GPU', variable_device='GPU', num_gpus=1) def testDeviceAssignmentLocalTwoGPUs(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_device_assignment_local(distribution, compute_device='GPU', variable_device='CPU', num_gpus=2) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testDeviceAssignmentDistributed(self, num_gpus): self._test_device_assignment_distributed('worker', 1, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testDeviceAssignmentDistributedEnablePartitioner(self, num_gpus): self._test_device_assignment_distributed_enable_partitioner( 'worker', 1, num_gpus) def testSimpleBetweenGraph(self): self._run_between_graph_clients(self._test_simple_increment, self._cluster_spec, context.num_gpus()) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testLocalSimpleIncrement(self, num_gpus): self._test_simple_increment(None, 0, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testMinimizeLossGraphDistributed(self, num_gpus): self._run_between_graph_clients(self._test_minimize_loss_graph, self._cluster_spec, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testMinimizeLossGraphLocal(self, num_gpus): self._test_minimize_loss_graph(None, None, num_gpus)
class MirroredVariableTest(test.TestCase, parameterized.TestCase): config = config_pb2.ConfigProto() config.allow_soft_placement = True @test_util.run_in_graph_and_eager_modes(config=config) def testProperties(self): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") v, _, mirrored = _make_mirrored() self.assertEqual(v[0].name, mirrored.name) self.assertEqual(v[0].dtype, mirrored.dtype) self.assertEqual(v[0].shape, mirrored.shape) @test_util.run_in_graph_and_eager_modes(config=config) def testVariableOnAnotherDevice(self): v = variable_scope.get_variable( name="v", initializer=[1.], use_resource=True) index = {"/job:foo/device:CPU:0": v} mirrored = values.MirroredVariable(index, v, variable_scope.VariableAggregation.MEAN) self.assertEqual(v.name, mirrored.name) self.assertEqual(v.dtype, mirrored.dtype) self.assertEqual(v.shape, mirrored.shape) def _assign_mirrored(self, devices, v, new): for d, var, n in zip(devices, v, new): with ops.device(d): self.evaluate(var.assign(n)) def _save_return_saver(self, sess, var): saver = saver_lib.Saver(var_list=[var]) test_dir = self.get_temp_dir() prefix = os.path.join(test_dir, "ckpt") return saver.save(sess, prefix), saver def _save(self, sess, var): save_path, _ = self._save_return_saver(sess, var) return save_path @test_util.run_in_graph_and_eager_modes(config=config) def testSaveAndRestoreMirroredOneGraph(self): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") with self.cached_session(config=self.config) as sess: v, devices, mirrored = _make_mirrored() # Overwrite the initial values. self._assign_mirrored(devices, v, [3., 4.]) # Saves the current value of v[0], 3. save_path, saver = self._save_return_saver(sess, mirrored) # Change the values between save and restore. self._assign_mirrored(devices, v, [5., 6.]) # Restores the saved value of 3. to both variables. saver.restore(sess, save_path) self.assertEqual([3., 3.], self.evaluate([v[0], v[1]])) def _save_mirrored(self): """Save variables with mirroring, returns save_path.""" with self.session(graph=ops.Graph()) as sess: v, devices, mirrored = _make_mirrored() # Overwrite the initial values. self._assign_mirrored(devices, v, [3., 4.]) # Saves the current value of v[0], 3. save_path = self._save(sess, mirrored) # Change the values between save and restore. self._assign_mirrored(devices, v, [5., 6.]) return save_path def _save_normal(self): """Save variables without mirroring, returns save_path.""" with self.session(graph=ops.Graph()) as sess: var = variable_scope.get_variable( name="v", initializer=1., use_resource=True) # Overwrite the initial value. self.evaluate(var.assign(3.)) # Saves the current value of var, 3. save_path = self._save(sess, var) # Change the values between save and restore. self.evaluate(var.assign(5.)) return save_path def _restore_normal(self, save_path): """Restore to variables without mirroring in a fresh graph.""" with self.session(graph=ops.Graph()) as sess: var = variable_scope.get_variable( name="v", initializer=7., use_resource=True) # Overwrite the initial value. self.evaluate(var.assign(8.)) # Restores the saved value of 3. to `var`. saver = saver_lib.Saver(var_list=[var]) saver.restore(sess, save_path) self.assertEqual(3., self.evaluate(var)) def _restore_mirrored(self, save_path): """Restore to variables with mirroring in a fresh graph.""" with self.session(graph=ops.Graph()) as sess: v, devices, mirrored = _make_mirrored() # Overwrite the initial values. self._assign_mirrored(devices, v, [7., 8.]) # Restores the saved value of 3. to both variables. saver = saver_lib.Saver(var_list=[mirrored]) saver.restore(sess, save_path) self.assertEqual([3., 3.], self.evaluate([v[0], v[1]])) @test_util.run_in_graph_and_eager_modes(config=config) def testSaveMirroredRestoreMirrored(self): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") save_path = self._save_mirrored() self._restore_mirrored(save_path) @test_util.run_in_graph_and_eager_modes(config=config) def testSaveMirroredRestoreNormal(self): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") save_path = self._save_mirrored() self._restore_normal(save_path) @test_util.run_in_graph_and_eager_modes(config=config) def testSaveNormalRestoreMirrored(self): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") save_path = self._save_normal() self._restore_mirrored(save_path) @combinations.generate(combinations.combine( distribution=[ combinations.mirrored_strategy_with_one_gpu, combinations.core_mirrored_strategy_with_one_gpu], mode=["graph"])) def testFetchAMirroredVariable(self, distribution): with self.session(graph=ops.Graph()) as sess, distribution.scope(): with ops.device("/device:GPU:0"): v = variable_scope.get_variable( name="v", initializer=1., use_resource=True) mirrored = values.MirroredVariable({ "/device:GPU:0": v }, v, variable_scope.VariableAggregation.MEAN) sess.run(variables_lib.global_variables_initializer()) sess.run({"complicated": mirrored})
class LocalCollectiveAllReduceStrategy( CollectiveAllReduceStrategyTestBase, strategy_test_lib.DistributionTestBase, strategy_test_lib.TwoDeviceDistributionTestBase, parameterized.TestCase): @combinations.generate( combinations.combine(mode=['graph', 'eager'], num_gpus=[2, 4], required_gpus=2, use_core_strategy=[True, False])) def testMinimizeLoss(self, num_gpus, use_core_strategy): # Collective ops doesn't support strategy with one device. if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') if context.executing_eagerly(): strategy, _, _ = self._get_test_object( None, None, num_gpus, use_core_strategy=use_core_strategy) self._test_minimize_loss_eager(strategy) else: self._test_minimize_loss_graph(None, None, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[2, 4], required_gpus=2, use_core_strategy=[True, False])) def testComplexModel(self, num_gpus, use_core_strategy): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') self._test_complex_model(None, None, num_gpus, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph', 'eager'], required_gpus=2, use_dataset=[True, False], use_core_strategy=[True, False])) def DISABLED_testMakeInputFnIterator(self, use_dataset, use_core_strategy): num_gpus = 2 if use_dataset: fn = lambda: dataset_ops.Dataset.range(5 * num_gpus) else: def fn(): dataset = dataset_ops.Dataset.range(5 * num_gpus) it = dataset.make_one_shot_iterator() return it.get_next expected_values = [ range(i, i + num_gpus) for i in range(0, 10, num_gpus) ] input_fn = self._input_fn_to_test_input_context( fn, expected_num_replicas_in_sync=num_gpus, expected_num_input_pipelines=1, expected_input_pipeline_id=0) self._test_input_fn_iterator(None, None, num_gpus, input_fn, expected_values, test_reinitialize=use_dataset, use_core_strategy=use_core_strategy) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testAllReduceSum(self, use_core_strategy): if context.num_gpus() < 2: self.skipTest('Not enough GPUs') distribution, target, config = self._get_test_object( None, None, num_gpus=2, use_core_strategy=use_core_strategy) with self.cached_session(config=config, target=target): self._test_all_reduce_sum(distribution) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testAllReduceSumGradients(self, use_core_strategy): if context.num_gpus() < 2: self.skipTest('Not enough GPUs') distribution, target, config = self._get_test_object( None, None, num_gpus=2, use_core_strategy=use_core_strategy) with self.cached_session(config=config, target=target): self._test_all_reduce_sum_gradients(distribution) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testAllReduceSumGradientTape(self, use_core_strategy): if context.num_gpus() < 2: self.skipTest('Not enough GPUs') distribution, target, config = self._get_test_object( None, None, num_gpus=2, use_core_strategy=use_core_strategy) with self.cached_session(config=config, target=target): self._test_all_reduce_sum_gradient_tape(distribution) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testAllReduceMean(self, use_core_strategy): if context.num_gpus() < 2: self.skipTest('Not enough GPUs') distribution, target, config = self._get_test_object( None, None, num_gpus=2, use_core_strategy=use_core_strategy) with self.cached_session(config=config, target=target): self._test_all_reduce_mean(distribution) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testAllReduceMeanGradients(self, use_core_strategy): if context.num_gpus() < 2: self.skipTest('Not enough GPUs') distribution, target, config = self._get_test_object( None, None, num_gpus=2, use_core_strategy=use_core_strategy) with self.cached_session(config=config, target=target): self._test_all_reduce_mean_gradients(distribution) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testAllReduceMeanGradientTape(self, use_core_strategy): if context.num_gpus() < 2: self.skipTest('Not enough GPUs') distribution, target, config = self._get_test_object( None, None, num_gpus=2, use_core_strategy=use_core_strategy) with self.cached_session(config=config, target=target): self._test_all_reduce_mean_gradient_tape(distribution) @combinations.generate( combinations.combine(mode=['graph'], use_core_strategy=[True, False])) def testNumpyIterator(self, use_core_strategy): num_gpus = 2 if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') strategy, _, _ = self._get_test_object( None, None, num_gpus=num_gpus, use_core_strategy=use_core_strategy) self._test_numpy_iterator(strategy)
def strategy_minus_tpu_combinations(): return combinations.combine( distribution=strategies_minus_tpu, mode=['graph', 'eager'])
from __future__ import print_function from absl.testing import parameterized from tensorflow.contrib.distribute.python import combinations from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import variables from tensorflow.python.training import moving_averages all_combinations = combinations.combine( distribution=[combinations.default_strategy, combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu], mode=["graph"]) class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase): @combinations.generate(all_combinations) def testTowerModeWithoutZeroDebias(self, distribution): tower_id = [0] def tower_fn(): var = variables.Variable([10.0, 11.0]) val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) tower_id[0] += 1 decay = 0.25
def strategy_for_numpy_input_combinations(): return combinations.combine( distribution=strategies_minus_tpu + tpu_strategies, mode=['graph'])
class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): def test_validating_dataset_input_tensors_with_shape_mismatch(self): with self.cached_session(): strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) a = constant_op.constant([1, 2], shape=(1, 2)) b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2)) x = values.DistributedValues({ '/device:CPU:0': a, '/device:GPU:0': b }) y = values.DistributedValues({ '/device:CPU:0': a, '/device:GPU:0': a }) with strategy.scope(): # Removed device and input tensor shape details from the error message # since the order of the device and the corresponding input tensor shape # is not deterministic over different runs. with self.assertRaisesRegexp( ValueError, 'Input tensor shapes do not match for ' 'distributed tensor inputs ' 'DistributedValues:.+'): distributed_training_utils.validate_distributed_dataset_inputs( strategy, x, y) def test_validating_dataset_input_tensors_with_dtype_mismatch(self): with self.cached_session(): strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32) b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64) x = values.DistributedValues({ '/device:CPU:0': a, '/device:GPU:0': b }) y = values.DistributedValues({ '/device:CPU:0': a, '/device:GPU:0': a }) with strategy.scope(): # Removed device and input tensor dtype details from the error message # since the order of the device and the corresponding input tensor dtype # is not deterministic over different runs. with self.assertRaisesRegexp( ValueError, 'Input tensor dtypes do not match for ' 'distributed tensor inputs ' 'DistributedValues:.+'): distributed_training_utils.validate_distributed_dataset_inputs( strategy, x, y) # TODO(anjalisridhar): Move this test along with other numpy related tests to # its own class. @combinations.generate(strategy_combinations()) def test_creating_var_with_numpy_arrays(self, distribution): with self.cached_session(): x = np.asarray(np.random.random((64, 3)), dtype=np.float32) var_x = distributed_training_utils.get_var_for_numpy( distribution, x) val = self.evaluate(var_x.value()) # Verify that the numpy value is copied to the variable. self.assertAllEqual(x, val) @combinations.generate(strategy_combinations()) def test_calling_model_with_numpy_arrays(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) # Call fit with validation data model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0, validation_data=(inputs, targets)) # TODO(anjalisridhar): We need tests for when the batch size and steps are # smaller and results in a 0 batch_size and steps value. model.evaluate(inputs, targets) # with steps model.evaluate(inputs, targets, steps=2) # with batch_size model.evaluate(inputs, targets, batch_size=8) model.predict(inputs) # with steps model.predict(inputs, steps=2) # with batch_size model.predict(inputs, batch_size=8) @combinations.generate(strategy_combinations()) def test_calling_model_with_nested_numpy_arrays(self, distribution): with self.cached_session(): a = keras.layers.Input(shape=(3, ), name='input_a') b = keras.layers.Input(shape=(3, ), name='input_b') dense = keras.layers.Dense(4, name='dense') c = dense(a) d = dense(b) e = keras.layers.Dropout(0.5, name='dropout')(c) model = keras.models.Model([a, b], [d, e]) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) input_b_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) inputs = [input_a_np, input_b_np] output_d_np = np.asarray(np.random.random((64, 4)), dtype=np.float32) output_e_np = np.asarray(np.random.random((64, 4)), dtype=np.float32) targets = [output_d_np, output_e_np] # Call fit with validation data model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0) # TODO(anjalisridhar): We need tests for when the batch size and steps are # smaller and results in a 0 batch_size and steps value. model.evaluate(inputs, targets) # with steps model.evaluate(inputs, targets, steps=2) # with batch_size model.evaluate(inputs, targets, batch_size=8) model.predict(inputs) # with steps model.predict(inputs, steps=2) # with batch_size model.predict(inputs, batch_size=8) @combinations.generate(strategy_combinations()) def test_calling_model_on_same_dataset(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) # Call fit with validation data model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.predict(get_predict_dataset(distribution), steps=2) # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work # as clone_model's input_tensors argument only seems to accept list and not # tuples or dict. def test_fit_with_tuple_and_dict_dataset_inputs(self): with self.cached_session(): a = keras.layers.Input(shape=(3, ), name='input_a') b = keras.layers.Input(shape=(3, ), name='input_b') dense = keras.layers.Dense(4, name='dense') c = dense(a) d = dense(b) e = keras.layers.Dropout(0.5, name='dropout')(c) model = keras.models.Model([a, b], [d, e]) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_d_np = np.random.random((10, 4)) output_e_np = np.random.random((10, 4)) # Test with tuples dataset_tuple = dataset_ops.Dataset.from_tensor_slices( ((input_a_np, input_b_np), (output_d_np, output_e_np))) dataset_tuple = dataset_tuple.repeat(100) dataset_tuple = dataset_tuple.batch(10) model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) # Test with dict dataset_dict = dataset_ops.Dataset.from_tensor_slices(({ 'input_a': input_a_np, 'input_b': input_b_np }, (output_d_np, output_e_np))) dataset_dict = dataset_dict.repeat(100) dataset_dict = dataset_dict.batch(10) model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) @combinations.generate(strategy_combinations()) def test_fit_eval_and_predict_methods_on_dataset(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(get_predict_dataset(distribution), steps=2) @combinations.generate(strategy_and_optimizer_combinations()) def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): with self.cached_session(): model = get_model() loss = 'mse' model.compile(optimizer(), loss, distribute=distribution) dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(get_predict_dataset(distribution), steps=2) def test_unsupported_features(self): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) dataset = get_dataset(strategy) # Test with validation split with self.assertRaisesRegexp( ValueError, '`validation_split` argument is not ' 'supported when input `x` is a dataset or a ' 'dataset iterator.+'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_split=0.5, validation_steps=2) # Test with sample weight. sample_weight = np.random.random((10, )) with self.assertRaisesRegexp( NotImplementedError, '`sample_weight` is currently not supported ' 'when using DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, sample_weight=sample_weight) # Test with not specifying the `steps` argument. with self.assertRaisesRegexp( ValueError, 'you should specify the `steps_per_epoch` argument'): model.fit(dataset, epochs=1, verbose=0) with self.assertRaisesRegexp( ValueError, 'you should specify the `steps` argument'): model.evaluate(dataset, verbose=0) with self.assertRaisesRegexp( ValueError, 'you should specify the `steps` argument'): model.predict(dataset, verbose=0) def test_calling_with_unsupported_predefined_callbacks(self): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) dataset = get_dataset(strategy) def schedule(_): return 0.001 with self.assertRaisesRegexp( ValueError, 'LearningRateScheduler callback is not ' 'supported with DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[ keras.callbacks.LearningRateScheduler(schedule) ]) with self.assertRaisesRegexp( ValueError, 'ReduceLROnPlateau callback is not ' 'supported with DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.ReduceLROnPlateau()]) with self.assertRaisesRegexp( ValueError, 'histogram_freq in the TensorBoard callback ' 'is not supported when using ' 'DistributionStrategy.'): model.fit( dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)]) def test_dataset_input_shape_validation(self): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, distribute=strategy) # User forgets to batch the dataset inputs = np.zeros((10, 3), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) # Wrong input shape inputs = np.zeros((10, 5), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) @combinations.generate( combinations.combine(distribution=[combinations.tpu_strategy_one_step], mode=['graph'])) def test_dataset_input_shape_fully_defined(self, distribution): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) dataset = get_dataset(distribution) # Input shapes are not fully known. Batch dimension is unknown as we are # not using the drop_remainder argument. dataset = dataset.repeat(100).batch(10) with self.assertRaisesRegexp(ValueError, 'requires fully defined shapes'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) def test_learning_phase_value(self): # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): x = keras.layers.Input(shape=(16, ), name='input') y = keras.layers.Dense(16)(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) inputs = np.random.rand(10, 16) targets = np.ones((10, 16), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(8) hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1) self.assertEqual(hist.history['acc'][0], 1) evaluate_output = model.evaluate(dataset, steps=20) self.assertEqual(evaluate_output[1], 0) predict_output = model.predict(dataset, steps=1) self.assertNotEqual(np.mean(predict_output), 0)
with context.graph_mode(): _, replica_local = _make_replica_local( variable_scope.VariableAggregation.SUM) converted = ops.internal_convert_to_tensor(replica_local, as_ref=False) self.assertIsInstance(converted, ops.Tensor) self.assertEqual(converted.dtype, replica_local.dtype) converted = ops.internal_convert_to_tensor(replica_local, as_ref=True) # Resources variable are converted to tensors as well when as_ref is True. self.assertIsInstance(converted, ops.Tensor) self.assertEqual(converted.dtype, replica_local.dtype) @combinations.generate(combinations.combine( distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_gpu_and_cpu], mode=["graph", "eager"])) class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase): def _assign_replica_local(self, devices, v, new): for d, var, n in zip(devices, v, new): with ops.device(d): self.evaluate(var.assign(n)) def _save_return_saver(self, sess, var): saver = saver_lib.Saver(var_list=[var]) test_dir = self.get_temp_dir() prefix = os.path.join(test_dir, "ckpt") return saver.save(sess, prefix), saver
def strategy_minus_tpu_combinations(): return combinations.combine(distribution=strategies_minus_tpu, mode=['graph', 'eager'])
class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase): # TODO(yuefengz): decouple the num_gpus check from distribution in # combinations module so that we can pass in devices instead of a distribution # strategy. reduction_to_one_combinations = combinations.combine( cross_device_ops=[ combinations.NamedObject( "DefaultReductionToOneDeviceCrossDeviceOps", cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps()), combinations.NamedObject( "ReductionToCPUDeviceCrossDeviceOps", cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps( reduce_to_device=_cpu_device)), combinations.NamedObject( "AccumulateNCrossDeviceOp", cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps( accumulation_fn=math_ops.accumulate_n)), ], distribution=[ combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus ], mode=["graph", "eager"]) allreduce_combinations = combinations.combine( cross_device_ops=[ combinations.NamedObject( "AllReduce", cross_device_ops_lib.AllReduceCrossDeviceOps("nccl", 1, 0, 0)), combinations.NamedObject( "HierarchicalCopy", cross_device_ops_lib.AllReduceCrossDeviceOps( "hierarchical_copy", 8, 0, 0)), combinations.NamedObject( "AllReduceNoGradientRepacking", cross_device_ops_lib.AllReduceCrossDeviceOps("nccl", 0, 0, 0)), combinations.NamedObject( "HierarchicalCopyAggregateSmallTensors", cross_device_ops_lib.AllReduceCrossDeviceOps( "hierarchical_copy", 0, 100, 10)) ], distribution=[combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_two_gpus], mode=["graph", "eager"]) @combinations.generate(reduction_to_one_combinations + allreduce_combinations) def testReductionAndBroadcast(self, cross_device_ops, distribution): with distribution.scope(): self._testReductionAndBroadcast(cross_device_ops, distribution) def testChooseAlgorithm(self): device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7], [0, 5, 6, 7], [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6]] result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links) self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps) self.assertEqual(result._all_reduce_alg, "hierarchical_copy") self.assertEqual(result._num_packs, 8) # if there are only 4 devices device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7]] result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links) self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps) self.assertEqual(result._all_reduce_alg, "nccl") self.assertEqual(result._num_packs, 1) # if devices links contain each device itself device_links = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 5], [0, 1, 2, 3, 6], [0, 1, 2, 3, 7], [0, 4, 5, 6, 7], [1, 4, 5, 6, 7], [2, 4, 5, 6, 7], [3, 4, 5, 6, 7]] result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links) self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps) self.assertEqual(result._all_reduce_alg, "hierarchical_copy") self.assertEqual(result._num_packs, 8) # if not dgx1-like links device_links = [[0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7], [0, 5, 6, 7], [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6], [1, 2, 3, 4]] result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links) self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps) self.assertEqual(result._all_reduce_alg, "nccl") self.assertEqual(result._num_packs, 1) @combinations.generate(combinations.combine( mode=["graph", "eager"], required_gpus=1)) def testSimpleReduceWithIndexedSlices(self): devices = ["/cpu:0", "/gpu:0"] t0 = _make_indexed_slices([[1., 2.]], [1], [5, 2], devices[0]) t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], [5, 2], devices[1]) per_replica = value_lib.PerReplica({devices[0]: t0, devices[1]: t1}) result = cross_device_ops_lib._simple_reduce( per_replica, devices[0], math_ops.add_n, reduce_util.ReduceOp.SUM) # Test that the result is semantically equal to both the concatenated # IndexedSlices with and without duplicate indices. total_with_dups = _make_indexed_slices( [[1., 2.], [3., 4.], [5., 6.]], [1, 1, 3], [5, 2], devices[0]) total_without_dups = _make_indexed_slices( [[4., 6.], [5., 6.]], [1, 3], [5, 2], devices[0]) self._assert_indexed_slices_equal(total_with_dups, result) self._assert_indexed_slices_equal(total_without_dups, result) @combinations.generate( combinations.combine( cross_device_ops_instance=[ combinations.NamedObject( "ReductionToOneDeviceCrossDeviceOps", cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps()), combinations.NamedObject( "AllReduceCrossDeviceOps", cross_device_ops_lib.AllReduceCrossDeviceOps()) ], reduce_op=[reduce_util.ReduceOp.SUM, reduce_util.ReduceOp.MEAN], batch_reduce=[True, False], mode=["graph", "eager"], required_gpus=1)) def testIndexedSlicesAllReduce(self, cross_device_ops_instance, reduce_op, batch_reduce): devices = ["/cpu:0", "/gpu:0"] dense_shape = [5, 2] t0 = _make_indexed_slices([[1., 2.]], [1], dense_shape, devices[0]) t1 = _make_indexed_slices( [[3., 4.], [5., 6.]], [1, 3], dense_shape, devices[1]) per_replica = value_lib.PerReplica({devices[0]: t0, devices[1]: t1}) if batch_reduce: result = cross_device_ops_instance.batch_reduce( reduce_op, [(per_replica, per_replica)]) else: result = cross_device_ops_instance.reduce( reduce_op, per_replica, per_replica) total_indices_with_dups = [1, 1, 3] total_indices_without_dups = [1, 3] if reduce_op == reduce_util.ReduceOp.SUM: total_values_with_dups = [[1., 2.], [3., 4.], [5., 6.]] total_values_without_dups = [[4., 6.], [5., 6.]] else: assert reduce_op == reduce_util.ReduceOp.MEAN total_values_with_dups = [[0.5, 1.], [1.5, 2.], [2.5, 3.]] total_values_without_dups = [[2., 3.], [2.5, 3.]] total_mirrored_with_dups = _make_mirrored_indexed_slices( devices, total_values_with_dups, total_indices_with_dups, dense_shape) total_mirrored_without_dups = _make_mirrored_indexed_slices( devices, total_values_without_dups, total_indices_without_dups, dense_shape) # Test that the result is semantically equal to both the concatenated # IndexedSlices, as well as when the duplicate indices are summed up. if batch_reduce: total_mirrored_with_dups = [total_mirrored_with_dups] total_mirrored_without_dups = [total_mirrored_without_dups] self._assert_values_equal(total_mirrored_with_dups, result) self._assert_values_equal(total_mirrored_without_dups, result)
class CheckpointUtilsWithDistributionStrategyTest( test.TestCase, parameterized.TestCase): def _get_test_object(self): checkpoint_dir = self.get_temp_dir() with self.cached_session() as session: v1, v2 = _create_checkpoints(session, checkpoint_dir) return checkpoint_dir, v1, v2 @combinations.generate(combinations.combine( distribution=[combinations.default_strategy, combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus], in_replica_mode=[True, False], mode=["graph"])) def testInitFromCheckpoint(self, distribution, in_replica_mode): checkpoint_dir, v1_value, v2_value = self._get_test_object() def init_and_verify(g): v1 = variable_scope.get_variable("new_var1", [1, 10]) v2 = variable_scope.get_variable( "new_var2", [10, 10], synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.MEAN) checkpoint_utils.init_from_checkpoint(checkpoint_dir, { "var1": "new_var1", "var2": "new_var2" }) with self.session(graph=g) as session: session.run(variables.global_variables_initializer()) self.assertAllEqual(v1_value, self.evaluate(v1)) self.assertAllEqual(v2_value, self.evaluate(v2)) with ops.Graph().as_default() as g, distribution.scope(): if in_replica_mode: distribution.extended.call_for_each_replica(init_and_verify, args=[g]) else: init_and_verify(g) @combinations.generate( combinations.combine( distribution=[ combinations.default_strategy, combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus ], in_replica_mode=[True, False], mode=["graph"])) def testInitFromDifferentNameObject(self, distribution, in_replica_mode): checkpoint_dir, v1_value, _ = self._get_test_object() def init_and_verify(g): v1 = variable_scope.get_variable("new_var1", [1, 10]) # Use string add to create new object in each replica prefix = "new_" suffix = "var1" new_var1 = prefix + suffix checkpoint_utils.init_from_checkpoint(checkpoint_dir, { "var1": new_var1, }) with self.test_session(graph=g) as session: session.run(variables.global_variables_initializer()) self.assertAllEqual(v1_value, self.evaluate(v1)) with ops.Graph().as_default() as g, distribution.scope(): if in_replica_mode: distribution.extended.call_for_each_replica(init_and_verify, [g]) else: init_and_verify(g)
class MultiWorkerCrossDeviceOpsTest(multi_worker_test_base.MultiWorkerTestBase, CrossDeviceOpsTestBase): worker_devices = [ "/job:worker/replica:0/task:0", "/job:worker/replica:0/task:1" ] multi_worker_allreduce_combinations = combinations.combine( cross_device_ops=[ combinations.NamedObject( "MultiWorkerAllReduce", cross_device_ops_lib.MultiWorkerAllReduce( worker_devices, 2, ("pscpu/pscpu", 2, -1), 0, 0, 0)), combinations.NamedObject( "MultiWorkerAllReducePack", cross_device_ops_lib.MultiWorkerAllReduce( worker_devices, 2, ("pscpu/pscpu", 2, -1), 1, 0, 0)), combinations.NamedObject( "MultiWorkerAllReduceAggregation", cross_device_ops_lib.MultiWorkerAllReduce( worker_devices, 2, ("pscpu/pscpu", 2, -1), 0, 100, 10)), combinations.NamedObject( "MultiWorkerAllReduceMultipleSpecs", cross_device_ops_lib.MultiWorkerAllReduce( worker_devices, 2, [("pscpu/pscpu", 2, 100), ("xring", 2, -1)], 0, 0, 0)), ], distribution=[ combinations.NamedDistribution( "MirroredCPU", lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=0), required_gpus=0), combinations.NamedDistribution( "Mirrored1GPU", lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=1), required_gpus=1), combinations.NamedDistribution( "Mirrored2GPUs", lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=2), required_gpus=2), # pylint: disable=g-long-lambda combinations.NamedDistribution( "CoreMirroredCPU", lambda: mirrored_strategy.CoreMirroredStrategy(["/device:CPU:0"]), required_gpus=0), combinations.NamedDistribution( "CoreMirrored1GPU", lambda: mirrored_strategy.CoreMirroredStrategy(["/device:GPU:0"]), required_gpus=1), combinations.NamedDistribution( "CoreMirrored2GPUs", lambda: mirrored_strategy.CoreMirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]), required_gpus=2), ], mode=["graph"]) @combinations.generate(multi_worker_allreduce_combinations) def testReductionAndBroadcast(self, cross_device_ops, distribution): distribution.configure(cluster_spec={ "worker": ["/job:worker/replica:0/task:0", "/job:worker/replica:0/task:1"] }) with distribution.scope(): self._testReductionAndBroadcast(cross_device_ops, distribution)
def all_strategy_combinations_with_graph_mode(): return combinations.combine(distribution=all_strategies, mode=['graph'])
class MultiWorkerCollectiveAllReduceTest( multi_worker_test_base.MultiWorkerTestBase, parameterized.TestCase): collective_key_base = 100000 @classmethod def setUpClass(cls): """Create a local cluster with 2 workers.""" cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=0) def setUp(self): super(MultiWorkerCollectiveAllReduceTest, self).setUp() # Reusing keys are not supported well. So we have to give a different # collective key base for different tests. MultiWorkerCollectiveAllReduceTest.collective_key_base += 100000 def _get_test_objects(self, task_type, task_id, num_gpus=0, local_mode=False): collective_keys = cross_device_utils.CollectiveKeys( group_key_start=10 * num_gpus + MultiWorkerCollectiveAllReduceTest.collective_key_base, instance_key_start=num_gpus * 100 + MultiWorkerCollectiveAllReduceTest.collective_key_base, instance_key_with_id_start=num_gpus * 10000 + MultiWorkerCollectiveAllReduceTest.collective_key_base) if local_mode: collective_all_reduce_ops = cross_device_ops_lib.CollectiveAllReduce( 1, num_gpus, collective_keys=collective_keys) if num_gpus: devices = ["/device:GPU:%d" % i for i in range(num_gpus)] else: devices = ["/device:CPU:0"] return collective_all_reduce_ops, devices, "" else: collective_all_reduce_ops = cross_device_ops_lib.CollectiveAllReduce( 3, num_gpus, collective_keys=collective_keys) if num_gpus: devices = [ "/job:%s/task:%d/device:GPU:%d" % (task_type, task_id, i) for i in range(num_gpus) ] else: devices = ["/job:%s/task:%d" % (task_type, task_id)] return (collective_all_reduce_ops, devices, "grpc://" + self._cluster_spec[task_type][task_id]) def _assert_values_equal(self, left, right, sess): if isinstance(left, list): for l, r in zip(left, right): self._assert_values_equal(l, r, sess) else: self.assertEqual(type(left), type(right)) self.assertEqual(set(left.devices), set(right.devices)) run_options = config_pb2.RunOptions() run_options.experimental.collective_graph_key = 6 left_values = np.array( sess.run(list(left._index.values()), options=run_options)).flatten() right_values = np.array(list(right._index.values())).flatten() self.assertEqual(len(left_values), len(right_values)) for l, r in zip(left_values, right_values): self.assertEqual(l, r) def _test_reduction(self, task_type, task_id, num_gpus, local_mode=False): collective_all_reduce, devices, master_target = self._get_test_objects( task_type, task_id, num_gpus, local_mode=local_mode) if local_mode: num_workers = 1 worker_device = None else: num_workers = len(self._cluster_spec.get("chief", [])) + len( self._cluster_spec.get("worker", [])) worker_device = "/job:%s/task:%d" % (task_type, task_id) with ops.Graph().as_default(), \ ops.device(worker_device), \ self.cached_session(target=master_target) as sess: # Collective ops doesn't support scalar tensors, so we have to construct # 1-d tensors. values = [constant_op.constant([float(d)]) for d in range(len(devices))] per_replica = _make_per_replica(values, devices, regroup=True) mean = np.array([(len(devices) - 1.) / 2.]) values_2 = [constant_op.constant([d + 1.0]) for d in range(len(devices))] per_replica_2 = _make_per_replica(values_2, devices) mean_2 = np.array([mean[0] + 1.]) destination_mirrored = _fake_mirrored(1., devices) destination_different = _fake_mirrored(1., _cpu_device) destination_str = _cpu_device all_destinations = [ destination_different, destination_mirrored, destination_str ] # test reduce() for destinations in all_destinations: self._assert_values_equal( collective_all_reduce.reduce( reduce_util.ReduceOp.MEAN, per_replica, destinations=destinations), _fake_mirrored(mean, destinations), sess) self._assert_values_equal( collective_all_reduce.reduce( reduce_util.ReduceOp.MEAN, per_replica_2, destinations=destinations), _fake_mirrored(mean_2, destinations), sess) self._assert_values_equal( collective_all_reduce.reduce( reduce_util.ReduceOp.SUM, per_replica, destinations=destinations), _fake_mirrored(mean * len(devices) * num_workers, destinations), sess) self._assert_values_equal( collective_all_reduce.reduce( reduce_util.ReduceOp.SUM, per_replica_2, destinations=destinations), _fake_mirrored(mean_2 * len(devices) * num_workers, destinations), sess) # test batch_reduce() for d1, d2 in itertools.product(all_destinations, all_destinations): self._assert_values_equal( collective_all_reduce.batch_reduce(reduce_util.ReduceOp.MEAN, [(per_replica, d1), (per_replica_2, d2)]), [ _fake_mirrored(mean, d1), _fake_mirrored(mean_2, d2) ], sess) self._assert_values_equal( collective_all_reduce.batch_reduce(reduce_util.ReduceOp.SUM, [(per_replica, d1), (per_replica_2, d2)]), [ _fake_mirrored(mean * len(devices) * num_workers, d1), _fake_mirrored(mean_2 * len(devices) * num_workers, d2) ], sess) return True @combinations.generate( combinations.combine(mode=["graph"], num_gpus=[0, 1, 2], required_gpus=1)) def testReductionDistributed(self, num_gpus): if context.num_gpus() < num_gpus: return self._run_between_graph_clients(self._test_reduction, self._cluster_spec, num_gpus) # Collective ops doesn't support strategy with one device. def testReductionLocal(self, num_gpus=2): if context.num_gpus() < num_gpus: return self._test_reduction(None, None, num_gpus, local_mode=True)
from __future__ import print_function from absl.testing import parameterized from tensorflow.contrib.distribute.python import combinations from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import variables from tensorflow.python.training import moving_averages all_combinations = combinations.combine( distribution=[combinations.default_strategy, combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu], mode=["graph"]) class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase): @combinations.generate(all_combinations) def testReplicaModeWithoutZeroDebias(self, distribution): replica_id = [0] def replica_fn(): var = variables.Variable([10.0, 11.0]) val = constant_op.constant([1.0 + replica_id[0], 2.0 - replica_id[0]]) replica_id[0] += 1 decay = 0.25
def test_overlapping_keys(self): c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"]) c2 = combinations.combine(mode=["eager"], loss=["callable"]) with self.assertRaisesRegexp(ValueError, ".*Keys.+overlap.+"): _ = combinations.times(c1, c2)
class ParameterServerStrategyTest(ParameterServerStrategyTestBase, strategy_test_lib.DistributionTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2) cls._default_target = 'grpc://' + cls._cluster_spec[WORKER][0] def test_num_replicas_in_sync(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) # All the devices on a given worker are in sync which in this case is the # number of gpus on each worker. self.assertEqual(2, distribution.num_replicas_in_sync) def testDeviceAssignmentLocalCPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=0) self._test_device_assignment_local(distribution, compute_device='CPU', variable_device='CPU', num_gpus=0) def testDeviceAssignmentLocalOneGPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=1) self._test_device_assignment_local(distribution, compute_device='GPU', variable_device='GPU', num_gpus=1) def testDeviceAssignmentLocalTwoGPUs(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_device_assignment_local(distribution, compute_device='GPU', variable_device='CPU', num_gpus=2) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testDeviceAssignmentDistributed(self, num_gpus): self._test_device_assignment_distributed('worker', 1, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testDeviceAssignmentDistributedEnablePartitioner(self, num_gpus): self._test_device_assignment_distributed_enable_partitioner( 'worker', 1, num_gpus) def testSimpleBetweenGraph(self): self._run_between_graph_clients(self._test_simple_increment, self._cluster_spec, context.num_gpus()) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testLocalSimpleIncrement(self, num_gpus): self._test_simple_increment(None, 0, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testMinimizeLossGraphDistributed(self, num_gpus): self._run_between_graph_clients(self._test_minimize_loss_graph, self._cluster_spec, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testMinimizeLossGraphLocal(self, num_gpus): self._test_minimize_loss_graph(None, None, num_gpus) # TODO(priyag): Refactor this and other multi worker tests. @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[1, 2], required_gpus=1)) def testMakeInputFnIteratorDistributed(self, num_gpus): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') dataset_fn = lambda: dataset_ops.Dataset.range(100) expected_values = [[i + j for j in range(num_gpus)] for i in range(0, 100, num_gpus)] input_fn = self._input_fn_to_test_input_context( dataset_fn, expected_num_replicas_in_sync=num_gpus, expected_num_input_pipelines=3, expected_input_pipeline_id=1) # because task_id = 1 self._test_input_fn_iterator('worker', 1, num_gpus, input_fn, expected_values) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[1, 2], required_gpus=1)) def testMakeInputFnIteratorLocal(self, num_gpus): if context.num_gpus() < num_gpus: self.skipTest('Not enough GPUs') dataset_fn = lambda: dataset_ops.Dataset.range(100) expected_values = [[i + j for j in range(num_gpus)] for i in range(0, 100, num_gpus)] input_fn = self._input_fn_to_test_input_context( dataset_fn, expected_num_replicas_in_sync=num_gpus, expected_num_input_pipelines=1, expected_input_pipeline_id=0 ) # only one worker and pipeline for local. self._test_input_fn_iterator(None, None, num_gpus, input_fn, expected_values) def testGlobalStepUpdate(self): strategy = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=context.num_gpus()) self._test_global_step_update(strategy) def testUpdateConfigProtoMultiWorker(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) distribution.configure(cluster_spec=self._cluster_spec, task_type='worker', task_id=1) config_proto = config_pb2.ConfigProto( device_filters=['to_be_overridden']) new_config = distribution.update_config_proto(config_proto) # Verify device filters. self.assertEqual(['/job:worker/task:1', '/job:ps'], new_config.device_filters) # Verify isolate_session_state self.assertFalse(new_config.isolate_session_state) def testUpdateConfigProtoLocal(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) config_proto = config_pb2.ConfigProto() new_config = distribution.update_config_proto(config_proto) # Verify isolate_session_state self.assertTrue(new_config.isolate_session_state)
class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): def _get_iterator(self, ds): if context.executing_eagerly(): iterator = ds.make_one_shot_iterator() else: iterator = ds.make_initializable_iterator() self.evaluate(iterator.initializer) return iterator @combinations.generate( combinations.times( combinations.distributions_and_v1_optimizers(), combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + combinations.combine(mode=["eager"], use_callable_loss=[True])) + combinations.combine(distribution=[combinations.tpu_strategy], optimizer_fn=combinations.optimizers_v1, mode=["graph"], use_callable_loss=[True, False])) def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss): with distribution.scope(): model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) def step_fn(ctx, inputs): del ctx # Unused return distribution.group( distribution.call_for_each_replica(model_fn, args=inputs)) iterator = self._get_iterator( distribution.distribute_dataset(dataset_fn)) def run_step(): return distribution.run_steps_on_dataset(step_fn, iterator, iterations=2).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.cached_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) weights, biases = [], [] for _ in range(5): run_step() weights.append(self.evaluate(layer.kernel)) biases.append(self.evaluate(layer.bias)) self.evaluate(distribution.finalize()) error = abs( numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) self.assertTrue(is_not_increasing) @combinations.generate( combinations.times( combinations.distributions_and_v1_optimizers(), combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + combinations.combine(mode=["eager"], use_callable_loss=[True]))) def testTrainNetworkByCallForEachReplica(self, distribution, optimizer_fn, use_callable_loss): with distribution.scope(): model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) iterator = self._get_iterator( distribution.distribute_dataset(dataset_fn)) def run_step(): return distribution.group( distribution.call_for_each_replica( model_fn, args=(iterator.get_next(), ))) if not context.executing_eagerly(): with self.cached_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) weights, biases = [], [] for _ in range(10): run_step() weights.append(self.evaluate(layer.kernel)) biases.append(self.evaluate(layer.bias)) error = abs( numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) self.assertTrue(is_not_increasing) @combinations.generate( combinations.times( combinations.distributions_and_v1_optimizers() + combinations.distributions_and_v2_optimizers(), combinations.combine(mode=["graph", "eager"])) + combinations.combine(distribution=[combinations.tpu_strategy], optimizer_fn=combinations.optimizers_v1 + combinations.optimizers_v2, mode=["graph"])) def testOptimizerInsideModelFn(self, distribution, optimizer_fn): created_variables = [] trainable_variables = [] def appending_creator(next_creator, *args, **kwargs): v = next_creator(*args, **kwargs) created_variables.append(v.name) if "trainable" in kwargs and kwargs["trainable"]: trainable_variables.append(v.name) return v # Creator scope needs to be set before it's used inside # `distribution.scope`. with variable_scope.variable_creator_scope( appending_creator), distribution.scope(): model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=True, create_optimizer_inside_model_fn=True) def step_fn(ctx, inputs): del ctx # Unused return distribution.group( distribution.call_for_each_replica(model_fn, args=inputs)) iterator = self._get_iterator( distribution.distribute_dataset(dataset_fn)) def run_step(): return distribution.run_steps_on_dataset(step_fn, iterator, iterations=1).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.cached_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) run_step() self.evaluate(distribution.finalize()) def get_expected_variables(optimizer_fn, num_parameter_devices): variables_map = { "GradientDescent": ["dense/kernel", "dense/bias"], "Adagrad": [ "dense/kernel/Adagrad", "dense/kernel", "dense/bias/Adagrad", "dense/bias" ] } variables = variables_map[optimizer_fn().get_name()] variables.extend([ v + "/replica_{}".format(replica) for v in variables for replica in range(1, num_parameter_devices) ]) return set([v + ":0" for v in variables]) self.assertEqual( get_expected_variables(optimizer_fn, len(distribution.parameter_devices)), set(created_variables)) @combinations.generate( combinations.times( combinations.combine(momentum=[0.8, 0.9, 0.99], renorm=[False, True]), combinations.times( combinations.distributions_and_v1_optimizers(), combinations.combine( mode=["graph", "eager"], # TODO(isaprykin): Allow False here. Currently subsequent # replicas will re-execute UPDATE_OPS of previous replicas. update_ops_in_cross_replica_mode=[True])) + combinations.combine(distribution=[combinations.tpu_strategy], optimizer_fn=combinations.optimizers_v1, mode=["graph"], update_ops_in_cross_replica_mode=[False]))) def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn, momentum, renorm, update_ops_in_cross_replica_mode): """Verifies that moving mean updates are reduced across replicas.""" with distribution.scope(): num_replicas = distribution.num_replicas_in_sync model_fn, dataset_fn, batchnorm = batchnorm_example( optimizer_fn, batch_per_epoch=num_replicas, momentum=momentum, renorm=renorm, update_ops_in_replica_mode=not update_ops_in_cross_replica_mode ) def step_fn(ctx, inputs): del ctx # Unused fetches = distribution.unwrap( distribution.call_for_each_replica(model_fn, args=inputs)) if update_ops_in_cross_replica_mode: fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS) return control_flow_ops.group(fetches) iterator = self._get_iterator( distribution.distribute_dataset(dataset_fn)) def run_step(): return distribution.run_steps_on_dataset(step_fn, iterator, iterations=1).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.cached_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) expected_moving_means = [0.] * 8 def averaged_batch_mean(i): # Each batch has shape [16, 8] where the ith element in jth list is # (8 * j + i + replica_id * 100). So the batch mean in each replica is # (60 + i + replica_id * 100). So here comes its batch mean over all # replicas: return 60. + i + (num_replicas - 1.) / 2. * 100. for _ in range(10): run_step() moving_means = self.evaluate(batchnorm.moving_mean) # We make sure that the moving_mean is updated as if the sample mean is # calculated over all replicas. for i, expected_moving_mean in enumerate( expected_moving_means): expected_moving_means[i] -= ( (expected_moving_mean - averaged_batch_mean(i)) * (1.0 - momentum)) self.assertNear(expected_moving_means[i], moving_means[i], 0.0001) self.evaluate(distribution.finalize()) @combinations.generate( combinations.times( combinations.combine( optimizer_fn=[ combinations.gradient_descent_optimizer_v1_fn, combinations.gradient_descent_optimizer_v2_fn ], loss_reduction=[ losses_impl.Reduction.SUM, losses_impl.Reduction.MEAN, losses_impl.Reduction.SUM_OVER_BATCH_SIZE, losses_impl.Reduction.SUM_OVER_NONZERO_WEIGHTS ]), combinations.times( combinations.combine(distribution=[ combinations.one_device_strategy, combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus ]), combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + combinations.combine(mode=["eager"], use_callable_loss=[True])) + combinations.combine(distribution=[combinations.tpu_strategy], mode=["graph"], use_callable_loss=[True, False]))) def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction, use_callable_loss): with distribution.scope(): all_vars = [] def model_fn(x, y): def loss_fn(): # Use fixed initialization to make the steps deterministic. w = variable_scope.get_variable("w", initializer=[[2.]]) all_vars.append(w) predict = math_ops.matmul(x, w) return losses_impl.mean_squared_error( y, predict, reduction=loss_reduction) optimizer = optimizer_fn( ) # GradientDescent with 0.2 learning rate if use_callable_loss: return optimizer.minimize(loss_fn) else: return optimizer.minimize(loss_fn()) def dataset_fn(): features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) return dataset_ops.Dataset.zip((features, labels)).repeat() def step_fn(ctx, inputs): del ctx # Unused return distribution.group( distribution.call_for_each_replica(model_fn, args=inputs)) iterator = self._get_iterator( distribution.distribute_dataset(dataset_fn)) def run_step(): return distribution.run_steps_on_dataset(step_fn, iterator, iterations=1).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.cached_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) run_step() v = all_vars[0] self.assertTrue(all(v is vi for vi in all_vars[1:])) weight = numpy.squeeze(self.evaluate(v)) # Our model is: # predict = x * w # loss = (predict - y)^2 # dloss/dpredict = 2*(predict - y) # dloss/dw = 2 * x^T @ (predict - y) # For our batch size of 2, assuming sum loss reduction: # x = [2, 7] # y = [6, 21] # w_initial = 2 # predict = [4, 14] # predict - y = [-2, -7] # dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106 # So unreplicated the update to w with lr=0.2 is -0.2 * -106 = 21.2 # with sum loss reduction, or 10.6 with mean. if loss_reduction == losses_impl.Reduction.SUM: # Note that the "distribution.num_replicas_in_sync" factor will go away # once we split the input across replicas, instead of pulling a complete # batch of input per replica. self.assertNear(weight, 2 + 21.2 * distribution.num_replicas_in_sync, 0.0001) else: # One of the mean loss reductions. self.assertNear(weight, 2 + 10.6, 0.0001) self.evaluate(distribution.finalize()) @combinations.generate( combinations.times(combinations.distributions_and_v1_optimizers(), combinations.combine(mode=["graph", "eager"]), combinations.combine(is_tpu=[False])) + combinations.combine(distribution=[combinations.tpu_strategy], optimizer_fn=combinations.optimizers_v1, mode=["graph"], is_tpu=[True])) def testRunStepsWithOutputContext(self, distribution, optimizer_fn, is_tpu): with distribution.scope(): def dataset_fn(): dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() # TODO(priyag): batch with drop_remainder=True causes shapes to be # fully defined for TPU. Remove this when XLA supports dynamic shapes. return dataset.batch(batch_size=1, drop_remainder=True) optimizer = optimizer_fn() layer = core.Dense(1, use_bias=True) key1 = "foo" value1 = "bar" def model_fn(output_context, x): """A very simple model written by the user.""" def loss_fn(): y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y train_op = optimizer.minimize(loss_fn) loss = loss_fn() output_context.set_last_step_output( name="replica_loss_reduced", output=loss, reduce_op=reduce_util.ReduceOp.MEAN) output_context.set_non_tensor_output(key1, value1) return (train_op, loss) def step_fn(output_context, inputs): (train_op, loss) = distribution.call_for_each_replica( model_fn, args=(output_context, ) + inputs) output_context.set_last_step_output( name="cross_replica_loss_reduced", output=loss, reduce_op=reduce_util.ReduceOp.MEAN) output_context.set_last_step_output( name="cross_replica_loss_not_reduced", output=loss) return distribution.group(train_op) iterator = self._get_iterator( distribution.distribute_dataset(dataset_fn)) def run_step(): initial_loss = lambda: constant_op.constant(1e7) # Initial values corresponding to reduced losses are just single # tensors. But for non reduced losses, we need to have initial # values that are of the same structure as non reduced losses. In # MirroredStrategy, this will be a list of losses, in TPUStrategy # it will be single tensor. Using `broadcast` followed by `unwrap` # gives us the desired initial value structure. initial_loop_values = { "replica_loss_reduced": initial_loss(), "cross_replica_loss_reduced": initial_loss(), "cross_replica_loss_not_reduced": distribution.unwrap(distribution.broadcast(initial_loss())) } ctx = distribution.run_steps_on_dataset( step_fn, iterator, iterations=2, initial_loop_values=initial_loop_values) self.assertEqual({key1: [value1]}, ctx.non_tensor_outputs) self._verify_loss_output( initial_loss(), loss_output=ctx.last_step_outputs["replica_loss_reduced"], reduced=True, distribution=distribution) self._verify_loss_output( initial_loss(), loss_output=ctx. last_step_outputs["cross_replica_loss_reduced"], reduced=True, distribution=distribution) self._verify_loss_output( initial_loss(), loss_output=ctx. last_step_outputs["cross_replica_loss_not_reduced"], reduced=False, distribution=distribution) return (ctx.run_op, ctx.last_step_outputs["replica_loss_reduced"]) self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.cached_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) weights, biases, losses = [], [], [] for _ in range(5): _, loss = run_step() losses.append(loss) weights.append(self.evaluate(layer.kernel)) biases.append(self.evaluate(layer.bias)) self.evaluate(distribution.finalize()) loss_is_not_increasing = all(y <= x for x, y in zip(losses, losses[1:])) self.assertTrue(loss_is_not_increasing) error = abs( numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) error_is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) self.assertTrue(error_is_not_increasing) def _verify_loss_output(self, initial_loss, loss_output, reduced, distribution): if not reduced: self.assertLen(distribution.unwrap(loss_output), distribution.num_replicas_in_sync) loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output) else: unwrapped_output = distribution.unwrap(loss_output) self.assertLen(unwrapped_output, 1) loss_tensor = unwrapped_output[0] self.assertEqual(initial_loss.dtype, loss_tensor.dtype) self.assertEqual(initial_loss.shape, loss_tensor.shape)
with context.graph_mode(): _, replica_local = _make_replica_local( variable_scope.VariableAggregation.SUM) converted = ops.internal_convert_to_tensor(replica_local, as_ref=False) self.assertIsInstance(converted, ops.Tensor) self.assertEqual(converted.dtype, replica_local.dtype) converted = ops.internal_convert_to_tensor(replica_local, as_ref=True) # Resources variable are converted to tensors as well when as_ref is True. self.assertIsInstance(converted, ops.Tensor) self.assertEqual(converted.dtype, replica_local.dtype) @combinations.generate(combinations.combine( distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_gpu_and_cpu], mode=["graph", "eager"])) class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): def _assign_replica_local(self, devices, v, new): for d, var, n in zip(devices, v, new): with ops.device(d): self.evaluate(var.assign(n)) def _save_return_saver(self, sess, var): saver = saver_lib.Saver(var_list=[var]) test_dir = self.get_temp_dir() prefix = os.path.join(test_dir, "ckpt") return saver.save(sess, prefix), saver
def all_strategy_and_input_config_combinations(): return ( combinations.times( combinations.combine(distribution=all_strategies), eager_mode_test_configuration() + graph_mode_test_configuration()))
"""Tests for class OneDeviceStrategy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.distribute.python import combinations from tensorflow.contrib.distribute.python import strategy_test_lib from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context from tensorflow.python.eager import test @combinations.generate(combinations.combine( distribution=[ combinations.one_device_strategy, combinations.one_device_strategy_gpu], mode=["eager", "graph"])) class OneDeviceStrategyTest( strategy_test_lib.DistributionTestBase, strategy_test_lib.OneDeviceDistributionTestBase): def testMinimizeLoss(self, distribution): if context.executing_eagerly(): self._test_minimize_loss_eager(distribution) else: self._test_minimize_loss_graph(distribution) def testReplicaId(self, distribution): self._test_replica_id(distribution)
class TestDistributionStrategyWithNumpyArrays(test.TestCase, parameterized.TestCase): @combinations.generate(strategy_for_numpy_input_combinations()) def test_calling_model_with_numpy_arrays(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) # Call fit with validation data model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0, validation_data=(inputs, targets)) # TODO(anjalisridhar): We need tests for when the batch size and steps are # smaller and results in a 0 batch_size and steps value. model.evaluate(inputs, targets) # with steps model.evaluate(inputs, targets, steps=2) # with batch_size model.evaluate(inputs, targets, batch_size=8) model.predict(inputs) # with steps model.predict(inputs, steps=2) # with batch_size model.predict(inputs, batch_size=8) @combinations.generate(strategy_for_numpy_input_combinations()) def test_calling_model_with_nested_numpy_arrays(self, distribution): with self.cached_session(): model = multi_input_output_model() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) input_b_np = np.asarray(np.random.random((64, 5)), dtype=np.float32) inputs = [input_a_np, input_b_np] output_d_np = np.asarray(np.random.random((64, 7)), dtype=np.float32) output_e_np = np.asarray(np.random.random((64, 7)), dtype=np.float32) targets = [output_d_np, output_e_np] # Call fit with validation data model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0) # TODO(anjalisridhar): We need tests for when the batch size and steps are # smaller and results in a 0 batch_size and steps value. model.evaluate(inputs, targets) # with steps model.evaluate(inputs, targets, steps=2) # with batch_size model.evaluate(inputs, targets, batch_size=8) model.predict(inputs) # with steps model.predict(inputs, steps=2) # with batch_size model.predict(inputs, batch_size=8) @combinations.generate( combinations.combine(distribution=strategies_minus_tpu, mode=['graph'])) def test_numpy_with_sample_weights(self, distribution): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) inputs = np.zeros((20, 3), np.float32) targets = np.zeros((20, 4), np.float32) sample_weights = np.ones((20), np.float32) model.fit(inputs, targets, sample_weight=sample_weights, epochs=1, steps_per_epoch=2, verbose=1) @combinations.generate(strategy_for_numpy_input_combinations()) def test_flatten_predict_outputs(self, distribution): with self.cached_session(): model = multi_input_output_model() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) # We take 6 input samples with each input having a dimension of 3 or 5. input_a_np = np.asarray(np.random.random((6, 3)), dtype=np.float32) input_b_np = np.asarray(np.random.random((6, 5)), dtype=np.float32) inputs = [input_a_np, input_b_np] outs = model.predict(inputs, steps=1) # `predict` a list that is equal in length to the number of model outputs. # In this test our model has two outputs and each element of `outs` # corresponds to all the samples of one of the model outputs. self.assertLen(outs, 2) # Each of the output samples have a dimension of 7. We should process all # the available input samples(6). self.assertAllEqual([6, 7], outs[0].shape) self.assertAllEqual([6, 7], outs[1].shape)
def graph_mode_test_configuration(): return combinations.combine(mode='graph', use_numpy=[True, False], use_validation_data=[True, False])
class TestDistributionStrategyWithDatasets(test.TestCase, parameterized.TestCase): @combinations.generate(all_strategy_combinations()) def test_calling_model_on_same_dataset(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) # Call fit with validation data model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.predict(get_predict_dataset(distribution), steps=2) @combinations.generate(all_strategy_combinations()) def test_model_interleaved_eval_same_as_direct_eval(self, distribution): with self.cached_session(): user_controlled_model = get_model() user_controlled_model.compile( gradient_descent.GradientDescentOptimizer(0.001), loss='mse', metrics=['mae', keras.metrics.CategoricalAccuracy()], distribute=distribution) interleaved_model = get_model() interleaved_model.set_weights(user_controlled_model.get_weights()) interleaved_model.compile( gradient_descent.GradientDescentOptimizer(0.001), loss='mse', metrics=['mae', keras.metrics.CategoricalAccuracy()], distribute=distribution) dataset = get_dataset(distribution) # Call fit with validation interleaved interleaved_output = interleaved_model.fit(dataset, epochs=2, steps_per_epoch=2, verbose=1, validation_data=dataset, validation_steps=2, shuffle=False) # Manually control the validation running after each epoch. user_controlled_output = [] for _ in range(2): user_controlled_model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1, shuffle=False) user_controlled_output.append( user_controlled_model.evaluate(dataset, steps=2)) self.assertEqual(interleaved_output.history['val_loss'], [x[0] for x in user_controlled_output]) self.assertEqual( interleaved_output.history['val_mean_absolute_error'], [x[1] for x in user_controlled_output]) self.assertEqual( interleaved_output.history['val_categorical_accuracy'], [x[2] for x in user_controlled_output]) # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work # as clone_model's input_tensors argument only seems to accept list and not # tuples or dict. @combinations.generate( combinations.combine(distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_gpu_and_cpu ], mode=['graph', 'eager'])) def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution): with self.cached_session(): model = multi_input_output_model() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 5)) output_d_np = np.random.random((10, 7)) output_e_np = np.random.random((10, 7)) # Test with tuples dataset_tuple = dataset_ops.Dataset.from_tensor_slices( ((input_a_np, input_b_np), (output_d_np, output_e_np))) dataset_tuple = dataset_tuple.repeat(100) dataset_tuple = dataset_tuple.batch(10) model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) # Test with dict dataset_dict = dataset_ops.Dataset.from_tensor_slices(({ 'input_a': input_a_np, 'input_b': input_b_np }, (output_d_np, output_e_np))) dataset_dict = dataset_dict.repeat(100) dataset_dict = dataset_dict.batch(10) model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) @combinations.generate(all_strategy_combinations()) def test_fit_eval_and_predict_methods_on_dataset(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(get_predict_dataset(distribution), steps=2) @combinations.generate(strategy_and_optimizer_combinations()) def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): with self.cached_session(): model = get_model() loss = 'mse' model.compile(optimizer(), loss, distribute=distribution) dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(get_predict_dataset(distribution), steps=2) @combinations.generate(strategy_minus_tpu_combinations()) def test_dataset_with_sample_weights(self, distribution): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) inputs = np.zeros((10, 3), np.float32) targets = np.zeros((10, 4), np.float32) sample_weights = np.ones((10), np.float32) dataset = dataset_ops.Dataset.from_tensor_slices( (inputs, targets, sample_weights)) dataset = dataset.repeat() dataset = dataset.batch(10) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(dataset, steps=2) @combinations.generate( combinations.combine(distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_gpu_and_cpu ], mode=['graph', 'eager'])) # TODO(b/120943676, b/120957836): Re-enable once the validation code is # restored. def DISABLED_test_dataset_wrong_input_shape(self, distribution): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) # Wrong input shape inputs = np.zeros((10, 5), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) @combinations.generate( combinations.combine( distribution=[combinations.mirrored_strategy_with_gpu_and_cpu], mode=['graph', 'eager'])) # TODO(b/120943676, b/120957836): Re-enable once the validation code is # restored. def DISABLED_test_dataset_no_batch_input_validation(self, distribution): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) # User forgets to batch the dataset inputs = np.zeros((10, 3), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) @combinations.generate( combinations.combine(distribution=[combinations.tpu_strategy_one_step], mode=['graph'])) def test_dataset_input_shape_fully_defined(self, distribution): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) dataset = get_dataset(distribution) # Input shapes are not fully known. Batch dimension is unknown as we are # not using the drop_remainder argument. dataset = dataset.repeat(100).batch(10) with self.assertRaisesRegexp(ValueError, 'requires fully defined shapes'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) @combinations.generate( combinations.combine(distribution=[ combinations.mirrored_strategy_with_gpu_and_cpu, combinations.mirrored_strategy_with_two_gpus, combinations.core_mirrored_strategy_with_gpu_and_cpu, combinations.core_mirrored_strategy_with_two_gpus ], mode=['graph', 'eager'])) def test_learning_phase_value(self, distribution): # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): x = keras.layers.Input(shape=(1, ), name='input') y = keras.layers.Dense(1, kernel_initializer='ones')(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) initial_weights = model.get_weights() optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) batch_size = 8 if isinstance(distribution, mirrored_strategy.CoreMirroredStrategy): # CoreMirroredStrategy uses global batch size. batch_size = 8 * distribution.num_replicas_in_sync inputs = np.ones((10, 1), dtype=np.float32) targets = np.ones((10, 1), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat().batch(batch_size) hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) self.assertAlmostEqual(hist.history['acc'][0], 0, 0) model.set_weights(initial_weights) # TODO(psv/anjalisridhar): Enable these lines after we fix b/117431185. # evaluate_output = model.evaluate(dataset, steps=20) # self.assertAlmostEqual(evaluate_output[1], 1, 0) inputs = np.ones((10, 1), dtype=np.float32) predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs) predict_dataset = predict_dataset.repeat().batch(batch_size) output = model.predict(predict_dataset, steps=10) # `predict` runs for 10 steps ref_output = np.ones((160, 1), dtype=np.float32) self.assertArrayNear(output, ref_output, 1e-1) @combinations.generate(strategy_minus_tpu_combinations()) def testOptimizerWithCallbacks(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent_keras.SGD(0.01) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) dataset = get_dataset(distribution) def schedule(_): return 0.001 model.fit( dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.LearningRateScheduler(schedule)]) grouped_models = distribution.unwrap( distributed_training_utils.get_distributed_model( model, ModeKeys.TRAIN)) with distribution.scope(): for m in grouped_models: self.assertAllClose(0.001, keras.backend.get_value( m.optimizer.lr), atol=1e-05, rtol=1e-05)
class ParameterServerStrategyTest(multi_worker_test_base.MultiWorkerTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): cls._workers, cls._ps = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2) cls._cluster_spec = { run_config.TaskType.WORKER: ['fake_worker_0', 'fake_worker_1', 'fake_worker_2'], run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] } def setUp(self): self._result = 0 self._lock = threading.Lock() self._init_condition = threading.Condition() self._init_reached = 0 self._finish_condition = threading.Condition() self._finish_reached = 0 super(ParameterServerStrategyTest, self).setUp() def _get_test_objects(self, task_type, task_id, num_gpus): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=num_gpus) if not task_type: return distribution, '' tf_config = { 'cluster': self._cluster_spec, 'task': { 'type': task_type, 'index': task_id } } with self._lock: # Accessing environment variables should be protected by locks because # environment variables are shared by all threads. with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): distribution.configure() return distribution, self._workers[task_id].target def _test_device_assignment_distributed(self, task_type, task_id, num_gpus): worker_device = '/job:%s/replica:0/task:%d' % (task_type, task_id) d, _ = self._get_test_objects(task_type, task_id, num_gpus) with ops.Graph().as_default(), \ self.test_session(target=self._workers[0].target) as sess, \ d.scope(): # Define a variable outside the call_for_each_tower scope. This is not # recommended. n = variable_scope.get_variable('n', initializer=10.0) self.assertEqual(n.device, '/job:ps/task:0') def model_fn(): if num_gpus == 0: last_part_device = 'device:CPU:0' else: last_part_device = ( 'device:GPU:%d' % distribute_lib.get_tower_context().tower_id) a = constant_op.constant(1.0) b = constant_op.constant(2.0) c = a + b self.assertEqual(a.device, worker_device + '/' + last_part_device) self.assertEqual(b.device, worker_device + '/' + last_part_device) self.assertEqual(c.device, worker_device + '/' + last_part_device) # The device scope is ignored for variables but not for normal ops. with ops.device('/job:worker/task:0'): x = variable_scope.get_variable('x', initializer=10.0) x_add = x.assign_add(c) e = a + c # The variable x is on the task 1 since the device_function has been # called once before the model_fn. self.assertEqual(x.device, '/job:ps/task:1') self.assertEqual(x_add.device, x.device) self.assertEqual( e.device, '/job:worker/replica:0/task:0/%s' % last_part_device) # The colocate_vars_with can override the distribution's device. with d.colocate_vars_with(x): y = variable_scope.get_variable('y', initializer=20.0) y_add = y.assign_add(x_add) self.assertEqual(y.device, '/job:ps/task:1') self.assertEqual(y_add.device, y.device) self.assertEqual(y.device, x.device) z = variable_scope.get_variable('z', initializer=10.0) self.assertEqual(z.device, '/job:ps/task:0') self.assertNotEqual(z.device, x.device) with ops.control_dependencies([y_add]): z_add = z.assign_add(y) with ops.control_dependencies([z_add]): f = z + c self.assertEqual(f.device, worker_device + '/' + last_part_device) # The device scope would merge with the default worker device. with ops.device('/CPU:1'): g = e + 1.0 self.assertEqual(g.device, worker_device + '/device:CPU:1') # Ths ops.colocate_with will be ignored when defining a variale but not # for a normal tensor. with ops.colocate_with(x): u = variable_scope.get_variable('u', initializer=30.0) v = variable_scope.get_variable('v', initializer=30.0) h = f + 1.0 self.assertIn('/job:ps/', u.device) self.assertIn('/job:ps/', v.device) # u and v are on different parameter servers. self.assertTrue(u.device != x.device or v.device != x.device) self.assertTrue(u.device == x.device or v.device == x.device) # Here h is not on one worker. Note h.device is canonical while x.device # is not but. self.assertIn('/job:ps/', h.device) return y_add, z_add, f y, z, f = d.call_for_each_tower(model_fn) self.assertNotEqual(y, None) self.assertNotEqual(z, None) self.assertNotEqual(f, None) if context.num_gpus() >= 1 and num_gpus <= 1: variables.global_variables_initializer().run() y_val, z_val, f_val = sess.run([y, z, f]) self.assertEqual(y_val, 33.0) self.assertEqual(z_val, 43.0) self.assertEqual(f_val, 46.0) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testDeviceAssignmentDistributed(self, num_gpus): self._test_device_assignment_distributed('worker', 1, num_gpus) def _test_device_assignment_local(self, d, compute_device='CPU', variable_device='CPU', num_gpus=0): with ops.Graph().as_default(), \ self.test_session(target=self._workers[0].target) as sess, \ d.scope(): def model_fn(): if 'CPU' in compute_device: tower_compute_device = '/device:CPU:0' else: tower_compute_device = ( '/device:GPU:%d' % distribute_lib.get_tower_context().tower_id) tower_compute_device = device_util.canonicalize( tower_compute_device) if 'CPU' in variable_device: tower_variable_device = '/device:CPU:0' else: tower_variable_device = ( '/device:GPU:%d' % distribute_lib.get_tower_context().tower_id) tower_variable_device = device_util.canonicalize( tower_variable_device) a = constant_op.constant(1.0) b = constant_op.constant(2.0) c = a + b self.assertEqual(a.device, tower_compute_device) self.assertEqual(b.device, tower_compute_device) self.assertEqual(c.device, tower_compute_device) # The device scope is ignored for variables but not for normal ops. with ops.device('/device:GPU:2'): x = variable_scope.get_variable('x', initializer=10.0) x_add = x.assign_add(c) e = a + c self.assertEqual(device_util.canonicalize(x.device), tower_variable_device) self.assertEqual(x_add.device, x.device) self.assertEqual(e.device, device_util.canonicalize('/device:GPU:2')) # The colocate_vars_with can override the distribution's device. with d.colocate_vars_with(x): y = variable_scope.get_variable('y', initializer=20.0) y_add = y.assign_add(x_add) self.assertEqual(device_util.canonicalize(y.device), tower_variable_device) self.assertEqual(y_add.device, y.device) self.assertEqual(y.device, x.device) z = variable_scope.get_variable('z', initializer=10.0) self.assertEqual(device_util.canonicalize(z.device), tower_variable_device) with ops.control_dependencies([y_add]): z_add = z.assign_add(y) with ops.control_dependencies([z_add]): f = z + c self.assertEqual(f.device, tower_compute_device) # The device scope would merge with the default worker device. with ops.device('/CPU:1'): g = e + 1.0 self.assertEqual(g.device, device_util.canonicalize('/device:CPU:1')) # Ths ops.colocate_with will be ignored when defining a variale but not # for a normal tensor. with ops.colocate_with(x): u = variable_scope.get_variable('u', initializer=30.0) h = f + 1.0 self.assertEqual(device_util.canonicalize(u.device), tower_variable_device) self.assertEqual(device_util.canonicalize(x.device), h.device) return y_add, z_add, f y, z, f = d.call_for_each_tower(model_fn) self.assertNotEqual(y, None) self.assertNotEqual(z, None) self.assertNotEqual(f, None) if context.num_gpus() >= 1 and num_gpus <= 1: variables.global_variables_initializer().run() y_val, z_val, f_val = sess.run([y, z, f]) self.assertEqual(y_val, 33.0) self.assertEqual(z_val, 43.0) self.assertEqual(f_val, 46.0) def testDeviceAssignmentLocalCPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=0) self._test_device_assignment_local(distribution, compute_device='CPU', variable_device='CPU', num_gpus=0) def testDeviceAssignmentLocalOneGPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=1) self._test_device_assignment_local(distribution, compute_device='GPU', variable_device='GPU', num_gpus=1) def testDeviceAssignmentLocalTwoGPUs(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) self._test_device_assignment_local(distribution, compute_device='GPU', variable_device='CPU', num_gpus=2) def _test_simple_increment(self, task_type, task_id, num_gpus): d, master_target = self._get_test_objects(task_type, task_id, num_gpus) if hasattr(d, '_cluster_spec') and d._cluster_spec: num_workers = len(d._cluster_spec.as_dict().get( 'worker', ['dummy_worker'])) else: num_workers = 1 with ops.Graph().as_default(), \ self.test_session(target=master_target) as sess, \ d.scope(): def model_fn(): x = variable_scope.get_variable('x', initializer=10.0) y = variable_scope.get_variable('y', initializer=20.0) x_add = x.assign_add(1.0, use_locking=True) y_add = y.assign_add(1.0, use_locking=True) train_op = control_flow_ops.group([x_add, y_add]) return x, y, train_op x, y, train_op = d.call_for_each_tower(model_fn) train_op = d.group(d.unwrap(train_op)) if context.num_gpus() < d._num_gpus_per_worker: return True if task_id == 0: variables.global_variables_initializer().run() # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != num_workers: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() sess.run(train_op) # Wait for other workers to finish training. self._finish_condition.acquire() self._finish_reached += 1 while self._finish_reached != num_workers: self._finish_condition.wait() self._finish_condition.notify_all() self._finish_condition.release() x_val, y_val = sess.run([x, y]) self.assertEqual(x_val, 10.0 + 1.0 * num_workers * d.num_towers) self.assertEqual(y_val, 20.0 + 1.0 * num_workers * d.num_towers) return (x_val == 10.0 + 1.0 * num_workers * d.num_towers and y_val == 20.0 + 1.0 * num_workers * d.num_towers) def _test_minimize_loss_graph(self, task_type, task_id, num_gpus): d, master_target = self._get_test_objects(task_type, task_id, num_gpus) with ops.Graph().as_default(), \ self.test_session(target=master_target) as sess, \ d.scope(): l = core.Dense(1, use_bias=False) def loss_fn(x): y = array_ops.reshape(l(x), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = d.broadcast(constant_op.constant([[1.]])) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.call_for_each_tower(grad_fn, one) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.reduce(variable_scope.VariableAggregation.SUM, g, destinations=v) with ops.control_dependencies( d.unwrap(d.update(v, update, g))): after_list.append(d.read_var(v)) return before_list, after_list before_out, after_out = step() if context.num_gpus() < d._num_gpus_per_worker: return True if task_id == 0: variables.global_variables_initializer().run() # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != 3: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before) return error_after < error_before def testSimpleBetweenGraph(self): self._run_between_graph_clients(self._test_simple_increment, self._cluster_spec, 0) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testLocalSimpleIncrement(self, num_gpus): self._test_simple_increment(None, 0, num_gpus) @combinations.generate( combinations.combine(mode=['graph'], num_gpus=[0, 1, 2])) def testMinimizeLossGraph(self, num_gpus): self._run_between_graph_clients(self._test_minimize_loss_graph, self._cluster_spec, num_gpus)
def strategy_for_numpy_input_combinations(): return combinations.combine(distribution=strategies_minus_tpu + tpu_strategies, mode=['graph'])
def strategy_combinations(): return combinations.combine( distribution=strategies, mode=['graph'])
def strategy_combinations(): return combinations.combine( distribution=strategies, mode=['graph'])
def strategy_and_inputs(): return combinations.combine( distribution=strategies, use_numpy=[True, False], mode=['graph'])
class TestDistributionStrategyWithDatasets(test.TestCase, parameterized.TestCase): @combinations.generate(strategy_combinations()) def test_calling_model_on_same_dataset(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) # Call fit with validation data model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.predict(get_predict_dataset(distribution), steps=2) @combinations.generate(strategy_combinations()) def test_model_interleaved_eval_same_as_direct_eval(self, distribution): with self.cached_session(): loss = 'mse' user_controlled_model = get_model() user_controlled_optimizer = gradient_descent.GradientDescentOptimizer( 0.001) user_controlled_metrics = ['mae', keras.metrics.CategoricalAccuracy()] user_controlled_model.compile(user_controlled_optimizer, loss, metrics=user_controlled_metrics, distribute=distribution) interleaved_model = get_model() interleaved_optimizer = gradient_descent.GradientDescentOptimizer(0.001) interleaved_metrics = ['mae', keras.metrics.CategoricalAccuracy()] interleaved_model.compile(interleaved_optimizer, loss, metrics=interleaved_metrics, distribute=distribution) dataset = get_dataset(distribution) # Call fit with validation interleaved interleaved_output = interleaved_model.fit(dataset, epochs=2, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) # Manually control the validation running after each epoch. user_controlled_output = [] for _ in range(2): user_controlled_model.fit( dataset, epochs=1, steps_per_epoch=2, verbose=0) user_controlled_output.append( user_controlled_model.evaluate(dataset, steps=2)) self.assertEqual(interleaved_output.history['val_loss'], [x[0] for x in user_controlled_output]) self.assertEqual(interleaved_output.history['val_mean_absolute_error'], [x[1] for x in user_controlled_output]) self.assertEqual(interleaved_output.history['val_categorical_accuracy'], [x[2] for x in user_controlled_output]) # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work # as clone_model's input_tensors argument only seems to accept list and not # tuples or dict. def test_fit_with_tuple_and_dict_dataset_inputs(self): with self.cached_session(): a = keras.layers.Input(shape=(3,), name='input_a') b = keras.layers.Input(shape=(3,), name='input_b') dense = keras.layers.Dense(4, name='dense') c = dense(a) d = dense(b) e = keras.layers.Dropout(0.5, name='dropout')(c) model = keras.models.Model([a, b], [d, e]) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_d_np = np.random.random((10, 4)) output_e_np = np.random.random((10, 4)) # Test with tuples dataset_tuple = dataset_ops.Dataset.from_tensor_slices(( (input_a_np, input_b_np), (output_d_np, output_e_np))) dataset_tuple = dataset_tuple.repeat(100) dataset_tuple = dataset_tuple.batch(10) model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) # Test with dict dataset_dict = dataset_ops.Dataset.from_tensor_slices(( {'input_a': input_a_np, 'input_b': input_b_np}, (output_d_np, output_e_np))) dataset_dict = dataset_dict.repeat(100) dataset_dict = dataset_dict.batch(10) model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) @combinations.generate(strategy_combinations()) def test_fit_eval_and_predict_methods_on_dataset(self, distribution): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(get_predict_dataset(distribution), steps=2) @combinations.generate(strategy_and_optimizer_combinations()) def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): with self.cached_session(): model = get_model() loss = 'mse' model.compile(optimizer(), loss, distribute=distribution) dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(get_predict_dataset(distribution), steps=2) def test_dataset_input_shape_validation(self): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, distribute=strategy) # User forgets to batch the dataset inputs = np.zeros((10, 3), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) # Wrong input shape inputs = np.zeros((10, 5), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) @combinations.generate(combinations.combine( distribution=[combinations.tpu_strategy_one_step], mode=['graph'])) def test_dataset_input_shape_fully_defined(self, distribution): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) dataset = get_dataset(distribution) # Input shapes are not fully known. Batch dimension is unknown as we are # not using the drop_remainder argument. dataset = dataset.repeat(100).batch(10) with self.assertRaisesRegexp(ValueError, 'requires fully defined shapes'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) def test_learning_phase_value(self): # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): x = keras.layers.Input(shape=(1,), name='input') y = keras.layers.Dense(1, kernel_initializer='ones')(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) initial_weights = model.get_weights() optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:GPU:1']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) inputs = np.ones((10, 1), dtype=np.float32) targets = np.ones((10, 1), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat().batch(8) hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) self.assertAlmostEqual(hist.history['acc'][0], 0, 0) model.set_weights(initial_weights) evaluate_output = model.evaluate(dataset, steps=20) self.assertAlmostEqual(evaluate_output[1], 1, 0) inputs = np.ones((10, 1), dtype=np.float32) predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs) predict_dataset = predict_dataset.repeat().batch(5) output = model.predict(predict_dataset, steps=10) ref_output = np.ones((50, 1), dtype=np.float32) self.assertArrayNear(output[0], ref_output, 1e-1)
def tpu_combinations(): return combinations.combine(distribution=[combinations.tpu_strategy_one_step, combinations.tpu_strategy], mode=["graph"])
class IndexedSlicesUtilsTest(test.TestCase, parameterized.TestCase): def _assert_values_equal(self, left, right): self.assertAllEqual(self.evaluate(ops.convert_to_tensor(left)), self.evaluate(ops.convert_to_tensor(right))) @test_util.run_in_graph_and_eager_modes def testAggregateTensors(self): t0 = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]]) total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) result = cross_device_utils.aggregate_tensors_or_indexed_slices( [t0, t1]) self._assert_values_equal(total, result) @test_util.run_in_graph_and_eager_modes def testAggregateIndexedSlices(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) result = cross_device_utils.aggregate_tensors_or_indexed_slices( [t0, t1]) self.assertIsInstance(result, ops.IndexedSlices) self._assert_values_equal(total, result) @test_util.run_in_graph_and_eager_modes def testDivideTensor(self): t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) n = 2 expected = constant_op.constant([[0.5, 1.], [0, 0], [1.5, 2.]]) result = cross_device_utils.divide_by_n_tensors_or_indexed_slices(t, n) self._assert_values_equal(expected, result) @test_util.run_in_graph_and_eager_modes def testDivideIndexedSlices(self): t = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) n = 2 expected = constant_op.constant([[0.5, 1.], [0, 0], [1.5, 2.]]) result = cross_device_utils.divide_by_n_tensors_or_indexed_slices(t, n) self.assertIsInstance(result, ops.IndexedSlices) self._assert_values_equal(expected, result) @test_util.run_in_graph_and_eager_modes def testIsIndexedSlices(self): t = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) self.assertTrue(cross_device_utils.contains_indexed_slices(t)) @test_util.run_in_graph_and_eager_modes def testContainsIndexedSlices_List(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) self.assertTrue(cross_device_utils.contains_indexed_slices([t0, t1])) @test_util.run_in_graph_and_eager_modes def testContainsIndexedSlices_Tuple(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) self.assertTrue(cross_device_utils.contains_indexed_slices((t0, t1))) @test_util.run_in_graph_and_eager_modes def testContainsIndexedSlices_PerReplica(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) per_replica = value_lib.PerReplica({"/gpu:0": t0, "/cpu:0": t1}) self.assertTrue( cross_device_utils.contains_indexed_slices(per_replica)) @combinations.generate( combinations.combine(mode=["graph", "eager"], required_gpus=1)) def testCopyTensor(self): with ops.device("/cpu:0"): t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) destination = "/gpu:0" result = cross_device_utils.copy_tensor_or_indexed_slices_to_device( t, destination) self._assert_values_equal(t, result) self.assertEqual(device_util.resolve(destination), device_util.resolve(result.device)) @combinations.generate( combinations.combine(mode=["graph", "eager"], required_gpus=1)) def testCopyIndexedSlices(self): with ops.device("/cpu:0"): t = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) destination = "/gpu:0" result = cross_device_utils.copy_tensor_or_indexed_slices_to_device( t, destination) self.assertIsInstance(result, ops.IndexedSlices) self._assert_values_equal(t, result) self.assertEqual(device_util.resolve(destination), device_util.resolve(result.device))