def test_times_variable_arguments(self):
   c1 = combinations.combine(mode=["graph", "eager"])
   c2 = combinations.combine(optimizer=["adam", "gd"])
   c3 = combinations.combine(distribution=["d1", "d2"])
   c4 = combinations.times(c3, c1, c2)
   self.assertEqual([
       OrderedDict([("distribution", "d1"), ("mode", "graph"),
                    ("optimizer", "adam")]),
       OrderedDict([("distribution", "d1"), ("mode", "graph"),
                    ("optimizer", "gd")]),
       OrderedDict([("distribution", "d1"), ("mode", "eager"),
                    ("optimizer", "adam")]),
       OrderedDict([("distribution", "d1"), ("mode", "eager"),
                    ("optimizer", "gd")]),
       OrderedDict([("distribution", "d2"), ("mode", "graph"),
                    ("optimizer", "adam")]),
       OrderedDict([("distribution", "d2"), ("mode", "graph"),
                    ("optimizer", "gd")]),
       OrderedDict([("distribution", "d2"), ("mode", "eager"),
                    ("optimizer", "adam")]),
       OrderedDict([("distribution", "d2"), ("mode", "eager"),
                    ("optimizer", "gd")])
   ], c4)
   self.assertEqual(
       combinations.combine(
           mode=["graph", "eager"],
           optimizer=["adam", "gd"],
           distribution=["d1", "d2"]), c4)
 def test_add(self):
   self.assertEqual(
       [{
           "a": 1
       }, {
           "a": 2
       }, {
           "b": 2
       }, {
           "b": 3
       }],
       combinations.combine(a=[1, 2]) +
       combinations.combine(b=[2, 3]))
def strategy_and_input_combinations():
  return (
      combinations.times(
          combinations.combine(distribution=strategies_minus_tpu),
          combinations.combine(mode=['graph'],
                               use_numpy=[True, False],
                               use_validation_data=[True, False])
          + combinations.combine(mode=['eager'],
                                 use_numpy=[False],
                                 use_validation_data=[False])) +
      combinations.times(
          combinations.combine(distribution=tpu_strategies),
          combinations.combine(mode=['graph'],
                               use_numpy=[True, False],
                               use_validation_data=[True, False])))
def test_combinations_for_embedding_model():
  return (
      combinations.times(
          combinations.combine(distribution=
                               strategies_for_embedding_models()),
          (graph_mode_test_configuration() +
           eager_mode_test_configuration())))
 def test_arguments_sorted(self):
   self.assertEqual([
       OrderedDict([("aa", 1), ("ab", 2)]),
       OrderedDict([("aa", 1), ("ab", 3)]),
       OrderedDict([("aa", 2), ("ab", 2)]),
       OrderedDict([("aa", 2), ("ab", 3)])
   ], combinations.combine(ab=[2, 3], aa=[1, 2]))
Beispiel #6
0
def all_combinations():
  return combinations.combine(
      distribution=[combinations.default_strategy,
                    combinations.one_device_strategy,
                    combinations.mirrored_strategy_with_gpu_and_cpu,
                    combinations.mirrored_strategy_with_two_gpus],
      mode=["graph"])
Beispiel #7
0
def strategy_and_optimizer_combinations():
  return combinations.combine(
      distribution=strategies,
      optimizer=[combinations.adagrad_optimizer_v1_fn,
                 combinations.adam_optimizer_v1_fn,
                 combinations.gradient_descent_optimizer_v1_fn,
                 combinations.rmsprop_optimizer_v1_fn],
      mode=['graph'])
 def test_combine_single_parameter(self):
   self.assertEqual([{
       "a": 1,
       "b": 2
   }, {
       "a": 2,
       "b": 2
   }], combinations.combine(a=[1, 2], b=2))
def test_combinations_for_stateful_embedding_model():
  return (
      combinations.combine(
          distribution=strategies_for_stateful_embedding_model(),
          mode='graph',
          use_numpy=False,
          use_validation_data=False
      ))
def test_combinations_with_tpu_strategies():
  tpu_strategies = [combinations.tpu_strategy,
                    combinations.tpu_strategy_one_step]

  return (
      combinations.times(
          combinations.combine(distribution=tpu_strategies),
          graph_mode_test_configuration()))
def strategy_and_optimizer_combinations():
  return combinations.times(
      all_strategy_combinations(),
      combinations.combine(
          optimizer=[combinations.adagrad_optimizer_v1_fn,
                     combinations.adam_optimizer_v1_fn,
                     combinations.gradient_descent_optimizer_v1_fn,
                     combinations.rmsprop_optimizer_v1_fn]))
def strategy_and_input_combinations():
  def cnn_model_with_batch_norm(**kwargs):
    return _create_cnn_model(with_batch_norm=True, **kwargs)

  return (
      combinations.times(
          combinations.combine(distribution=all_strategies),
          combinations.combine(mode=['graph', 'eager'],
                               use_numpy=[True, False],
                               use_validation_data=[True, False]),
          combinations.combine(model_with_data=[
              ModelWithData('dnn', _create_dnn_model, _dnn_training_data),
              ModelWithData('cnn', _create_cnn_model, _cnn_training_data),
              ModelWithData('cnn_batch_norm',
                            cnn_model_with_batch_norm,
                            _cnn_training_data,
                            with_batch_norm=True),
          ])))
Beispiel #13
0
def strategy_minus_tpu_combinations():
  return combinations.combine(
      distribution=[combinations.default_strategy,
                    combinations.one_device_strategy,
                    combinations.mirrored_strategy_with_gpu_and_cpu,
                    combinations.mirrored_strategy_with_two_gpus,
                    combinations.core_mirrored_strategy_with_gpu_and_cpu,
                    combinations.core_mirrored_strategy_with_two_gpus],
      mode=['graph'])
 def test_times(self):
   c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"])
   c2 = combinations.combine(mode=["eager"], loss=["callable"])
   c3 = combinations.combine(distribution=["d1", "d2"])
   c4 = combinations.times(c3, c1 + c2)
   self.assertEqual([
       OrderedDict([("distribution", "d1"), ("loss", "callable"),
                    ("mode", "graph")]),
       OrderedDict([("distribution", "d1"), ("loss", "tensor"),
                    ("mode", "graph")]),
       OrderedDict([("distribution", "d1"), ("loss", "callable"),
                    ("mode", "eager")]),
       OrderedDict([("distribution", "d2"), ("loss", "callable"),
                    ("mode", "graph")]),
       OrderedDict([("distribution", "d2"), ("loss", "tensor"),
                    ("mode", "graph")]),
       OrderedDict([("distribution", "d2"), ("loss", "callable"),
                    ("mode", "eager")])
   ], c4)
Beispiel #15
0
def all_strategy_combinations_minus_default():
  strategy_minus_default_combinations = combinations.combine(
      distribution=[
          combinations.one_device_strategy,
          combinations.mirrored_strategy_with_gpu_and_cpu,
          combinations.mirrored_strategy_with_two_gpus,
          combinations.core_mirrored_strategy_with_gpu_and_cpu,
          combinations.core_mirrored_strategy_with_two_gpus],
      mode=['graph', 'eager'])
  return strategy_minus_default_combinations + tpu_strategy_combinations()
 def test_combine(self):
   self.assertEqual([{
       "a": 1,
       "b": 2
   }, {
       "a": 1,
       "b": 3
   }, {
       "a": 2,
       "b": 2
   }, {
       "a": 2,
       "b": 3
   }], combinations.combine(a=[1, 2], b=[2, 3]))
Beispiel #17
0
class DistributedCollectiveAllReduceStrategyTest(
        CollectiveAllReduceStrategyTestBase,
        strategy_test_lib.DistributionTestBase, parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        """Create a local cluster with 3 workers."""
        cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=0)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def test_num_replicas_in_sync(self, use_core_strategy):
        distribution, _, _ = create_test_objects(
            cluster_spec=self._cluster_spec,
            task_type='worker',
            task_id=0,
            num_gpus=2,
            use_core_strategy=use_core_strategy)
        num_workers = len(
            self._cluster_spec.get('chief', []) +
            self._cluster_spec.get('worker', []))
        self.assertEqual(2 * num_workers, distribution.num_replicas_in_sync)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             required_gpus=1,
                             use_core_strategy=[True, False]))
    def testMinimizeLossGraph(self, num_gpus, use_core_strategy):
        self._run_between_graph_clients(self._test_minimize_loss_graph,
                                        self._cluster_spec,
                                        num_gpus,
                                        use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             required_gpus=1,
                             use_core_strategy=[True, False]))
    def testVariableInitialization(self, num_gpus, use_core_strategy):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        self._run_between_graph_clients(self._test_variable_initialization,
                                        self._cluster_spec,
                                        num_gpus=num_gpus,
                                        use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             required_gpus=1,
                             use_core_strategy=[True, False]))
    def testComplexModel(self, num_gpus, use_core_strategy):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        self._run_between_graph_clients(self._test_complex_model,
                                        self._cluster_spec,
                                        num_gpus=num_gpus,
                                        use_core_strategy=use_core_strategy)

    # TODO(b/124344198): Re-enable after fixing this flaky test.
    # TODO(yuefengz): Update how we use num_gpus and required_gpus
    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             required_gpus=1,
                             use_dataset=[True, False],
                             use_core_strategy=[True, False]))
    def DISABLED_testMakeInputFnIterator(self, num_gpus, use_dataset,
                                         use_core_strategy):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        if use_dataset:
            fn = lambda: dataset_ops.Dataset.range(100)
        else:

            def fn():
                dataset = dataset_ops.Dataset.range(100)
                it = dataset.make_one_shot_iterator()
                return it.get_next

        # We use CPU as the device when num_gpus = 0
        devices_per_worker = max(1, num_gpus)
        expected_values = [[i + j for j in range(devices_per_worker)]
                           for i in range(0, 100, devices_per_worker)]

        input_fn = self._input_fn_to_test_input_context(
            fn,
            expected_num_replicas_in_sync=3 * devices_per_worker,
            expected_num_input_pipelines=3,
            expected_input_pipeline_id=1)  # because task_id = 1
        self._test_input_fn_iterator('worker',
                                     1,
                                     num_gpus,
                                     input_fn,
                                     expected_values,
                                     test_reinitialize=use_dataset,
                                     use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testUpdateConfigProto(self, use_core_strategy):
        strategy, _, _ = self._get_test_object(
            task_type='worker',
            task_id=1,
            num_gpus=2,
            use_core_strategy=use_core_strategy)

        config_proto = config_pb2.ConfigProto(
            device_filters=['to_be_overridden'])
        rewrite_options = config_proto.graph_options.rewrite_options
        rewrite_options.scoped_allocator_opts.enable_op.append('to_be_removed')

        new_config = strategy.update_config_proto(config_proto)

        # Verify group leader
        self.assertEqual('/job:worker/replica:0/task:0',
                         new_config.experimental.collective_group_leader)

        # Verify device filters.
        self.assertEqual(['/job:worker/task:1'], new_config.device_filters)

        # Verify rewrite options.
        new_rewrite_options = new_config.graph_options.rewrite_options
        self.assertEqual(rewriter_config_pb2.RewriterConfig.ON,
                         new_rewrite_options.scoped_allocator_optimization)
        self.assertEqual(['CollectiveReduce'],
                         new_rewrite_options.scoped_allocator_opts.enable_op)
Beispiel #18
0
class TestDistributionStrategyErrorCases(test.TestCase,
                                         parameterized.TestCase):
    @combinations.generate(
        combinations.combine(distribution=[
            combinations.mirrored_strategy_with_gpu_and_cpu,
            combinations.core_mirrored_strategy_with_gpu_and_cpu
        ],
                             mode=['graph', 'eager']))
    def test_unsupported_features(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae']
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            dataset = get_dataset(distribution)

            # Test with validation split
            with self.assertRaisesRegexp(
                    ValueError, '`validation_split` argument is not '
                    'supported when input `x` is a dataset or a '
                    'dataset iterator.+'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          validation_split=0.5,
                          validation_steps=2)

            # Test with sample weight.
            sample_weight = np.random.random((10, ))
            with self.assertRaisesRegexp(
                    ValueError,
                    '`sample_weight` argument is not supported when input '
                    '`x` is a dataset or a dataset iterator.'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          sample_weight=sample_weight)

            # Test with not specifying the `steps` argument for dataset with
            # infinite cardinality.
            dataset = dataset.repeat()
            with self.assertRaisesRegexp(
                    ValueError, 'When passing an infinitely '
                    'repeating dataset, you must specify the '
                    '`steps_per_epoch` argument'):
                model.fit(dataset, epochs=1, verbose=0)
            with self.assertRaisesRegexp(
                    ValueError, 'When passing an infinitely '
                    'repeating dataset, you must specify the '
                    '`steps` argument'):
                model.evaluate(dataset, verbose=0)

            with self.assertRaisesRegexp(
                    ValueError, 'When passing an infinitely '
                    'repeating dataset, you must specify the '
                    '`steps` argument'):
                model.predict(dataset, verbose=0)

    @combinations.generate(
        combinations.combine(distribution=[
            combinations.mirrored_strategy_with_gpu_and_cpu,
            combinations.core_mirrored_strategy_with_gpu_and_cpu
        ],
                             mode=['graph', 'eager']))
    def test_calling_with_unsupported_predefined_callbacks(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae']
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            dataset = get_dataset(distribution)

            def schedule(_):
                return 0.001

            with self.assertRaisesRegexp(
                    ValueError, 'You must specify a Keras Optimizer V2 when '
                    'using'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          callbacks=[
                              keras.callbacks.LearningRateScheduler(schedule)
                          ])

            with self.assertRaisesRegexp(
                    ValueError, 'You must specify a Keras Optimizer V2 when '
                    'using'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          callbacks=[keras.callbacks.ReduceLROnPlateau()])
def test_combinations_for_stateful_embedding_model():
    return (combinations.combine(
        distribution=strategies_for_stateful_embedding_model(),
        mode='graph',
        use_numpy=False,
        use_validation_data=False))
Beispiel #20
0
class DistributeCoordinatorIntegrationTest(
        multi_worker_test_base.IndependentWorkerTestBase,
        parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        """Create a local cluster with 2 workers."""
        super(DistributeCoordinatorIntegrationTest, cls).setUpClass()
        cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=2, has_eval=True)

    def setUp(self):
        self._model_dir = tempfile.mkdtemp()
        super(DistributeCoordinatorIntegrationTest, self).setUp()

    def dataset_input_fn(self, x, y, batch_size, shuffle):
        def input_fn():
            dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
            if shuffle:
                dataset = dataset.shuffle(batch_size)
            dataset = dataset.repeat(100).batch(batch_size)
            return dataset

        return input_fn

    def _get_exporter(self, name, fc):
        feature_spec = feature_column.make_parse_example_spec(fc)
        serving_input_receiver_fn = (
            export_lib.build_parsing_serving_input_receiver_fn(feature_spec))
        return exporter_lib.LatestExporter(
            name, serving_input_receiver_fn=serving_input_receiver_fn)

    def _extract_loss_and_global_step(self, event_folder):
        """Returns the loss and global step in last event."""
        event_paths = glob.glob(os.path.join(event_folder, "events*"))
        self.assertNotEmpty(event_paths,
                            msg="Event file not found in dir %s" %
                            event_folder)

        loss = None
        global_step_count = None

        for e in summary_iterator.summary_iterator(event_paths[-1]):
            current_loss = None
            for v in e.summary.value:
                if v.tag == "loss":
                    current_loss = v.simple_value

            # If loss is not found, global step is meaningless.
            if current_loss is None:
                continue

            current_global_step = e.step
            if global_step_count is None or current_global_step > global_step_count:
                global_step_count = current_global_step
                loss = current_loss

        return (loss, global_step_count)

    def _get_estimator(self,
                       train_distribute,
                       eval_distribute,
                       remote_cluster=None):
        input_dimension = LABEL_DIMENSION
        linear_feature_columns = [
            feature_column.numeric_column("x", shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column("x", shape=(input_dimension, ))
        ]

        return dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=LABEL_DIMENSION,
            model_dir=self._model_dir,
            dnn_optimizer=adagrad.AdagradOptimizer(0.001),
            linear_optimizer=adagrad.AdagradOptimizer(0.001),
            config=run_config_lib.RunConfig(
                experimental_distribute=DistributeConfig(
                    train_distribute=train_distribute,
                    eval_distribute=eval_distribute,
                    remote_cluster=remote_cluster)))

    def _complete_flow(self,
                       train_distribute,
                       eval_distribute,
                       remote_cluster=None,
                       use_train_and_evaluate=True):
        estimator = self._get_estimator(train_distribute, eval_distribute,
                                        remote_cluster)

        input_dimension = LABEL_DIMENSION
        train_input_fn = self.dataset_input_fn(
            x={"x": DATA},
            y=DATA,
            batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync,
            shuffle=True)
        if eval_distribute:
            eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync
        else:
            eval_batch_size = BATCH_SIZE
        eval_input_fn = self.dataset_input_fn(x={"x": DATA},
                                              y=DATA,
                                              batch_size=eval_batch_size,
                                              shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column("x", shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column("x", shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns

        eval_spec = estimator_training.EvalSpec(name=EVAL_NAME,
                                                input_fn=eval_input_fn,
                                                steps=None,
                                                exporters=self._get_exporter(
                                                    EXPORTER_NAME,
                                                    feature_columns),
                                                start_delay_secs=0,
                                                throttle_secs=1)

        if use_train_and_evaluate:
            estimator_training.train_and_evaluate(
                estimator,
                estimator_training.TrainSpec(train_input_fn,
                                             max_steps=MAX_STEPS), eval_spec)
        else:
            estimator.train(train_input_fn, max_steps=MAX_STEPS)

            latest_ckpt_path = estimator.latest_checkpoint()
            metrics = estimator.evaluate(eval_input_fn,
                                         checkpoint_path=latest_ckpt_path,
                                         name=EVAL_NAME)

            # Export the eval result to files.
            eval_result = estimator_training._EvalResult(
                status=estimator_training._EvalStatus.EVALUATED,
                metrics=metrics,
                checkpoint_path=latest_ckpt_path)
            evaluator = estimator_training._TrainingExecutor._Evaluator(
                estimator, eval_spec, None)
            evaluator._export_eval_result(eval_result, True)

        return estimator

    def _inspect_train_and_eval_events(self, estimator):
        # Make sure nothing is stuck in limbo.
        writer_cache.FileWriterCache.clear()

        # Examine the training events. Use a range to check global step to avoid
        # flakyness due to global step race condition.
        training_loss, _ = self._extract_loss_and_global_step(self._model_dir)
        self.assertIsNotNone(training_loss)

        # Examine the eval events. The global step should be accurate.
        eval_dir = os.path.join(self._model_dir, "eval_" + EVAL_NAME)
        eval_loss, eval_global_step = self._extract_loss_and_global_step(
            event_folder=eval_dir)
        self.assertIsNotNone(eval_loss)
        self.assertGreaterEqual(eval_global_step, MAX_STEPS)

        # Examine the export folder.
        export_dir = os.path.join(os.path.join(self._model_dir, "export"),
                                  EXPORTER_NAME)
        self.assertTrue(gfile.Exists(export_dir))

        # Examine the ckpt for predict.
        def predict_input_fn():
            return dataset_ops.Dataset.from_tensor_slices({
                "x": DATA
            }).batch(BATCH_SIZE)

        predicted_proba = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((BATCH_SIZE, LABEL_DIMENSION),
                            predicted_proba.shape)

    def _get_strategy_object(self, strategy_cls):
        if strategy_cls == mirrored_strategy.CoreMirroredStrategy:
            return strategy_cls()
        else:
            return strategy_cls(num_gpus_per_worker=context.num_gpus())

    @combinations.generate(
        combinations.combine(
            mode=["graph"],
            train_distribute_cls=[
                collective_all_reduce_strategy.CollectiveAllReduceStrategy,
                mirrored_strategy.MirroredStrategy,
                mirrored_strategy.CoreMirroredStrategy,
                parameter_server_strategy.ParameterServerStrategy
            ],
            eval_distribute_cls=[
                None,
                mirrored_strategy.MirroredStrategy,
                mirrored_strategy.CoreMirroredStrategy,
                parameter_server_strategy.ParameterServerStrategy,
                collective_all_reduce_strategy.CollectiveAllReduceStrategy,
            ],
            required_gpus=[0, 1]))
    def test_complete_flow_standalone_client(self, train_distribute_cls,
                                             eval_distribute_cls):
        train_distribute = self._get_strategy_object(train_distribute_cls)

        if eval_distribute_cls:
            eval_distribute = self._get_strategy_object(eval_distribute_cls)
        else:
            eval_distribute = None

        cluster_spec = copy.deepcopy(self._cluster_spec)
        if (train_distribute_cls !=
                parameter_server_strategy.ParameterServerStrategy):
            cluster_spec.pop("ps", None)
        estimator = self._complete_flow(train_distribute, eval_distribute,
                                        cluster_spec)
        self._inspect_train_and_eval_events(estimator)

    @combinations.generate(
        combinations.combine(
            mode=["graph"],
            eval_distribute_class=[
                None,
                mirrored_strategy.MirroredStrategy,
                mirrored_strategy.CoreMirroredStrategy,
                parameter_server_strategy.ParameterServerStrategy,
            ],
            required_gpus=[0, 1]))
    def test_complete_flow_standalone_client_collective_nccl(
            self, eval_distribute_class):
        train_distribute = (
            collective_all_reduce_strategy.CollectiveAllReduceStrategy(
                num_gpus_per_worker=context.num_gpus(),
                communication=cross_device_ops_lib.CollectiveCommunication.NCCL
            ))

        if eval_distribute_class:
            eval_distribute = self._get_strategy_object(eval_distribute_class)
        else:
            eval_distribute = None

        cluster_spec = copy.deepcopy(self._cluster_spec)
        cluster_spec.pop("ps", None)
        estimator = self._complete_flow(train_distribute, eval_distribute,
                                        cluster_spec)
        self._inspect_train_and_eval_events(estimator)

    @combinations.generate(
        combinations.combine(mode=["graph"],
                             train_distribute_cls=[
                                 mirrored_strategy.MirroredStrategy,
                                 mirrored_strategy.CoreMirroredStrategy,
                             ],
                             eval_distribute_cls=[
                                 None,
                                 mirrored_strategy.MirroredStrategy,
                                 mirrored_strategy.CoreMirroredStrategy,
                             ],
                             required_gpus=[0, 1]))
    def test_estimator_standalone_client(self, train_distribute_cls,
                                         eval_distribute_cls):
        train_distribute = self._get_strategy_object(train_distribute_cls)

        if eval_distribute_cls:
            eval_distribute = self._get_strategy_object(eval_distribute_cls)
        else:
            eval_distribute = None

        # We use the whole cluster for evaluation.
        cluster = copy.deepcopy(self._cluster_spec)
        cluster.pop("evaluator", None)

        estimator = self._complete_flow(train_distribute,
                                        eval_distribute,
                                        remote_cluster=cluster,
                                        use_train_and_evaluate=False)
        self._inspect_train_and_eval_events(estimator)

    def _mock_run_std_server(self, *args, **kwargs):
        ret = original_run_std_server(*args, **kwargs)
        # Wait for all std servers to be brought up in order to reduce the chance of
        # remote sessions taking local ports that have been assigned to std servers.
        self._barrier.wait()
        return ret

    def _independent_worker_fn(
        self,
        train_distribute,
        eval_distribute,
    ):
        with test.mock.patch.object(dc, "_run_std_server",
                                    self._mock_run_std_server):
            self._complete_flow(train_distribute, eval_distribute)

    @combinations.generate(
        combinations.combine(
            mode=["graph"],
            train_distribute_cls=[
                collective_all_reduce_strategy.CollectiveAllReduceStrategy,
                parameter_server_strategy.ParameterServerStrategy,
            ],
            eval_distribute_cls=[
                None,
                mirrored_strategy.MirroredStrategy,
                mirrored_strategy.CoreMirroredStrategy,
                parameter_server_strategy.ParameterServerStrategy,
                collective_all_reduce_strategy.CollectiveAllReduceStrategy,
            ],
            required_gpus=[0, 1]))
    def test_complete_flow_independent_worker_between_graph(
            self, train_distribute_cls, eval_distribute_cls):
        if (context.num_gpus() < 2 and eval_distribute_cls
                == collective_all_reduce_strategy.CollectiveAllReduceStrategy):
            self.skipTest(
                "`CollectiveAllReduceStrategy` needs at least two towers.")

        train_distribute = self._get_strategy_object(train_distribute_cls)

        if eval_distribute_cls:
            eval_distribute = self._get_strategy_object(eval_distribute_cls)
        else:
            eval_distribute = None

        if (train_distribute_cls ==
                parameter_server_strategy.ParameterServerStrategy):
            cluster_spec = multi_worker_test_base.create_cluster_spec(
                num_workers=3, num_ps=2, has_eval=True)
            # 3 workers, 2 ps and 1 evaluator.
            self._barrier = dc._Barrier(6)
        else:
            cluster_spec = multi_worker_test_base.create_cluster_spec(
                num_workers=3, num_ps=0, has_eval=True)
            # 3 workers and 1 evaluator.
            self._barrier = dc._Barrier(4)

        threads = self.run_multiple_tasks_in_threads(
            self._independent_worker_fn, cluster_spec, train_distribute,
            eval_distribute)
        threads_to_join = []
        for task_type, ts in threads.items():
            if task_type == PS:
                continue
            for t in ts:
                threads_to_join.append(t)
        self.join_independent_workers(threads_to_join)

        estimator = self._get_estimator(train_distribute, eval_distribute)
        self._inspect_train_and_eval_events(estimator)

    @combinations.generate(
        combinations.combine(mode=["graph"],
                             train_distribute_cls=[
                                 mirrored_strategy.MirroredStrategy,
                                 mirrored_strategy.CoreMirroredStrategy
                             ],
                             eval_distribute_cls=[
                                 None, mirrored_strategy.MirroredStrategy,
                                 mirrored_strategy.CoreMirroredStrategy
                             ],
                             required_gpus=[0, 1]))
    def test_complete_flow_independent_worker_in_graph(self,
                                                       train_distribute_cls,
                                                       eval_distribute_cls):
        train_distribute = self._get_strategy_object(train_distribute_cls)

        if eval_distribute_cls:
            eval_distribute = self._get_strategy_object(eval_distribute_cls)
        else:
            eval_distribute = None

        cluster_spec = multi_worker_test_base.create_cluster_spec(
            num_workers=3, num_ps=0, has_eval=True)
        # 3 workers and 1 evaluator.
        self._barrier = dc._Barrier(4)
        threads = self.run_multiple_tasks_in_threads(
            self._independent_worker_fn, cluster_spec, train_distribute,
            eval_distribute)
        self.join_independent_workers(
            [threads[WORKER][0], threads[EVALUATOR][0]])

        estimator = self._get_estimator(train_distribute, eval_distribute)
        self._inspect_train_and_eval_events(estimator)
def eager_mode_test_configuration():
  return combinations.combine(mode='eager',
                              use_numpy=False,
                              use_validation_data=False)
 def test_overlapping_keys(self):
   c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"])
   c2 = combinations.combine(mode=["eager"], loss=["callable"])
   with self.assertRaisesRegexp(ValueError, ".*Keys.+overlap.+"):
     _ = combinations.times(c1, c2)
class InputIteratorSingleWorkerTest(InputIteratorTestBase,
                                    parameterized.TestCase):

  @combinations.generate(combinations.combine(
      mode=["graph", "eager"],
      input_type=["input_fn", "dataset"]))
  def testOneDeviceCPU(self, input_type):
    worker_device_pairs = [("", ["/device:CPU:0"])]
    dataset_fn = lambda: dataset_ops.Dataset.range(10)

    expected_values = [[i] for i in range(10)]

    self._test_iterator(input_type, dataset_fn, worker_device_pairs,
                        expected_values)

  @combinations.generate(combinations.combine(
      mode=["graph", "eager"],
      input_type=["input_fn", "dataset"],
      required_gpus=1))
  def testTwoDevicesOneGPUOneCPU(self, input_type):
    worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])]
    dataset_fn = lambda: dataset_ops.Dataset.range(10)

    expected_values = [[i, i+1] for i in range(0, 10, 2)]

    self._test_iterator(input_type, dataset_fn, worker_device_pairs,
                        expected_values)

  @combinations.generate(combinations.combine(
      mode=["graph", "eager"],
      input_type=["input_fn", "dataset"],
      required_gpus=1))
  def testTupleDataset(self, input_type):
    worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])]
    def dataset_fn():
      dataset1 = dataset_ops.Dataset.range(10)
      dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2)
      return dataset_ops.Dataset.zip((dataset1, dataset2))

    expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)]

    self._test_iterator(input_type, dataset_fn, worker_device_pairs,
                        expected_values)

  @combinations.generate(combinations.combine(
      mode=["graph", "eager"],
      input_type=["input_fn", "dataset"],
      required_gpus=1))
  def testUnevenDatasetBatches(self, input_type):
    worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])]
    dataset_fn = lambda: dataset_ops.Dataset.range(11)

    expected_values = [[i, i+1] for i in range(0, 10, 2)]
    self._test_iterator(input_type, dataset_fn, worker_device_pairs,
                        expected_values)

  @combinations.generate(combinations.combine(
      mode=["graph", "eager"],
      input_type=["dataset"],
      split_batch_by=[None, 2],
      required_gpus=1))
  def testBatchSplitting(self, input_type, split_batch_by):
    worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])]
    batch_size = 10
    dataset_fn = lambda: dataset_ops.Dataset.range(100).batch(batch_size)

    updated_batch_size = (
        batch_size // split_batch_by if split_batch_by else batch_size)
    expected_values = [[range(i, i+updated_batch_size),
                        range(i+updated_batch_size, i+2*updated_batch_size)]
                       for i in range(0, 100, updated_batch_size*2)]

    self._test_iterator(input_type, dataset_fn, worker_device_pairs,
                        expected_values, sess=None,
                        split_batch_by=split_batch_by)
def all_strategy_combinations_with_eager_and_graph_modes():
  return combinations.combine(distribution=all_strategies,
                              mode=['graph', 'eager'])
Beispiel #25
0
class DNNLinearCombinedClassifierIntegrationTest(test.TestCase,
                                                 parameterized.TestCase):

  def setUp(self):
    self._model_dir = tempfile.mkdtemp()

  def dataset_input_fn(self, x, y, batch_size, shuffle):

    def input_fn():
      dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
      if shuffle:
        dataset = dataset.shuffle(batch_size)
      dataset = dataset.repeat(10).batch(batch_size)
      return dataset

    return input_fn

  @combinations.generate(
      combinations.combine(
          mode=['graph'],
          distribution=[
              combinations.one_device_strategy,
              combinations.mirrored_strategy_with_gpu_and_cpu,
              combinations.mirrored_strategy_with_two_gpus
          ]))
  def test_complete_flow_with_mode(self, distribution):
    label_dimension = 2
    input_dimension = label_dimension
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    train_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices),
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=batch_size, shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns
    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir,
        # TODO(isaprykin): Work around the colocate_with error.
        dnn_optimizer=adagrad.AdagradOptimizer(0.001),
        linear_optimizer=adagrad.AdagradOptimizer(0.001),
        config=run_config.RunConfig(
            train_distribute=distribution, eval_distribute=distribution))

    num_steps = 10
    estimator.train(train_input_fn, steps=num_steps)

    scores = estimator.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))

  def tearDown(self):
    if self._model_dir:
      writer_cache.FileWriterCache.clear()
      shutil.rmtree(self._model_dir)
class ParameterServerStrategyWithChiefTest(ParameterServerStrategyTestBase,
                                           parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=2, has_chief=True)
        cls._default_target = 'grpc://' + cls._cluster_spec[CHIEF][0]

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testSimpleBetweenGraph(self, use_core_strategy):
        self._run_between_graph_clients(self._test_simple_increment,
                                        self._cluster_spec,
                                        context.num_gpus(),
                                        use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             use_core_strategy=[True, False]))
    def testMinimizeLossGraph(self, num_gpus, use_core_strategy):
        self._run_between_graph_clients(self._test_minimize_loss_graph,
                                        self._cluster_spec,
                                        num_gpus,
                                        use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testGlobalStepIsWrappedOnTwoGPUs(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=2, use_core_strategy=use_core_strategy)
        with ops.Graph().as_default(), strategy.scope():
            created_step = training_util.create_global_step()
            get_step = training_util.get_global_step()
            self.assertEqual(
                created_step,
                get_step,
                msg=('created_step %s type %s vs. get_step %s type %s' %
                     (id(created_step), created_step.__class__.__name__,
                      id(get_step), get_step.__class__.__name__)))
            self.assertIs(values.AggregatingVariable, type(created_step))
            self.assertIs(values.AggregatingVariable, type(get_step))
            self.assertIs(strategy, created_step.distribute_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=1, use_core_strategy=use_core_strategy)
        with ops.Graph().as_default(), strategy.scope():
            created_step = training_util.create_global_step()
            get_step = training_util.get_global_step()
            self.assertEqual(
                created_step,
                get_step,
                msg=('created_step %s type %s vs. get_step %s type %s' %
                     (id(created_step), created_step.__class__.__name__,
                      id(get_step), get_step.__class__.__name__)))
            self.assertIs(resource_variable_ops.ResourceVariable,
                          type(created_step))
            self.assertIs(resource_variable_ops.ResourceVariable,
                          type(get_step))
            # All variables have an _distribute_strategy parameter. Only variable
            # subclasses in distribution strategy expose it publicly.
            self.assertFalse(hasattr(strategy, 'distribute_strategy'))
            self.assertIs(strategy, created_step._distribute_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testValueContainer(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=2, use_core_strategy=use_core_strategy)
        with ops.Graph().as_default(), strategy.scope():

            def f():
                with backprop.GradientTape() as tape:
                    v = variable_scope.get_variable('v', initializer=10.0)
                    _ = v * v
                v, = tape.watched_variables()
                w = strategy.extended.value_container(v)
                self.assertIs(values.AggregatingVariable, type(w))

            strategy.extended.call_for_each_replica(f)
class ParameterServerStrategyTest(
        ParameterServerStrategyTestBase,
        strategy_test_lib.DistributionTestBase,
        strategy_test_lib.TwoDeviceDistributionTestBase,
        parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=2)
        cls._default_target = 'grpc://' + cls._cluster_spec[WORKER][0]

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def test_num_replicas_in_sync(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=2, use_core_strategy=use_core_strategy)
        # All the devices on a given worker are in sync which in this case is the
        # number of gpus on each worker.
        self.assertEqual(2, strategy.num_replicas_in_sync)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testDeviceAssignmentLocalCPU(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=0, use_core_strategy=use_core_strategy)
        self._test_device_assignment_local(strategy,
                                           compute_device='CPU',
                                           variable_device='CPU',
                                           num_gpus=0)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testDeviceAssignmentLocalOneGPU(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=1, use_core_strategy=use_core_strategy)
        self._test_device_assignment_local(strategy,
                                           compute_device='GPU',
                                           variable_device='GPU',
                                           num_gpus=1)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testDeviceAssignmentLocalTwoGPUs(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=2, use_core_strategy=use_core_strategy)
        self._test_device_assignment_local(strategy,
                                           compute_device='GPU',
                                           variable_device='CPU',
                                           num_gpus=2)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             use_core_strategy=[True, False]))
    def testDeviceAssignmentDistributed(self, num_gpus, use_core_strategy):
        self._test_device_assignment_distributed(
            'worker', 1, num_gpus, use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             use_core_strategy=[True, False]))
    def testDeviceAssignmentDistributedEnablePartitioner(
            self, num_gpus, use_core_strategy):
        self._test_device_assignment_distributed_enable_partitioner(
            'worker', 1, num_gpus, use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testSimpleBetweenGraph(self, use_core_strategy):
        self._run_between_graph_clients(self._test_simple_increment,
                                        self._cluster_spec,
                                        context.num_gpus(),
                                        use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             use_core_strategy=[True, False]))
    def testLocalSimpleIncrement(self, num_gpus, use_core_strategy):
        self._test_simple_increment(None, 0, num_gpus, use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             use_core_strategy=[True, False]))
    def testMinimizeLossGraphDistributed(self, num_gpus, use_core_strategy):
        self._run_between_graph_clients(self._test_minimize_loss_graph,
                                        self._cluster_spec,
                                        num_gpus,
                                        use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[0, 1, 2],
                             use_core_strategy=[True, False]))
    def testMinimizeLossGraphLocal(self, num_gpus, use_core_strategy):
        self._test_minimize_loss_graph(None, None, num_gpus, use_core_strategy)

    # TODO(b/124344198): Re-enable after fixing this flaky test.
    # TODO(priyag): Refactor this and other multi worker tests.
    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[1, 2],
                             required_gpus=1,
                             use_core_strategy=[True, False],
                             use_dataset=[True, False]))
    def DISABLED_testMakeInputFnIteratorDistributed(self, num_gpus,
                                                    use_core_strategy,
                                                    use_dataset):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        if use_dataset:
            fn = lambda: dataset_ops.Dataset.range(100)
        else:

            def fn():
                dataset = dataset_ops.Dataset.range(100)
                it = dataset.make_one_shot_iterator()
                return it.get_next

        expected_values = [[i + j for j in range(num_gpus)]
                           for i in range(0, 100, num_gpus)]

        input_fn = self._input_fn_to_test_input_context(
            fn,
            expected_num_replicas_in_sync=num_gpus,
            expected_num_input_pipelines=3,
            expected_input_pipeline_id=1)  # because task_id = 1
        self._test_input_fn_iterator('worker',
                                     1,
                                     num_gpus,
                                     input_fn,
                                     expected_values,
                                     test_reinitialize=use_dataset,
                                     use_core_strategy=use_core_strategy)

    # TODO(b/124344198): Re-enable after fixing this flaky test.
    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[1, 2],
                             required_gpus=1,
                             use_core_strategy=[True, False],
                             use_dataset=[True, False]))
    def DISABLED_testMakeInputFnIteratorLocal(self, num_gpus,
                                              use_core_strategy, use_dataset):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        if use_dataset:
            fn = lambda: dataset_ops.Dataset.range(100)
        else:

            def fn():
                dataset = dataset_ops.Dataset.range(100)
                it = dataset.make_one_shot_iterator()
                return it.get_next

        expected_values = [[i + j for j in range(num_gpus)]
                           for i in range(0, 100, num_gpus)]

        input_fn = self._input_fn_to_test_input_context(
            fn,
            expected_num_replicas_in_sync=num_gpus,
            expected_num_input_pipelines=1,
            expected_input_pipeline_id=0
        )  # only one worker and pipeline for local.
        self._test_input_fn_iterator(None,
                                     None,
                                     num_gpus,
                                     input_fn,
                                     expected_values,
                                     test_reinitialize=use_dataset,
                                     use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testGlobalStepUpdate(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            use_core_strategy=use_core_strategy)
        self._test_global_step_update(strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testUpdateConfigProtoMultiWorker(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=2, use_core_strategy=use_core_strategy)
        strategy.configure(cluster_spec=self._cluster_spec,
                           task_type='worker',
                           task_id=1)

        config_proto = config_pb2.ConfigProto(
            device_filters=['to_be_overridden'])

        new_config = strategy.update_config_proto(config_proto)

        # Verify device filters.
        self.assertEqual(['/job:worker/task:1', '/job:ps'],
                         new_config.device_filters)

        # Verify isolate_session_state
        self.assertFalse(new_config.isolate_session_state)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testUpdateConfigProtoLocal(self, use_core_strategy):
        strategy, _, _ = create_test_objects(
            num_gpus=2, use_core_strategy=use_core_strategy)

        config_proto = config_pb2.ConfigProto()
        new_config = strategy.update_config_proto(config_proto)

        # Verify isolate_session_state
        self.assertTrue(new_config.isolate_session_state)

    def testAllReduceSum(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_all_reduce_sum(distribution)

    def testAllReduceSumGradients(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_all_reduce_sum_gradients(distribution)

    def testAllReduceSumGradientTape(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_all_reduce_sum_gradient_tape(distribution)

    def testAllReduceMean(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_all_reduce_mean(distribution)

    def testAllReduceMeanGradients(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_all_reduce_mean_gradients(distribution)

    def testAllReduceMeanGradientTape(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_all_reduce_mean_gradient_tape(distribution)
Beispiel #28
0
def tpu_combinations():
    return combinations.combine(distribution=[
        combinations.tpu_strategy_one_step, combinations.tpu_strategy
    ],
                                mode=["graph"])
Beispiel #29
0
class ParameterServerStrategyTest(ParameterServerStrategyTestBase,
                                  parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=2)
        cls._default_target = 'grpc://' + cls._cluster_spec[WORKER][0]

    def test_num_replicas_in_sync(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        # All the devices on a given worker are in sync which in this case is the
        # number of gpus on each worker.
        self.assertEqual(2, distribution.num_replicas_in_sync)

    def testDeviceAssignmentLocalCPU(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=0)
        self._test_device_assignment_local(distribution,
                                           compute_device='CPU',
                                           variable_device='CPU',
                                           num_gpus=0)

    def testDeviceAssignmentLocalOneGPU(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=1)
        self._test_device_assignment_local(distribution,
                                           compute_device='GPU',
                                           variable_device='GPU',
                                           num_gpus=1)

    def testDeviceAssignmentLocalTwoGPUs(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_device_assignment_local(distribution,
                                           compute_device='GPU',
                                           variable_device='CPU',
                                           num_gpus=2)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testDeviceAssignmentDistributed(self, num_gpus):
        self._test_device_assignment_distributed('worker', 1, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testDeviceAssignmentDistributedEnablePartitioner(self, num_gpus):
        self._test_device_assignment_distributed_enable_partitioner(
            'worker', 1, num_gpus)

    def testSimpleBetweenGraph(self):
        self._run_between_graph_clients(self._test_simple_increment,
                                        self._cluster_spec, context.num_gpus())

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testLocalSimpleIncrement(self, num_gpus):
        self._test_simple_increment(None, 0, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testMinimizeLossGraphDistributed(self, num_gpus):
        self._run_between_graph_clients(self._test_minimize_loss_graph,
                                        self._cluster_spec, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testMinimizeLossGraphLocal(self, num_gpus):
        self._test_minimize_loss_graph(None, None, num_gpus)
class MirroredVariableTest(test.TestCase, parameterized.TestCase):

  config = config_pb2.ConfigProto()
  config.allow_soft_placement = True

  @test_util.run_in_graph_and_eager_modes(config=config)
  def testProperties(self):
    if context.num_gpus() < 1 and context.executing_eagerly():
      self.skipTest("A GPU is not available for this test in eager mode.")

    v, _, mirrored = _make_mirrored()

    self.assertEqual(v[0].name, mirrored.name)
    self.assertEqual(v[0].dtype, mirrored.dtype)
    self.assertEqual(v[0].shape, mirrored.shape)

  @test_util.run_in_graph_and_eager_modes(config=config)
  def testVariableOnAnotherDevice(self):
    v = variable_scope.get_variable(
        name="v", initializer=[1.], use_resource=True)
    index = {"/job:foo/device:CPU:0": v}
    mirrored = values.MirroredVariable(index, v,
                                       variable_scope.VariableAggregation.MEAN)

    self.assertEqual(v.name, mirrored.name)
    self.assertEqual(v.dtype, mirrored.dtype)
    self.assertEqual(v.shape, mirrored.shape)

  def _assign_mirrored(self, devices, v, new):
    for d, var, n in zip(devices, v, new):
      with ops.device(d):
        self.evaluate(var.assign(n))

  def _save_return_saver(self, sess, var):
    saver = saver_lib.Saver(var_list=[var])
    test_dir = self.get_temp_dir()
    prefix = os.path.join(test_dir, "ckpt")
    return saver.save(sess, prefix), saver

  def _save(self, sess, var):
    save_path, _ = self._save_return_saver(sess, var)
    return save_path

  @test_util.run_in_graph_and_eager_modes(config=config)
  def testSaveAndRestoreMirroredOneGraph(self):
    if context.num_gpus() < 1 and context.executing_eagerly():
      self.skipTest("A GPU is not available for this test in eager mode.")

    with self.cached_session(config=self.config) as sess:
      v, devices, mirrored = _make_mirrored()

      # Overwrite the initial values.
      self._assign_mirrored(devices, v, [3., 4.])

      # Saves the current value of v[0], 3.
      save_path, saver = self._save_return_saver(sess, mirrored)

      # Change the values between save and restore.
      self._assign_mirrored(devices, v, [5., 6.])

      # Restores the saved value of 3. to both variables.
      saver.restore(sess, save_path)
      self.assertEqual([3., 3.], self.evaluate([v[0], v[1]]))

  def _save_mirrored(self):
    """Save variables with mirroring, returns save_path."""
    with self.session(graph=ops.Graph()) as sess:
      v, devices, mirrored = _make_mirrored()

      # Overwrite the initial values.
      self._assign_mirrored(devices, v, [3., 4.])

      # Saves the current value of v[0], 3.
      save_path = self._save(sess, mirrored)

      # Change the values between save and restore.
      self._assign_mirrored(devices, v, [5., 6.])
    return save_path

  def _save_normal(self):
    """Save variables without mirroring, returns save_path."""
    with self.session(graph=ops.Graph()) as sess:
      var = variable_scope.get_variable(
          name="v", initializer=1., use_resource=True)

      # Overwrite the initial value.
      self.evaluate(var.assign(3.))

      # Saves the current value of var, 3.
      save_path = self._save(sess, var)

      # Change the values between save and restore.
      self.evaluate(var.assign(5.))
    return save_path

  def _restore_normal(self, save_path):
    """Restore to variables without mirroring in a fresh graph."""
    with self.session(graph=ops.Graph()) as sess:
      var = variable_scope.get_variable(
          name="v", initializer=7., use_resource=True)

      # Overwrite the initial value.
      self.evaluate(var.assign(8.))

      # Restores the saved value of 3. to `var`.
      saver = saver_lib.Saver(var_list=[var])
      saver.restore(sess, save_path)
      self.assertEqual(3., self.evaluate(var))

  def _restore_mirrored(self, save_path):
    """Restore to variables with mirroring in a fresh graph."""
    with self.session(graph=ops.Graph()) as sess:
      v, devices, mirrored = _make_mirrored()

      # Overwrite the initial values.
      self._assign_mirrored(devices, v, [7., 8.])

      # Restores the saved value of 3. to both variables.
      saver = saver_lib.Saver(var_list=[mirrored])
      saver.restore(sess, save_path)
      self.assertEqual([3., 3.], self.evaluate([v[0], v[1]]))

  @test_util.run_in_graph_and_eager_modes(config=config)
  def testSaveMirroredRestoreMirrored(self):
    if context.num_gpus() < 1 and context.executing_eagerly():
      self.skipTest("A GPU is not available for this test in eager mode.")

    save_path = self._save_mirrored()
    self._restore_mirrored(save_path)

  @test_util.run_in_graph_and_eager_modes(config=config)
  def testSaveMirroredRestoreNormal(self):
    if context.num_gpus() < 1 and context.executing_eagerly():
      self.skipTest("A GPU is not available for this test in eager mode.")

    save_path = self._save_mirrored()
    self._restore_normal(save_path)

  @test_util.run_in_graph_and_eager_modes(config=config)
  def testSaveNormalRestoreMirrored(self):
    if context.num_gpus() < 1 and context.executing_eagerly():
      self.skipTest("A GPU is not available for this test in eager mode.")

    save_path = self._save_normal()
    self._restore_mirrored(save_path)

  @combinations.generate(combinations.combine(
      distribution=[
          combinations.mirrored_strategy_with_one_gpu,
          combinations.core_mirrored_strategy_with_one_gpu],
      mode=["graph"]))
  def testFetchAMirroredVariable(self, distribution):
    with self.session(graph=ops.Graph()) as sess, distribution.scope():
      with ops.device("/device:GPU:0"):
        v = variable_scope.get_variable(
            name="v", initializer=1., use_resource=True)
      mirrored = values.MirroredVariable({
          "/device:GPU:0": v
      }, v, variable_scope.VariableAggregation.MEAN)
      sess.run(variables_lib.global_variables_initializer())
      sess.run({"complicated": mirrored})
Beispiel #31
0
class LocalCollectiveAllReduceStrategy(
        CollectiveAllReduceStrategyTestBase,
        strategy_test_lib.DistributionTestBase,
        strategy_test_lib.TwoDeviceDistributionTestBase,
        parameterized.TestCase):
    @combinations.generate(
        combinations.combine(mode=['graph', 'eager'],
                             num_gpus=[2, 4],
                             required_gpus=2,
                             use_core_strategy=[True, False]))
    def testMinimizeLoss(self, num_gpus, use_core_strategy):
        # Collective ops doesn't support strategy with one device.
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        if context.executing_eagerly():
            strategy, _, _ = self._get_test_object(
                None, None, num_gpus, use_core_strategy=use_core_strategy)
            self._test_minimize_loss_eager(strategy)
        else:
            self._test_minimize_loss_graph(None,
                                           None,
                                           num_gpus,
                                           use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'],
                             num_gpus=[2, 4],
                             required_gpus=2,
                             use_core_strategy=[True, False]))
    def testComplexModel(self, num_gpus, use_core_strategy):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        self._test_complex_model(None,
                                 None,
                                 num_gpus,
                                 use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph', 'eager'],
                             required_gpus=2,
                             use_dataset=[True, False],
                             use_core_strategy=[True, False]))
    def DISABLED_testMakeInputFnIterator(self, use_dataset, use_core_strategy):
        num_gpus = 2
        if use_dataset:
            fn = lambda: dataset_ops.Dataset.range(5 * num_gpus)
        else:

            def fn():
                dataset = dataset_ops.Dataset.range(5 * num_gpus)
                it = dataset.make_one_shot_iterator()
                return it.get_next

        expected_values = [
            range(i, i + num_gpus) for i in range(0, 10, num_gpus)
        ]

        input_fn = self._input_fn_to_test_input_context(
            fn,
            expected_num_replicas_in_sync=num_gpus,
            expected_num_input_pipelines=1,
            expected_input_pipeline_id=0)
        self._test_input_fn_iterator(None,
                                     None,
                                     num_gpus,
                                     input_fn,
                                     expected_values,
                                     test_reinitialize=use_dataset,
                                     use_core_strategy=use_core_strategy)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testAllReduceSum(self, use_core_strategy):
        if context.num_gpus() < 2: self.skipTest('Not enough GPUs')
        distribution, target, config = self._get_test_object(
            None, None, num_gpus=2, use_core_strategy=use_core_strategy)
        with self.cached_session(config=config, target=target):
            self._test_all_reduce_sum(distribution)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testAllReduceSumGradients(self, use_core_strategy):
        if context.num_gpus() < 2: self.skipTest('Not enough GPUs')
        distribution, target, config = self._get_test_object(
            None, None, num_gpus=2, use_core_strategy=use_core_strategy)
        with self.cached_session(config=config, target=target):
            self._test_all_reduce_sum_gradients(distribution)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testAllReduceSumGradientTape(self, use_core_strategy):
        if context.num_gpus() < 2: self.skipTest('Not enough GPUs')
        distribution, target, config = self._get_test_object(
            None, None, num_gpus=2, use_core_strategy=use_core_strategy)
        with self.cached_session(config=config, target=target):
            self._test_all_reduce_sum_gradient_tape(distribution)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testAllReduceMean(self, use_core_strategy):
        if context.num_gpus() < 2: self.skipTest('Not enough GPUs')
        distribution, target, config = self._get_test_object(
            None, None, num_gpus=2, use_core_strategy=use_core_strategy)
        with self.cached_session(config=config, target=target):
            self._test_all_reduce_mean(distribution)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testAllReduceMeanGradients(self, use_core_strategy):
        if context.num_gpus() < 2: self.skipTest('Not enough GPUs')
        distribution, target, config = self._get_test_object(
            None, None, num_gpus=2, use_core_strategy=use_core_strategy)
        with self.cached_session(config=config, target=target):
            self._test_all_reduce_mean_gradients(distribution)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testAllReduceMeanGradientTape(self, use_core_strategy):
        if context.num_gpus() < 2: self.skipTest('Not enough GPUs')
        distribution, target, config = self._get_test_object(
            None, None, num_gpus=2, use_core_strategy=use_core_strategy)
        with self.cached_session(config=config, target=target):
            self._test_all_reduce_mean_gradient_tape(distribution)

    @combinations.generate(
        combinations.combine(mode=['graph'], use_core_strategy=[True, False]))
    def testNumpyIterator(self, use_core_strategy):
        num_gpus = 2
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        strategy, _, _ = self._get_test_object(
            None, None, num_gpus=num_gpus, use_core_strategy=use_core_strategy)
        self._test_numpy_iterator(strategy)
def strategy_minus_tpu_combinations():
  return combinations.combine(
      distribution=strategies_minus_tpu,
      mode=['graph', 'eager'])
Beispiel #33
0
from __future__ import print_function

from absl.testing import parameterized

from tensorflow.contrib.distribute.python import combinations
from tensorflow.python.eager import test
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variables
from tensorflow.python.training import moving_averages


all_combinations = combinations.combine(
    distribution=[combinations.default_strategy,
                  combinations.one_device_strategy,
                  combinations.mirrored_strategy_with_gpu_and_cpu],
    mode=["graph"])


class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase):

  @combinations.generate(all_combinations)
  def testTowerModeWithoutZeroDebias(self, distribution):
    tower_id = [0]

    def tower_fn():
      var = variables.Variable([10.0, 11.0])
      val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]])
      tower_id[0] += 1
      decay = 0.25
def strategy_for_numpy_input_combinations():
  return combinations.combine(
      distribution=strategies_minus_tpu + tpu_strategies,
      mode=['graph'])
Beispiel #35
0
class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
    def test_validating_dataset_input_tensors_with_shape_mismatch(self):
        with self.cached_session():
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:0', '/device:CPU:0'])
            a = constant_op.constant([1, 2], shape=(1, 2))
            b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
            x = values.DistributedValues({
                '/device:CPU:0': a,
                '/device:GPU:0': b
            })
            y = values.DistributedValues({
                '/device:CPU:0': a,
                '/device:GPU:0': a
            })
            with strategy.scope():
                # Removed device and input tensor shape details from the error message
                # since the order of the device and the corresponding input tensor shape
                # is not deterministic over different runs.
                with self.assertRaisesRegexp(
                        ValueError, 'Input tensor shapes do not match for '
                        'distributed tensor inputs '
                        'DistributedValues:.+'):
                    distributed_training_utils.validate_distributed_dataset_inputs(
                        strategy, x, y)

    def test_validating_dataset_input_tensors_with_dtype_mismatch(self):
        with self.cached_session():
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:0', '/device:CPU:0'])
            a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
            b = constant_op.constant([1, 2],
                                     shape=(1, 2),
                                     dtype=dtypes.float64)
            x = values.DistributedValues({
                '/device:CPU:0': a,
                '/device:GPU:0': b
            })
            y = values.DistributedValues({
                '/device:CPU:0': a,
                '/device:GPU:0': a
            })
            with strategy.scope():
                # Removed device and input tensor dtype details from the error message
                # since the order of the device and the corresponding input tensor dtype
                # is not deterministic over different runs.
                with self.assertRaisesRegexp(
                        ValueError, 'Input tensor dtypes do not match for '
                        'distributed tensor inputs '
                        'DistributedValues:.+'):
                    distributed_training_utils.validate_distributed_dataset_inputs(
                        strategy, x, y)

    # TODO(anjalisridhar): Move this test along with other numpy related tests to
    # its own class.
    @combinations.generate(strategy_combinations())
    def test_creating_var_with_numpy_arrays(self, distribution):
        with self.cached_session():
            x = np.asarray(np.random.random((64, 3)), dtype=np.float32)
            var_x = distributed_training_utils.get_var_for_numpy(
                distribution, x)
            val = self.evaluate(var_x.value())
            # Verify that the numpy value is copied to the variable.
            self.assertAllEqual(x, val)

    @combinations.generate(strategy_combinations())
    def test_calling_model_with_numpy_arrays(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae']
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            inputs = np.zeros((64, 3), dtype=np.float32)
            targets = np.zeros((64, 4), dtype=np.float32)

            # Call fit with validation data
            model.fit(inputs,
                      targets,
                      epochs=1,
                      batch_size=2,
                      verbose=0,
                      validation_data=(inputs, targets))

            # TODO(anjalisridhar): We need tests for when the batch size and steps are
            # smaller and results in a 0 batch_size and steps value.
            model.evaluate(inputs, targets)
            # with steps
            model.evaluate(inputs, targets, steps=2)
            # with batch_size
            model.evaluate(inputs, targets, batch_size=8)

            model.predict(inputs)
            # with steps
            model.predict(inputs, steps=2)
            # with batch_size
            model.predict(inputs, batch_size=8)

    @combinations.generate(strategy_combinations())
    def test_calling_model_with_nested_numpy_arrays(self, distribution):
        with self.cached_session():
            a = keras.layers.Input(shape=(3, ), name='input_a')
            b = keras.layers.Input(shape=(3, ), name='input_b')

            dense = keras.layers.Dense(4, name='dense')
            c = dense(a)
            d = dense(b)
            e = keras.layers.Dropout(0.5, name='dropout')(c)

            model = keras.models.Model([a, b], [d, e])

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            input_a_np = np.asarray(np.random.random((64, 3)),
                                    dtype=np.float32)
            input_b_np = np.asarray(np.random.random((64, 3)),
                                    dtype=np.float32)
            inputs = [input_a_np, input_b_np]

            output_d_np = np.asarray(np.random.random((64, 4)),
                                     dtype=np.float32)
            output_e_np = np.asarray(np.random.random((64, 4)),
                                     dtype=np.float32)
            targets = [output_d_np, output_e_np]

            # Call fit with validation data
            model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0)

            # TODO(anjalisridhar): We need tests for when the batch size and steps are
            # smaller and results in a 0 batch_size and steps value.
            model.evaluate(inputs, targets)
            # with steps
            model.evaluate(inputs, targets, steps=2)
            # with batch_size
            model.evaluate(inputs, targets, batch_size=8)

            model.predict(inputs)
            # with steps
            model.predict(inputs, steps=2)
            # with batch_size
            model.predict(inputs, batch_size=8)

    @combinations.generate(strategy_combinations())
    def test_calling_model_on_same_dataset(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae', keras.metrics.CategoricalAccuracy()]
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            dataset = get_dataset(distribution)

            # Call fit with validation data
            model.fit(dataset,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0,
                      validation_data=dataset,
                      validation_steps=2)
            model.fit(dataset,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0,
                      validation_data=dataset,
                      validation_steps=2)
            model.predict(get_predict_dataset(distribution), steps=2)

    # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work
    # as clone_model's input_tensors argument only seems to accept list and not
    # tuples or dict.
    def test_fit_with_tuple_and_dict_dataset_inputs(self):
        with self.cached_session():
            a = keras.layers.Input(shape=(3, ), name='input_a')
            b = keras.layers.Input(shape=(3, ), name='input_b')

            dense = keras.layers.Dense(4, name='dense')
            c = dense(a)
            d = dense(b)
            e = keras.layers.Dropout(0.5, name='dropout')(c)

            model = keras.models.Model([a, b], [d, e])

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.001)
            loss = 'mse'
            metrics = ['mae', keras.metrics.CategoricalAccuracy()]
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:0', '/device:CPU:0'])
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=strategy)

            input_a_np = np.random.random((10, 3))
            input_b_np = np.random.random((10, 3))
            output_d_np = np.random.random((10, 4))
            output_e_np = np.random.random((10, 4))

            # Test with tuples
            dataset_tuple = dataset_ops.Dataset.from_tensor_slices(
                ((input_a_np, input_b_np), (output_d_np, output_e_np)))
            dataset_tuple = dataset_tuple.repeat(100)
            dataset_tuple = dataset_tuple.batch(10)

            model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1)

            # Test with dict
            dataset_dict = dataset_ops.Dataset.from_tensor_slices(({
                'input_a':
                input_a_np,
                'input_b':
                input_b_np
            }, (output_d_np, output_e_np)))
            dataset_dict = dataset_dict.repeat(100)
            dataset_dict = dataset_dict.batch(10)

            model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)

    @combinations.generate(strategy_combinations())
    def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae', keras.metrics.CategoricalAccuracy()]
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            dataset = get_dataset(distribution)

            model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
            model.evaluate(dataset, steps=2, verbose=1)
            model.predict(get_predict_dataset(distribution), steps=2)

    @combinations.generate(strategy_and_optimizer_combinations())
    def test_fit_eval_and_predict_with_optimizer(self, distribution,
                                                 optimizer):
        with self.cached_session():
            model = get_model()

            loss = 'mse'
            model.compile(optimizer(), loss, distribute=distribution)

            dataset = get_dataset(distribution)

            model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
            model.evaluate(dataset, steps=2, verbose=1)
            model.predict(get_predict_dataset(distribution), steps=2)

    def test_unsupported_features(self):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae']
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:1', '/device:GPU:0'])

            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=strategy)

            dataset = get_dataset(strategy)

            # Test with validation split
            with self.assertRaisesRegexp(
                    ValueError, '`validation_split` argument is not '
                    'supported when input `x` is a dataset or a '
                    'dataset iterator.+'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          validation_split=0.5,
                          validation_steps=2)

            # Test with sample weight.
            sample_weight = np.random.random((10, ))
            with self.assertRaisesRegexp(
                    NotImplementedError,
                    '`sample_weight` is currently not supported '
                    'when using DistributionStrategy.'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          sample_weight=sample_weight)

            # Test with not specifying the `steps` argument.
            with self.assertRaisesRegexp(
                    ValueError,
                    'you should specify the `steps_per_epoch` argument'):
                model.fit(dataset, epochs=1, verbose=0)
            with self.assertRaisesRegexp(
                    ValueError, 'you should specify the `steps` argument'):
                model.evaluate(dataset, verbose=0)

            with self.assertRaisesRegexp(
                    ValueError, 'you should specify the `steps` argument'):
                model.predict(dataset, verbose=0)

    def test_calling_with_unsupported_predefined_callbacks(self):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae']
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:1', '/device:GPU:0'])
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=strategy)

            dataset = get_dataset(strategy)

            def schedule(_):
                return 0.001

            with self.assertRaisesRegexp(
                    ValueError, 'LearningRateScheduler callback is not '
                    'supported with DistributionStrategy.'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          callbacks=[
                              keras.callbacks.LearningRateScheduler(schedule)
                          ])

            with self.assertRaisesRegexp(
                    ValueError, 'ReduceLROnPlateau callback is not '
                    'supported with DistributionStrategy.'):
                model.fit(dataset,
                          epochs=1,
                          steps_per_epoch=2,
                          verbose=0,
                          callbacks=[keras.callbacks.ReduceLROnPlateau()])
            with self.assertRaisesRegexp(
                    ValueError, 'histogram_freq in the TensorBoard callback '
                    'is not supported when using '
                    'DistributionStrategy.'):
                model.fit(
                    dataset,
                    epochs=1,
                    steps_per_epoch=2,
                    verbose=0,
                    callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])

    def test_dataset_input_shape_validation(self):
        with self.cached_session():
            model = get_model()

            optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
            loss = 'mse'
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:1', '/device:GPU:0'])

            model.compile(optimizer, loss, distribute=strategy)

            # User forgets to batch the dataset
            inputs = np.zeros((10, 3), dtype=np.float32)
            targets = np.zeros((10, 4), dtype=np.float32)
            dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
            dataset = dataset.repeat(100)

            with self.assertRaisesRegexp(ValueError,
                                         'expected input to have shape'):
                model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

            # Wrong input shape
            inputs = np.zeros((10, 5), dtype=np.float32)
            targets = np.zeros((10, 4), dtype=np.float32)
            dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
            dataset = dataset.repeat(100)
            dataset = dataset.batch(10)

            with self.assertRaisesRegexp(ValueError,
                                         'expected input to have shape'):
                model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

    @combinations.generate(
        combinations.combine(distribution=[combinations.tpu_strategy_one_step],
                             mode=['graph']))
    def test_dataset_input_shape_fully_defined(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            dataset = get_dataset(distribution)
            # Input shapes are not fully known. Batch dimension is unknown as we are
            # not using the drop_remainder argument.
            dataset = dataset.repeat(100).batch(10)

            with self.assertRaisesRegexp(ValueError,
                                         'requires fully defined shapes'):
                model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

    def test_learning_phase_value(self):
        # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare
        # meaningful values. Currently we don't pass the learning phase if the
        # Lambda layer uses the learning phase.
        with self.cached_session():
            x = keras.layers.Input(shape=(16, ), name='input')
            y = keras.layers.Dense(16)(x)
            z = keras.layers.Dropout(0.9999)(y)
            model = keras.Model(x, z)

            optimizer = gradient_descent.GradientDescentOptimizer(0.005)
            loss = 'mse'
            metrics = ['acc']
            strategy = mirrored_strategy.MirroredStrategy(
                ['/device:GPU:0', '/device:CPU:0'])

            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=strategy)

            inputs = np.random.rand(10, 16)
            targets = np.ones((10, 16), dtype=np.float32)
            dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
            dataset = dataset.repeat(100)
            dataset = dataset.batch(8)

            hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1)
            self.assertEqual(hist.history['acc'][0], 1)

            evaluate_output = model.evaluate(dataset, steps=20)
            self.assertEqual(evaluate_output[1], 0)

            predict_output = model.predict(dataset, steps=1)
            self.assertNotEqual(np.mean(predict_output), 0)
    with context.graph_mode():
      _, replica_local = _make_replica_local(
          variable_scope.VariableAggregation.SUM)
      converted = ops.internal_convert_to_tensor(replica_local, as_ref=False)
      self.assertIsInstance(converted, ops.Tensor)
      self.assertEqual(converted.dtype, replica_local.dtype)

      converted = ops.internal_convert_to_tensor(replica_local, as_ref=True)
      # Resources variable are converted to tensors as well when as_ref is True.
      self.assertIsInstance(converted, ops.Tensor)
      self.assertEqual(converted.dtype, replica_local.dtype)


@combinations.generate(combinations.combine(
    distribution=[
        combinations.mirrored_strategy_with_gpu_and_cpu,
        combinations.core_mirrored_strategy_with_gpu_and_cpu],
    mode=["graph", "eager"]))
class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):

  def _assign_replica_local(self, devices, v, new):
    for d, var, n in zip(devices, v, new):
      with ops.device(d):
        self.evaluate(var.assign(n))

  def _save_return_saver(self, sess, var):
    saver = saver_lib.Saver(var_list=[var])
    test_dir = self.get_temp_dir()
    prefix = os.path.join(test_dir, "ckpt")
    return saver.save(sess, prefix), saver
Beispiel #37
0
def strategy_minus_tpu_combinations():
    return combinations.combine(distribution=strategies_minus_tpu,
                                mode=['graph', 'eager'])
class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase):
  # TODO(yuefengz): decouple the num_gpus check from distribution in
  # combinations module so that we can pass in devices instead of a distribution
  # strategy.
  reduction_to_one_combinations = combinations.combine(
      cross_device_ops=[
          combinations.NamedObject(
              "DefaultReductionToOneDeviceCrossDeviceOps",
              cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps()),
          combinations.NamedObject(
              "ReductionToCPUDeviceCrossDeviceOps",
              cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps(
                  reduce_to_device=_cpu_device)),
          combinations.NamedObject(
              "AccumulateNCrossDeviceOp",
              cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps(
                  accumulation_fn=math_ops.accumulate_n)),
      ],
      distribution=[
          combinations.one_device_strategy,
          combinations.mirrored_strategy_with_gpu_and_cpu,
          combinations.mirrored_strategy_with_two_gpus,
          combinations.core_mirrored_strategy_with_gpu_and_cpu,
          combinations.core_mirrored_strategy_with_two_gpus
      ],
      mode=["graph", "eager"])
  allreduce_combinations = combinations.combine(
      cross_device_ops=[
          combinations.NamedObject(
              "AllReduce",
              cross_device_ops_lib.AllReduceCrossDeviceOps("nccl", 1, 0, 0)),
          combinations.NamedObject(
              "HierarchicalCopy",
              cross_device_ops_lib.AllReduceCrossDeviceOps(
                  "hierarchical_copy", 8, 0, 0)),
          combinations.NamedObject(
              "AllReduceNoGradientRepacking",
              cross_device_ops_lib.AllReduceCrossDeviceOps("nccl", 0, 0, 0)),
          combinations.NamedObject(
              "HierarchicalCopyAggregateSmallTensors",
              cross_device_ops_lib.AllReduceCrossDeviceOps(
                  "hierarchical_copy", 0, 100, 10))
      ],
      distribution=[combinations.mirrored_strategy_with_two_gpus,
                    combinations.core_mirrored_strategy_with_two_gpus],
      mode=["graph", "eager"])

  @combinations.generate(reduction_to_one_combinations + allreduce_combinations)
  def testReductionAndBroadcast(self, cross_device_ops, distribution):
    with distribution.scope():
      self._testReductionAndBroadcast(cross_device_ops, distribution)

  def testChooseAlgorithm(self):
    device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7],
                    [0, 5, 6, 7], [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6]]
    result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links)
    self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps)
    self.assertEqual(result._all_reduce_alg, "hierarchical_copy")
    self.assertEqual(result._num_packs, 8)

    # if there are only 4 devices
    device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7]]
    result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links)
    self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps)
    self.assertEqual(result._all_reduce_alg, "nccl")
    self.assertEqual(result._num_packs, 1)

    # if devices links contain each device itself
    device_links = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 5], [0, 1, 2, 3, 6],
                    [0, 1, 2, 3, 7], [0, 4, 5, 6, 7], [1, 4, 5, 6, 7],
                    [2, 4, 5, 6, 7], [3, 4, 5, 6, 7]]
    result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links)
    self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps)
    self.assertEqual(result._all_reduce_alg, "hierarchical_copy")
    self.assertEqual(result._num_packs, 8)

    # if not dgx1-like links
    device_links = [[0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7], [0, 5, 6, 7],
                    [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6], [1, 2, 3, 4]]
    result = cross_device_ops_lib._choose_all_reduce_algorithm(device_links)
    self.assertIsInstance(result, cross_device_ops_lib.AllReduceCrossDeviceOps)
    self.assertEqual(result._all_reduce_alg, "nccl")
    self.assertEqual(result._num_packs, 1)

  @combinations.generate(combinations.combine(
      mode=["graph", "eager"],
      required_gpus=1))
  def testSimpleReduceWithIndexedSlices(self):
    devices = ["/cpu:0", "/gpu:0"]
    t0 = _make_indexed_slices([[1., 2.]], [1], [5, 2], devices[0])
    t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], [5, 2], devices[1])
    per_replica = value_lib.PerReplica({devices[0]: t0, devices[1]: t1})
    result = cross_device_ops_lib._simple_reduce(
        per_replica, devices[0], math_ops.add_n, reduce_util.ReduceOp.SUM)

    # Test that the result is semantically equal to both the concatenated
    # IndexedSlices with and without duplicate indices.
    total_with_dups = _make_indexed_slices(
        [[1., 2.], [3., 4.], [5., 6.]], [1, 1, 3], [5, 2], devices[0])
    total_without_dups = _make_indexed_slices(
        [[4., 6.], [5., 6.]], [1, 3], [5, 2], devices[0])
    self._assert_indexed_slices_equal(total_with_dups, result)
    self._assert_indexed_slices_equal(total_without_dups, result)

  @combinations.generate(
      combinations.combine(
          cross_device_ops_instance=[
              combinations.NamedObject(
                  "ReductionToOneDeviceCrossDeviceOps",
                  cross_device_ops_lib.ReductionToOneDeviceCrossDeviceOps()),
              combinations.NamedObject(
                  "AllReduceCrossDeviceOps",
                  cross_device_ops_lib.AllReduceCrossDeviceOps())
          ],
          reduce_op=[reduce_util.ReduceOp.SUM, reduce_util.ReduceOp.MEAN],
          batch_reduce=[True, False],
          mode=["graph", "eager"],
          required_gpus=1))
  def testIndexedSlicesAllReduce(self, cross_device_ops_instance, reduce_op,
                                 batch_reduce):
    devices = ["/cpu:0", "/gpu:0"]
    dense_shape = [5, 2]
    t0 = _make_indexed_slices([[1., 2.]], [1], dense_shape, devices[0])
    t1 = _make_indexed_slices(
        [[3., 4.], [5., 6.]], [1, 3], dense_shape, devices[1])
    per_replica = value_lib.PerReplica({devices[0]: t0, devices[1]: t1})

    if batch_reduce:
      result = cross_device_ops_instance.batch_reduce(
          reduce_op, [(per_replica, per_replica)])
    else:
      result = cross_device_ops_instance.reduce(
          reduce_op, per_replica, per_replica)

    total_indices_with_dups = [1, 1, 3]
    total_indices_without_dups = [1, 3]

    if reduce_op == reduce_util.ReduceOp.SUM:
      total_values_with_dups = [[1., 2.], [3., 4.], [5., 6.]]
      total_values_without_dups = [[4., 6.], [5., 6.]]
    else:
      assert reduce_op == reduce_util.ReduceOp.MEAN
      total_values_with_dups = [[0.5, 1.], [1.5, 2.], [2.5, 3.]]
      total_values_without_dups = [[2., 3.], [2.5, 3.]]

    total_mirrored_with_dups = _make_mirrored_indexed_slices(
        devices, total_values_with_dups, total_indices_with_dups, dense_shape)
    total_mirrored_without_dups = _make_mirrored_indexed_slices(
        devices, total_values_without_dups, total_indices_without_dups,
        dense_shape)

    # Test that the result is semantically equal to both the concatenated
    # IndexedSlices, as well as when the duplicate indices are summed up.
    if batch_reduce:
      total_mirrored_with_dups = [total_mirrored_with_dups]
      total_mirrored_without_dups = [total_mirrored_without_dups]

    self._assert_values_equal(total_mirrored_with_dups, result)
    self._assert_values_equal(total_mirrored_without_dups, result)
Beispiel #39
0
class CheckpointUtilsWithDistributionStrategyTest(
    test.TestCase, parameterized.TestCase):

  def _get_test_object(self):
    checkpoint_dir = self.get_temp_dir()
    with self.cached_session() as session:
      v1, v2 = _create_checkpoints(session, checkpoint_dir)
    return checkpoint_dir, v1, v2

  @combinations.generate(combinations.combine(
      distribution=[combinations.default_strategy,
                    combinations.one_device_strategy,
                    combinations.mirrored_strategy_with_gpu_and_cpu,
                    combinations.mirrored_strategy_with_two_gpus,
                    combinations.core_mirrored_strategy_with_gpu_and_cpu,
                    combinations.core_mirrored_strategy_with_two_gpus],
      in_replica_mode=[True, False],
      mode=["graph"]))
  def testInitFromCheckpoint(self, distribution, in_replica_mode):
    checkpoint_dir, v1_value, v2_value = self._get_test_object()

    def init_and_verify(g):
      v1 = variable_scope.get_variable("new_var1", [1, 10])
      v2 = variable_scope.get_variable(
          "new_var2", [10, 10],
          synchronization=variable_scope.VariableSynchronization.ON_READ,
          aggregation=variable_scope.VariableAggregation.MEAN)
      checkpoint_utils.init_from_checkpoint(checkpoint_dir, {
          "var1": "new_var1",
          "var2": "new_var2"
      })
      with self.session(graph=g) as session:
        session.run(variables.global_variables_initializer())
        self.assertAllEqual(v1_value, self.evaluate(v1))
        self.assertAllEqual(v2_value, self.evaluate(v2))

    with ops.Graph().as_default() as g, distribution.scope():
      if in_replica_mode:
        distribution.extended.call_for_each_replica(init_and_verify, args=[g])
      else:
        init_and_verify(g)

  @combinations.generate(
      combinations.combine(
          distribution=[
              combinations.default_strategy, combinations.one_device_strategy,
              combinations.mirrored_strategy_with_gpu_and_cpu,
              combinations.mirrored_strategy_with_two_gpus,
              combinations.core_mirrored_strategy_with_gpu_and_cpu,
              combinations.core_mirrored_strategy_with_two_gpus
          ],
          in_replica_mode=[True, False],
          mode=["graph"]))
  def testInitFromDifferentNameObject(self, distribution, in_replica_mode):
    checkpoint_dir, v1_value, _ = self._get_test_object()

    def init_and_verify(g):
      v1 = variable_scope.get_variable("new_var1", [1, 10])
      # Use string add to create new object in each replica
      prefix = "new_"
      suffix = "var1"
      new_var1 = prefix + suffix
      checkpoint_utils.init_from_checkpoint(checkpoint_dir, {
          "var1": new_var1,
      })
      with self.test_session(graph=g) as session:
        session.run(variables.global_variables_initializer())
        self.assertAllEqual(v1_value, self.evaluate(v1))

    with ops.Graph().as_default() as g, distribution.scope():
      if in_replica_mode:
        distribution.extended.call_for_each_replica(init_and_verify, [g])
      else:
        init_and_verify(g)
class MultiWorkerCrossDeviceOpsTest(multi_worker_test_base.MultiWorkerTestBase,
                                    CrossDeviceOpsTestBase):

  worker_devices = [
      "/job:worker/replica:0/task:0", "/job:worker/replica:0/task:1"
  ]
  multi_worker_allreduce_combinations = combinations.combine(
      cross_device_ops=[
          combinations.NamedObject(
              "MultiWorkerAllReduce",
              cross_device_ops_lib.MultiWorkerAllReduce(
                  worker_devices, 2, ("pscpu/pscpu", 2, -1), 0, 0, 0)),
          combinations.NamedObject(
              "MultiWorkerAllReducePack",
              cross_device_ops_lib.MultiWorkerAllReduce(
                  worker_devices, 2, ("pscpu/pscpu", 2, -1), 1, 0, 0)),
          combinations.NamedObject(
              "MultiWorkerAllReduceAggregation",
              cross_device_ops_lib.MultiWorkerAllReduce(
                  worker_devices, 2, ("pscpu/pscpu", 2, -1), 0, 100, 10)),
          combinations.NamedObject(
              "MultiWorkerAllReduceMultipleSpecs",
              cross_device_ops_lib.MultiWorkerAllReduce(
                  worker_devices, 2, [("pscpu/pscpu", 2, 100),
                                      ("xring", 2, -1)], 0, 0, 0)),
      ],
      distribution=[
          combinations.NamedDistribution(
              "MirroredCPU",
              lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=0),
              required_gpus=0),
          combinations.NamedDistribution(
              "Mirrored1GPU",
              lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=1),
              required_gpus=1),
          combinations.NamedDistribution(
              "Mirrored2GPUs",
              lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=2),
              required_gpus=2),
          # pylint: disable=g-long-lambda
          combinations.NamedDistribution(
              "CoreMirroredCPU",
              lambda: mirrored_strategy.CoreMirroredStrategy(["/device:CPU:0"]),
              required_gpus=0),
          combinations.NamedDistribution(
              "CoreMirrored1GPU",
              lambda: mirrored_strategy.CoreMirroredStrategy(["/device:GPU:0"]),
              required_gpus=1),
          combinations.NamedDistribution(
              "CoreMirrored2GPUs",
              lambda: mirrored_strategy.CoreMirroredStrategy(
                  ["/device:GPU:0", "/device:GPU:1"]),
              required_gpus=2),
      ],
      mode=["graph"])

  @combinations.generate(multi_worker_allreduce_combinations)
  def testReductionAndBroadcast(self, cross_device_ops, distribution):
    distribution.configure(cluster_spec={
        "worker":
            ["/job:worker/replica:0/task:0", "/job:worker/replica:0/task:1"]
    })
    with distribution.scope():
      self._testReductionAndBroadcast(cross_device_ops, distribution)
def all_strategy_combinations_with_graph_mode():
  return combinations.combine(distribution=all_strategies, mode=['graph'])
class MultiWorkerCollectiveAllReduceTest(
    multi_worker_test_base.MultiWorkerTestBase, parameterized.TestCase):

  collective_key_base = 100000

  @classmethod
  def setUpClass(cls):
    """Create a local cluster with 2 workers."""
    cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
        num_workers=3, num_ps=0)

  def setUp(self):
    super(MultiWorkerCollectiveAllReduceTest, self).setUp()
    # Reusing keys are not supported well. So we have to give a different
    # collective key base for different tests.
    MultiWorkerCollectiveAllReduceTest.collective_key_base += 100000

  def _get_test_objects(self, task_type, task_id, num_gpus=0, local_mode=False):
    collective_keys = cross_device_utils.CollectiveKeys(
        group_key_start=10 * num_gpus +
        MultiWorkerCollectiveAllReduceTest.collective_key_base,
        instance_key_start=num_gpus * 100 +
        MultiWorkerCollectiveAllReduceTest.collective_key_base,
        instance_key_with_id_start=num_gpus * 10000 +
        MultiWorkerCollectiveAllReduceTest.collective_key_base)
    if local_mode:
      collective_all_reduce_ops = cross_device_ops_lib.CollectiveAllReduce(
          1, num_gpus, collective_keys=collective_keys)
      if num_gpus:
        devices = ["/device:GPU:%d" % i for i in range(num_gpus)]
      else:
        devices = ["/device:CPU:0"]
      return collective_all_reduce_ops, devices, ""
    else:
      collective_all_reduce_ops = cross_device_ops_lib.CollectiveAllReduce(
          3, num_gpus, collective_keys=collective_keys)
      if num_gpus:
        devices = [
            "/job:%s/task:%d/device:GPU:%d" % (task_type, task_id, i)
            for i in range(num_gpus)
        ]
      else:
        devices = ["/job:%s/task:%d" % (task_type, task_id)]
      return (collective_all_reduce_ops, devices,
              "grpc://" + self._cluster_spec[task_type][task_id])

  def _assert_values_equal(self, left, right, sess):
    if isinstance(left, list):
      for l, r in zip(left, right):
        self._assert_values_equal(l, r, sess)
    else:
      self.assertEqual(type(left), type(right))
      self.assertEqual(set(left.devices), set(right.devices))

      run_options = config_pb2.RunOptions()
      run_options.experimental.collective_graph_key = 6

      left_values = np.array(
          sess.run(list(left._index.values()), options=run_options)).flatten()
      right_values = np.array(list(right._index.values())).flatten()
      self.assertEqual(len(left_values), len(right_values))
      for l, r in zip(left_values, right_values):
        self.assertEqual(l, r)

  def _test_reduction(self, task_type, task_id, num_gpus, local_mode=False):
    collective_all_reduce, devices, master_target = self._get_test_objects(
        task_type, task_id, num_gpus, local_mode=local_mode)
    if local_mode:
      num_workers = 1
      worker_device = None
    else:
      num_workers = len(self._cluster_spec.get("chief", [])) + len(
          self._cluster_spec.get("worker", []))
      worker_device = "/job:%s/task:%d" % (task_type, task_id)
    with ops.Graph().as_default(), \
         ops.device(worker_device), \
         self.cached_session(target=master_target) as sess:
      # Collective ops doesn't support scalar tensors, so we have to construct
      # 1-d tensors.
      values = [constant_op.constant([float(d)]) for d in range(len(devices))]
      per_replica = _make_per_replica(values, devices, regroup=True)
      mean = np.array([(len(devices) - 1.) / 2.])

      values_2 = [constant_op.constant([d + 1.0]) for d in range(len(devices))]
      per_replica_2 = _make_per_replica(values_2, devices)
      mean_2 = np.array([mean[0] + 1.])

      destination_mirrored = _fake_mirrored(1., devices)
      destination_different = _fake_mirrored(1., _cpu_device)
      destination_str = _cpu_device

      all_destinations = [
          destination_different, destination_mirrored, destination_str
      ]

      # test reduce()
      for destinations in all_destinations:
        self._assert_values_equal(
            collective_all_reduce.reduce(
                reduce_util.ReduceOp.MEAN,
                per_replica,
                destinations=destinations),
            _fake_mirrored(mean, destinations), sess)
        self._assert_values_equal(
            collective_all_reduce.reduce(
                reduce_util.ReduceOp.MEAN,
                per_replica_2,
                destinations=destinations),
            _fake_mirrored(mean_2, destinations), sess)
        self._assert_values_equal(
            collective_all_reduce.reduce(
                reduce_util.ReduceOp.SUM,
                per_replica,
                destinations=destinations),
            _fake_mirrored(mean * len(devices) * num_workers, destinations),
            sess)
        self._assert_values_equal(
            collective_all_reduce.reduce(
                reduce_util.ReduceOp.SUM,
                per_replica_2,
                destinations=destinations),
            _fake_mirrored(mean_2 * len(devices) * num_workers, destinations),
            sess)

      # test batch_reduce()
      for d1, d2 in itertools.product(all_destinations, all_destinations):
        self._assert_values_equal(
            collective_all_reduce.batch_reduce(reduce_util.ReduceOp.MEAN,
                                               [(per_replica, d1),
                                                (per_replica_2, d2)]),
            [
                _fake_mirrored(mean, d1),
                _fake_mirrored(mean_2, d2)
            ], sess)
        self._assert_values_equal(
            collective_all_reduce.batch_reduce(reduce_util.ReduceOp.SUM,
                                               [(per_replica, d1),
                                                (per_replica_2, d2)]),
            [
                _fake_mirrored(mean * len(devices) * num_workers, d1),
                _fake_mirrored(mean_2 * len(devices) * num_workers, d2)
            ], sess)

    return True

  @combinations.generate(
      combinations.combine(mode=["graph"], num_gpus=[0, 1, 2], required_gpus=1))
  def testReductionDistributed(self, num_gpus):
    if context.num_gpus() < num_gpus:
      return
    self._run_between_graph_clients(self._test_reduction, self._cluster_spec,
                                    num_gpus)

  # Collective ops doesn't support strategy with one device.
  def testReductionLocal(self, num_gpus=2):
    if context.num_gpus() < num_gpus:
      return
    self._test_reduction(None, None, num_gpus, local_mode=True)
from __future__ import print_function

from absl.testing import parameterized

from tensorflow.contrib.distribute.python import combinations
from tensorflow.python.eager import test
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variables
from tensorflow.python.training import moving_averages


all_combinations = combinations.combine(
    distribution=[combinations.default_strategy,
                  combinations.one_device_strategy,
                  combinations.mirrored_strategy_with_gpu_and_cpu],
    mode=["graph"])


class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase):

  @combinations.generate(all_combinations)
  def testReplicaModeWithoutZeroDebias(self, distribution):
    replica_id = [0]

    def replica_fn():
      var = variables.Variable([10.0, 11.0])
      val = constant_op.constant([1.0 + replica_id[0], 2.0 - replica_id[0]])
      replica_id[0] += 1
      decay = 0.25
 def test_overlapping_keys(self):
   c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"])
   c2 = combinations.combine(mode=["eager"], loss=["callable"])
   with self.assertRaisesRegexp(ValueError, ".*Keys.+overlap.+"):
     _ = combinations.times(c1, c2)
class ParameterServerStrategyTest(ParameterServerStrategyTestBase,
                                  strategy_test_lib.DistributionTestBase,
                                  parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._cluster_spec = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=2)
        cls._default_target = 'grpc://' + cls._cluster_spec[WORKER][0]

    def test_num_replicas_in_sync(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        # All the devices on a given worker are in sync which in this case is the
        # number of gpus on each worker.
        self.assertEqual(2, distribution.num_replicas_in_sync)

    def testDeviceAssignmentLocalCPU(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=0)
        self._test_device_assignment_local(distribution,
                                           compute_device='CPU',
                                           variable_device='CPU',
                                           num_gpus=0)

    def testDeviceAssignmentLocalOneGPU(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=1)
        self._test_device_assignment_local(distribution,
                                           compute_device='GPU',
                                           variable_device='GPU',
                                           num_gpus=1)

    def testDeviceAssignmentLocalTwoGPUs(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_device_assignment_local(distribution,
                                           compute_device='GPU',
                                           variable_device='CPU',
                                           num_gpus=2)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testDeviceAssignmentDistributed(self, num_gpus):
        self._test_device_assignment_distributed('worker', 1, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testDeviceAssignmentDistributedEnablePartitioner(self, num_gpus):
        self._test_device_assignment_distributed_enable_partitioner(
            'worker', 1, num_gpus)

    def testSimpleBetweenGraph(self):
        self._run_between_graph_clients(self._test_simple_increment,
                                        self._cluster_spec, context.num_gpus())

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testLocalSimpleIncrement(self, num_gpus):
        self._test_simple_increment(None, 0, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testMinimizeLossGraphDistributed(self, num_gpus):
        self._run_between_graph_clients(self._test_minimize_loss_graph,
                                        self._cluster_spec, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testMinimizeLossGraphLocal(self, num_gpus):
        self._test_minimize_loss_graph(None, None, num_gpus)

    # TODO(priyag): Refactor this and other multi worker tests.
    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[1, 2], required_gpus=1))
    def testMakeInputFnIteratorDistributed(self, num_gpus):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        dataset_fn = lambda: dataset_ops.Dataset.range(100)
        expected_values = [[i + j for j in range(num_gpus)]
                           for i in range(0, 100, num_gpus)]

        input_fn = self._input_fn_to_test_input_context(
            dataset_fn,
            expected_num_replicas_in_sync=num_gpus,
            expected_num_input_pipelines=3,
            expected_input_pipeline_id=1)  # because task_id = 1
        self._test_input_fn_iterator('worker', 1, num_gpus, input_fn,
                                     expected_values)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[1, 2], required_gpus=1))
    def testMakeInputFnIteratorLocal(self, num_gpus):
        if context.num_gpus() < num_gpus:
            self.skipTest('Not enough GPUs')
        dataset_fn = lambda: dataset_ops.Dataset.range(100)
        expected_values = [[i + j for j in range(num_gpus)]
                           for i in range(0, 100, num_gpus)]

        input_fn = self._input_fn_to_test_input_context(
            dataset_fn,
            expected_num_replicas_in_sync=num_gpus,
            expected_num_input_pipelines=1,
            expected_input_pipeline_id=0
        )  # only one worker and pipeline for local.
        self._test_input_fn_iterator(None, None, num_gpus, input_fn,
                                     expected_values)

    def testGlobalStepUpdate(self):
        strategy = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=context.num_gpus())
        self._test_global_step_update(strategy)

    def testUpdateConfigProtoMultiWorker(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        distribution.configure(cluster_spec=self._cluster_spec,
                               task_type='worker',
                               task_id=1)

        config_proto = config_pb2.ConfigProto(
            device_filters=['to_be_overridden'])

        new_config = distribution.update_config_proto(config_proto)

        # Verify device filters.
        self.assertEqual(['/job:worker/task:1', '/job:ps'],
                         new_config.device_filters)

        # Verify isolate_session_state
        self.assertFalse(new_config.isolate_session_state)

    def testUpdateConfigProtoLocal(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)

        config_proto = config_pb2.ConfigProto()
        new_config = distribution.update_config_proto(config_proto)

        # Verify isolate_session_state
        self.assertTrue(new_config.isolate_session_state)
Beispiel #46
0
class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
    def _get_iterator(self, ds):
        if context.executing_eagerly():
            iterator = ds.make_one_shot_iterator()
        else:
            iterator = ds.make_initializable_iterator()
            self.evaluate(iterator.initializer)
        return iterator

    @combinations.generate(
        combinations.times(
            combinations.distributions_and_v1_optimizers(),
            combinations.combine(mode=["graph"],
                                 use_callable_loss=[True, False]) +
            combinations.combine(mode=["eager"], use_callable_loss=[True])) +
        combinations.combine(distribution=[combinations.tpu_strategy],
                             optimizer_fn=combinations.optimizers_v1,
                             mode=["graph"],
                             use_callable_loss=[True, False]))
    def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss):
        with distribution.scope():
            model_fn, dataset_fn, layer = minimize_loss_example(
                optimizer_fn,
                use_bias=True,
                use_callable_loss=use_callable_loss)

            def step_fn(ctx, inputs):
                del ctx  # Unused
                return distribution.group(
                    distribution.call_for_each_replica(model_fn, args=inputs))

            iterator = self._get_iterator(
                distribution.distribute_dataset(dataset_fn))

            def run_step():
                return distribution.run_steps_on_dataset(step_fn,
                                                         iterator,
                                                         iterations=2).run_op

            self.evaluate(distribution.initialize())
            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            weights, biases = [], []
            for _ in range(5):
                run_step()

                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            self.evaluate(distribution.finalize())

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
            self.assertTrue(is_not_increasing)

    @combinations.generate(
        combinations.times(
            combinations.distributions_and_v1_optimizers(),
            combinations.combine(mode=["graph"],
                                 use_callable_loss=[True, False]) +
            combinations.combine(mode=["eager"], use_callable_loss=[True])))
    def testTrainNetworkByCallForEachReplica(self, distribution, optimizer_fn,
                                             use_callable_loss):
        with distribution.scope():
            model_fn, dataset_fn, layer = minimize_loss_example(
                optimizer_fn,
                use_bias=True,
                use_callable_loss=use_callable_loss)

            iterator = self._get_iterator(
                distribution.distribute_dataset(dataset_fn))

            def run_step():
                return distribution.group(
                    distribution.call_for_each_replica(
                        model_fn, args=(iterator.get_next(), )))

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
                self.evaluate(variables_lib.global_variables_initializer())

            weights, biases = [], []
            for _ in range(10):
                run_step()

                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
            self.assertTrue(is_not_increasing)

    @combinations.generate(
        combinations.times(
            combinations.distributions_and_v1_optimizers() +
            combinations.distributions_and_v2_optimizers(),
            combinations.combine(mode=["graph", "eager"])) +
        combinations.combine(distribution=[combinations.tpu_strategy],
                             optimizer_fn=combinations.optimizers_v1 +
                             combinations.optimizers_v2,
                             mode=["graph"]))
    def testOptimizerInsideModelFn(self, distribution, optimizer_fn):
        created_variables = []
        trainable_variables = []

        def appending_creator(next_creator, *args, **kwargs):
            v = next_creator(*args, **kwargs)
            created_variables.append(v.name)
            if "trainable" in kwargs and kwargs["trainable"]:
                trainable_variables.append(v.name)
            return v

        # Creator scope needs to be set before it's used inside
        # `distribution.scope`.
        with variable_scope.variable_creator_scope(
                appending_creator), distribution.scope():
            model_fn, dataset_fn, layer = minimize_loss_example(
                optimizer_fn,
                use_bias=True,
                use_callable_loss=True,
                create_optimizer_inside_model_fn=True)

            def step_fn(ctx, inputs):
                del ctx  # Unused
                return distribution.group(
                    distribution.call_for_each_replica(model_fn, args=inputs))

            iterator = self._get_iterator(
                distribution.distribute_dataset(dataset_fn))

            def run_step():
                return distribution.run_steps_on_dataset(step_fn,
                                                         iterator,
                                                         iterations=1).run_op

            self.evaluate(distribution.initialize())
            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            run_step()

            self.evaluate(distribution.finalize())

            def get_expected_variables(optimizer_fn, num_parameter_devices):
                variables_map = {
                    "GradientDescent": ["dense/kernel", "dense/bias"],
                    "Adagrad": [
                        "dense/kernel/Adagrad", "dense/kernel",
                        "dense/bias/Adagrad", "dense/bias"
                    ]
                }
                variables = variables_map[optimizer_fn().get_name()]
                variables.extend([
                    v + "/replica_{}".format(replica) for v in variables
                    for replica in range(1, num_parameter_devices)
                ])
                return set([v + ":0" for v in variables])

            self.assertEqual(
                get_expected_variables(optimizer_fn,
                                       len(distribution.parameter_devices)),
                set(created_variables))

    @combinations.generate(
        combinations.times(
            combinations.combine(momentum=[0.8, 0.9, 0.99],
                                 renorm=[False, True]),
            combinations.times(
                combinations.distributions_and_v1_optimizers(),
                combinations.combine(
                    mode=["graph", "eager"],
                    # TODO(isaprykin):  Allow False here.  Currently subsequent
                    # replicas will re-execute UPDATE_OPS of previous replicas.
                    update_ops_in_cross_replica_mode=[True])) +
            combinations.combine(distribution=[combinations.tpu_strategy],
                                 optimizer_fn=combinations.optimizers_v1,
                                 mode=["graph"],
                                 update_ops_in_cross_replica_mode=[False])))
    def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn,
                                      momentum, renorm,
                                      update_ops_in_cross_replica_mode):
        """Verifies that moving mean updates are reduced across replicas."""
        with distribution.scope():
            num_replicas = distribution.num_replicas_in_sync
            model_fn, dataset_fn, batchnorm = batchnorm_example(
                optimizer_fn,
                batch_per_epoch=num_replicas,
                momentum=momentum,
                renorm=renorm,
                update_ops_in_replica_mode=not update_ops_in_cross_replica_mode
            )

            def step_fn(ctx, inputs):
                del ctx  # Unused
                fetches = distribution.unwrap(
                    distribution.call_for_each_replica(model_fn, args=inputs))
                if update_ops_in_cross_replica_mode:
                    fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS)
                return control_flow_ops.group(fetches)

            iterator = self._get_iterator(
                distribution.distribute_dataset(dataset_fn))

            def run_step():
                return distribution.run_steps_on_dataset(step_fn,
                                                         iterator,
                                                         iterations=1).run_op

            self.evaluate(distribution.initialize())
            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            expected_moving_means = [0.] * 8

            def averaged_batch_mean(i):
                # Each batch has shape [16, 8] where the ith element in jth list is
                # (8 * j + i + replica_id * 100). So the batch mean in each replica is
                # (60 + i + replica_id * 100). So here comes its batch mean over all
                # replicas:
                return 60. + i + (num_replicas - 1.) / 2. * 100.

            for _ in range(10):
                run_step()
                moving_means = self.evaluate(batchnorm.moving_mean)

                # We make sure that the moving_mean is updated as if the sample mean is
                # calculated over all replicas.
                for i, expected_moving_mean in enumerate(
                        expected_moving_means):
                    expected_moving_means[i] -= (
                        (expected_moving_mean - averaged_batch_mean(i)) *
                        (1.0 - momentum))
                    self.assertNear(expected_moving_means[i], moving_means[i],
                                    0.0001)

            self.evaluate(distribution.finalize())

    @combinations.generate(
        combinations.times(
            combinations.combine(
                optimizer_fn=[
                    combinations.gradient_descent_optimizer_v1_fn,
                    combinations.gradient_descent_optimizer_v2_fn
                ],
                loss_reduction=[
                    losses_impl.Reduction.SUM, losses_impl.Reduction.MEAN,
                    losses_impl.Reduction.SUM_OVER_BATCH_SIZE,
                    losses_impl.Reduction.SUM_OVER_NONZERO_WEIGHTS
                ]),
            combinations.times(
                combinations.combine(distribution=[
                    combinations.one_device_strategy,
                    combinations.mirrored_strategy_with_gpu_and_cpu,
                    combinations.mirrored_strategy_with_two_gpus,
                    combinations.core_mirrored_strategy_with_gpu_and_cpu,
                    combinations.core_mirrored_strategy_with_two_gpus
                ]),
                combinations.combine(mode=["graph"],
                                     use_callable_loss=[True, False]) +
                combinations.combine(mode=["eager"], use_callable_loss=[True]))
            + combinations.combine(distribution=[combinations.tpu_strategy],
                                   mode=["graph"],
                                   use_callable_loss=[True, False])))
    def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction,
                      use_callable_loss):
        with distribution.scope():
            all_vars = []

            def model_fn(x, y):
                def loss_fn():
                    # Use fixed initialization to make the steps deterministic.
                    w = variable_scope.get_variable("w", initializer=[[2.]])
                    all_vars.append(w)
                    predict = math_ops.matmul(x, w)
                    return losses_impl.mean_squared_error(
                        y, predict, reduction=loss_reduction)

                optimizer = optimizer_fn(
                )  # GradientDescent with 0.2 learning rate

                if use_callable_loss:
                    return optimizer.minimize(loss_fn)
                else:
                    return optimizer.minimize(loss_fn())

            def dataset_fn():
                features = dataset_ops.Dataset.from_tensors([[2.], [7.]])
                labels = dataset_ops.Dataset.from_tensors([[6.], [21.]])
                return dataset_ops.Dataset.zip((features, labels)).repeat()

            def step_fn(ctx, inputs):
                del ctx  # Unused
                return distribution.group(
                    distribution.call_for_each_replica(model_fn, args=inputs))

            iterator = self._get_iterator(
                distribution.distribute_dataset(dataset_fn))

            def run_step():
                return distribution.run_steps_on_dataset(step_fn,
                                                         iterator,
                                                         iterations=1).run_op

            self.evaluate(distribution.initialize())
            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            run_step()

            v = all_vars[0]
            self.assertTrue(all(v is vi for vi in all_vars[1:]))
            weight = numpy.squeeze(self.evaluate(v))
            # Our model is:
            #   predict = x * w
            #   loss = (predict - y)^2
            #   dloss/dpredict = 2*(predict - y)
            #   dloss/dw = 2 * x^T @ (predict - y)
            # For our batch size of 2, assuming sum loss reduction:
            #   x = [2, 7]
            #   y = [6, 21]
            #   w_initial = 2
            #   predict = [4, 14]
            #   predict - y = [-2, -7]
            #   dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106
            # So unreplicated the update to w with lr=0.2 is -0.2 * -106 = 21.2
            # with sum loss reduction, or 10.6 with mean.
            if loss_reduction == losses_impl.Reduction.SUM:
                # Note that the "distribution.num_replicas_in_sync" factor will go away
                # once we split the input across replicas, instead of pulling a complete
                # batch of input per replica.
                self.assertNear(weight,
                                2 + 21.2 * distribution.num_replicas_in_sync,
                                0.0001)
            else:
                # One of the mean loss reductions.
                self.assertNear(weight, 2 + 10.6, 0.0001)

            self.evaluate(distribution.finalize())

    @combinations.generate(
        combinations.times(combinations.distributions_and_v1_optimizers(),
                           combinations.combine(mode=["graph", "eager"]),
                           combinations.combine(is_tpu=[False])) +
        combinations.combine(distribution=[combinations.tpu_strategy],
                             optimizer_fn=combinations.optimizers_v1,
                             mode=["graph"],
                             is_tpu=[True]))
    def testRunStepsWithOutputContext(self, distribution, optimizer_fn,
                                      is_tpu):
        with distribution.scope():

            def dataset_fn():
                dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
                # TODO(priyag): batch with drop_remainder=True causes shapes to be
                # fully defined for TPU. Remove this when XLA supports dynamic shapes.
                return dataset.batch(batch_size=1, drop_remainder=True)

            optimizer = optimizer_fn()
            layer = core.Dense(1, use_bias=True)

            key1 = "foo"
            value1 = "bar"

            def model_fn(output_context, x):
                """A very simple model written by the user."""
                def loss_fn():
                    y = array_ops.reshape(layer(x),
                                          []) - constant_op.constant(1.)
                    return y * y

                train_op = optimizer.minimize(loss_fn)
                loss = loss_fn()
                output_context.set_last_step_output(
                    name="replica_loss_reduced",
                    output=loss,
                    reduce_op=reduce_util.ReduceOp.MEAN)
                output_context.set_non_tensor_output(key1, value1)
                return (train_op, loss)

            def step_fn(output_context, inputs):
                (train_op, loss) = distribution.call_for_each_replica(
                    model_fn, args=(output_context, ) + inputs)
                output_context.set_last_step_output(
                    name="cross_replica_loss_reduced",
                    output=loss,
                    reduce_op=reduce_util.ReduceOp.MEAN)
                output_context.set_last_step_output(
                    name="cross_replica_loss_not_reduced", output=loss)
                return distribution.group(train_op)

            iterator = self._get_iterator(
                distribution.distribute_dataset(dataset_fn))

            def run_step():
                initial_loss = lambda: constant_op.constant(1e7)
                # Initial values corresponding to reduced losses are just single
                # tensors. But for non reduced losses, we need to have initial
                # values that are of the same structure as non reduced losses. In
                # MirroredStrategy, this will be a list of losses, in TPUStrategy
                # it will be single tensor. Using `broadcast` followed by `unwrap`
                # gives us the desired initial value structure.
                initial_loop_values = {
                    "replica_loss_reduced":
                    initial_loss(),
                    "cross_replica_loss_reduced":
                    initial_loss(),
                    "cross_replica_loss_not_reduced":
                    distribution.unwrap(distribution.broadcast(initial_loss()))
                }
                ctx = distribution.run_steps_on_dataset(
                    step_fn,
                    iterator,
                    iterations=2,
                    initial_loop_values=initial_loop_values)

                self.assertEqual({key1: [value1]}, ctx.non_tensor_outputs)
                self._verify_loss_output(
                    initial_loss(),
                    loss_output=ctx.last_step_outputs["replica_loss_reduced"],
                    reduced=True,
                    distribution=distribution)
                self._verify_loss_output(
                    initial_loss(),
                    loss_output=ctx.
                    last_step_outputs["cross_replica_loss_reduced"],
                    reduced=True,
                    distribution=distribution)
                self._verify_loss_output(
                    initial_loss(),
                    loss_output=ctx.
                    last_step_outputs["cross_replica_loss_not_reduced"],
                    reduced=False,
                    distribution=distribution)
                return (ctx.run_op,
                        ctx.last_step_outputs["replica_loss_reduced"])

            self.evaluate(distribution.initialize())
            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            weights, biases, losses = [], [], []
            for _ in range(5):
                _, loss = run_step()
                losses.append(loss)
                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            self.evaluate(distribution.finalize())

            loss_is_not_increasing = all(y <= x
                                         for x, y in zip(losses, losses[1:]))
            self.assertTrue(loss_is_not_increasing)

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            error_is_not_increasing = all(y <= x
                                          for x, y in zip(error, error[1:]))
            self.assertTrue(error_is_not_increasing)

    def _verify_loss_output(self, initial_loss, loss_output, reduced,
                            distribution):
        if not reduced:
            self.assertLen(distribution.unwrap(loss_output),
                           distribution.num_replicas_in_sync)
            loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN,
                                              loss_output)
        else:
            unwrapped_output = distribution.unwrap(loss_output)
            self.assertLen(unwrapped_output, 1)
            loss_tensor = unwrapped_output[0]
        self.assertEqual(initial_loss.dtype, loss_tensor.dtype)
        self.assertEqual(initial_loss.shape, loss_tensor.shape)
Beispiel #47
0
    with context.graph_mode():
      _, replica_local = _make_replica_local(
          variable_scope.VariableAggregation.SUM)
      converted = ops.internal_convert_to_tensor(replica_local, as_ref=False)
      self.assertIsInstance(converted, ops.Tensor)
      self.assertEqual(converted.dtype, replica_local.dtype)

      converted = ops.internal_convert_to_tensor(replica_local, as_ref=True)
      # Resources variable are converted to tensors as well when as_ref is True.
      self.assertIsInstance(converted, ops.Tensor)
      self.assertEqual(converted.dtype, replica_local.dtype)


@combinations.generate(combinations.combine(
    distribution=[
        combinations.mirrored_strategy_with_gpu_and_cpu,
        combinations.core_mirrored_strategy_with_gpu_and_cpu],
    mode=["graph", "eager"]))
class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):

  def _assign_replica_local(self, devices, v, new):
    for d, var, n in zip(devices, v, new):
      with ops.device(d):
        self.evaluate(var.assign(n))

  def _save_return_saver(self, sess, var):
    saver = saver_lib.Saver(var_list=[var])
    test_dir = self.get_temp_dir()
    prefix = os.path.join(test_dir, "ckpt")
    return saver.save(sess, prefix), saver
def all_strategy_and_input_config_combinations():
  return (
      combinations.times(
          combinations.combine(distribution=all_strategies),
          eager_mode_test_configuration() + graph_mode_test_configuration()))
"""Tests for class OneDeviceStrategy."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tensorflow.contrib.distribute.python import combinations
from tensorflow.contrib.distribute.python import strategy_test_lib
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.eager import context
from tensorflow.python.eager import test


@combinations.generate(combinations.combine(
    distribution=[
        combinations.one_device_strategy,
        combinations.one_device_strategy_gpu],
    mode=["eager", "graph"]))
class OneDeviceStrategyTest(
    strategy_test_lib.DistributionTestBase,
    strategy_test_lib.OneDeviceDistributionTestBase):

  def testMinimizeLoss(self, distribution):
    if context.executing_eagerly():
      self._test_minimize_loss_eager(distribution)
    else:
      self._test_minimize_loss_graph(distribution)

  def testReplicaId(self, distribution):
    self._test_replica_id(distribution)
Beispiel #50
0
class TestDistributionStrategyWithNumpyArrays(test.TestCase,
                                              parameterized.TestCase):
    @combinations.generate(strategy_for_numpy_input_combinations())
    def test_calling_model_with_numpy_arrays(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae']
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            inputs = np.zeros((64, 3), dtype=np.float32)
            targets = np.zeros((64, 4), dtype=np.float32)

            # Call fit with validation data
            model.fit(inputs,
                      targets,
                      epochs=1,
                      batch_size=2,
                      verbose=0,
                      validation_data=(inputs, targets))

            # TODO(anjalisridhar): We need tests for when the batch size and steps are
            # smaller and results in a 0 batch_size and steps value.
            model.evaluate(inputs, targets)
            # with steps
            model.evaluate(inputs, targets, steps=2)
            # with batch_size
            model.evaluate(inputs, targets, batch_size=8)

            model.predict(inputs)
            # with steps
            model.predict(inputs, steps=2)
            # with batch_size
            model.predict(inputs, batch_size=8)

    @combinations.generate(strategy_for_numpy_input_combinations())
    def test_calling_model_with_nested_numpy_arrays(self, distribution):
        with self.cached_session():
            model = multi_input_output_model()

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            input_a_np = np.asarray(np.random.random((64, 3)),
                                    dtype=np.float32)
            input_b_np = np.asarray(np.random.random((64, 5)),
                                    dtype=np.float32)
            inputs = [input_a_np, input_b_np]

            output_d_np = np.asarray(np.random.random((64, 7)),
                                     dtype=np.float32)
            output_e_np = np.asarray(np.random.random((64, 7)),
                                     dtype=np.float32)
            targets = [output_d_np, output_e_np]

            # Call fit with validation data
            model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0)

            # TODO(anjalisridhar): We need tests for when the batch size and steps are
            # smaller and results in a 0 batch_size and steps value.
            model.evaluate(inputs, targets)
            # with steps
            model.evaluate(inputs, targets, steps=2)
            # with batch_size
            model.evaluate(inputs, targets, batch_size=8)

            model.predict(inputs)
            # with steps
            model.predict(inputs, steps=2)
            # with batch_size
            model.predict(inputs, batch_size=8)

    @combinations.generate(
        combinations.combine(distribution=strategies_minus_tpu,
                             mode=['graph']))
    def test_numpy_with_sample_weights(self, distribution):
        model = get_model()
        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
        loss = 'mse'
        model.compile(optimizer, loss, distribute=distribution)

        inputs = np.zeros((20, 3), np.float32)
        targets = np.zeros((20, 4), np.float32)
        sample_weights = np.ones((20), np.float32)

        model.fit(inputs,
                  targets,
                  sample_weight=sample_weights,
                  epochs=1,
                  steps_per_epoch=2,
                  verbose=1)

    @combinations.generate(strategy_for_numpy_input_combinations())
    def test_flatten_predict_outputs(self, distribution):
        with self.cached_session():
            model = multi_input_output_model()

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            # We take 6 input samples with each input having a dimension of 3 or 5.
            input_a_np = np.asarray(np.random.random((6, 3)), dtype=np.float32)
            input_b_np = np.asarray(np.random.random((6, 5)), dtype=np.float32)
            inputs = [input_a_np, input_b_np]

            outs = model.predict(inputs, steps=1)
            # `predict` a list that is equal in length to the number of model outputs.
            # In this test our model has two outputs and each element of `outs`
            # corresponds to all the samples of one of the model outputs.
            self.assertLen(outs, 2)
            # Each of the output samples have a dimension of 7. We should process all
            # the available input samples(6).
            self.assertAllEqual([6, 7], outs[0].shape)
            self.assertAllEqual([6, 7], outs[1].shape)
def graph_mode_test_configuration():
  return combinations.combine(mode='graph',
                              use_numpy=[True, False],
                              use_validation_data=[True, False])
Beispiel #52
0
class TestDistributionStrategyWithDatasets(test.TestCase,
                                           parameterized.TestCase):
    @combinations.generate(all_strategy_combinations())
    def test_calling_model_on_same_dataset(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae', keras.metrics.CategoricalAccuracy()]
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            dataset = get_dataset(distribution)

            # Call fit with validation data
            model.fit(dataset,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0,
                      validation_data=dataset,
                      validation_steps=2)
            model.fit(dataset,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0,
                      validation_data=dataset,
                      validation_steps=2)
            model.predict(get_predict_dataset(distribution), steps=2)

    @combinations.generate(all_strategy_combinations())
    def test_model_interleaved_eval_same_as_direct_eval(self, distribution):
        with self.cached_session():
            user_controlled_model = get_model()
            user_controlled_model.compile(
                gradient_descent.GradientDescentOptimizer(0.001),
                loss='mse',
                metrics=['mae', keras.metrics.CategoricalAccuracy()],
                distribute=distribution)

            interleaved_model = get_model()
            interleaved_model.set_weights(user_controlled_model.get_weights())
            interleaved_model.compile(
                gradient_descent.GradientDescentOptimizer(0.001),
                loss='mse',
                metrics=['mae', keras.metrics.CategoricalAccuracy()],
                distribute=distribution)

            dataset = get_dataset(distribution)

            # Call fit with validation interleaved
            interleaved_output = interleaved_model.fit(dataset,
                                                       epochs=2,
                                                       steps_per_epoch=2,
                                                       verbose=1,
                                                       validation_data=dataset,
                                                       validation_steps=2,
                                                       shuffle=False)

            # Manually control the validation running after each epoch.
            user_controlled_output = []
            for _ in range(2):
                user_controlled_model.fit(dataset,
                                          epochs=1,
                                          steps_per_epoch=2,
                                          verbose=1,
                                          shuffle=False)
                user_controlled_output.append(
                    user_controlled_model.evaluate(dataset, steps=2))

            self.assertEqual(interleaved_output.history['val_loss'],
                             [x[0] for x in user_controlled_output])
            self.assertEqual(
                interleaved_output.history['val_mean_absolute_error'],
                [x[1] for x in user_controlled_output])
            self.assertEqual(
                interleaved_output.history['val_categorical_accuracy'],
                [x[2] for x in user_controlled_output])

    # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work
    # as clone_model's input_tensors argument only seems to accept list and not
    # tuples or dict.

    @combinations.generate(
        combinations.combine(distribution=[
            combinations.mirrored_strategy_with_gpu_and_cpu,
            combinations.core_mirrored_strategy_with_gpu_and_cpu
        ],
                             mode=['graph', 'eager']))
    def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution):
        with self.cached_session():
            model = multi_input_output_model()

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.001)
            loss = 'mse'
            metrics = ['mae', keras.metrics.CategoricalAccuracy()]
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            input_a_np = np.random.random((10, 3))
            input_b_np = np.random.random((10, 5))
            output_d_np = np.random.random((10, 7))
            output_e_np = np.random.random((10, 7))

            # Test with tuples
            dataset_tuple = dataset_ops.Dataset.from_tensor_slices(
                ((input_a_np, input_b_np), (output_d_np, output_e_np)))
            dataset_tuple = dataset_tuple.repeat(100)
            dataset_tuple = dataset_tuple.batch(10)

            model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1)

            # Test with dict
            dataset_dict = dataset_ops.Dataset.from_tensor_slices(({
                'input_a':
                input_a_np,
                'input_b':
                input_b_np
            }, (output_d_np, output_e_np)))
            dataset_dict = dataset_dict.repeat(100)
            dataset_dict = dataset_dict.batch(10)

            model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)

    @combinations.generate(all_strategy_combinations())
    def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent.GradientDescentOptimizer(0.001)
            loss = 'mse'
            metrics = ['mae', keras.metrics.CategoricalAccuracy()]
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            dataset = get_dataset(distribution)

            model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
            model.evaluate(dataset, steps=2, verbose=1)
            model.predict(get_predict_dataset(distribution), steps=2)

    @combinations.generate(strategy_and_optimizer_combinations())
    def test_fit_eval_and_predict_with_optimizer(self, distribution,
                                                 optimizer):
        with self.cached_session():
            model = get_model()

            loss = 'mse'
            model.compile(optimizer(), loss, distribute=distribution)

            dataset = get_dataset(distribution)

            model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
            model.evaluate(dataset, steps=2, verbose=1)
            model.predict(get_predict_dataset(distribution), steps=2)

    @combinations.generate(strategy_minus_tpu_combinations())
    def test_dataset_with_sample_weights(self, distribution):
        model = get_model()
        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
        loss = 'mse'
        model.compile(optimizer, loss, distribute=distribution)

        inputs = np.zeros((10, 3), np.float32)
        targets = np.zeros((10, 4), np.float32)
        sample_weights = np.ones((10), np.float32)
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (inputs, targets, sample_weights))
        dataset = dataset.repeat()
        dataset = dataset.batch(10)

        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
        model.evaluate(dataset, steps=2, verbose=1)
        model.predict(dataset, steps=2)

    @combinations.generate(
        combinations.combine(distribution=[
            combinations.mirrored_strategy_with_gpu_and_cpu,
            combinations.core_mirrored_strategy_with_gpu_and_cpu
        ],
                             mode=['graph', 'eager']))
    # TODO(b/120943676, b/120957836): Re-enable once the validation code is
    # restored.
    def DISABLED_test_dataset_wrong_input_shape(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            # Wrong input shape
            inputs = np.zeros((10, 5), dtype=np.float32)
            targets = np.zeros((10, 4), dtype=np.float32)
            dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
            dataset = dataset.repeat(100)
            dataset = dataset.batch(10)

            with self.assertRaisesRegexp(ValueError,
                                         'expected input to have shape'):
                model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

    @combinations.generate(
        combinations.combine(
            distribution=[combinations.mirrored_strategy_with_gpu_and_cpu],
            mode=['graph', 'eager']))
    # TODO(b/120943676, b/120957836): Re-enable once the validation code is
    # restored.
    def DISABLED_test_dataset_no_batch_input_validation(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            # User forgets to batch the dataset
            inputs = np.zeros((10, 3), dtype=np.float32)
            targets = np.zeros((10, 4), dtype=np.float32)
            dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
            dataset = dataset.repeat(100)

            with self.assertRaisesRegexp(ValueError,
                                         'expected input to have shape'):
                model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

    @combinations.generate(
        combinations.combine(distribution=[combinations.tpu_strategy_one_step],
                             mode=['graph']))
    def test_dataset_input_shape_fully_defined(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            dataset = get_dataset(distribution)
            # Input shapes are not fully known. Batch dimension is unknown as we are
            # not using the drop_remainder argument.
            dataset = dataset.repeat(100).batch(10)

            with self.assertRaisesRegexp(ValueError,
                                         'requires fully defined shapes'):
                model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

    @combinations.generate(
        combinations.combine(distribution=[
            combinations.mirrored_strategy_with_gpu_and_cpu,
            combinations.mirrored_strategy_with_two_gpus,
            combinations.core_mirrored_strategy_with_gpu_and_cpu,
            combinations.core_mirrored_strategy_with_two_gpus
        ],
                             mode=['graph', 'eager']))
    def test_learning_phase_value(self, distribution):
        # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare
        # meaningful values. Currently we don't pass the learning phase if the
        # Lambda layer uses the learning phase.
        with self.cached_session():
            x = keras.layers.Input(shape=(1, ), name='input')
            y = keras.layers.Dense(1, kernel_initializer='ones')(x)
            z = keras.layers.Dropout(0.9999)(y)
            model = keras.Model(x, z)
            initial_weights = model.get_weights()

            optimizer = gradient_descent.GradientDescentOptimizer(0.005)
            loss = 'mse'
            metrics = ['acc']
            model.compile(optimizer,
                          loss,
                          metrics=metrics,
                          distribute=distribution)

            batch_size = 8
            if isinstance(distribution,
                          mirrored_strategy.CoreMirroredStrategy):
                # CoreMirroredStrategy uses global batch size.
                batch_size = 8 * distribution.num_replicas_in_sync

            inputs = np.ones((10, 1), dtype=np.float32)
            targets = np.ones((10, 1), dtype=np.float32)
            dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
            dataset = dataset.repeat().batch(batch_size)
            hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
            self.assertAlmostEqual(hist.history['acc'][0], 0, 0)

            model.set_weights(initial_weights)
            # TODO(psv/anjalisridhar): Enable these lines after we fix b/117431185.
            # evaluate_output = model.evaluate(dataset, steps=20)
            # self.assertAlmostEqual(evaluate_output[1], 1, 0)

            inputs = np.ones((10, 1), dtype=np.float32)
            predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)

            predict_dataset = predict_dataset.repeat().batch(batch_size)
            output = model.predict(predict_dataset, steps=10)
            # `predict` runs for 10 steps
            ref_output = np.ones((160, 1), dtype=np.float32)
            self.assertArrayNear(output, ref_output, 1e-1)

    @combinations.generate(strategy_minus_tpu_combinations())
    def testOptimizerWithCallbacks(self, distribution):
        with self.cached_session():
            model = get_model()

            optimizer = gradient_descent_keras.SGD(0.01)
            loss = 'mse'
            model.compile(optimizer, loss, distribute=distribution)

            dataset = get_dataset(distribution)

            def schedule(_):
                return 0.001

            model.fit(
                dataset,
                epochs=1,
                steps_per_epoch=2,
                verbose=0,
                callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
            grouped_models = distribution.unwrap(
                distributed_training_utils.get_distributed_model(
                    model, ModeKeys.TRAIN))
            with distribution.scope():
                for m in grouped_models:
                    self.assertAllClose(0.001,
                                        keras.backend.get_value(
                                            m.optimizer.lr),
                                        atol=1e-05,
                                        rtol=1e-05)
class ParameterServerStrategyTest(multi_worker_test_base.MultiWorkerTestBase,
                                  parameterized.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._workers, cls._ps = multi_worker_test_base.create_in_process_cluster(
            num_workers=3, num_ps=2)
        cls._cluster_spec = {
            run_config.TaskType.WORKER:
            ['fake_worker_0', 'fake_worker_1', 'fake_worker_2'],
            run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
        }

    def setUp(self):
        self._result = 0
        self._lock = threading.Lock()
        self._init_condition = threading.Condition()
        self._init_reached = 0
        self._finish_condition = threading.Condition()
        self._finish_reached = 0
        super(ParameterServerStrategyTest, self).setUp()

    def _get_test_objects(self, task_type, task_id, num_gpus):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=num_gpus)
        if not task_type:
            return distribution, ''

        tf_config = {
            'cluster': self._cluster_spec,
            'task': {
                'type': task_type,
                'index': task_id
            }
        }
        with self._lock:
            # Accessing environment variables should be protected by locks because
            # environment variables are shared by all threads.
            with test.mock.patch.dict('os.environ',
                                      {'TF_CONFIG': json.dumps(tf_config)}):
                distribution.configure()
        return distribution, self._workers[task_id].target

    def _test_device_assignment_distributed(self, task_type, task_id,
                                            num_gpus):
        worker_device = '/job:%s/replica:0/task:%d' % (task_type, task_id)
        d, _ = self._get_test_objects(task_type, task_id, num_gpus)
        with ops.Graph().as_default(), \
             self.test_session(target=self._workers[0].target) as sess, \
             d.scope():

            # Define a variable outside the call_for_each_tower scope. This is not
            # recommended.
            n = variable_scope.get_variable('n', initializer=10.0)
            self.assertEqual(n.device, '/job:ps/task:0')

            def model_fn():
                if num_gpus == 0:
                    last_part_device = 'device:CPU:0'
                else:
                    last_part_device = (
                        'device:GPU:%d' %
                        distribute_lib.get_tower_context().tower_id)

                a = constant_op.constant(1.0)
                b = constant_op.constant(2.0)
                c = a + b
                self.assertEqual(a.device,
                                 worker_device + '/' + last_part_device)
                self.assertEqual(b.device,
                                 worker_device + '/' + last_part_device)
                self.assertEqual(c.device,
                                 worker_device + '/' + last_part_device)

                # The device scope is ignored for variables but not for normal ops.
                with ops.device('/job:worker/task:0'):
                    x = variable_scope.get_variable('x', initializer=10.0)
                    x_add = x.assign_add(c)
                    e = a + c
                # The variable x is on the task 1 since the device_function has been
                # called once before the model_fn.
                self.assertEqual(x.device, '/job:ps/task:1')
                self.assertEqual(x_add.device, x.device)
                self.assertEqual(
                    e.device,
                    '/job:worker/replica:0/task:0/%s' % last_part_device)

                # The colocate_vars_with can override the distribution's device.
                with d.colocate_vars_with(x):
                    y = variable_scope.get_variable('y', initializer=20.0)
                y_add = y.assign_add(x_add)
                self.assertEqual(y.device, '/job:ps/task:1')
                self.assertEqual(y_add.device, y.device)
                self.assertEqual(y.device, x.device)

                z = variable_scope.get_variable('z', initializer=10.0)
                self.assertEqual(z.device, '/job:ps/task:0')
                self.assertNotEqual(z.device, x.device)

                with ops.control_dependencies([y_add]):
                    z_add = z.assign_add(y)
                with ops.control_dependencies([z_add]):
                    f = z + c
                self.assertEqual(f.device,
                                 worker_device + '/' + last_part_device)

                # The device scope would merge with the default worker device.
                with ops.device('/CPU:1'):
                    g = e + 1.0
                self.assertEqual(g.device, worker_device + '/device:CPU:1')

                # Ths ops.colocate_with will be ignored when defining a variale but not
                # for a normal tensor.
                with ops.colocate_with(x):
                    u = variable_scope.get_variable('u', initializer=30.0)
                    v = variable_scope.get_variable('v', initializer=30.0)
                    h = f + 1.0
                self.assertIn('/job:ps/', u.device)
                self.assertIn('/job:ps/', v.device)
                # u and v are on different parameter servers.
                self.assertTrue(u.device != x.device or v.device != x.device)
                self.assertTrue(u.device == x.device or v.device == x.device)
                # Here h is not on one worker. Note h.device is canonical while x.device
                # is not but.
                self.assertIn('/job:ps/', h.device)
                return y_add, z_add, f

            y, z, f = d.call_for_each_tower(model_fn)
            self.assertNotEqual(y, None)
            self.assertNotEqual(z, None)
            self.assertNotEqual(f, None)

            if context.num_gpus() >= 1 and num_gpus <= 1:
                variables.global_variables_initializer().run()
                y_val, z_val, f_val = sess.run([y, z, f])
                self.assertEqual(y_val, 33.0)
                self.assertEqual(z_val, 43.0)
                self.assertEqual(f_val, 46.0)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testDeviceAssignmentDistributed(self, num_gpus):
        self._test_device_assignment_distributed('worker', 1, num_gpus)

    def _test_device_assignment_local(self,
                                      d,
                                      compute_device='CPU',
                                      variable_device='CPU',
                                      num_gpus=0):
        with ops.Graph().as_default(), \
             self.test_session(target=self._workers[0].target) as sess, \
             d.scope():

            def model_fn():
                if 'CPU' in compute_device:
                    tower_compute_device = '/device:CPU:0'
                else:
                    tower_compute_device = (
                        '/device:GPU:%d' %
                        distribute_lib.get_tower_context().tower_id)
                tower_compute_device = device_util.canonicalize(
                    tower_compute_device)

                if 'CPU' in variable_device:
                    tower_variable_device = '/device:CPU:0'
                else:
                    tower_variable_device = (
                        '/device:GPU:%d' %
                        distribute_lib.get_tower_context().tower_id)
                tower_variable_device = device_util.canonicalize(
                    tower_variable_device)

                a = constant_op.constant(1.0)
                b = constant_op.constant(2.0)
                c = a + b
                self.assertEqual(a.device, tower_compute_device)
                self.assertEqual(b.device, tower_compute_device)
                self.assertEqual(c.device, tower_compute_device)

                # The device scope is ignored for variables but not for normal ops.
                with ops.device('/device:GPU:2'):
                    x = variable_scope.get_variable('x', initializer=10.0)
                    x_add = x.assign_add(c)
                    e = a + c
                self.assertEqual(device_util.canonicalize(x.device),
                                 tower_variable_device)
                self.assertEqual(x_add.device, x.device)
                self.assertEqual(e.device,
                                 device_util.canonicalize('/device:GPU:2'))

                # The colocate_vars_with can override the distribution's device.
                with d.colocate_vars_with(x):
                    y = variable_scope.get_variable('y', initializer=20.0)
                y_add = y.assign_add(x_add)
                self.assertEqual(device_util.canonicalize(y.device),
                                 tower_variable_device)
                self.assertEqual(y_add.device, y.device)
                self.assertEqual(y.device, x.device)

                z = variable_scope.get_variable('z', initializer=10.0)
                self.assertEqual(device_util.canonicalize(z.device),
                                 tower_variable_device)

                with ops.control_dependencies([y_add]):
                    z_add = z.assign_add(y)
                with ops.control_dependencies([z_add]):
                    f = z + c
                self.assertEqual(f.device, tower_compute_device)

                # The device scope would merge with the default worker device.
                with ops.device('/CPU:1'):
                    g = e + 1.0
                self.assertEqual(g.device,
                                 device_util.canonicalize('/device:CPU:1'))

                # Ths ops.colocate_with will be ignored when defining a variale but not
                # for a normal tensor.
                with ops.colocate_with(x):
                    u = variable_scope.get_variable('u', initializer=30.0)
                    h = f + 1.0
                self.assertEqual(device_util.canonicalize(u.device),
                                 tower_variable_device)
                self.assertEqual(device_util.canonicalize(x.device), h.device)
                return y_add, z_add, f

            y, z, f = d.call_for_each_tower(model_fn)
            self.assertNotEqual(y, None)
            self.assertNotEqual(z, None)
            self.assertNotEqual(f, None)

            if context.num_gpus() >= 1 and num_gpus <= 1:
                variables.global_variables_initializer().run()
                y_val, z_val, f_val = sess.run([y, z, f])
                self.assertEqual(y_val, 33.0)
                self.assertEqual(z_val, 43.0)
                self.assertEqual(f_val, 46.0)

    def testDeviceAssignmentLocalCPU(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=0)
        self._test_device_assignment_local(distribution,
                                           compute_device='CPU',
                                           variable_device='CPU',
                                           num_gpus=0)

    def testDeviceAssignmentLocalOneGPU(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=1)
        self._test_device_assignment_local(distribution,
                                           compute_device='GPU',
                                           variable_device='GPU',
                                           num_gpus=1)

    def testDeviceAssignmentLocalTwoGPUs(self):
        distribution = parameter_server_strategy.ParameterServerStrategy(
            num_gpus_per_worker=2)
        self._test_device_assignment_local(distribution,
                                           compute_device='GPU',
                                           variable_device='CPU',
                                           num_gpus=2)

    def _test_simple_increment(self, task_type, task_id, num_gpus):
        d, master_target = self._get_test_objects(task_type, task_id, num_gpus)
        if hasattr(d, '_cluster_spec') and d._cluster_spec:
            num_workers = len(d._cluster_spec.as_dict().get(
                'worker', ['dummy_worker']))
        else:
            num_workers = 1
        with ops.Graph().as_default(), \
             self.test_session(target=master_target) as sess, \
             d.scope():

            def model_fn():
                x = variable_scope.get_variable('x', initializer=10.0)
                y = variable_scope.get_variable('y', initializer=20.0)

                x_add = x.assign_add(1.0, use_locking=True)
                y_add = y.assign_add(1.0, use_locking=True)

                train_op = control_flow_ops.group([x_add, y_add])
                return x, y, train_op

            x, y, train_op = d.call_for_each_tower(model_fn)
            train_op = d.group(d.unwrap(train_op))

            if context.num_gpus() < d._num_gpus_per_worker:
                return True

            if task_id == 0:
                variables.global_variables_initializer().run()

            # Workers waiting for chief worker's initializing variables.
            self._init_condition.acquire()
            self._init_reached += 1
            while self._init_reached != num_workers:
                self._init_condition.wait()
            self._init_condition.notify_all()
            self._init_condition.release()

            sess.run(train_op)

            # Wait for other workers to finish training.
            self._finish_condition.acquire()
            self._finish_reached += 1
            while self._finish_reached != num_workers:
                self._finish_condition.wait()
            self._finish_condition.notify_all()
            self._finish_condition.release()

            x_val, y_val = sess.run([x, y])
            self.assertEqual(x_val, 10.0 + 1.0 * num_workers * d.num_towers)
            self.assertEqual(y_val, 20.0 + 1.0 * num_workers * d.num_towers)
            return (x_val == 10.0 + 1.0 * num_workers * d.num_towers
                    and y_val == 20.0 + 1.0 * num_workers * d.num_towers)

    def _test_minimize_loss_graph(self, task_type, task_id, num_gpus):
        d, master_target = self._get_test_objects(task_type, task_id, num_gpus)
        with ops.Graph().as_default(), \
             self.test_session(target=master_target) as sess, \
             d.scope():
            l = core.Dense(1, use_bias=False)

            def loss_fn(x):
                y = array_ops.reshape(l(x), []) - constant_op.constant(1.)
                return y * y

            # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for
            # multiple graphs (b/111216820).
            def grad_fn(x):
                loss = loss_fn(x)
                var_list = (variables.trainable_variables() +
                            ops.get_collection(
                                ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
                grads = gradients.gradients(loss, var_list)
                ret = list(zip(grads, var_list))
                return ret

            def update(v, g):
                return v.assign_sub(0.05 * g, use_locking=True)

            one = d.broadcast(constant_op.constant([[1.]]))

            def step():
                """Perform one optimization step."""
                # Run forward & backward to get gradients, variables list.
                g_v = d.call_for_each_tower(grad_fn, one)
                # Update the variables using the gradients and the update() function.
                before_list = []
                after_list = []
                for g, v in g_v:
                    fetched = d.read_var(v)
                    before_list.append(fetched)
                    with ops.control_dependencies([fetched]):
                        # TODO(yuefengz): support non-Mirrored variable as destinations.
                        g = d.reduce(variable_scope.VariableAggregation.SUM,
                                     g,
                                     destinations=v)
                        with ops.control_dependencies(
                                d.unwrap(d.update(v, update, g))):
                            after_list.append(d.read_var(v))
                return before_list, after_list

            before_out, after_out = step()

            if context.num_gpus() < d._num_gpus_per_worker:
                return True

            if task_id == 0:
                variables.global_variables_initializer().run()

            # Workers waiting for chief worker's initializing variables.
            self._init_condition.acquire()
            self._init_reached += 1
            while self._init_reached != 3:
                self._init_condition.wait()
            self._init_condition.notify_all()
            self._init_condition.release()

            for i in range(10):
                b, a = sess.run((before_out, after_out))
                if i == 0:
                    before, = b
                after, = a

            error_before = abs(before - 1)
            error_after = abs(after - 1)
            # Error should go down
            self.assertLess(error_after, error_before)
            return error_after < error_before

    def testSimpleBetweenGraph(self):
        self._run_between_graph_clients(self._test_simple_increment,
                                        self._cluster_spec, 0)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testLocalSimpleIncrement(self, num_gpus):
        self._test_simple_increment(None, 0, num_gpus)

    @combinations.generate(
        combinations.combine(mode=['graph'], num_gpus=[0, 1, 2]))
    def testMinimizeLossGraph(self, num_gpus):
        self._run_between_graph_clients(self._test_minimize_loss_graph,
                                        self._cluster_spec, num_gpus)
Beispiel #54
0
def strategy_for_numpy_input_combinations():
    return combinations.combine(distribution=strategies_minus_tpu +
                                tpu_strategies,
                                mode=['graph'])
Beispiel #55
0
def strategy_combinations():
  return combinations.combine(
      distribution=strategies,
      mode=['graph'])
Beispiel #56
0
def strategy_combinations():
  return combinations.combine(
      distribution=strategies,
      mode=['graph'])
Beispiel #57
0
def strategy_and_inputs():
  return combinations.combine(
      distribution=strategies,
      use_numpy=[True, False],
      mode=['graph'])
Beispiel #58
0
class TestDistributionStrategyWithDatasets(test.TestCase,
                                           parameterized.TestCase):

  @combinations.generate(strategy_combinations())
  def test_calling_model_on_same_dataset(self, distribution):
    with self.cached_session():
      model = get_model()

      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      loss = 'mse'
      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)

      dataset = get_dataset(distribution)

      # Call fit with validation data
      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                validation_data=dataset, validation_steps=2)
      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                validation_data=dataset, validation_steps=2)
      model.predict(get_predict_dataset(distribution), steps=2)

  @combinations.generate(strategy_combinations())
  def test_model_interleaved_eval_same_as_direct_eval(self, distribution):
    with self.cached_session():
      loss = 'mse'

      user_controlled_model = get_model()
      user_controlled_optimizer = gradient_descent.GradientDescentOptimizer(
          0.001)
      user_controlled_metrics = ['mae', keras.metrics.CategoricalAccuracy()]
      user_controlled_model.compile(user_controlled_optimizer, loss,
                                    metrics=user_controlled_metrics,
                                    distribute=distribution)

      interleaved_model = get_model()
      interleaved_optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      interleaved_metrics = ['mae', keras.metrics.CategoricalAccuracy()]
      interleaved_model.compile(interleaved_optimizer, loss,
                                metrics=interleaved_metrics,
                                distribute=distribution)

      dataset = get_dataset(distribution)

      # Call fit with validation interleaved
      interleaved_output = interleaved_model.fit(dataset, epochs=2,
                                                 steps_per_epoch=2, verbose=0,
                                                 validation_data=dataset,
                                                 validation_steps=2)

      # Manually control the validation running after each epoch.
      user_controlled_output = []
      for _ in range(2):
        user_controlled_model.fit(
            dataset, epochs=1, steps_per_epoch=2, verbose=0)
        user_controlled_output.append(
            user_controlled_model.evaluate(dataset, steps=2))

      self.assertEqual(interleaved_output.history['val_loss'],
                       [x[0] for x in user_controlled_output])
      self.assertEqual(interleaved_output.history['val_mean_absolute_error'],
                       [x[1] for x in user_controlled_output])
      self.assertEqual(interleaved_output.history['val_categorical_accuracy'],
                       [x[2] for x in user_controlled_output])

  # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work
  # as clone_model's input_tensors argument only seems to accept list and not
  # tuples or dict.
  def test_fit_with_tuple_and_dict_dataset_inputs(self):
    with self.cached_session():
      a = keras.layers.Input(shape=(3,), name='input_a')
      b = keras.layers.Input(shape=(3,), name='input_b')

      dense = keras.layers.Dense(4, name='dense')
      c = dense(a)
      d = dense(b)
      e = keras.layers.Dropout(0.5, name='dropout')(c)

      model = keras.models.Model([a, b], [d, e])

      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
      loss = 'mse'
      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
                                                     '/device:CPU:0'])
      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)

      input_a_np = np.random.random((10, 3))
      input_b_np = np.random.random((10, 3))
      output_d_np = np.random.random((10, 4))
      output_e_np = np.random.random((10, 4))

      # Test with tuples
      dataset_tuple = dataset_ops.Dataset.from_tensor_slices((
          (input_a_np, input_b_np), (output_d_np, output_e_np)))
      dataset_tuple = dataset_tuple.repeat(100)
      dataset_tuple = dataset_tuple.batch(10)

      model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1)

      # Test with dict
      dataset_dict = dataset_ops.Dataset.from_tensor_slices((
          {'input_a': input_a_np, 'input_b': input_b_np},
          (output_d_np, output_e_np)))
      dataset_dict = dataset_dict.repeat(100)
      dataset_dict = dataset_dict.batch(10)

      model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)

  @combinations.generate(strategy_combinations())
  def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
    with self.cached_session():
      model = get_model()

      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      loss = 'mse'
      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)

      dataset = get_dataset(distribution)

      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
      model.evaluate(dataset, steps=2, verbose=1)
      model.predict(get_predict_dataset(distribution), steps=2)

  @combinations.generate(strategy_and_optimizer_combinations())
  def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer):
    with self.cached_session():
      model = get_model()

      loss = 'mse'
      model.compile(optimizer(), loss, distribute=distribution)

      dataset = get_dataset(distribution)

      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
      model.evaluate(dataset, steps=2, verbose=1)
      model.predict(get_predict_dataset(distribution), steps=2)

  def test_dataset_input_shape_validation(self):
    with self.cached_session():
      model = get_model()

      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
      loss = 'mse'
      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
                                                     '/device:GPU:0'])

      model.compile(optimizer, loss, distribute=strategy)

      # User forgets to batch the dataset
      inputs = np.zeros((10, 3), dtype=np.float32)
      targets = np.zeros((10, 4), dtype=np.float32)
      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
      dataset = dataset.repeat(100)

      with self.assertRaisesRegexp(ValueError, 'expected input to have shape'):
        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

      # Wrong input shape
      inputs = np.zeros((10, 5), dtype=np.float32)
      targets = np.zeros((10, 4), dtype=np.float32)
      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
      dataset = dataset.repeat(100)
      dataset = dataset.batch(10)

      with self.assertRaisesRegexp(ValueError,
                                   'expected input to have shape'):
        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

  @combinations.generate(combinations.combine(
      distribution=[combinations.tpu_strategy_one_step],
      mode=['graph']))
  def test_dataset_input_shape_fully_defined(self, distribution):
    with self.cached_session():
      model = get_model()

      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
      loss = 'mse'
      model.compile(optimizer, loss, distribute=distribution)

      dataset = get_dataset(distribution)
      # Input shapes are not fully known. Batch dimension is unknown as we are
      # not using the drop_remainder argument.
      dataset = dataset.repeat(100).batch(10)

      with self.assertRaisesRegexp(ValueError, 'requires fully defined shapes'):
        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)

  def test_learning_phase_value(self):
    # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare
    # meaningful values. Currently we don't pass the learning phase if the
    # Lambda layer uses the learning phase.
    with self.cached_session():
      x = keras.layers.Input(shape=(1,), name='input')
      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
      z = keras.layers.Dropout(0.9999)(y)
      model = keras.Model(x, z)
      initial_weights = model.get_weights()

      optimizer = gradient_descent.GradientDescentOptimizer(0.005)
      loss = 'mse'
      metrics = ['acc']
      strategy = mirrored_strategy.MirroredStrategy(
          ['/device:GPU:0', '/device:GPU:1'])

      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)

      inputs = np.ones((10, 1), dtype=np.float32)
      targets = np.ones((10, 1), dtype=np.float32)
      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
      dataset = dataset.repeat().batch(8)
      hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
      self.assertAlmostEqual(hist.history['acc'][0], 0, 0)

      model.set_weights(initial_weights)
      evaluate_output = model.evaluate(dataset, steps=20)
      self.assertAlmostEqual(evaluate_output[1], 1, 0)

      inputs = np.ones((10, 1), dtype=np.float32)
      predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
      predict_dataset = predict_dataset.repeat().batch(5)
      output = model.predict(predict_dataset, steps=10)
      ref_output = np.ones((50, 1), dtype=np.float32)
      self.assertArrayNear(output[0], ref_output, 1e-1)
def tpu_combinations():
  return combinations.combine(distribution=[combinations.tpu_strategy_one_step,
                                            combinations.tpu_strategy],
                              mode=["graph"])
class IndexedSlicesUtilsTest(test.TestCase, parameterized.TestCase):
    def _assert_values_equal(self, left, right):
        self.assertAllEqual(self.evaluate(ops.convert_to_tensor(left)),
                            self.evaluate(ops.convert_to_tensor(right)))

    @test_util.run_in_graph_and_eager_modes
    def testAggregateTensors(self):
        t0 = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
        t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]])
        total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
        result = cross_device_utils.aggregate_tensors_or_indexed_slices(
            [t0, t1])
        self._assert_values_equal(total, result)

    @test_util.run_in_graph_and_eager_modes
    def testAggregateIndexedSlices(self):
        t0 = math_ops._as_indexed_slices(
            constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        t1 = math_ops._as_indexed_slices(
            constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
        total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
        result = cross_device_utils.aggregate_tensors_or_indexed_slices(
            [t0, t1])
        self.assertIsInstance(result, ops.IndexedSlices)
        self._assert_values_equal(total, result)

    @test_util.run_in_graph_and_eager_modes
    def testDivideTensor(self):
        t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
        n = 2
        expected = constant_op.constant([[0.5, 1.], [0, 0], [1.5, 2.]])
        result = cross_device_utils.divide_by_n_tensors_or_indexed_slices(t, n)
        self._assert_values_equal(expected, result)

    @test_util.run_in_graph_and_eager_modes
    def testDivideIndexedSlices(self):
        t = math_ops._as_indexed_slices(
            constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        n = 2
        expected = constant_op.constant([[0.5, 1.], [0, 0], [1.5, 2.]])
        result = cross_device_utils.divide_by_n_tensors_or_indexed_slices(t, n)
        self.assertIsInstance(result, ops.IndexedSlices)
        self._assert_values_equal(expected, result)

    @test_util.run_in_graph_and_eager_modes
    def testIsIndexedSlices(self):
        t = math_ops._as_indexed_slices(
            constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        self.assertTrue(cross_device_utils.contains_indexed_slices(t))

    @test_util.run_in_graph_and_eager_modes
    def testContainsIndexedSlices_List(self):
        t0 = math_ops._as_indexed_slices(
            constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        t1 = math_ops._as_indexed_slices(
            constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
        self.assertTrue(cross_device_utils.contains_indexed_slices([t0, t1]))

    @test_util.run_in_graph_and_eager_modes
    def testContainsIndexedSlices_Tuple(self):
        t0 = math_ops._as_indexed_slices(
            constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        t1 = math_ops._as_indexed_slices(
            constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
        self.assertTrue(cross_device_utils.contains_indexed_slices((t0, t1)))

    @test_util.run_in_graph_and_eager_modes
    def testContainsIndexedSlices_PerReplica(self):
        t0 = math_ops._as_indexed_slices(
            constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        t1 = math_ops._as_indexed_slices(
            constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
        per_replica = value_lib.PerReplica({"/gpu:0": t0, "/cpu:0": t1})
        self.assertTrue(
            cross_device_utils.contains_indexed_slices(per_replica))

    @combinations.generate(
        combinations.combine(mode=["graph", "eager"], required_gpus=1))
    def testCopyTensor(self):
        with ops.device("/cpu:0"):
            t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
        destination = "/gpu:0"
        result = cross_device_utils.copy_tensor_or_indexed_slices_to_device(
            t, destination)

        self._assert_values_equal(t, result)
        self.assertEqual(device_util.resolve(destination),
                         device_util.resolve(result.device))

    @combinations.generate(
        combinations.combine(mode=["graph", "eager"], required_gpus=1))
    def testCopyIndexedSlices(self):
        with ops.device("/cpu:0"):
            t = math_ops._as_indexed_slices(
                constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
        destination = "/gpu:0"
        result = cross_device_utils.copy_tensor_or_indexed_slices_to_device(
            t, destination)

        self.assertIsInstance(result, ops.IndexedSlices)
        self._assert_values_equal(t, result)
        self.assertEqual(device_util.resolve(destination),
                         device_util.resolve(result.device))