Example #1
0
class SavedModelKerasModelTest(test_base.TestSavedModelBase):

  def setUp(self):
    self._root_dir = 'saved_model_save_load'
    super(SavedModelKerasModelTest, self).setUp()

  def _save_model(self, model, saved_dir):
    saved_model.save(model, saved_dir)

  def _load_and_run_model(self,
                          distribution,
                          saved_dir,
                          predict_dataset,
                          output_name='output_1'):
    return test_base.load_and_run_with_saved_model_api(distribution, saved_dir,
                                                       predict_dataset,
                                                       output_name)

  @ds_combinations.generate(test_base.simple_models_with_strategies())
  def test_save_no_strategy_restore_strategy(self, model_and_input,
                                             distribution):
    self.run_test_save_no_strategy_restore_strategy(
        model_and_input, distribution)

  @ds_combinations.generate(
      combinations.times(test_base.simple_models_with_strategies(),
                         combinations.combine(save_in_scope=[True, False])))
  def test_save_strategy_restore_no_strategy(self, model_and_input,
                                             distribution, save_in_scope):
    self.run_test_save_strategy_restore_no_strategy(
        model_and_input, distribution, save_in_scope)

  @ds_combinations.generate(
      combinations.times(test_base.simple_models_with_strategy_pairs(),
                         combinations.combine(save_in_scope=[True, False])))
  def test_save_strategy_restore_strategy(self, model_and_input,
                                          distribution_for_saving,
                                          distribution_for_restoring,
                                          save_in_scope):
    self.run_test_save_strategy_restore_strategy(model_and_input,
                                                 distribution_for_saving,
                                                 distribution_for_restoring,
                                                 save_in_scope)

  @ds_combinations.generate(
      combinations.times(test_base.simple_models_with_strategies(),
                         combinations.combine(save_in_scope=[True, False])))
  def test_no_variable_device_placement(self, model_and_input, distribution,
                                        save_in_scope):
    saved_dir = self.run_test_save_strategy(model_and_input, distribution,
                                            save_in_scope)
    func = saved_model.load(saved_dir)
    concrete_function = func.signatures[test_base._DEFAULT_FUNCTION_KEY]
    for f in concrete_function.graph.as_graph_def().library.function:
      for n in f.node_def:
        if n.op == 'ReadVariableOp':
          self.assertEmpty(n.device)
Example #2
0
def test_combinations_for_embedding_model():
    # TODO(sourabhbajaj): Enable tests for eager mode
    eager_mode_strategies = [
        s for s in strategies_for_embedding_models() if not s.required_tpu
    ]

    return (combinations.times(
        combinations.combine(distribution=strategies_for_embedding_models()),
        (graph_mode_test_configuration())) + combinations.times(
            combinations.combine(distribution=eager_mode_strategies),
            (eager_mode_test_configuration())))
Example #3
0
class TestDistributionStrategySaveLoadWeights(test.TestCase,
                                              parameterized.TestCase):
    @ds_combinations.generate(
        combinations.times(
            keras_test_lib.all_strategy_combinations_minus_default(),
            combinations.combine(optimizer=optimizer_combinations.
                                 rmsprop_optimizer_keras_v2_fn)))
    def test_save_load_h5(self, distribution, optimizer):
        with self.cached_session():
            dataset = keras_test_lib.get_dataset(distribution)
            with distribution.scope():
                model = keras_test_lib.get_model()
                model.compile(optimizer(), 'mse')
                model.fit(dataset, epochs=1, steps_per_epoch=1)

                weights_file = tempfile.mktemp('.h5')
                model.save_weights(weights_file)

                model_2 = keras_test_lib.get_model()
                model_2.compile(optimizer(), 'mse')
                model_2.load_weights(weights_file)
                model_2.predict(
                    keras_test_lib.get_predict_dataset(distribution), steps=2)
                model_2.fit(dataset, epochs=1, steps_per_epoch=1)

    @ds_combinations.generate(
        combinations.times(
            keras_test_lib.all_strategy_combinations_minus_default(),
            combinations.combine(optimizer=optimizer_combinations.
                                 rmsprop_optimizer_keras_v2_fn)))
    def test_save_load_trackable(self, distribution, optimizer):
        # TODO(b/123533246): Enable the test for TPU once bug is fixed
        if (isinstance(distribution,
                       (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1))
                and distribution.extended.steps_per_run > 1):
            self.skipTest(
                'MultiStep TPU Strategy deadlocks with optimizer restore.')
        with self.cached_session():
            dataset = keras_test_lib.get_dataset(distribution)
            with distribution.scope():
                model = keras_test_lib.get_model()
                model.compile(optimizer(), 'mse')
                model.fit(dataset, epochs=1, steps_per_epoch=1)

                weights_file = tempfile.mktemp()
                model.save_weights(weights_file)

                model_2 = keras_test_lib.get_model()
                model_2.compile(optimizer(), 'mse')
                model_2.load_weights(weights_file)
                model_2.predict(
                    keras_test_lib.get_predict_dataset(distribution), steps=2)
                model_2.fit(dataset, epochs=1, steps_per_epoch=1)
Example #4
0
class TestDistributionStrategyValidation(test.TestCase, parameterized.TestCase):

  @ds_combinations.generate(
      combinations.times(
          keras_test_lib.all_strategy_combinations_minus_default()))
  def test_layer_outside_scope(self, distribution):
    with self.cached_session():
      with self.assertRaisesRegex(
          ValueError, 'was not created in the distribution strategy'):
        x = keras.layers.Input(shape=(3,), name='input')
        y = keras.layers.Dense(4, name='dense')(x)
        with distribution.scope():
          model = keras.Model(x, y)
          optimizer = gradient_descent.GradientDescentOptimizer(0.001)
          loss = 'mse'
          metrics = ['mae', keras.metrics.CategoricalAccuracy()]
          model.compile(
              optimizer,
              loss,
              metrics=metrics)

  @ds_combinations.generate(
      keras_test_lib.all_strategy_combinations_minus_default())
  def test_model_outside_scope(self, distribution):
    with self.cached_session():
      with self.assertRaisesRegex(
          ValueError, 'was not created in the distribution strategy'):
        x = keras.layers.Input(shape=(3,), name='input')
        y = keras.layers.Dense(4, name='dense')(x)
        model = keras.Model(x, y)
        with distribution.scope():
          optimizer = gradient_descent.GradientDescentOptimizer(0.001)
          loss = 'mse'
          metrics = ['mae', keras.metrics.CategoricalAccuracy()]
          model.compile(optimizer, loss, metrics=metrics)
Example #5
0
 def test_times_variable_arguments(self):
     c1 = combinations.combine(mode=["graph", "eager"])
     c2 = combinations.combine(optimizer=["adam", "gd"])
     c3 = combinations.combine(distribution=["d1", "d2"])
     c4 = combinations.times(c3, c1, c2)
     self.assertEqual([
         OrderedDict([("distribution", "d1"), ("mode", "graph"),
                      ("optimizer", "adam")]),
         OrderedDict([("distribution", "d1"), ("mode", "graph"),
                      ("optimizer", "gd")]),
         OrderedDict([("distribution", "d1"), ("mode", "eager"),
                      ("optimizer", "adam")]),
         OrderedDict([("distribution", "d1"), ("mode", "eager"),
                      ("optimizer", "gd")]),
         OrderedDict([("distribution", "d2"), ("mode", "graph"),
                      ("optimizer", "adam")]),
         OrderedDict([("distribution", "d2"), ("mode", "graph"),
                      ("optimizer", "gd")]),
         OrderedDict([("distribution", "d2"), ("mode", "eager"),
                      ("optimizer", "adam")]),
         OrderedDict([("distribution", "d2"), ("mode", "eager"),
                      ("optimizer", "gd")])
     ], c4)
     self.assertEqual(
         combinations.combine(mode=["graph", "eager"],
                              optimizer=["adam", "gd"],
                              distribution=["d1", "d2"]), c4)
Example #6
0
def test_combinations_with_tpu_strategies_graph():
    tpu_strategies = [
        strategy_combinations.tpu_strategy,
    ]

    return (combinations.times(
        combinations.combine(distribution=tpu_strategies),
        graph_mode_test_configuration()))
Example #7
0
class DistributionStrategyStatefulLstmModelCorrectnessTest(
        keras_correctness_test_base.
        TestDistributionStrategyEmbeddingModelCorrectnessBase):
    def get_model(self,
                  max_words=10,
                  initial_weights=None,
                  distribution=None,
                  input_shapes=None):
        del input_shapes
        batch_size = keras_correctness_test_base._GLOBAL_BATCH_SIZE

        with keras_correctness_test_base.MaybeDistributionScope(distribution):
            word_ids = keras.layers.Input(shape=(max_words, ),
                                          batch_size=batch_size,
                                          dtype=np.int32,
                                          name='words')
            word_embed = keras.layers.Embedding(input_dim=20,
                                                output_dim=10)(word_ids)
            lstm_embed = keras.layers.LSTM(units=4,
                                           return_sequences=False,
                                           stateful=True)(word_embed)

            preds = keras.layers.Dense(2, activation='softmax')(lstm_embed)
            model = keras.Model(inputs=[word_ids], outputs=[preds])

            if initial_weights:
                model.set_weights(initial_weights)

            optimizer_fn = gradient_descent_keras.SGD

            model.compile(optimizer=optimizer_fn(learning_rate=0.1),
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])
        return model

    # TODO(jhseu): Disabled to fix b/130808953. Need to investigate why it
    # doesn't work and enable for DistributionStrategy more generally.
    @ds_combinations.generate(test_combinations_for_stateful_embedding_model())
    def disabled_test_stateful_lstm_model_correctness(self, distribution,
                                                      use_numpy,
                                                      use_validation_data):
        self.run_correctness_test(distribution,
                                  use_numpy,
                                  use_validation_data,
                                  is_stateful_model=True)

    @ds_combinations.generate(
        combinations.times(keras_correctness_test_base.
                           test_combinations_with_tpu_strategies_graph()))
    def test_incorrectly_use_multiple_cores_for_stateful_lstm_model(
            self, distribution, use_numpy, use_validation_data):
        with self.assertRaisesRegex(
                ValueError, 'RNNs with stateful=True not yet supported with '
                'tf.distribute.Strategy.'):
            self.run_correctness_test(distribution,
                                      use_numpy,
                                      use_validation_data,
                                      is_stateful_model=True)
Example #8
0
class KerasSaveLoadTest(test_base.TestSavedModelBase):
    def setUp(self):
        self._root_dir = 'keras_save_load'
        super(KerasSaveLoadTest, self).setUp()

    def _save_model(self, model, saved_dir):
        model.save(saved_dir, save_format='tf')

    def _load_and_run_model(self,
                            distribution,
                            saved_dir,
                            predict_dataset,
                            output_name='output_1'):
        restored_keras_model = save.load_model(saved_dir)
        return restored_keras_model.predict(predict_dataset,
                                            steps=test_base.PREDICT_STEPS)

    @ds_combinations.generate(test_base.simple_models_with_strategies())
    def test_save_no_strategy_restore_strategy(self, model_and_input,
                                               distribution):
        self.run_test_save_no_strategy_restore_strategy(
            model_and_input, distribution)

    @ds_combinations.generate(
        combinations.times(test_base.simple_models_with_strategies(),
                           combinations.combine(save_in_scope=[True, False])))
    def test_save_strategy_restore_no_strategy(self, model_and_input,
                                               distribution, save_in_scope):
        self.run_test_save_strategy_restore_no_strategy(
            model_and_input, distribution, save_in_scope)

    @ds_combinations.generate(
        combinations.times(test_base.simple_models_with_strategy_pairs(),
                           combinations.combine(save_in_scope=[True, False])))
    def test_save_strategy_restore_strategy(self, model_and_input,
                                            distribution_for_saving,
                                            distribution_for_restoring,
                                            save_in_scope):
        self.run_test_save_strategy_restore_strategy(
            model_and_input, distribution_for_saving,
            distribution_for_restoring, save_in_scope)
Example #9
0
class TestDistributionStrategyWithNormalizationLayer(test.TestCase,
                                                     parameterized.TestCase):

  @ds_combinations.generate(
      combinations.times(
          keras_test_lib.all_strategy_combinations(),
          combinations.combine(
              fused=[True, False],
              optimizer=optimizer_combinations
              .gradient_descent_optimizer_keras_v2_fn)))
  def test_batchnorm_correctness(self, distribution, fused, optimizer):
    with self.cached_session():
      with distribution.scope():
        model = keras.models.Sequential()
        norm = keras.layers.BatchNormalization(
            input_shape=(
                10,
                20,
                30,
            ), momentum=0.8, fused=fused)
        model.add(norm)
        model.compile(
            loss='mse',
            optimizer=optimizer())

      # centered on 5.0, variance 10.0
      x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 20, 30))
      x = x.astype('float32')
      dataset = dataset_ops.Dataset.from_tensor_slices((x, x))
      dataset = dataset.repeat(100)
      dataset = keras_test_lib.batch_wrapper(dataset, 32, distribution)

      predict_dataset = dataset_ops.Dataset.from_tensor_slices(x)
      predict_dataset = predict_dataset.repeat(100)
      predict_dataset = keras_test_lib.batch_wrapper(predict_dataset, 32,
                                                     distribution)

      model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10)
      out = model.predict(predict_dataset, steps=2)
      out -= keras.backend.eval(norm.beta)
      out /= keras.backend.eval(norm.gamma)
      np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
      np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
Example #10
0
 def test_times(self):
     c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"])
     c2 = combinations.combine(mode=["eager"], loss=["callable"])
     c3 = combinations.combine(distribution=["d1", "d2"])
     c4 = combinations.times(c3, c1 + c2)
     self.assertEqual([
         OrderedDict([("distribution", "d1"), ("loss", "callable"),
                      ("mode", "graph")]),
         OrderedDict([("distribution", "d1"), ("loss", "tensor"),
                      ("mode", "graph")]),
         OrderedDict([("distribution", "d1"), ("loss", "callable"),
                      ("mode", "eager")]),
         OrderedDict([("distribution", "d2"), ("loss", "callable"),
                      ("mode", "graph")]),
         OrderedDict([("distribution", "d2"), ("loss", "tensor"),
                      ("mode", "graph")]),
         OrderedDict([("distribution", "d2"), ("loss", "callable"),
                      ("mode", "eager")])
     ], c4)
class SingleLossStepTest(test.TestCase, parameterized.TestCase):
    @ds_combinations.generate(
        combinations.times(
            optimizer_combinations.distributions_and_v1_optimizers(),
            combinations.combine(
                mode=strategy_combinations.graph_and_eager_modes),
            combinations.combine(is_tpu=[False])) +
        combinations.combine(distribution=[strategy_combinations.tpu_strategy],
                             optimizer_fn=optimizer_combinations.optimizers_v1,
                             mode=["graph"],
                             is_tpu=[True]))
    def testTrainNetwork(self, distribution, optimizer_fn, is_tpu):
        with distribution.scope():
            single_loss_step, layer = single_loss_example(
                optimizer_fn,
                distribution,
                use_bias=True,
                iterations_per_step=2)

            if context.executing_eagerly():
                single_loss_step.initialize()
                run_step = single_loss_step
            else:
                with self.cached_session() as sess:
                    sess.run(single_loss_step.initialize())
                    run_step = sess.make_callable(single_loss_step())
            self.evaluate(variables.global_variables_initializer())

            weights, biases = [], []
            for _ in range(5):
                run_step()
                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
            self.assertTrue(is_not_increasing)
Example #12
0
class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
    def _get_iterator(self, strategy, input_fn):
        iterator = strategy.make_input_fn_iterator(lambda _: input_fn())
        self.evaluate(iterator.initializer)
        return iterator

    @ds_combinations.generate(
        combinations.times(
            optimizer_combinations.distributions_and_v1_optimizers(),
            combinations.combine(mode=["graph"],
                                 use_callable_loss=[True, False]) +
            combinations.combine(mode=["eager"], use_callable_loss=[True])) +
        combinations.times(
            optimizer_combinations.distributions_and_v2_optimizers(),
            combinations.combine(mode=["graph", "eager"],
                                 use_callable_loss=[True])) +
        combinations.combine(distribution=[strategy_combinations.tpu_strategy],
                             optimizer_fn=optimizer_combinations.optimizers_v2,
                             mode=["graph"],
                             use_callable_loss=[True]) +
        combinations.combine(distribution=[strategy_combinations.tpu_strategy],
                             optimizer_fn=optimizer_combinations.optimizers_v1,
                             mode=["graph"],
                             use_callable_loss=[True, False]))
    def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss):
        with distribution.scope():
            optimizer = optimizer_fn()
            model_fn, dataset_fn, layer = minimize_loss_example(
                optimizer, use_bias=True, use_callable_loss=use_callable_loss)

            def step_fn(ctx, inputs):
                del ctx  # Unused
                return distribution.group(
                    distribution.extended.call_for_each_replica(
                        model_fn, args=(inputs, )))

            iterator = self._get_iterator(distribution, dataset_fn)

            def run_step():
                return distribution.extended.experimental_run_steps_on_iterator(
                    step_fn, iterator, iterations=2).run_op

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            weights, biases = [], []
            for _ in range(5):
                run_step()
                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
            self.assertTrue(is_not_increasing)

    @ds_combinations.generate(
        combinations.times(
            optimizer_combinations.distributions_and_v1_optimizers(),
            combinations.combine(mode=["graph"],
                                 use_callable_loss=[True, False]) +
            combinations.combine(mode=["eager"], use_callable_loss=[True])) +
        combinations.times(
            optimizer_combinations.distributions_and_v2_optimizers(),
            combinations.combine(mode=["graph", "eager"],
                                 use_callable_loss=[True])))
    def testTrainNetworkByCallForEachReplica(self, distribution, optimizer_fn,
                                             use_callable_loss):
        with distribution.scope():
            optimizer = optimizer_fn()
            model_fn, dataset_fn, layer = minimize_loss_example(
                optimizer, use_bias=True, use_callable_loss=use_callable_loss)

            iterator = self._get_iterator(distribution, dataset_fn)

            def run_step():
                return distribution.group(
                    distribution.extended.call_for_each_replica(
                        model_fn, args=(iterator.get_next(), )))

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
                self.evaluate(variables_lib.global_variables_initializer())

            weights, biases = [], []
            for _ in range(10):
                run_step()

                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
            self.assertTrue(is_not_increasing)

    @ds_combinations.generate(
        combinations.times(
            optimizer_combinations.distributions_and_v1_and_v2_optimizers(),
            combinations.combine(mode=["graph", "eager"])) +
        combinations.combine(
            distribution=[strategy_combinations.tpu_strategy],
            optimizer_fn=optimizer_combinations.optimizers_v1_and_v2,
            mode=["graph"]))
    def testOptimizerInsideModelFn(self, distribution, optimizer_fn):
        if (not context.executing_eagerly()
                and control_flow_v2_toggles.control_flow_v2_enabled()):
            self.skipTest("b/138751864")
        created_variables = []
        trainable_variables = []

        def appending_creator(next_creator, **kwargs):
            v = next_creator(**kwargs)
            created_variables.append(v.name)
            if "trainable" in kwargs and kwargs["trainable"]:
                trainable_variables.append(v.name)
            return v

        # Creator scope needs to be set before it's used inside
        # `distribution.scope`.
        with variable_scope.variable_creator_scope(
                appending_creator), distribution.scope():
            optimizer = optimizer_fn()
            model_fn, dataset_fn, _ = minimize_loss_example(
                optimizer, use_bias=True, use_callable_loss=True)

            def step_fn(ctx, inputs):
                del ctx  # Unused
                return distribution.group(
                    distribution.extended.call_for_each_replica(
                        model_fn, args=(inputs, )))

            iterator = self._get_iterator(distribution, dataset_fn)

            def run_step():
                return distribution.extended.experimental_run_steps_on_iterator(
                    step_fn, iterator, iterations=1).run_op

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())
            run_step()

            def get_expected_variables(num_parameter_devices):
                name = optimizer._name

                if isinstance(optimizer, optimizer_v2.OptimizerV2):
                    variables = VAR_MAP_V2[name]
                else:
                    variables = VAR_MAP_V1[name]

                extended_variables = [
                    v + "/replica_{}".format(replica) for v in variables
                    for replica in range(1, num_parameter_devices)
                ]
                variables = list(variables) + extended_variables
                return set(v + ":0" for v in variables)

            self.assertEqual(
                get_expected_variables(
                    len(distribution.extended.parameter_devices)),
                set(created_variables))

    @ds_combinations.generate(
        combinations.times(
            combinations.combine(momentum=[0.8, 0.9, 0.99],
                                 renorm=[False, True]),
            combinations.times(
                optimizer_combinations.distributions_and_v1_and_v2_optimizers(
                ),
                combinations.combine(
                    mode=["graph", "eager"],
                    # TODO(isaprykin):  Allow False here.  Currently subsequent
                    # replicas will re-execute UPDATE_OPS of previous replicas.
                    update_ops_in_cross_replica_mode=[True])) +
            combinations.combine(
                distribution=[strategy_combinations.tpu_strategy],
                optimizer_fn=optimizer_combinations.optimizers_v1_and_v2,
                mode=["graph"],
                update_ops_in_cross_replica_mode=[False])))
    def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn,
                                      momentum, renorm,
                                      update_ops_in_cross_replica_mode):
        """Verifies that moving mean updates are reduced across replicas."""
        with distribution.scope():
            num_replicas = distribution.num_replicas_in_sync
            model_fn, dataset_fn, batchnorm = batchnorm_example(
                optimizer_fn,
                batch_per_epoch=num_replicas,
                momentum=momentum,
                renorm=renorm,
                update_ops_in_replica_mode=not update_ops_in_cross_replica_mode
            )

            def step_fn(ctx, inputs):
                del ctx  # Unused
                fetches = distribution.experimental_local_results(
                    distribution.extended.call_for_each_replica(
                        model_fn, args=(inputs, )))
                if update_ops_in_cross_replica_mode:
                    fetches += tuple(
                        ops.get_collection(ops.GraphKeys.UPDATE_OPS))
                return control_flow_ops.group(fetches)

            iterator = self._get_iterator(distribution, dataset_fn)

            def run_step():
                return distribution.extended.experimental_run_steps_on_iterator(
                    step_fn, iterator, iterations=1).run_op

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            expected_moving_means = [0.] * 8

            def averaged_batch_mean(i):
                # Each batch has shape [16, 8] where the ith element in jth list is
                # (8 * j + i + replica_id * 100). So the batch mean in each replica is
                # (60 + i + replica_id * 100). So here comes its batch mean over all
                # replicas:
                return 60. + i + (num_replicas - 1.) / 2. * 100.

            for _ in range(10):
                run_step()
                moving_means = self.evaluate(batchnorm.moving_mean)

                # We make sure that the moving_mean is updated as if the sample mean is
                # calculated over all replicas.
                for i, expected_moving_mean in enumerate(
                        expected_moving_means):
                    expected_moving_means[i] -= (
                        (expected_moving_mean - averaged_batch_mean(i)) *
                        (1.0 - momentum))
                    self.assertNear(expected_moving_means[i], moving_means[i],
                                    0.0001)

    @ds_combinations.generate(
        combinations.times(
            combinations.combine(loss_reduction=[
                losses_impl.Reduction.SUM, losses_impl.Reduction.MEAN,
                losses_impl.Reduction.SUM_OVER_BATCH_SIZE,
                losses_impl.Reduction.SUM_OVER_NONZERO_WEIGHTS
            ]),
            combinations.times(
                combinations.combine(distribution=[
                    strategy_combinations.one_device_strategy,
                    strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
                    strategy_combinations.mirrored_strategy_with_two_gpus
                ]),
                combinations.times(
                    combinations.combine(optimizer_fn=optimizer_combinations.
                                         gradient_descent_optimizer_v1_fn),
                    combinations.combine(mode=["graph"],
                                         use_callable_loss=[True, False]) +
                    combinations.combine(mode=["eager"],
                                         use_callable_loss=[True])) +
                combinations.times(
                    combinations.combine(
                        optimizer_fn=optimizer_combinations.
                        gradient_descent_optimizer_keras_v2_fn),
                    combinations.combine(mode=["graph", "eager"],
                                         use_callable_loss=[True]))) +
            combinations.combine(
                distribution=[strategy_combinations.tpu_strategy],
                optimizer_fn=optimizer_combinations.
                gradient_descent_optimizer_v1_fn,
                mode=["graph"],
                use_callable_loss=[True, False]) + combinations.combine(
                    distribution=[strategy_combinations.tpu_strategy],
                    optimizer_fn=optimizer_combinations.
                    gradient_descent_optimizer_keras_v2_fn,
                    mode=["graph"],
                    use_callable_loss=[True])))
    def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction,
                      use_callable_loss):
        with distribution.scope():
            all_vars = []

            def model_fn(inputs):
                x, y = inputs
                w = variable_scope.get_variable("w", initializer=[[2.]])
                all_vars.append(w)

                def loss_fn():
                    # Use fixed initialization to make the steps deterministic.
                    predict = math_ops.matmul(x, w)
                    loss = losses_impl.mean_squared_error(
                        y, predict, reduction=loss_reduction)
                    if loss_reduction == losses_impl.Reduction.SUM:
                        return loss
                    return loss / distribution.num_replicas_in_sync

                optimizer = optimizer_fn(
                )  # GradientDescent with 0.2 learning rate

                if isinstance(optimizer, optimizer_v2.OptimizerV2):
                    return optimizer.minimize(loss_fn, [w])
                else:
                    if use_callable_loss:
                        return optimizer.minimize(loss_fn)
                    else:
                        return optimizer.minimize(loss_fn())

            def dataset_fn():
                features = dataset_ops.Dataset.from_tensors([[2.], [7.]])
                labels = dataset_ops.Dataset.from_tensors([[6.], [21.]])
                return dataset_ops.Dataset.zip((features, labels)).repeat()

            def step_fn(ctx, inputs):
                del ctx  # Unused
                return distribution.group(
                    distribution.extended.call_for_each_replica(
                        model_fn, args=(inputs, )))

            iterator = self._get_iterator(distribution, dataset_fn)

            def run_step():
                return distribution.extended.experimental_run_steps_on_iterator(
                    step_fn, iterator, iterations=1).run_op

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            run_step()

            v = all_vars[0]
            self.assertTrue(all(v is vi for vi in all_vars[1:]))
            weight = numpy.squeeze(self.evaluate(v))
            # Our model is:
            #   predict = x * w
            #   loss = (predict - y)^2
            #   dloss/dpredict = 2*(predict - y)
            #   dloss/dw = 2 * x^T @ (predict - y)
            # For our batch size of 2, assuming sum loss reduction:
            #   x = [2, 7]
            #   y = [6, 21]
            #   w_initial = 2
            #   predict = [4, 14]
            #   predict - y = [-2, -7]
            #   dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106
            # So unreplicated the update to w with lr=0.001 is -0.2 * -106 = 0.106
            # with sum loss reduction, or 0.053 with mean.
            if loss_reduction == losses_impl.Reduction.SUM:
                # Note that the "distribution.num_replicas_in_sync" factor will go away
                # once we split the input across replicas, instead of pulling a complete
                # batch of input per replica.
                self.assertNear(weight,
                                2 + 0.106 * distribution.num_replicas_in_sync,
                                0.0001)
            else:
                # One of the mean loss reductions.
                self.assertNear(weight, 2 + 0.053, 0.0001)

    @ds_combinations.generate(
        combinations.times(
            optimizer_combinations.distributions_and_v1_and_v2_optimizers(),
            combinations.combine(mode=["graph", "eager"]),
            combinations.combine(is_tpu=[False])) + combinations.combine(
                distribution=[strategy_combinations.tpu_strategy],
                optimizer_fn=optimizer_combinations.optimizers_v1_and_v2,
                mode=["graph"],
                is_tpu=[True]))
    def testRunStepsWithOutputContext(self, distribution, optimizer_fn,
                                      is_tpu):
        with distribution.scope():

            def dataset_fn():
                dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
                # TODO(priyag): batch with drop_remainder=True causes shapes to be
                # fully defined for TPU. Remove this when XLA supports dynamic shapes.
                return dataset.batch(batch_size=1, drop_remainder=True)

            optimizer = optimizer_fn()
            layer = core.Dense(1, use_bias=True)

            key1 = "foo"
            value1 = "bar"

            def model_fn(output_context, x):
                """A very simple model written by the user."""
                def loss_fn():
                    y = array_ops.reshape(layer(x),
                                          []) - constant_op.constant(1.)
                    return y * y

                if isinstance(optimizer, optimizer_v2.OptimizerV2):
                    train_op = optimizer.minimize(
                        loss_fn, lambda: layer.trainable_variables)
                else:
                    train_op = optimizer.minimize(loss_fn)
                loss = loss_fn()
                output_context.set_last_step_output(
                    name="replica_loss_reduced",
                    output=loss,
                    reduce_op=reduce_util.ReduceOp.MEAN)
                output_context.set_non_tensor_output(key1, value1)
                return (train_op, loss)

            def step_fn(output_context, inputs):
                (train_op, loss) = distribution.extended.call_for_each_replica(
                    model_fn, args=(output_context, inputs))
                output_context.set_last_step_output(
                    name="cross_replica_loss_reduced",
                    output=loss,
                    reduce_op=reduce_util.ReduceOp.MEAN)
                output_context.set_last_step_output(
                    name="cross_replica_loss_not_reduced", output=loss)
                return distribution.group(train_op)

            iterator = self._get_iterator(distribution, dataset_fn)

            def run_step():
                initial_loss = lambda: constant_op.constant(1e7)
                # Initial values corresponding to reduced losses are just single
                # tensors. But for non reduced losses, we need to have initial
                # values that are of the same structure as non reduced losses. In
                # MirroredStrategy, this will be a list of losses, in TPUStrategy
                # it will be single tensor. Using `call_for_each_replica` followed
                # by `experimental_local_results` gives us the desired initial
                # value structure.
                not_reduced = distribution.experimental_local_results(
                    distribution.extended.call_for_each_replica(initial_loss))
                initial_loop_values = {
                    "replica_loss_reduced": initial_loss(),
                    "cross_replica_loss_reduced": initial_loss(),
                    "cross_replica_loss_not_reduced": not_reduced,
                }
                ctx = distribution.extended.experimental_run_steps_on_iterator(
                    step_fn,
                    iterator,
                    iterations=2,
                    initial_loop_values=initial_loop_values)

                self.assertEqual({key1: (value1, )}, ctx.non_tensor_outputs)
                self._verify_loss_output(
                    initial_loss(),
                    loss_output=ctx.last_step_outputs["replica_loss_reduced"],
                    reduced=True,
                    distribution=distribution)
                self._verify_loss_output(
                    initial_loss(),
                    loss_output=ctx.
                    last_step_outputs["cross_replica_loss_reduced"],
                    reduced=True,
                    distribution=distribution)
                self._verify_loss_output(
                    initial_loss(),
                    loss_output=ctx.
                    last_step_outputs["cross_replica_loss_not_reduced"],
                    reduced=False,
                    distribution=distribution)
                return (ctx.run_op,
                        ctx.last_step_outputs["replica_loss_reduced"])

            if not context.executing_eagerly():
                with self.cached_session() as sess:
                    run_step = sess.make_callable(run_step())
            self.evaluate(variables_lib.global_variables_initializer())

            weights, biases, losses = [], [], []
            for _ in range(5):
                _, loss = run_step()
                losses.append(loss)
                weights.append(self.evaluate(layer.kernel))
                biases.append(self.evaluate(layer.bias))

            loss_is_not_increasing = all(y <= x
                                         for x, y in zip(losses, losses[1:]))
            self.assertTrue(loss_is_not_increasing)

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            error_is_not_increasing = all(y <= x
                                          for x, y in zip(error, error[1:]))
            self.assertTrue(error_is_not_increasing)

    def _verify_loss_output(self, initial_loss, loss_output, reduced,
                            distribution):
        if not reduced:
            self.assertLen(
                distribution.experimental_local_results(loss_output),
                distribution.num_replicas_in_sync)
            loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN,
                                              loss_output,
                                              axis=None)
        else:
            unwrapped_output = distribution.experimental_local_results(
                loss_output)
            self.assertLen(unwrapped_output, 1)
            loss_tensor = unwrapped_output[0]
        self.assertEqual(initial_loss.dtype, loss_tensor.dtype)
        self.assertEqual(initial_loss.shape, loss_tensor.shape)

    @ds_combinations.generate(
        optimizer_combinations.distributions_and_v2_optimizers())
    def test_empty_var_list(self, distribution, optimizer_fn):
        opt = optimizer_fn()
        with distribution.scope():

            def run_fn():
                opt.minimize(lambda: constant_op.constant(1.), [])
                opt.apply_gradients([])

            distribution.run(run_fn)
Example #13
0
def strategy_minus_tpu_and_input_config_combinations_eager():
    return (combinations.times(
        combinations.combine(distribution=strategies_minus_tpu),
        eager_mode_test_configuration()))
Example #14
0
def all_strategy_and_input_config_combinations_eager():
    return (combinations.times(
        combinations.combine(distribution=all_strategies),
        eager_mode_test_configuration()))
Example #15
0
def multi_worker_mirrored_eager_and_graph():
    return combinations.times(
        combinations.combine(distribution=multi_worker_mirrored_strategies),
        eager_mode_test_configuration() + graph_mode_test_configuration())
class OptimizerTest(test.TestCase, parameterized.TestCase):

  @ds_combinations.generate(
      combinations.times(
          combinations.combine(
              distribution=strategy_combinations.multidevice_strategies,
              mode=["eager"],
          ),
          ds_combinations.concat(
              combinations.combine(
                  experimental_aggregate_gradients=True,
                  expected=[[[-0.3, -0.3], [-0.3, -0.3]]]),
              combinations.combine(
                  experimental_aggregate_gradients=False,
                  expected=[[[-0.1, -0.1], [-0.2, -0.2]]]),
          )))
  def test_custom_aggregation(self, distribution,
                              experimental_aggregate_gradients, expected):

    with distribution.scope():
      v = variables.Variable([0., 0.])
      optimizer = gradient_descent.SGD(0.1)

    @def_function.function
    def optimize():
      grads = values.PerReplica([
          ops.convert_to_tensor_v2_with_dispatch([1., 1.]),
          ops.convert_to_tensor_v2_with_dispatch([2., 2.]),
      ])

      def step_fn(grads):
        optimizer.apply_gradients(
            [(grads, v)],
            experimental_aggregate_gradients=experimental_aggregate_gradients)
        return v.read_value()

      return distribution.experimental_local_results(
          distribution.run(step_fn, args=(grads,)))

    self.assertAllClose(optimize(), expected)

  @ds_combinations.generate(
      combinations.combine(
          distribution=strategy_combinations.one_device_strategy,
          mode=["eager"],
          experimental_aggregate_gradients=[True, False]))
  def test_custom_aggregation_one_device(self, distribution,
                                         experimental_aggregate_gradients):

    with distribution.scope():
      v = variables.Variable([0., 0.])
      optimizer = gradient_descent.SGD(0.1)

    @def_function.function
    def optimize():
      grads = ops.convert_to_tensor_v2_with_dispatch([1., 1.])

      def step_fn(grads):
        optimizer.apply_gradients(
            [(grads, v)],
            experimental_aggregate_gradients=experimental_aggregate_gradients)
        return v.read_value()

      return distribution.experimental_local_results(
          distribution.run(step_fn, args=(grads,)))

    self.assertAllClose(optimize(), [[-0.1, -0.1]])

  @ds_combinations.generate(
      combinations.combine(distribution=[
          strategy_combinations.central_storage_strategy_with_gpu_and_cpu
      ]))
  def test_custom_aggregation_central_storage(self, distribution):
    with distribution.scope():
      v = variables.Variable([0., 0.])
      optimizer = gradient_descent.SGD(0.1)

    grads = ops.convert_to_tensor_v2_with_dispatch([1., 1.])

    def step_fn(grads):
      with self.assertRaises(NotImplementedError):
        optimizer.apply_gradients([(grads, v)],
                                  experimental_aggregate_gradients=False)

    return distribution.run(step_fn, args=(grads,))
Example #17
0
class TestDistributionStrategyWithCallbacks(test.TestCase,
                                            parameterized.TestCase):

  @ds_combinations.generate(
      combinations.times(
          keras_test_lib.all_strategy_combinations()))
  def test_callbacks_in_fit(self, distribution):
    with distribution.scope():
      model = keras_test_lib.get_model()
      model.compile(
          optimizer='sgd',
          loss='mse',
          metrics=['mae'])

    dataset = keras_test_lib.get_dataset(distribution)
    counter = Counter()

    epochs = 2
    steps_per_epoch = 5
    validation_steps = 3

    model.fit(
        dataset,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        verbose=0,
        validation_data=dataset,
        validation_steps=validation_steps,
        callbacks=[counter])

    if (isinstance(distribution, tpu_strategy.TPUStrategyV1) and
        not context.executing_eagerly()):
      # TPU Strategy can have multi step training, from extended.steps_per_run
      # if steps_per_run = 1, then num_batch_call_per_epoch = steps_per_epoch
      steps_per_run = distribution.extended.steps_per_run
      num_batch_call_per_epoch = steps_per_epoch // steps_per_run
      if steps_per_epoch % steps_per_run:
        num_batch_call_per_epoch += 1
    else:
      num_batch_call_per_epoch = steps_per_epoch

    self.assertDictEqual(
        counter.method_counts, {
            'on_batch_begin': epochs * num_batch_call_per_epoch,
            'on_batch_end': epochs * num_batch_call_per_epoch,
            'on_epoch_begin': epochs,
            'on_epoch_end': epochs,
            'on_test_batch_begin': epochs * validation_steps,
            'on_test_batch_end': epochs * validation_steps,
            'on_test_begin': epochs,
            'on_test_end': epochs,
            'on_train_batch_begin': epochs * num_batch_call_per_epoch,
            'on_train_batch_end': epochs * num_batch_call_per_epoch,
            'on_train_begin': 1,
            'on_train_end': 1
        })

  @ds_combinations.generate(
      combinations.times(
          keras_test_lib.all_strategy_combinations()))
  def test_callbacks_in_eval(self, distribution):
    with distribution.scope():
      model = keras_test_lib.get_model()
      model.compile(
          optimizer='sgd',
          loss='mse',
          metrics=['mae'])

    dataset = keras_test_lib.get_dataset(distribution)
    counter = Counter()

    model.evaluate(dataset, steps=5, callbacks=[counter])

    self.assertDictEqual(
        counter.method_counts, {
            'on_test_batch_begin': 5,
            'on_test_batch_end': 5,
            'on_test_begin': 1,
            'on_test_end': 1
        })

  @ds_combinations.generate(
      combinations.times(
          keras_test_lib.all_strategy_combinations()))
  def test_callbacks_in_predict(self, distribution):
    with distribution.scope():
      model = keras_test_lib.get_model()
      model.compile(
          optimizer='sgd',
          loss='mse',
          metrics=['mae'])

    dataset = keras_test_lib.get_dataset(distribution)
    counter = Counter()

    model.predict(
        keras_test_lib.get_predict_dataset(dataset),
        steps=5,
        callbacks=[counter])

    self.assertDictEqual(
        counter.method_counts, {
            'on_predict_batch_begin': 5,
            'on_predict_batch_end': 5,
            'on_predict_begin': 1,
            'on_predict_end': 1
        })
    # Cross above test cases with use_dataset in (True, False)
    for use_dataset in (True, False):
        for case in test_cases:
            case = case.copy()
            if use_dataset:
                case["testcase_name"] = case["testcase_name"] + "_with_dataset"
            case["use_dataset"] = use_dataset
            crossed_test_cases.append(case)

    return crossed_test_cases


@ds_combinations.generate(
    combinations.times(
        combinations.combine(
            strategy=strategy_combinations.all_strategies +
            strategy_combinations.multi_worker_mirrored_strategies,
            mode=["eager"]), _get_layer_computation_test_cases()))
class NormalizationTest(keras_parameterized.TestCase,
                        preprocessing_test_utils.PreprocessingLayerTest):
    def test_layer_computation(self, strategy, adapt_data, axis, test_data,
                               use_dataset, expected):
        input_shape = tuple([None for _ in range(test_data.ndim - 1)])
        if use_dataset:
            # Keras APIs expect batched datasets
            adapt_data = dataset_ops.Dataset.from_tensor_slices(
                adapt_data).batch(test_data.shape[0] // 2)
            test_data = dataset_ops.Dataset.from_tensor_slices(
                test_data).batch(test_data.shape[0] // 2)

        with strategy.scope():
Example #19
0
class SavedModelTFModuleTest(test_base.TestSavedModelBase):
    def setUp(self):
        self._root_dir = 'saved_model_save_load'
        super(SavedModelTFModuleTest, self).setUp()

    def _train_model(self, model, x_train, y_train, batch_size):
        pass

    def _predict_with_model(self, distribution, model, predict_dataset):
        if distribution:
            dist_predict_dataset = distribution.experimental_distribute_dataset(
                predict_dataset)
            per_replica_predict_data = next(iter(dist_predict_dataset))
            result = distribution.run(model, args=(per_replica_predict_data, ))
            # Convert the per_replica value to a list, then concatenate them
            reduced = distribution.experimental_local_results(result)
            concat = array_ops.concat(reduced, 0)
            return concat
        else:
            return model(next(iter(predict_dataset)))

    def _save_model(self, model, saved_dir):
        call = model.__call__.get_concrete_function(
            tensor_spec.TensorSpec(None))
        saved_model.save(model, saved_dir, signatures=call)

    def _load_and_run_model(self,
                            distribution,
                            saved_dir,
                            predict_dataset,
                            output_name='output_1'):
        del output_name
        model = saved_model.load(saved_dir)
        return self._predict_with_model(distribution, model, predict_dataset)

    @ds_combinations.generate(test_base.tfmodule_models_with_strategies())
    def test_save_no_strategy_restore_strategy(self, model_and_input,
                                               distribution):
        self.run_test_save_no_strategy_restore_strategy(
            model_and_input, distribution)

    @ds_combinations.generate(
        combinations.times(test_base.tfmodule_models_with_strategies(),
                           combinations.combine(save_in_scope=[True, False])))
    def test_save_strategy_restore_no_strategy(self, model_and_input,
                                               distribution, save_in_scope):
        self.run_test_save_strategy_restore_no_strategy(
            model_and_input, distribution, save_in_scope)

    @ds_combinations.generate(
        combinations.times(test_base.tfmodule_models_with_strategy_pairs(),
                           combinations.combine(save_in_scope=[True, False])))
    def test_save_strategy_restore_strategy(self, model_and_input,
                                            distribution_for_saving,
                                            distribution_for_restoring,
                                            save_in_scope):
        self.run_test_save_strategy_restore_strategy(
            model_and_input, distribution_for_saving,
            distribution_for_restoring, save_in_scope)

    @ds_combinations.generate(
        combinations.combine(
            model_and_input=[model_combinations.simple_tfmodule_model],
            distribution=test_base.strategies +
            [strategy_combinations.cloud_tpu_strategy]))
    def test_save_load_io_device(self, model_and_input, distribution):
        saved_dir = os.path.join(self.get_temp_dir(), 'io_device')
        with distribution.scope():
            model = model_and_input.get_model()
            x_train, y_train, _ = model_and_input.get_data()
            batch_size = model_and_input.get_batch_size()
            self._train_model(model, x_train, y_train, batch_size)
        call = model.__call__.get_concrete_function(
            tensor_spec.TensorSpec(None))
        save_options = save_options_lib.SaveOptions(
            experimental_io_device='/job:localhost')
        saved_model.save(model,
                         saved_dir,
                         signatures=call,
                         options=save_options)
        load_options = load_options_lib.LoadOptions(
            experimental_io_device='/job:localhost')
        # Check that the model can be loaded and training continued without error.
        with distribution.scope():
            loaded_model = saved_model.load(saved_dir, options=load_options)
            self._train_model(loaded_model, x_train, y_train, batch_size)
Example #20
0
class OptimizerTest(test.TestCase, parameterized.TestCase):
    @ds_combinations.generate(
        combinations.times(
            combinations.combine(
                distribution=keras_strategy_combinations.
                multidevice_strategies,
                mode=["eager"],
            ),
            combinations.combine(experimental_aggregate_gradients=True,
                                 expected=[[[-0.3, -0.3], [-0.3, -0.3]]]) +
            combinations.combine(experimental_aggregate_gradients=False,
                                 expected=[[[-0.1, -0.1], [-0.2, -0.2]]])))
    def test_custom_aggregation(self, distribution,
                                experimental_aggregate_gradients, expected):

        with distribution.scope():
            v = variables.Variable([0., 0.])
            optimizer = gradient_descent.SGD(0.1)

        class PerReplica(values.DistributedValues):
            """Holds a map from replica to unsynchronized values."""
            @property
            def values(self):
                """Returns the per replica values."""
                return self._values

        @def_function.function
        def optimize():
            with ops.device(distribution.extended.worker_devices[0]):
                v1 = ops.convert_to_tensor_v2_with_dispatch([1., 1.])
            with ops.device(distribution.extended.worker_devices[1]):
                v2 = ops.convert_to_tensor_v2_with_dispatch([2., 2.])
            grads = PerReplica([v1, v2])

            def step_fn(grads):
                optimizer.apply_gradients([(grads, v)],
                                          experimental_aggregate_gradients=
                                          experimental_aggregate_gradients)
                return v.read_value()

            return distribution.experimental_local_results(
                distribution.run(step_fn, args=(grads, )))

        self.assertAllClose(optimize(), expected)

    @ds_combinations.generate(
        combinations.combine(
            distribution=strategy_combinations.one_device_strategy,
            mode=["eager"],
            experimental_aggregate_gradients=[True, False]))
    def test_custom_aggregation_one_device(self, distribution,
                                           experimental_aggregate_gradients):

        with distribution.scope():
            v = variables.Variable([0., 0.])
            optimizer = gradient_descent.SGD(0.1)

        @def_function.function
        def optimize():
            grads = ops.convert_to_tensor_v2_with_dispatch([1., 1.])

            def step_fn(grads):
                optimizer.apply_gradients([(grads, v)],
                                          experimental_aggregate_gradients=
                                          experimental_aggregate_gradients)
                return v.read_value()

            return distribution.experimental_local_results(
                distribution.run(step_fn, args=(grads, )))

        self.assertAllClose(optimize(), [[-0.1, -0.1]])

    @ds_combinations.generate(
        combinations.combine(distribution=[
            strategy_combinations.central_storage_strategy_with_gpu_and_cpu
        ]))
    def test_custom_aggregation_central_storage(self, distribution):
        with distribution.scope():
            v = variables.Variable([0., 0.])
            optimizer = gradient_descent.SGD(0.1)

        grads = ops.convert_to_tensor_v2_with_dispatch([1., 1.])

        def step_fn(grads):
            with self.assertRaises(NotImplementedError):
                optimizer.apply_gradients(
                    [(grads, v)], experimental_aggregate_gradients=False)

        return distribution.run(step_fn, args=(grads, ))
Example #21
0
 def test_overlapping_keys(self):
     c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"])
     c2 = combinations.combine(mode=["eager"], loss=["callable"])
     with self.assertRaisesRegex(ValueError, ".*Keys.+overlap.+"):
         _ = combinations.times(c1, c2)
    crossed_test_cases = []
    # Cross above test cases with use_dataset in (True, False)
    for use_dataset in (True, False):
        for case in test_cases:
            case = case.copy()
            if use_dataset:
                case["testcase_name"] = case["testcase_name"] + "_with_dataset"
            case["use_dataset"] = use_dataset
            crossed_test_cases.append(case)

    return crossed_test_cases


@ds_combinations.generate(
    combinations.times(
        combinations.combine(distribution=all_strategies,
                             mode=["eager", "graph"]),
        _get_layer_computation_test_cases()))
class NormalizationTest(keras_parameterized.TestCase,
                        preprocessing_test_utils.PreprocessingLayerTest):
    def test_layer_computation(self, distribution, adapt_data, axis, test_data,
                               use_dataset, expected):
        input_shape = tuple([None for _ in range(test_data.ndim - 1)])
        if use_dataset:
            # Keras APIs expect batched datasets
            adapt_data = dataset_ops.Dataset.from_tensor_slices(
                adapt_data).batch(test_data.shape[0] // 2)
            test_data = dataset_ops.Dataset.from_tensor_slices(
                test_data).batch(test_data.shape[0] // 2)

        with distribution.scope():
            input_data = keras.Input(shape=input_shape)
def multi_worker_mirrored_eager():
    return combinations.times(
        combinations.combine(distribution=multi_worker_mirrored),
        eager_mode_test_configuration())