def get_model(self,
                cloning,
                initial_weights=None,
                distribution=None,
                input_shapes=None):
    with keras_correctness_test_base.MaybeDistributionScope(distribution):
      # We add few non-linear layers to make it non-trivial.
      model = keras.Sequential()
      model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,)))
      model.add(keras.layers.Dense(
          10, activation='relu',
          kernel_regularizer=keras.regularizers.l2(1e-4)))
      model.add(keras.layers.Dense(10, activation='relu'))
      model.add(keras.layers.Dense(1))

      if initial_weights:
        model.set_weights(initial_weights)

      model.compile(
          loss=keras.losses.mean_squared_error,
          optimizer=gradient_descent_keras.SGD(0.05),
          metrics=['mse'],
          cloning=cloning)
      return model
Ejemplo n.º 2
0
    def testOptimizerWithKerasModelAndNumpyArrays(self, distribution, cloning):
        self.skipTest('b/130309197')
        with self.cached_session():
            with distribution.scope():
                model = get_model()
                optimizer = gradient_descent.SGD(0.001)
                loss = 'mse'
                metrics = ['mae']
                model.compile(optimizer,
                              loss,
                              metrics=metrics,
                              cloning=cloning)

            inputs = np.zeros((64, 3), dtype=np.float32)
            targets = np.zeros((64, 4), dtype=np.float32)

            model.fit(inputs,
                      targets,
                      epochs=1,
                      batch_size=2,
                      verbose=0,
                      validation_data=(inputs, targets))
            model.evaluate(inputs, targets)
            model.predict(inputs)
Ejemplo n.º 3
0
 def testNesterovMomentum(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with ops.Graph().as_default():
         for dtype in [dtypes.float32, dtypes.float64]:
             var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0")
             var1 = variables.Variable([3.0, 4.0], dtype=dtype, name="var1")
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             loss = lambda: 5 * var0 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
             mom_op = gradient_descent.SGD(learning_rate=2.0,
                                           momentum=0.9,
                                           nesterov=True)
             opt_op = mom_op.minimize(loss, [var0, var1])
             self.evaluate(variables.global_variables_initializer())
             for _ in range(1, 5):
                 self.evaluate(opt_op)
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
                 self.assertAllClose(var0_np, self.evaluate(var0))
                 self.assertAllClose(var1_np, self.evaluate(var1))
Ejemplo n.º 4
0
    def testAdaptiveLearningRate(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype)
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype)

            def loss():
                return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

            sgd = gradient_descent.SGD(1.0)

            self.evaluate(variables.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))
            # Run 1 step of sgd through optimizer
            opt_op = sgd.minimize(loss, [var0, var1])
            self.evaluate(variables.global_variables_initializer())
            self.evaluate(opt_op)
            # Validate updated params
            # var0 = [1., 2.] - 1.0 * [5, 5]
            self.assertAllClose([-4., -3.], self.evaluate(var0))
            # var1 = [3., 4.] - 1.0 * [3, 3]
            self.assertAllClose([0., 1.], self.evaluate(var1))

            sgd.learning_rate = 0.5
            if context.executing_eagerly():
                sgd.minimize(loss, [var0, var1])
            else:
                self.evaluate(opt_op)
            # Validate updated params
            # var0 = [-4., -3.] - 0.5 * [5, 5]
            self.assertAllClose([-6.5, -5.5], self.evaluate(var0))
            # var1 = [0., 1.] - 0.5 * [3, 3]
            self.assertAllClose([-1.5, -0.5], self.evaluate(var1))
Ejemplo n.º 5
0
            def fit_eval_and_predict(with_distribution=None):
                model = _create_model()
                # We have initialized the model to the same weight for the distribution
                # and non-distribution run.
                model.set_weights(initial_weights)
                model.compile(loss=keras.losses.mean_squared_error,
                              optimizer=gradient_descent_keras.SGD(0.5),
                              metrics=['mse'],
                              distribute=with_distribution)

                training_inputs, eval_inputs, predict_inputs = (
                    get_correctness_test_inputs(use_numpy, use_validation_data,
                                                with_distribution, x_train,
                                                y_train, x_predict))

                result = {}
                result['training_history_1'] = model.fit(
                    **training_inputs).history

                if eval_inputs is not None:
                    result['eval_result_1'] = model.evaluate(**eval_inputs)

                result['weights_1'] = model.get_weights()
                result['predict_result_1'] = model.predict(**predict_inputs)

                # Train and eval again to mimic user's flow.

                result['training_history_2'] = model.fit(
                    **training_inputs).history

                if eval_inputs is not None:
                    result['eval_result_2'] = model.evaluate(**eval_inputs)

                result['weights_2'] = model.get_weights()

                return result
    def test_custom_aggregation(self, distribution,
                                experimental_aggregate_gradients, expected):

        with distribution.scope():
            v = variables.Variable([0., 0.])
            optimizer = gradient_descent.SGD(0.1)

        @def_function.function
        def optimize():
            grads = values.PerReplica([
                ops.convert_to_tensor_v2_with_dispatch([1., 1.]),
                ops.convert_to_tensor_v2_with_dispatch([2., 2.]),
            ])

            def step_fn(grads):
                optimizer.apply_gradients([(grads, v)],
                                          experimental_aggregate_gradients=
                                          experimental_aggregate_gradients)
                return v.read_value()

            return distribution.experimental_local_results(
                distribution.run(step_fn, args=(grads, )))

        self.assertAllClose(optimize(), expected)
Ejemplo n.º 7
0
def build_federated_sgd_process(
        model_fn,
        server_optimizer_fn=lambda: gradient_descent.SGD(learning_rate=0.1),
        client_weight_fn=None):
    """Builds the TFF computations for optimization using federated SGD.

  Args:
    model_fn: A no-arg function that returns a `tff.learning.TrainableModel`.
    server_optimizer_fn: A no-arg function that returns a `tf.Optimizer`. The
      `apply_gradients` method of this optimizer is used to apply client updates
      to the server model.
    client_weight_fn: Optional function that takes the output of
      `model.report_local_outputs` and returns a tensor that provides the weight
      in the federated average of model deltas. If not provided, the default is
      the total number of examples processed on device.

  Returns:
    A `tff.utils.IterativeProcess`.
  """
    def client_sgd_avg(model_fn):
        return ClientSgd(model_fn(), client_weight_fn)

    return optimizer_utils.build_model_delta_optimizer_process(
        model_fn, client_sgd_avg, server_optimizer_fn)
Ejemplo n.º 8
0
    def testMinimizeWith2DIndicesForEmbeddingLookup(self):
        # This test invokes the ResourceSparseApplyMomentum operation, which
        # did not have a registered GPU kernel as of April 2018. With graph
        # execution, the placement algorithm notices this and automatically
        # places the variable in CPU (host) memory. With eager execution,
        # the variable would be placed in GPU memory if available, which
        # would then conflict with the future invocation of the
        # ResourceSparseApplyMomentum operation.
        # To work around this discrepancy, for now we force the variable
        # to be placed on CPU.
        with ops.device("/cpu:0"):
            var0 = resource_variable_ops.ResourceVariable(
                array_ops.ones([2, 2]))

        def loss():
            return math_ops.reduce_sum(
                embedding_ops.embedding_lookup(var0, [[1]]))

        opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.0)
        sgd_op = opt.minimize(loss, [var0])
        self.evaluate(variables.global_variables_initializer())
        self.evaluate(sgd_op)
        self.assertAllCloseAccordingToType([[1, 1], [0, 0]],
                                           self.evaluate(var0))
  def testPassingV1LossScaleErrors(self):
    opt = gradient_descent.SGD()
    loss_scale = tf_loss_scale_module.DynamicLossScale(multiplier=4)
    with self.assertRaisesRegex(
        ValueError, 'When passing a DynamicLossScale to "loss_scale", '
                    'DynamicLossScale.multiplier must be 2. Got: '
                    'DynamicLossScale'):
      loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)

    class MyLossScale(tf_loss_scale_module.LossScale):

      def __call__(self):
        return 1.

      def update(self, grads):
        return None, True

      def get_config(self):
        return {}

    with self.assertRaisesRegex(
        TypeError, 'Passing a LossScale that is not a FixedLossScale or a '
                   'DynamicLossScale is no longer supported. Got:'):
      loss_scale_optimizer.LossScaleOptimizerV1(opt, MyLossScale())
Ejemplo n.º 10
0
    def testIterationsIncremented(self, strategy_fn):
        with strategy_fn().scope() as strategy:
            # Test iterations is incremented in opt.minimize.
            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                          loss_scale='dynamic')
            var = variables.Variable([5.0])
            loss = lambda: var * 2.0 / strategy.num_replicas_in_sync
            run_fn = lambda: opt.minimize(loss, [var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            self.assertEqual(self.evaluate(var),
                             3.0)  # Grad is 2, so var is 5 - 2
            self.assertEqual(self.evaluate(opt.iterations), 1)

            # Test iterations is incremented in opt.minimize even if gradients aren't
            # applied to variables due to NaN gradients.
            loss = lambda: var * float('NaN')
            run_fn = lambda: opt.minimize(loss, [var])
            run_op = strategy.experimental_run(run_fn)
            self._run_if_in_graph_mode(run_op)
            self.assertEqual(self.evaluate(var), 3.0)
            self.assertEqual(self.evaluate(opt.iterations), 2)
    def testOptimizerWithKerasModelAndNumpyArrays(self):
        if context.num_gpus() < 1:
            self.skipTest('Not enough GPUs.')

        with self.cached_session():
            model = get_model()
            optimizer = gradient_descent.SGD(0.001)
            loss = 'mse'
            metrics = ['mae']
            devices = ['/device:GPU:0', '/device:CPU:0']
            dist = mirrored_strategy.MirroredStrategy(devices)
            model.compile(optimizer, loss, metrics=metrics, distribute=dist)

            inputs = np.zeros((64, 3), dtype=np.float32)
            targets = np.zeros((64, 4), dtype=np.float32)

            model.fit(inputs,
                      targets,
                      epochs=1,
                      batch_size=2,
                      verbose=0,
                      validation_data=(inputs, targets))
            model.evaluate(inputs, targets)
            model.predict(inputs)
Ejemplo n.º 12
0
  def testPrecomputedGradient(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
      with self.cached_session():
        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
        cost = 5 * var0 + 3 * var1
        grad_loss = constant_op.constant([42, -42], dtype=dtype)
        global_step = variables.Variable(
            array_ops.zeros([], dtypes.int64), name='global_step')
        sgd_op = gradient_descent.SGD(3.0)
        opt_op = sgd_op.minimize(
            cost, global_step, [var0, var1], grad_loss=grad_loss)

        variables.global_variables_initializer().run()
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], var0.eval())
        self.assertAllClose([3.0, 4.0], var1.eval())
        # Run 1 step of sgd through optimizer
        opt_op.run()
        # Validate updated params
        self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)],
                            var0.eval())
        self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)],
                            var1.eval())
Ejemplo n.º 13
0
  def testAggregationMethod(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
      with self.cached_session():
        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
        cost = 5 * var0 + 3 * var1
        global_step = variables.Variable(
            array_ops.zeros([], dtypes.int64), name='global_step')
        sgd_op = gradient_descent.SGD(3.0)
        opt_op = sgd_op.minimize(
            cost,
            global_step, [var0, var1],
            aggregation_method=gradients_impl.AggregationMethod.
            EXPERIMENTAL_ACCUMULATE_N)

        variables.global_variables_initializer().run()
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], var0.eval())
        self.assertAllClose([3.0, 4.0], var1.eval())
        # Run 1 step of sgd through optimizer
        opt_op.run()
        # Validate updated params
        self.assertAllClose([-14., -13.], var0.eval())
        self.assertAllClose([-6., -5.], var1.eval())
Ejemplo n.º 14
0
  def testDynamicLossScaleWithSlots(self, strategy_fn):
    strategy_obj = strategy_fn()
    if (isinstance(strategy_obj, mirrored_strategy.MirroredStrategy) and
        control_flow_v2_toggles.control_flow_v2_enabled() and
        not context.executing_eagerly()):
      self.skipTest('b/138667997')
    with strategy_obj.scope() as strategy:
      var = variables.Variable([1.0, 2.0])
      # An SGD optimizer with momentum has slot variables.
      opt = gradient_descent.SGD(1.0, momentum=1.)
      initial_loss_scale = 2.
      loss_scale = loss_scale_module.DynamicLossScale(
          initial_loss_scale=initial_loss_scale, increment_period=1,
          multiplier=4)
      opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
      loss = lambda: var / strategy.num_replicas_in_sync
      run_fn = lambda: opt.minimize(loss, var_list=[var])
      run_op = strategy.experimental_run(run_fn)
      self.evaluate(variables.global_variables_initializer())
      self._run_if_in_graph_mode(run_op)
      # The momentum accumulator starts at 0 and the gradient is 1. The
      # accumulator is incremented by the gradient, so it is now 1. Then the
      # variable is subtracted by the accumulator, so the variable is subtracted
      # by 1.
      self.assertAllClose([0.0, 1.0], self.evaluate(var))
      self.assertEqual(self.evaluate(opt.loss_scale()), initial_loss_scale * 4)

      run_op = strategy.experimental_run(run_fn)
      self._run_if_in_graph_mode(run_op)
      # The momentum accumulator was 1 before this step and the gradient is 1.
      # The accumulator is incremented by the gradient, so it is now 2. Then the
      # variable is subtracted by the accumulator, so the variable is subtracted
      # by 2.
      self.assertAllClose([-2., -1.], self.evaluate(var))
      self.assertEqual(self.evaluate(opt.loss_scale()),
                       initial_loss_scale * 16)
def multi_inputs_multi_outputs_model():
  input_a = keras.layers.Input(shape=(16,), name='input_a')
  input_b = keras.layers.Input(shape=(16,), name='input_b')
  input_m = keras.layers.Input(shape=(8,), dtype='string', name='input_m')
  dense = keras.layers.Dense(8, name='dense_1')

  interm_a = dense(input_a)
  # Read m
  interm_m = keras.layers.Lambda(gen_parsing_ops.string_to_number)(input_m)
  interm_s = keras.layers.Lambda(lambda k: k[0] * k[1])([interm_m, interm_a])
  interm_b = dense(input_b)
  merged = keras.layers.concatenate([interm_s, interm_b], name='merge')
  output_c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
  output_d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
  model = keras.models.Model(
      inputs=[input_a, input_b, input_m], outputs=[output_c, output_d])
  model.compile(
      loss='categorical_crossentropy',
      optimizer=gradient_descent_keras.SGD(learning_rate=0.001),
      metrics={
          'dense_2': 'categorical_accuracy',
          'dense_3': 'categorical_accuracy'
      })
  return model
Ejemplo n.º 16
0
def get_mnist_model(input_shape):
  """Define a deterministically-initialized CNN model for MNIST testing."""
  inputs = keras.Input(shape=input_shape)
  x = keras.layers.Conv2D(
      32,
      kernel_size=(3, 3),
      activation="relu",
      kernel_initializer=keras.initializers.TruncatedNormal(seed=99))(inputs)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.Flatten()(x) + keras.layers.Flatten()(x)
  x = keras.layers.Dense(
      10,
      activation="softmax",
      kernel_initializer=keras.initializers.TruncatedNormal(seed=99))(x)
  model = keras.Model(inputs=inputs, outputs=x)

  # TODO(yuefengz): optimizer with slot variables doesn't work because of
  # optimizer's bug.
  # TODO(yuefengz): we should not allow non-v2 optimizer.
  model.compile(
      loss=keras.losses.sparse_categorical_crossentropy,
      optimizer=gradient_descent.SGD(learning_rate=0.001),
      metrics=["accuracy"])
  return model
Ejemplo n.º 17
0
 def testSparseBasic(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
       var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
       grads0 = ops.IndexedSlices(
           constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
           constant_op.constant([0]), constant_op.constant([2, 1]))
       grads1 = ops.IndexedSlices(
           constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
           constant_op.constant([1]), constant_op.constant([2, 1]))
       sgd_op = gradient_descent.SGD(3.0).apply_gradients(
           zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval())
       self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                          var0.eval())
       self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                          var1.eval())
Ejemplo n.º 18
0
    def testCapturingInFunctionWhileExecutingEagerly(self):
        optimizer = gradient_descent.SGD(1.0)

        var_holder = {}

        def step():
            if not var_holder:
                var_holder["var"] = variables.Variable(1.0)
            else:
                var_holder["var"].assign(1.0)

            with backprop.GradientTape() as tape:
                loss = var_holder["var"]**2
            grad = tape.gradient(loss, var_holder["var"])
            optimizer.apply_gradients([(grad, var_holder["var"])])
            return var_holder["var"].read_value()

        compiled_step = def_function.function(step)

        self.assertEqual(float(step()), -1.0)
        self.assertEqual(float(compiled_step()), -1.0)
        # This shouldn't fail; in particular, the learning rate tensor should
        # be an EagerTensor once again, not a graph Tensor.
        self.assertEqual(float(step()), -1.0)
Ejemplo n.º 19
0
    def testMinimizeSparseResourceVariable(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            var0 = variables.Variable([[1.0, 2.0]], dtype=dtype)
            var1 = variables.Variable([3.0], dtype=dtype)
            x = constant_op.constant([[4.0], [5.0]], dtype=dtype)

            def loss():
                pred = math_ops.matmul(
                    embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
                pred += var1  # pylint: disable=cell-var-from-loop
                return pred * pred

            sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1])
            self.evaluate(variables.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
            np_grad = 2 * np_pred
            self.assertAllCloseAccordingToType(
                [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]],
                self.evaluate(var0))
            self.assertAllCloseAccordingToType([3.0 - np_grad],
                                               self.evaluate(var1))
Ejemplo n.º 20
0
    def test_fixed_loss_scaling(self, strategy_fn, cloning=True):
        # Note: We do not test mixed precision in this method, only loss scaling.
        loss_scale = 8.
        batch_size = 4
        with strategy_fn().scope():
            x = layers.Input(shape=(1, ), batch_size=batch_size)
            layer = AddLayer()
            y = layer(x)

            # The gradient of 'y' at this point is 1. With loss scaling, the gradient
            # is 'loss_scale'. We divide by the batch size since the loss is averaged
            # across batch elements.
            expected_gradient = loss_scale / batch_size
            identity_with_grad_check_fn = (
                mp_test_util.create_identity_with_grad_check_fn(
                    [expected_gradient]))
            y = core.Lambda(identity_with_grad_check_fn)(y)
            model = models.Model(inputs=x, outputs=y)

            def loss_fn(y_true, y_pred):
                del y_true
                return math_ops.reduce_mean(y_pred)

            opt = gradient_descent.SGD(1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
            model.compile(opt, loss=loss_fn, cloning=cloning)

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 1 subtracted from it.
        expected = 0
        self.assertEqual(backend.eval(layer.v), expected)
Ejemplo n.º 21
0
 def testMinimizeSparseResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                           dtype=dtype)
             var1 = resource_variable_ops.ResourceVariable([3.0],
                                                           dtype=dtype)
             x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
             pred = math_ops.matmul(
                 embedding_ops.embedding_lookup([var0], [0]), x)
             pred += var1
             loss = pred * pred
             sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1])
             self.evaluate(variables.global_variables_initializer())
             # Run 1 step of sgd
             self.evaluate(sgd_op)
             # Validate updated params
             np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
             np_grad = 2 * np_pred
             self.assertAllCloseAccordingToType(
                 [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]],
                 self.evaluate(var0))
             self.assertAllCloseAccordingToType([3.0 - np_grad],
                                                self.evaluate(var1))
Ejemplo n.º 22
0
    def testBasic(self):
        for _, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            with self.cached_session():
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype)
                var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                              dtype=dtype)
                loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
                if not context.executing_eagerly():
                    loss = loss()
                sgd = gradient_descent.SGD(3.0)

                self.evaluate(variables.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Run 1 step of sgd through optimizer
                opt_op = sgd.minimize(loss, var_list=[var0, var1])
                self.evaluate(variables.global_variables_initializer())
                self.evaluate(opt_op)
                # Validate updated params
                self.assertAllClose([-14., -13.], self.evaluate(var0))
                self.assertAllClose([-6., -5.], self.evaluate(var1))
Ejemplo n.º 23
0
 def test_wide_deep_model_with_single_feature_column(self):
     vocab_list = ['alpha', 'beta', 'gamma']
     vocab_val = [0.4, 0.6, 0.9]
     data = np.random.choice(vocab_list, size=256)
     y = np.zeros_like(data, dtype=np.float32)
     for vocab, val in zip(vocab_list, vocab_val):
         indices = np.where(data == vocab)
         y[indices] = val + np.random.uniform(
             low=-0.01, high=0.01, size=indices[0].shape)
     cat_column = fc.categorical_column_with_vocabulary_list(
         key='symbol', vocabulary_list=vocab_list)
     ind_column = fc.indicator_column(cat_column)
     dense_feature_layer = dense_features_v2.DenseFeatures([ind_column])
     linear_model = linear.LinearModel(use_bias=False,
                                       kernel_initializer='zeros')
     dnn_model = sequential.Sequential([core.Dense(units=1)])
     wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
     combined = sequential.Sequential(
         [dense_feature_layer, wide_deep_model])
     opt = gradient_descent.SGD(learning_rate=0.1)
     combined.compile(opt,
                      'mse', [],
                      run_eagerly=testing_utils.should_run_eagerly())
     combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)
Ejemplo n.º 24
0
    def testOptimizerWithKerasModel(self):
        a = input_layer.Input(shape=(3, ), name='input_a')
        b = input_layer.Input(shape=(3, ), name='input_b')

        dense = core.Dense(4, name='dense')
        c = dense(a)
        d = dense(b)
        e = core.Dropout(0.5, name='dropout')(c)

        model = training.Model([a, b], [d, e])

        optimizer = gradient_descent.SGD(learning_rate=0.001)
        loss = 'mse'
        model.compile(optimizer, loss, metrics=['mae'])

        input_a_np = np.random.random((10, 3))
        input_b_np = np.random.random((10, 3))

        output_d_np = np.random.random((10, 4))
        output_e_np = np.random.random((10, 4))

        model.fit([input_a_np, input_b_np], [output_d_np, output_e_np],
                  epochs=1,
                  batch_size=5)
Ejemplo n.º 25
0
    def get_model(self, initial_weights=None, distribution=None):
        with keras_correctness_test_base.MaybeDistributionScope(distribution):
            image = keras.layers.Input(shape=(28, 28, 3), name='image')
            c1 = keras.layers.Conv2D(
                name='conv1',
                filters=16,
                kernel_size=(3, 3),
                strides=(4, 4),
                kernel_regularizer=keras.regularizers.l2(1e-4))(image)
            if self.with_batch_norm:
                c1 = keras.layers.BatchNormalization(name='bn1')(c1)
            c1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(c1)
            logits = keras.layers.Dense(10, activation='softmax', name='pred')(
                keras.layers.Flatten()(c1))
            model = keras.Model(inputs=[image], outputs=[logits])

            if initial_weights:
                model.set_weights(initial_weights)

            model.compile(optimizer=gradient_descent.SGD(learning_rate=0.1),
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])

        return model
Ejemplo n.º 26
0
    def testCustomAggregater(self):
        def gradient_aggregator(grads_and_vars):
            # Simulate an all-reduce where a replica has a NaN gradient by setting
            # the last gradient to NaN
            grads_and_vars = list(grads_and_vars)
            last_grad, last_var = grads_and_vars[-1]
            grads_and_vars[-1] = (last_grad * float('NaN'), last_var)
            return grads_and_vars

        var = variables.Variable([1.0, 2.0])
        opt = gradient_descent.SGD(1.0,
                                   gradient_aggregator=gradient_aggregator)
        opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                      initial_scale=2,
                                                      dynamic_growth_steps=2)

        loss = lambda: var * 2
        run_op = opt.minimize(loss, var_list=[var])
        self.evaluate(variables.global_variables_initializer())
        self._run_if_in_graph_mode(run_op)
        # Variable should not change from before, due to NaN gradients.
        self.assertAllClose(self.evaluate(var), [1.0, 2.0])
        # Loss scale should half due to NaN gradients.
        self.assertEqual(1., self.evaluate(opt.loss_scale))
Ejemplo n.º 27
0
 def testNesterovWithoutMomentum(self):
     with self.assertRaisesRegexp(ValueError, "must be between"):
         gradient_descent.SGD(learning_rate=1.0, momentum=2.0)
Ejemplo n.º 28
0
    def testBasic(self):
        for _, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtype,
                                                          name="var0")
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtype,
                                                          name="var1")
            grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
            grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
            learning_rate = 2.0
            momentum = 0.9
            mom_opt = gradient_descent.SGD(learning_rate=learning_rate,
                                           momentum=momentum)
            # self.assertFalse(mom_opt._initial_decay)
            mom_update = mom_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))

            # Check we have slots
            slot0 = mom_opt.get_slot(var0, "momentum")
            self.assertEqual(slot0.shape, var0.shape)
            slot1 = mom_opt.get_slot(var1, "momentum")
            self.assertEqual(slot1.shape, var1.shape)

            # Step 1: the momentum accumulators where 0. So we should see a normal
            # update: v -= grad * learning_rate
            self.evaluate(variables.global_variables_initializer())
            self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]),
                                               self.evaluate(slot0))
            self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]),
                                               self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                self.evaluate(var1))
            # Step 2: the momentum accumulators contain the previous update.
            self.evaluate(mom_update)
            if context.executing_eagerly():
                mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                          (0.9 * (-0.2) - 2.0 * 0.1)]), self.evaluate(slot0))
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                          (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                    2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                    3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                ]), self.evaluate(var1))
Ejemplo n.º 29
0
    def testSparse(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
            var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2]))
            grads0 = ops.IndexedSlices(
                constant_op.constant([[.1, .1]], dtype=dtype),
                constant_op.constant([1]), constant_op.constant([4, 2]))
            grads1 = ops.IndexedSlices(
                constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype),
                constant_op.constant([2, 3]), constant_op.constant([4, 2]))
            mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9)
            mom_update = mom_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))
            self.evaluate(variables.global_variables_initializer())

            # Check we have slots
            slot0 = mom_opt.get_slot(var0, "momentum")
            self.assertEqual(slot0.shape, var0.shape)
            slot1 = mom_opt.get_slot(var1, "momentum")
            self.assertEqual(slot1.shape, var1.shape)

            # Fetch params to validate initial values
            self.assertAllClose([0, 0], self.evaluate(var0)[0])
            self.assertAllClose([0, 0], self.evaluate(var0)[1])
            self.assertAllClose([1, 1], self.evaluate(var1)[2])

            # Step 1: the momentum accumulators are 0. So we should see a normal
            # update: v -= grad * learning_rate
            self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(np.array([0, 0]),
                                               self.evaluate(slot0)[0])
            self.assertAllCloseAccordingToType(
                np.array([-2.0 * .1, -2.0 * .1]),
                self.evaluate(slot0)[1])
            self.assertAllCloseAccordingToType(
                np.array([-2.0 * .01, -2.0 * .01]),
                self.evaluate(slot1)[2])
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(np.array([0, 0]),
                                               self.evaluate(var0)[0])
            self.assertAllCloseAccordingToType(
                np.array([-(0.1 * 2.0), -(0.1 * 2.0)]),
                self.evaluate(var0)[1])
            self.assertAllCloseAccordingToType(
                np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]),
                self.evaluate(var1)[2])
            # Step 2: the momentum accumulators contain the previous update.
            self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0])
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                          (0.9 * (-0.2) - 2.0 * 0.1)]),
                self.evaluate(slot0)[1])
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                          (0.9 * (-0.02) - 2.0 * 0.01)]),
                self.evaluate(slot1)[2])
            # Check that the parameters have been updated.
            self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0])
            self.assertAllCloseAccordingToType(
                np.array([
                    -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                    -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                ]),
                self.evaluate(var0)[1])
            self.assertAllCloseAccordingToType(
                np.array([
                    0.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                    0.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                ]),
                self.evaluate(var1)[2])
Ejemplo n.º 30
0
 def testBasicWithLearningRateInverseTimeDecay(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         learning_rate = learning_rate_schedule.InverseTimeDecay(
             3.0, decay_steps=1.0, decay_rate=0.5)
         sgd = gradient_descent.SGD(learning_rate=learning_rate)
         self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)