def _rewrite_output_as_tensor(body_grad_graph, grad_output_slices):
  """Rewrites grad_output_slices to be a Tensor output.

  Args:
    body_grad_graph: _WhileBodyGradFuncGraph.
    grad_output_slices: IndexedSlices output of body_grad_graph.
  """
  with body_grad_graph.as_default():
    new_output = ops.convert_to_tensor_v2(grad_output_slices)

  idx = body_grad_graph.structured_outputs.index(grad_output_slices)
  body_grad_graph.structured_outputs[idx] = new_output
  body_grad_graph.outputs = func_graph.flatten(
      body_grad_graph.structured_outputs)
Exemple #2
0
    def _prepare_local(self, var_device, var_dtype, apply_state):
        super(AdamMultilr, self)._prepare_local(var_device, var_dtype,
                                                apply_state)
        if self.pattern_lrs:
            for i, pair in enumerate(self.pattern_lrs):
                lr_t = array_ops.identity(
                    self._decayed_multi_lr(pair["lr"], var_dtype))
                apply_state[(var_device, var_dtype)][f"lr-{i}_t"] = lr_t

        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_t = array_ops.identity(self._get_hyper("beta_1", var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper("beta_2", var_dtype))
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)

        updated_lrs = {
            lr_name.replace("_t", ""):
            apply_state[(var_device, var_dtype)][lr_name] *
            (math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))
            for lr_name in apply_state[(var_device, var_dtype)]
            if "lr" in lr_name
        }
        # lr = (apply_state[(var_device, var_dtype)]['lr_t'] *
        #       (math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power)))
        apply_state[(var_device, var_dtype)].update(
            dict(
                # lr=lr,
                epsilon=ops.convert_to_tensor_v2(self.epsilon, var_dtype),
                beta_1_t=beta_1_t,
                beta_1_power=beta_1_power,
                one_minus_beta_1_t=1 - beta_1_t,
                beta_2_t=beta_2_t,
                beta_2_power=beta_2_power,
                one_minus_beta_2_t=1 - beta_2_t,
                **updated_lrs,
            ))
Exemple #3
0
    def test_metric_dict(self):
        metric_container = compile_utils.MetricsContainer(metrics={
            'out1': 'mse',
            'out2': 'mae'
        },
                                                          weighted_metrics={
                                                              'out1': 'mse',
                                                              'out2': 'mae'
                                                          })

        y_t = {
            'out1': array_ops.ones((10, 1)),
            'out2': array_ops.zeros((10, 1))
        }
        y_p = {
            'out1': array_ops.ones((10, 1)),
            'out2': 2 * array_ops.ones((10, 1))
        }
        sw = ops.convert_to_tensor_v2([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
        metric_container.update_state(y_t, y_p, sample_weight=sw)

        mse_metric = metric_container.metrics[0]
        self.assertEqual(mse_metric.name, 'out1_mse')
        self.assertEqual(mse_metric.result().numpy(), 0.)

        mae_metric = metric_container.metrics[1]
        self.assertEqual(mae_metric.name, 'out2_mae')
        self.assertEqual(mae_metric.result().numpy(), 2.)

        weighted_mse_metric = metric_container.metrics[2]
        self.assertEqual(weighted_mse_metric.name, 'weighted_out1_mse')
        self.assertEqual(weighted_mse_metric.result().numpy(), 0.)

        weighted_mae_metric = metric_container.metrics[3]
        self.assertEqual(weighted_mae_metric.name, 'weighted_out2_mae')
        self.assertEqual(weighted_mae_metric.result().numpy(), 2.)
    def test_add_update_in_model(self):
        class MyModel(keras.Model):
            def __init__(self):
                super(MyModel, self).__init__()
                self.b = self.add_weight('bias', (10, ))
                self.c = self.add_weight('bias2', (10, ))

            def call(self, inputs):
                # Unconditional
                self.add_update(self.b.assign(self.b * 2))
                # Conditional
                self.add_update(self.c.assign(inputs[1, :]), inputs)
                return inputs + self.b + self.c

        x = ops.convert_to_tensor_v2(np.ones((10, 10), 'float32'))
        model = MyModel()
        model(x)

        if context.executing_eagerly():
            self.assertEqual(0, len(model.updates))
        else:
            self.assertEqual(2, len(model.updates))
            self.assertEqual(1, len(model.get_updates_for(None)))
            self.assertEqual(1, len(model.get_updates_for(x)))
  def test_list_of_metrics_list_of_outputs(self):
    metric_container = compile_utils.MetricsContainer(
        metrics=['mse', 'mae'],  # Should broadcast to both outputs.
        weighted_metrics=['accuracy'])  # Should broadcast to both outputs.

    y_t = [array_ops.ones((10, 1)), array_ops.zeros((10, 1))]
    y_p = [array_ops.ones((10, 1)), 2 * array_ops.ones((10, 1))]
    sw = ops.convert_to_tensor_v2([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
    metric_container.update_state(y_t, y_p, sample_weight=sw)
    self.assertLen(metric_container.metrics, 6)

    mse_metric = metric_container.metrics[0]
    self.assertEqual(mse_metric.name, 'output_1_mse')
    self.assertEqual(mse_metric.result().numpy(), 0.)

    mse_metric = metric_container.metrics[1]
    self.assertEqual(mse_metric.name, 'output_1_mae')
    self.assertEqual(mse_metric.result().numpy(), 0.)

    acc_metric_1 = metric_container.metrics[2]
    self.assertEqual(acc_metric_1.name, 'output_1_accuracy')
    self.assertEqual(acc_metric_1.result().numpy(), 1.)
    self.assertEqual(acc_metric_1._fn, metrics_mod.binary_accuracy)

    mae_metric = metric_container.metrics[3]
    self.assertEqual(mae_metric.name, 'output_2_mse')
    self.assertEqual(mae_metric.result().numpy(), 4.)

    mae_metric = metric_container.metrics[4]
    self.assertEqual(mae_metric.name, 'output_2_mae')
    self.assertEqual(mae_metric.result().numpy(), 2.)

    acc_metric_2 = metric_container.metrics[5]
    self.assertEqual(acc_metric_2.name, 'output_2_accuracy')
    self.assertEqual(acc_metric_2.result().numpy(), 0.)
    self.assertEqual(acc_metric_2._fn, metrics_mod.binary_accuracy)
Exemple #6
0
    def __call__(self, step):
        with ops.name_scope_v2(self.name or "LinearCosineDecay") as name:
            initial_learning_rate = ops.convert_to_tensor_v2(
                self.initial_learning_rate, name="initial_learning_rate")
            dtype = initial_learning_rate.dtype
            decay_steps = math_ops.cast(self.decay_steps, dtype)
            num_periods = math_ops.cast(self.num_periods, dtype)
            alpha = math_ops.cast(self.alpha, dtype)
            beta = math_ops.cast(self.beta, dtype)

            global_step_recomp = math_ops.cast(step, dtype)
            global_step_recomp = math_ops.minimum(global_step_recomp,
                                                  decay_steps)
            linear_decayed = (decay_steps - global_step_recomp) / decay_steps
            completed_fraction = global_step_recomp / decay_steps
            fraction = 2.0 * num_periods * completed_fraction
            cosine_decayed = 0.5 * (
                1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))

            linear_cosine_decayed = (alpha +
                                     linear_decayed) * cosine_decayed + beta
            return math_ops.multiply(initial_learning_rate,
                                     linear_cosine_decayed,
                                     name=name)
  def test_validation_split_shuffled(self, use_numpy):
    if use_numpy:
      x = np.array([0, 1, 2, 3, 4])
      y = np.array([0, 2, 4, 6, 8])
      sw = np.array([0, 4, 8, 12, 16])
    else:
      x = ops.convert_to_tensor_v2([0, 1, 2, 3, 4])
      y = ops.convert_to_tensor_v2([0, 2, 4, 6, 8])
      sw = ops.convert_to_tensor_v2([0, 4, 8, 12, 16])

    (train_x, train_y, train_sw), (val_x, val_y, val_sw) = (
        data_adapter.train_validation_split((x, y, sw), validation_split=0.2))

    self.assertEqual(int(train_x.shape[0]), 4)
    self.assertEqual(int(train_y.shape[0]), 4)
    self.assertEqual(int(train_sw.shape[0]), 4)
    for i in range(4):
      # Check that all arrays were shuffled in identical order.
      self.assertEqual(2 * train_x[i].numpy(), train_y[i].numpy())
      self.assertEqual(2 * train_y[i].numpy(), train_sw[i].numpy())

    self.assertEqual(int(val_x.shape[0]), 1)
    self.assertEqual(int(val_y.shape[0]), 1)
    self.assertEqual(int(val_sw.shape[0]), 1)
    for i in range(1):
      # Check that all arrays were shuffled in identical order.
      self.assertEqual(2 * train_x[i].numpy(), train_y[i].numpy())
      self.assertEqual(2 * train_y[i].numpy(), train_sw[i].numpy())

    # Check that arrays contain expected values.
    self.assertEqual(
        sorted(array_ops.concat([train_x, val_x], axis=0).numpy().tolist()),
        sorted(ops.convert_to_tensor_v2(x).numpy().tolist()))
    self.assertEqual(
        sorted(array_ops.concat([train_y, val_y], axis=0).numpy().tolist()),
        sorted(ops.convert_to_tensor_v2(y).numpy().tolist()))
    self.assertEqual(
        sorted(array_ops.concat([train_sw, val_sw], axis=0).numpy().tolist()),
        sorted(ops.convert_to_tensor_v2(sw).numpy().tolist()))
 def build(self, input_shape):
     self.b = ops.convert_to_tensor_v2(2.0)
Exemple #9
0
def transform(images,
              transforms,
              fill_mode='reflect',
              interpolation='bilinear',
              output_shape=None,
              name=None):
  """Applies the given transform(s) to the image(s).

  Args:
    images: A tensor of shape (num_images, num_rows, num_columns, num_channels)
      (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows,
      num_columns) (HW). The rank must be statically known (the shape is not
      `TensorShape(None)`.
    transforms: Projective transform matrix/matrices. A vector of length 8 or
      tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2,
      c0, c1], then it maps the *output* point `(x, y)` to a transformed *input*
      point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
      `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the
      transform mapping input points to output points. Note that gradients are
      not backpropagated into transformation parameters.
    fill_mode: Points outside the boundaries of the input are filled according
      to the given mode (one of `{'constant', 'reflect', 'wrap'}`).
    interpolation: Interpolation mode. Supported values: "nearest", "bilinear".
    output_shape: Output dimesion after the transform, [height, width]. If None,
      output is the same size as input image.
    name: The name of the op.

  ## Fill mode.
  Behavior for each valid value is as follows:

  reflect (d c b a | a b c d | d c b a)
  The input is extended by reflecting about the edge of the last pixel.

  constant (k k k k | a b c d | k k k k)
  The input is extended by filling all values beyond the edge with the same
  constant value k = 0.

  wrap (a b c d | a b c d | a b c d)
  The input is extended by wrapping around to the opposite edge.

  Input shape:
    4D tensor with shape: `(samples, height, width, channels)`,
      data_format='channels_last'.
  Output shape:
    4D tensor with shape: `(samples, height, width, channels)`,
      data_format='channels_last'.

  Returns:
    Image(s) with the same type and shape as `images`, with the given
    transform(s) applied. Transformed coordinates outside of the input image
    will be filled with zeros.

  Raises:
    TypeError: If `image` is an invalid type.
    ValueError: If output shape is not 1-D int32 Tensor.
  """
  with ops.name_scope(name, 'transform'):
    if output_shape is None:
      output_shape = array_ops.shape(images)[1:3]
      if not context.executing_eagerly():
        output_shape_value = tensor_util.constant_value(output_shape)
        if output_shape_value is not None:
          output_shape = output_shape_value

    output_shape = ops.convert_to_tensor_v2(
        output_shape, dtypes.int32, name='output_shape')

    if not output_shape.get_shape().is_compatible_with([2]):
      raise ValueError('output_shape must be a 1-D Tensor of 2 elements: '
                       'new_height, new_width, instead got '
                       '{}'.format(output_shape))

    return image_ops.image_projective_transform_v2(
        images,
        output_shape=output_shape,
        transforms=transforms,
        fill_mode=fill_mode.upper(),
        interpolation=interpolation.upper())
Exemple #10
0
  def _fused_batch_norm(self, inputs, training):
    """Returns the output of fused batch norm."""
    beta = self.beta if self.center else self._beta_const
    gamma = self.gamma if self.scale else self._gamma_const

    # TODO(b/129279393): Support zero batch input in non DistributionStrategy
    # code as well.
    if self._support_zero_size_input():
      inputs_size = array_ops.size(inputs)
    else:
      inputs_size = None

    def _fused_batch_norm_training():
      return nn.fused_batch_norm(
          inputs,
          gamma,
          beta,
          epsilon=self.epsilon,
          data_format=self._data_format)

    def _fused_batch_norm_inference():
      return nn.fused_batch_norm(
          inputs,
          gamma,
          beta,
          mean=self.moving_mean,
          variance=self.moving_variance,
          epsilon=self.epsilon,
          is_training=False,
          data_format=self._data_format)

    output, mean, variance = tf_utils.smart_cond(
        training, _fused_batch_norm_training, _fused_batch_norm_inference)
    if not self._bessels_correction_test_only:
      # Remove Bessel's correction to be consistent with non-fused batch norm.
      # Note that the variance computed by fused batch norm is
      # with Bessel's correction.
      sample_size = math_ops.cast(
          array_ops.size(inputs) / array_ops.size(variance), variance.dtype)
      factor = (sample_size - math_ops.cast(1.0, variance.dtype)) / sample_size
      variance *= factor

    training_value = tf_utils.constant_value(training)
    if training_value is None:
      momentum = tf_utils.smart_cond(training,
                                     lambda: self.momentum,
                                     lambda: 1.0)
    else:
      momentum = ops.convert_to_tensor_v2(self.momentum)
    if training_value or training_value is None:
      def mean_update():
        return self._assign_moving_average(self.moving_mean, mean, momentum,
                                           inputs_size)

      def variance_update():
        """Update self.moving_variance with the most recent data point."""
        if self.renorm:
          # We apply epsilon as part of the moving_stddev to mirror the training
          # code path.
          moving_stddev = self._assign_moving_average(
              self.moving_stddev, math_ops.sqrt(variance + self.epsilon),
              momentum, inputs_size)
          return self._assign_new_value(
              self.moving_variance,
              # Apply relu in case floating point rounding causes it to go
              # negative.
              K.relu(moving_stddev * moving_stddev - self.epsilon))
        else:
          return self._assign_moving_average(self.moving_variance, variance,
                                             momentum, inputs_size)

      self.add_update(mean_update)
      self.add_update(variance_update)

    return output
 def _fn(*fargs, **fkwargs):
     d = fn(*fargs, **fkwargs)
     x = ops.convert_to_tensor_v2(d)
     d.shape = x.shape
     d.get_shape = x.get_shape
     return d, x
Exemple #12
0
 def _get_noise_shape(self, inputs):
     return ops.convert_to_tensor_v2([1, array_ops.shape(inputs)[-1]])
Exemple #13
0
  def _fused_batch_norm(self, inputs, training):
    """Returns the output of fused batch norm."""
    beta = self.beta if self.center else self._beta_const
    gamma = self.gamma if self.scale else self._gamma_const

    # TODO(b/129279393): Support zero batch input in non DistributionStrategy
    # code as well.
    if self._support_zero_size_input():
      inputs_size = array_ops.size(inputs)
    else:
      inputs_size = None

    # TODO(rmlarsen): Support using fused avg updates for non-eager execution
    # after fixing graph pattern matching and enabling fused_batch_norm to
    # take exponential_avg_factor as a tensor input.
    use_fused_avg_updates = (
        compat.forward_compatible(2020, 3, 6) and
        ops.executing_eagerly_outside_functions())
    if use_fused_avg_updates:
      exponential_avg_factor = 1.0 - self.momentum
    else:
      exponential_avg_factor = None

    def _maybe_add_or_remove_bessels_correction(variance, remove=True):
      r"""Add or remove Bessel's correction."""
      # Removes Bessel's correction if remove == True, adds it otherwise.
      # This is to be consistent with non-fused batch norm. Note that the
      # variance computed by fused batch norm is with Bessel's correction.
      # This is only used in legacy V1 batch norm tests.
      if self._bessels_correction_test_only:
        return variance
      sample_size = math_ops.cast(
          array_ops.size(inputs) / array_ops.size(variance), variance.dtype)
      if remove:
        factor = (sample_size -
                  math_ops.cast(1.0, variance.dtype)) / sample_size
      else:
        factor = sample_size / (
            sample_size - math_ops.cast(1.0, variance.dtype))
      return variance * factor

    def _fused_batch_norm_training():
      return nn.fused_batch_norm(
          inputs,
          gamma,
          beta,
          mean=self.moving_mean,
          variance=_maybe_add_or_remove_bessels_correction(
              self.moving_variance, remove=False),
          epsilon=self.epsilon,
          is_training=True,
          data_format=self._data_format,
          exponential_avg_factor=exponential_avg_factor)

    def _fused_batch_norm_training_empty():
      return inputs, self.moving_mean, self.moving_variance

    def _fused_batch_norm_inference():
      return nn.fused_batch_norm(
          inputs,
          gamma,
          beta,
          mean=self.moving_mean,
          variance=self.moving_variance,
          epsilon=self.epsilon,
          is_training=False,
          data_format=self._data_format)

    train_op = _fused_batch_norm_training
    if use_fused_avg_updates and inputs_size is not None:
      train_op = lambda: tf_utils.smart_cond(inputs_size > 0,
                                             _fused_batch_norm_training,
                                             _fused_batch_norm_training_empty)

    output, mean, variance = tf_utils.smart_cond(training, train_op,
                                                 _fused_batch_norm_inference)
    variance = _maybe_add_or_remove_bessels_correction(variance, remove=True)

    training_value = tf_utils.constant_value(training)
    if training_value or training_value is None:
      if not use_fused_avg_updates:
        if training_value is None:
          momentum = tf_utils.smart_cond(training, lambda: self.momentum,
                                         lambda: 1.0)
        else:
          momentum = ops.convert_to_tensor_v2(self.momentum)

      def mean_update():
        """Update self.moving_mean with the most recent data point."""
        if use_fused_avg_updates:
          return self._assign_new_value(self.moving_mean, mean)
        else:
          return self._assign_moving_average(self.moving_mean, mean, momentum,
                                             inputs_size)

      def variance_update():
        """Update self.moving_variance with the most recent data point."""
        if use_fused_avg_updates:
          return self._assign_new_value(self.moving_variance, variance)
        else:
          return self._assign_moving_average(self.moving_variance, variance,
                                             momentum, inputs_size)

      self.add_update(mean_update)
      self.add_update(variance_update)

    return output
Exemple #14
0
 def gamma(self):
     """Returns strongly convex parameter, gamma."""
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 def __init__(self, input_):
     self._input = input_
     self.value = ops.convert_to_tensor_v2([[42.]])
    def test_enables_nontensor_plumbing(self):
        if context.executing_eagerly():
            self.skipTest('`compile` functionality changed.')
        # Setup.

        class Foo(object):
            def __init__(self, input_):
                self._input = input_
                self.value = ops.convert_to_tensor_v2([[42.]])

            @property
            def dtype(self):
                return self.value.dtype

        ops.register_tensor_conversion_function(
            Foo, lambda x, *args, **kwargs: x.value)
        tf_utils.register_symbolic_tensor_type(Foo)

        class PlumbingLayer(keras.layers.Lambda):
            def __init__(self, fn, **kwargs):
                def _fn(*fargs, **fkwargs):
                    d = fn(*fargs, **fkwargs)
                    x = ops.convert_to_tensor_v2(d)
                    d.shape = x.shape
                    d.get_shape = x.get_shape
                    return d, x

                super(PlumbingLayer, self).__init__(_fn, **kwargs)
                self._enter_dunder_call = False

            def __call__(self, inputs, *args, **kwargs):
                self._enter_dunder_call = True
                d, _ = super(PlumbingLayer,
                             self).__call__(inputs, *args, **kwargs)
                self._enter_dunder_call = False
                return d

            def call(self, inputs, *args, **kwargs):
                d, v = super(PlumbingLayer, self).call(inputs, *args, **kwargs)
                if self._enter_dunder_call:
                    return d, v
                return d

        # User-land.
        model = keras.Sequential([
            keras.layers.InputLayer((1, )),
            PlumbingLayer(Foo),  # Makes a `Foo` object.
        ])
        # Let's ensure Keras graph history is preserved by composing the models.
        model = keras.Model(model.inputs, model(model.outputs))
        # Now we instantiate the model and verify we have a `Foo` object, not a
        # `Tensor`.
        y = model(ops.convert_to_tensor_v2([[7.]]))
        self.assertIsInstance(y, Foo)
        # Confirm that (custom) loss sees `Foo` instance, not Tensor.
        obtained_prediction_box = [None]

        def custom_loss(y_obs, y_pred):
            del y_obs
            obtained_prediction_box[0] = y_pred
            return y_pred

        # Apparently `compile` calls the loss function enough to trigger the
        # side-effect.
        model.compile('SGD', loss=custom_loss)
        self.assertIsInstance(obtained_prediction_box[0], Foo)
 def value(self):
     return ops.convert_to_tensor_v2(42.)
 def build(self, input_shape):
     a = ops.convert_to_tensor_v2(1.0)
     b = 2.0 * a
     self.variable = variables.Variable(b)
     self.constant = ops.convert_to_tensor_v2(self.variable)
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn.
    # Used to keep track of the total loss value (stateless).
    # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
    #                   loss_weight_2 * output_2_loss_fn(...) +
    #                   layer losses.
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    # Allow mixed `NumPy` and `EagerTensor` input here.
    if any(
            isinstance(input_t, (np.ndarray, float, int))
            for input_t in nest.flatten(inputs)):
        inputs = nest.map_structure(ops.convert_to_tensor_v2, inputs)

    outs = model(inputs, **kwargs)
    outs = nest.flatten(outs)

    if targets:
        targets = training_utils.cast_if_floating_dtype_and_mismatch(
            targets, outs)
    # TODO(sallymatson/psv): check if we should do same mismatch fix for weights
    if sample_weights:
        sample_weights = [
            training_utils.cast_if_floating_dtype(
                ops.convert_to_tensor_v2(val)) if val is not None else None
            for val in sample_weights
        ]

    masks = [getattr(t, '_keras_mask', None) for t in outs]
    targets = nest.flatten(targets)

    # Used to keep track of individual output losses.
    output_losses = []

    with backend.name_scope('loss'):
        loss_fns = [
            loss_fn for loss_fn in model.loss_functions if loss_fn is not None
        ]
        custom_losses = model.losses  # Regularization losses

        if not loss_fns and not custom_losses:
            if training:
                raise ValueError('The model cannot be trained '
                                 'because it has no loss to optimize.')
            else:
                raise ValueError('The model cannot be evaluated '
                                 'because it has no loss to compute.')

        for i, loss_fn in enumerate(loss_fns):
            weights = sample_weights[i] if sample_weights else None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if mask is not None:
                    mask = math_ops.cast(mask, outs[i].dtype)
                    # Update weights with mask.
                    if weights is None:
                        weights = mask
                    else:
                        # Update dimensions of weights to match with mask if possible.
                        weights = math_ops.cast(weights, outs[i].dtype)
                        mask, _, weights = (
                            losses_utils.squeeze_or_expand_dimensions(
                                mask, sample_weight=weights))
                        weights *= mask

                if hasattr(loss_fn, 'reduction'):
                    per_sample_losses = loss_fn.call(targets[i], outs[i])
                    weighted_losses = losses_utils.compute_weighted_loss(
                        per_sample_losses,
                        sample_weight=weights,
                        reduction=losses_utils.ReductionV2.NONE)
                    loss_reduction = loss_fn.reduction

                    # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all
                    # compile use cases.
                    if loss_reduction == losses_utils.ReductionV2.AUTO:
                        loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

                    # Compute the stateless loss value.
                    output_loss = losses_utils.reduce_weighted_loss(
                        weighted_losses, reduction=loss_reduction)
                else:
                    # Compute the stateless loss value for a custom loss class.
                    # Here we assume that the class takes care of loss reduction
                    # because if this class returns a vector value we cannot
                    # differentiate between use case where a custom optimizer
                    # expects a vector loss value vs unreduced per-sample loss value.
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)
                    loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                # Keep track of the stateful output loss result.
                output_losses.append(output_loss_metrics[i](output_loss))

            # Scale output loss for distribution. For custom losses we assume
            # reduction was mean.
            if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
                output_loss = losses_utils.scale_loss_for_distribution(
                    output_loss)
            total_loss += model._loss_weights_list[i] * output_loss

        # Add regularization losses
        if custom_losses:
            total_loss += losses_utils.scale_loss_for_distribution(
                math_ops.add_n(custom_losses))
    return outs, total_loss, output_losses, masks
Exemple #20
0
  def adapt(self, data, reset_state=True):
    """Fits the state of the preprocessing layer to the data being passed.

    Arguments:
      data: The data to train on. It can be passed either as a tf.data Dataset,
        or as a numpy array.
      reset_state: Optional argument specifying whether to clear the state of
        the layer at the start of the call to `adapt`, or whether to start from
        the existing state. Subclasses may choose to throw if reset_state is set
        to 'False'.
    """
    if reset_state:
      accumulator = None
    else:
      accumulator = self._combiner.restore(self._restore_updates())
    if isinstance(data, (list, tuple)):
      data = ops.convert_to_tensor_v2(data)
    if not isinstance(data,
                      (dataset_ops.DatasetV2,
                       np.ndarray,
                       ops.Tensor,
                       ragged_tensor.RaggedTensor)):
      raise ValueError(
          '`adapt()` requires a batched Dataset, a Tensor, '
          'or a Numpy array as input, '
          'got {}'.format(type(data)))

    if isinstance(data, dataset_ops.DatasetV2):
      # Validate the datasets to try and ensure we haven't been passed one with
      # infinite size. That would cause an infinite loop here.
      if tf_utils.dataset_is_infinite(data):
        raise ValueError(
            'The dataset passed to `adapt()` has an infinite number of '
            'elements. Please use `dataset.take(...)` to make the number '
            'of elements finite.')
      next_data = self._get_dataset_iterator(data)
      # TODO(fchollet): consider checking if the dataset is already batched
      # and otherwise batching it.
    elif isinstance(data, (ops.Tensor, ragged_tensor.RaggedTensor)):
      next_data = self._get_dataset_iterator(
          dataset_ops.Dataset.from_tensor_slices(data).batch(512))
    else:
      generator, _ = training_generator.convert_to_generator_like(
          data, batch_size=512)
      # If the data is not a dataset, we can iterate over it using next(foo);
      # here, we wrap that into a callable.
      next_data = lambda: next(generator)

    # TODO(momernick): Some sort of status bar?
    # TODO(momernick): Implement parallel processing here?
    try:
      data_element = next_data()

      # First, see if the layer is built or not. If it is not, then we must
      # build it.
      if not self.built:
        try:
          # If this is a Numpy array or tensor, we can get shape from .shape.
          # If not, an attribute error will be thrown.
          data_shape = data_element.shape
          data_shape_nones = tuple([None]*len(data_element.shape))
        except AttributeError:
          # The input has an unknown number of dimensions.
          data_shape = None
          data_shape_nones = None

        # TODO (b/159261555): move this to base layer build.
        batch_input_shape = getattr(self, '_batch_input_shape', None)
        if batch_input_shape is None:
          # Set the number of dimensions.
          self._batch_input_shape = data_shape_nones

        self.build(data_shape)

      # Once we have built the Layer, we can process the input data. We do so
      # until we've gotten an exception indicating that we have no more data.
      while True:
        accumulator = self._combiner.compute(data_element, accumulator)
        data_element = next_data()
    # Note that this belongs to the outer indentation of 'try' - we need to
    # catch exceptions resulting from the first 'next_data()' invocation as
    # well.
    except (StopIteration, errors.OutOfRangeError):
      pass

    updates = self._combiner.extract(accumulator)
    self._set_state_variables(updates)
 def test_dense_dtype(self):
   inputs = ops.convert_to_tensor_v2(
       np.random.randint(low=0, high=7, size=(2, 2)))
   layer = keras.layers.Dense(5, dtype='float32')
   outputs = layer(inputs)
   self.assertEqual(outputs.dtype, 'float32')
Exemple #22
0
  def call(self, inputs, training=None):
    training = self._get_training_value(training)

    if self.virtual_batch_size is not None:
      # Virtual batches (aka ghost batches) can be simulated by reshaping the
      # Tensor and reusing the existing batch norm implementation
      original_shape = [-1] + inputs.shape.as_list()[1:]
      expanded_shape = [self.virtual_batch_size, -1] + original_shape[1:]

      # Will cause errors if virtual_batch_size does not divide the batch size
      inputs = array_ops.reshape(inputs, expanded_shape)

      def undo_virtual_batching(outputs):
        outputs = array_ops.reshape(outputs, original_shape)
        return outputs

    if self.fused:
      outputs = self._fused_batch_norm(inputs, training=training)
      if self.virtual_batch_size is not None:
        # Currently never reaches here since fused_batch_norm does not support
        # virtual batching
        outputs = undo_virtual_batching(outputs)
      return outputs

    # Compute the axes along which to reduce the mean / variance
    input_shape = inputs.shape
    ndims = len(input_shape)
    reduction_axes = [i for i in range(ndims) if i not in self.axis]
    if self.virtual_batch_size is not None:
      del reduction_axes[1]     # Do not reduce along virtual batch dim

    # Broadcasting only necessary for single-axis batch norm where the axis is
    # not the last dimension
    broadcast_shape = [1] * ndims
    broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value
    def _broadcast(v):
      if (v is not None and len(v.shape) != ndims and
          reduction_axes != list(range(ndims - 1))):
        return array_ops.reshape(v, broadcast_shape)
      return v

    scale, offset = _broadcast(self.gamma), _broadcast(self.beta)

    def _compose_transforms(scale, offset, then_scale, then_offset):
      if then_scale is not None:
        scale *= then_scale
        offset *= then_scale
      if then_offset is not None:
        offset += then_offset
      return (scale, offset)

    # Determine a boolean value for `training`: could be True, False, or None.
    training_value = tf_utils.constant_value(training)
    if training_value == False:  # pylint: disable=singleton-comparison,g-explicit-bool-comparison
      mean, variance = self.moving_mean, self.moving_variance
    else:
      if self.adjustment:
        adj_scale, adj_bias = self.adjustment(array_ops.shape(inputs))
        # Adjust only during training.
        adj_scale = tf_utils.smart_cond(training,
                                        lambda: adj_scale,
                                        lambda: array_ops.ones_like(adj_scale))
        adj_bias = tf_utils.smart_cond(training,
                                       lambda: adj_bias,
                                       lambda: array_ops.zeros_like(adj_bias))
        scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset)

      # Some of the computations here are not necessary when training==False
      # but not a constant. However, this makes the code simpler.
      keep_dims = self.virtual_batch_size is not None or len(self.axis) > 1
      mean, variance = self._moments(
          math_ops.cast(inputs, self._param_dtype),
          reduction_axes,
          keep_dims=keep_dims)

      moving_mean = self.moving_mean
      moving_variance = self.moving_variance

      mean = tf_utils.smart_cond(training, lambda: mean,
                                 lambda: ops.convert_to_tensor_v2(moving_mean))
      variance = tf_utils.smart_cond(
          training, lambda: variance,
          lambda: ops.convert_to_tensor_v2(moving_variance))

      if self.virtual_batch_size is not None:
        # This isn't strictly correct since in ghost batch norm, you are
        # supposed to sequentially update the moving_mean and moving_variance
        # with each sub-batch. However, since the moving statistics are only
        # used during evaluation, it is more efficient to just update in one
        # step and should not make a significant difference in the result.
        new_mean = math_ops.reduce_mean(mean, axis=1, keepdims=True)
        new_variance = math_ops.reduce_mean(variance, axis=1, keepdims=True)
      else:
        new_mean, new_variance = mean, variance

      if self._support_zero_size_input():
        inputs_size = array_ops.size(inputs)
      else:
        inputs_size = None
      if self.renorm:
        r, d, new_mean, new_variance = self._renorm_correction_and_moments(
            new_mean, new_variance, training, inputs_size)
        # When training, the normalized values (say, x) will be transformed as
        # x * gamma + beta without renorm, and (x * r + d) * gamma + beta
        # = x * (r * gamma) + (d * gamma + beta) with renorm.
        r = _broadcast(array_ops.stop_gradient(r, name='renorm_r'))
        d = _broadcast(array_ops.stop_gradient(d, name='renorm_d'))
        scale, offset = _compose_transforms(r, d, scale, offset)

      def _do_update(var, value):
        """Compute the updates for mean and variance."""
        return self._assign_moving_average(var, value, self.momentum,
                                           inputs_size)

      def mean_update():
        true_branch = lambda: _do_update(self.moving_mean, new_mean)
        false_branch = lambda: self.moving_mean
        return tf_utils.smart_cond(training, true_branch, false_branch)

      def variance_update():
        """Update the moving variance."""

        def true_branch_renorm():
          # We apply epsilon as part of the moving_stddev to mirror the training
          # code path.
          moving_stddev = _do_update(self.moving_stddev,
                                     math_ops.sqrt(new_variance + self.epsilon))
          return self._assign_new_value(
              self.moving_variance,
              # Apply relu in case floating point rounding causes it to go
              # negative.
              K.relu(moving_stddev * moving_stddev - self.epsilon))

        if self.renorm:
          true_branch = true_branch_renorm
        else:
          true_branch = lambda: _do_update(self.moving_variance, new_variance)

        false_branch = lambda: self.moving_variance
        return tf_utils.smart_cond(training, true_branch, false_branch)

      self.add_update(mean_update)
      self.add_update(variance_update)

    mean = math_ops.cast(mean, inputs.dtype)
    variance = math_ops.cast(variance, inputs.dtype)
    if offset is not None:
      offset = math_ops.cast(offset, inputs.dtype)
    if scale is not None:
      scale = math_ops.cast(scale, inputs.dtype)
    # TODO(reedwm): Maybe do math in float32 if given float16 inputs, if doing
    # math in float16 hurts validation accuracy of popular models like resnet.
    outputs = nn.batch_normalization(inputs,
                                     _broadcast(mean),
                                     _broadcast(variance),
                                     offset,
                                     scale,
                                     self.epsilon)
    # If some components of the shape got lost due to adjustments, fix that.
    outputs.set_shape(input_shape)

    if self.virtual_batch_size is not None:
      outputs = undo_virtual_batching(outputs)
    return outputs
Exemple #23
0
 def _prepare_local(self, var_device, var_dtype, apply_state):
     super(Adagrad, self)._prepare_local(var_device, var_dtype, apply_state)
     apply_state[(var_device, var_dtype)].update(
         dict(epsilon=ops.convert_to_tensor_v2(self.epsilon, var_dtype),
              neg_lr_t=-apply_state[(var_device, var_dtype)]['lr_t'],
              zero=array_ops.zeros((), dtype=dtypes.int64)))
Exemple #24
0
    def call(self, y_true, y_pred):
        """Invokes the `Loss` instance.

        Args:
            y_true: Ground truth values.
            y_pred: The predicted values.

        Returns:
            Loss values in the form of a Tensor
        """
        gamma = self.gamma
        from_logits = self.from_logits
        axis = -1

        y_true = tf.cast(y_true, y_pred.dtype)
        y_true = ops.convert_to_tensor_v2(y_true)
        y_pred = ops.convert_to_tensor_v2(y_pred)

        probs = y_pred

        # Reformat y_pred shapes
        if (not from_logits and
                not isinstance(y_pred,
                               (ops.EagerTensor, variables_module.Variable))
                and y_pred.op.type == 'Softmax') and not hasattr(
                    y_pred, '_keras_history'):
            assert len(y_pred.op.inputs) == 1
            y_pred = y_pred.op.inputs[0]
            from_logits = True

        # Clip y_pred to a minimum and maximum value
        if not from_logits:
            epsilon_ = constant_op.constant(K.epsilon(),
                                            y_pred.dtype.base_dtype)
            y_pred = clip_ops.clip_by_value(y_pred, epsilon_, 1 - epsilon_)
            y_pred = math_ops.log(y_pred)

        # Get dimensions of predictions tensor
        if isinstance(y_pred.shape, (tuple, list)):
            output_rank = len(y_pred.shape)
        else:
            output_rank = y_pred.shape.ndims
        if output_rank is not None:
            axis %= output_rank
            if axis != output_rank - 1:
                permutation = list(
                    itertools.chain(range(axis), range(axis + 1, output_rank),
                                    [axis]))
                y_pred = array_ops.transpose(y_pred, perm=permutation)
        elif axis != -1:
            raise ValueError(
                'Cannot compute sparse categorical crossentropy with `axis={}` on an '
                'output tensor with unknown rank'.format(axis))

        # Reformat y_true shape and data type.
        y_true = cast(y_true, 'int64')

        output_shape = array_ops.shape_v2(y_pred)
        target_rank = y_true.shape.ndims

        update_shape = (target_rank is not None and output_rank is not None
                        and target_rank != output_rank - 1)
        if update_shape:
            y_true = flatten(y_true)
            y_pred = array_ops.reshape(y_pred, [-1, output_shape[-1]])

        # Calculate cross-entropy loss
        if py_any(_is_symbolic_tensor(v) for v in [y_true, y_pred]):
            with get_graph().as_default():
                loss = nn.sparse_softmax_cross_entropy_with_logits_v2(
                    labels=y_true, logits=y_pred)
        else:
            loss = nn.sparse_softmax_cross_entropy_with_logits_v2(
                labels=y_true, logits=y_pred)

        if update_shape and output_rank >= 3:
            loss = array_ops.reshape(loss, output_shape[:-1])

        # Calculate focal modulation to be applied
        gamma = tf.convert_to_tensor(gamma, dtype=tf.dtypes.float32)
        scalar_gamma = gamma.shape.rank == 0

        y_true_rank = y_true.shape.rank
        if not scalar_gamma:
            gamma = tf.gather(gamma, y_true, axis=0, batch_dims=y_true_rank)

        focal_modulation = K.pow(1 - tf.math.reduce_mean(probs, axis=1), gamma)
        focal_modulation = tf.gather(focal_modulation,
                                     y_true,
                                     axis=0,
                                     batch_dims=y_true_rank)

        loss = focal_modulation * loss

        return loss
 def _convert_non_tensor(x):
     # Don't call `ops.convert_to_tensor_v2` on all `inputs` because
     # `SparseTensors` can't be converted to `Tensor`.
     if isinstance(x, (np.ndarray, float, int)):
         return ops.convert_to_tensor_v2(x)
     return x
Exemple #26
0
 def _prepare_local(self, var_device, var_dtype, apply_state):
     super(Adadelta, self)._prepare_local(var_device, var_dtype,
                                          apply_state)
     apply_state[(var_device, var_dtype)].update(
         dict(epsilon=ops.convert_to_tensor_v2(self.epsilon, var_dtype),
              rho=array_ops.identity(self._get_hyper('rho', var_dtype))))
Exemple #27
0
 def call(self, inputs):
     inputs = ops.convert_to_tensor_v2(inputs, dtype=self.dtype)
     inputs = gen_math_ops.cast(inputs, dtypes.float32)
     outputs = gen_math_ops.mat_mul(inputs, self.kernel)
     outputs = nn.bias_add(outputs, self.bias)
     return gen_math_ops.cos(outputs)
Exemple #28
0
 def generator():
     for _ in range(2):
         for step in range(3):
             yield (ops.convert_to_tensor_v2([step]), )
 def _split(t, indices):
   if t is None:
     return t
   t = ops.convert_to_tensor_v2(t)
   return array_ops.gather_v2(t, indices)
    def testSaveWithSignatures(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(5,
                               input_shape=(3, ),
                               kernel_regularizer=regularizers.get('l2')))
        model.add(keras.layers.Dropout(0.5))
        model.add(
            keras.layers.Dense(4, kernel_regularizer=regularizers.get('l2')))

        input_arr = np.random.random((2, 3))
        target_arr = np.random.random((2, 4))

        model.compile(loss='mse', optimizer='rmsprop')
        model.train_on_batch(input_arr, target_arr)

        @def_function.function(
            input_signature=[tensor_spec.TensorSpec((None, 3))])
        def predict(inputs):
            return {'predictions': model(inputs)}

        feature_configs = {
            'inputs':
            parsing_ops.FixedLenFeature(shape=[2, 3], dtype=dtypes.float32)
        }

        @def_function.function(
            input_signature=[tensor_spec.TensorSpec([None], dtypes.string)])
        def parse_and_predict(examples):
            features = parsing_ops.parse_single_example(
                examples[0], feature_configs)
            return {
                'predictions': model(features['inputs']),
                'layer_1_outputs': model.layers[0](features['inputs'])
            }

        saved_model_dir = self._save_model_dir()
        model.save(saved_model_dir,
                   save_format='tf',
                   signatures={
                       'predict': predict,
                       'parse_and_predict': parse_and_predict
                   })
        model.save('/tmp/saved',
                   save_format='tf',
                   signatures={
                       'predict': predict,
                       'parse_and_predict': parse_and_predict
                   })

        loaded = keras_load.load(saved_model_dir)

        self.assertAllClose(
            model.predict(input_arr),
            loaded.signatures['predict'](ops.convert_to_tensor_v2(
                input_arr.astype('float32')))['predictions'])

        feature = {
            'inputs':
            feature_pb2.Feature(float_list=feature_pb2.FloatList(
                value=input_arr.astype('float32').flatten()))
        }
        example = example_pb2.Example(features=feature_pb2.Features(
            feature=feature))
        outputs = loaded.signatures['parse_and_predict'](
            ops.convert_to_tensor_v2([example.SerializeToString()]))
        self.assertAllClose(model.predict(input_arr), outputs['predictions'])
        self.assertAllClose(model.layers[0](input_arr),
                            outputs['layer_1_outputs'])
Exemple #31
0
 def autocast_to_tensor(v):
   if isinstance(
       v, (int, float, bool, str, list, tuple, np.ndarray, np.generic)):
     init_val = ops.convert_to_tensor_v2(v)
     return array_ops.placeholder(init_val.dtype, init_val.shape)
   return v