Ejemplo n.º 1
0
def compute_weighted_loss(
    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.
  """
  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, losses),)):
      losses = ops.convert_to_tensor(losses)
      input_dtype = losses.dtype
      losses = math_ops.to_float(losses)
      weights = math_ops.to_float(weights)
      total_loss = _scale_losses(losses, weights)
      num_present = _num_present(losses, weights)
      mean_loss = _safe_mean(total_loss, num_present)
      # Convert the result back to the input type.
      mean_loss = math_ops.cast(mean_loss, input_dtype)
      util.add_loss(mean_loss, loss_collection)
      return mean_loss
Ejemplo n.º 2
0
def mean_squared_error(labels, predictions, weights=1.0, scope=None,
                       loss_collection=ops.GraphKeys.LOSSES):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "mean_squared_error",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    losses = math_ops.square(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 3
0
def contrastive_loss(labels, embeddings_anchor, embeddings_positive,
                     margin=1.0):
  """Computes the contrastive loss.

  This loss encourages the embedding to be close to each other for
    the samples of the same label and the embedding to be far apart at least
    by the margin constant for the samples of different labels.
  See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      binary labels indicating positive vs negative pair.
    embeddings_anchor: 2-D float `Tensor` of embedding vectors for the anchor
      images. Embeddings should be l2 normalized.
    embeddings_positive: 2-D float `Tensor` of embedding vectors for the
      positive images. Embeddings should be l2 normalized.
    margin: margin term in the loss definition.

  Returns:
    contrastive_loss: tf.float32 scalar.
  """
  # Get per pair distances
  distances = math_ops.sqrt(
      math_ops.reduce_sum(
          math_ops.square(embeddings_anchor - embeddings_positive), 1))

  # Add contrastive loss for the siamese network.
  #   label here is {0,1} for neg, pos.
  return math_ops.reduce_mean(
      math_ops.to_float(labels) * math_ops.square(distances) +
      (1. - math_ops.to_float(labels)) *
      math_ops.square(math_ops.maximum(margin - distances, 0.)),
      name='contrastive_loss')
Ejemplo n.º 4
0
def compute_weighted_loss(losses, weight=1.0):
  """Computes the weighted loss.

  Args:
    losses: A tensor of size [batch_size, d1, ... dN].
    weight: A tensor of size [1] or [batch_size, d1, ... dK] where K < N.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the weight is None or the shape is not compatible with the
      losses shape or if the number of dimensions (rank) of either losses or
      weight is missing.
  """
  if weight is None:
    raise ValueError("`weight` cannot be None")
  input_dtype = losses.dtype
  losses = math_ops.to_float(losses)
  weight = math_ops.to_float(ops.convert_to_tensor(weight))

  if losses.get_shape().ndims is None:
    raise ValueError("losses.get_shape().ndims cannot be None")
  if weight.get_shape().ndims is None:
    raise ValueError("weight.get_shape().ndims cannot be None")

  total_loss = _scale_losses(losses, weight)
  num_present = _num_present(losses, weight)
  mean_loss = _safe_mean(total_loss, num_present)
  # convert the result back to the input type
  mean_loss = math_ops.cast(mean_loss, input_dtype)
  add_loss(mean_loss)
  return mean_loss
Ejemplo n.º 5
0
def ParseLabelTensorOrDict(labels):
  """Return a tensor to use for input labels to tensor_forest.

  The incoming targets can be a dict where keys are the string names of the
  columns, which we turn into a single 1-D tensor for classification or
  2-D tensor for regression.

  Converts sparse tensors to dense ones.

  Args:
    labels: `Tensor` or `dict` of `Tensor` objects.

  Returns:
    A 2-D tensor for labels/outputs.
  """
  if isinstance(labels, dict):
    return math_ops.to_float(
        array_ops.concat(
            [
                sparse_ops.sparse_tensor_to_dense(
                    labels[k], default_value=-1) if isinstance(
                        labels, sparse_tensor.SparseTensor) else labels[k]
                for k in sorted(labels.keys())
            ],
            1))
  else:
    if isinstance(labels, sparse_tensor.SparseTensor):
      return math_ops.to_float(sparse_ops.sparse_tensor_to_dense(
          labels, default_value=-1))
    else:
      return math_ops.to_float(labels)
Ejemplo n.º 6
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
  """Adds a hinge loss to the training procedure.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", (logits, labels)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 7
0
def huber_loss(labels, predictions, weight=1.0, k=1.0, scope=None):
    """Define a huber loss  https://en.wikipedia.org/wiki/Huber_loss
      tensor: tensor to regularize.
      k: value of k in the huber loss
      scope: Optional scope for op_scope.

    Huber loss:
    f(x) = if |x| <= k:
              0.5 * x^2
           else:
              k * |x| - 0.5 * k^2

    Returns:
      the L1 loss op.

    http://concise-bio.readthedocs.io/en/latest/_modules/concise/tf_helper.html
    """
    with ops.name_scope(scope, "absolute_difference",
                        [predictions, labels]) as scope:
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        if weight is None:
            raise ValueError("`weight` cannot be None")
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        diff = math_ops.subtract(predictions, labels)
        abs_diff = tf.abs(diff)
        losses = tf.where(abs_diff < k,
                          0.5 * tf.square(diff),
                          k * abs_diff - 0.5 * k ** 2)
        return tf.losses.compute_weighted_loss(losses, weight)
Ejemplo n.º 8
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Ejemplo n.º 9
0
def cosine_distance(predictions, targets, dim, weight=1.0, scope=None):
  """Adds a cosine-distance loss to the training procedure.

  Note that the function assumes that the predictions and targets are already
  unit-normalized.

  Args:
    predictions: An arbitrary matrix.
    targets: A `Tensor` whose shape matches 'predictions'
    dim: The dimension along which the cosine distance is computed.
    weight: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If predictions.shape doesn't match targets.shape, if the ignore
                mask is provided and its shape doesn't match targets.shape or if
                the ignore mask is not boolean valued.
  """
  with ops.name_scope(scope, "cosine_distance_loss",
                      [predictions, targets]) as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
      raise ValueError("`weight` cannot be None")

    predictions = math_ops.to_float(predictions)
    targets = math_ops.to_float(targets)

    radial_diffs = math_ops.mul(predictions, targets)
    losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,])
    return compute_weighted_loss(losses, weight)
Ejemplo n.º 10
0
def absolute_difference(
    labels, predictions, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES):
  """Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of
  size [batch_size], then the total loss for each sample of the batch is
  rescaled by the corresponding element in the `weight` vector. If the shape of
  `weight` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weight`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weight` is invalid.
  """
  with ops.name_scope(scope, "absolute_difference",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.abs(math_ops.sub(predictions, labels))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 11
0
def mean_squared_error(predictions, labels=None, weights=1.0, scope=None):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "mean_squared_error",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.square(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(losses, weights, scope=scope)
Ejemplo n.º 12
0
def _compute_weighted_loss(losses, weight):
    """Computes the weighted loss.

  Args:
    losses: A tensor of size [batch_size, d1, ... dN].
    weight: A tensor of size [1] or [batch_size, d1, ... dK] where K < N.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the weight shape is not compatible with the losses shape or
      if the number of dimensions (rank) of either losses or weight is missing.
  """
    losses = math_ops.to_float(losses)
    weight = math_ops.to_float(ops.convert_to_tensor(weight))

    if losses.get_shape().ndims is None:
        raise ValueError("losses.get_shape().ndims cannot be None")
    if weight.get_shape().ndims is None:
        raise ValueError("weight.get_shape().ndims cannot be None")

    total_loss = _scale_losses(losses, weight)
    num_present = _num_present(losses, weight)
    mean_loss = _safe_mean(total_loss, num_present)
    ops.add_to_collection(ops.GraphKeys.LOSSES, mean_loss)
    return mean_loss
Ejemplo n.º 13
0
def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
  """Adds a Huber Loss term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  See: https://en.wikipedia.org/wiki/Huber_loss

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function
      changes from a quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "huber_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    error = math_ops.subtract(predictions, labels)
    abs_error = math_ops.abs(error)
    quadratic = math_ops.minimum(abs_error, delta)
    # The following expression is the same in value as
    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
    # This is necessary to avoid doubling the gradient, since there is already a
    # nonzero contribution to the gradient from the quadratic term.
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Ejemplo n.º 14
0
def cosine_distance(
    predictions, labels=None, dim=None, weights=1.0, scope=None):
  """Adds a cosine-distance loss to the training procedure.

  Note that the function assumes that `predictions` and `labels` are already
  unit-normalized.

  Args:
    predictions: An arbitrary matrix.
    labels: A `Tensor` whose shape matches 'predictions'
    dim: The dimension along which the cosine distance is computed.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If `predictions` shape doesn't match `labels` shape, or
      `weights` is `None`.
  """
  if dim is None:
    raise ValueError("`dim` cannot be None.")
  with ops.name_scope(scope, "cosine_distance_loss",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())

    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)

    radial_diffs = math_ops.multiply(predictions, labels)
    losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,])
    return compute_weighted_loss(losses, weights, scope=scope)
Ejemplo n.º 15
0
def _mask_probs(probs, eos_token, finished):
  """Masks log probabilities.

  The result is that finished beams allocate all probability mass to eos and
  unfinished beams remain unchanged.

  Args:
    probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]`
    eos_token: An int32 id corresponding to the EOS token to allocate
      probability to.
    finished: A boolean tensor of shape `[batch_size, beam_width]` that
      specifies which
      elements in the beam are finished already.

  Returns:
    A tensor of shape `[batch_size, beam_width, vocab_size]`, where unfinished
    beams stay unchanged and finished beams are replaced with a tensor with all
    probability on the EOS token.
  """
  vocab_size = array_ops.shape(probs)[2]
  finished_mask = array_ops.expand_dims(
      math_ops.to_float(1. - math_ops.to_float(finished)), 2)
  # These examples are not finished and we leave them
  non_finished_examples = finished_mask * probs
  # All finished examples are replaced with a vector that has all
  # probability on EOS
  finished_row = array_ops.one_hot(
      eos_token,
      vocab_size,
      dtype=dtypes.float32,
      on_value=0.,
      off_value=dtypes.float32.min)
  finished_examples = (1. - finished_mask) * finished_row
  return finished_examples + non_finished_examples
Ejemplo n.º 16
0
def absolute_difference(predictions, targets, weight=1.0, scope=None):
  """Adds an Absolute Difference loss to the training procedure.

  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
  loss is simply scaled by the given value. If `weight` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weight` vector. If the shape of
  `weight` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weight`.

  Args:
    predictions: The predicted outputs.
    targets: The ground truth output tensor, same dimensions as 'predictions'.
    weight: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid.
  """
  with ops.name_scope(scope, "absolute_difference",
                      [predictions, targets]) as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
      raise ValueError("`weight` cannot be None")
    predictions = math_ops.to_float(predictions)
    targets = math_ops.to_float(targets)
    losses = math_ops.abs(math_ops.sub(predictions, targets))
    return compute_weighted_loss(losses, weight)
Ejemplo n.º 17
0
def log_loss(predictions, labels=None, weights=1.0, epsilon=1e-7, scope=None):
  """Adds a Log Loss term to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    epsilon: A small increment to add to avoid taking a log of zero.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "log_loss",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = -math_ops.multiply(
        labels, math_ops.log(predictions + epsilon)) - math_ops.multiply(
            (1 - labels), math_ops.log(1 - predictions + epsilon))
    return compute_weighted_loss(losses, weights, scope=scope)
Ejemplo n.º 18
0
def add_image_comparison_summaries(gan_model, num_comparisons=2,
                                   display_diffs=False):
  """Adds image summaries to compare triplets of images.

  The first image is the generator input, the second is the generator output,
  and the third is the real data. This style of comparison is useful for
  image translation problems, where the generator input is a corrupted image,
  the generator output is the reconstruction, and the real data is the target.

  Args:
    gan_model: A GANModel tuple.
    num_comparisons: The number of image triplets to display.
    display_diffs: Also display the difference between generated and target.

  Raises:
    ValueError: If real data, generated data, and generator inputs aren't
      images.
    ValueError: If the generator input, real, and generated data aren't all the
      same size.
  """
  if isinstance(gan_model, namedtuples.CycleGANModel):
    saved_params = locals()
    saved_params.pop('gan_model', None)
    with ops.name_scope('cyclegan_x2y_image_comparison_summaries'):
      add_image_comparison_summaries(gan_model.model_x2y, **saved_params)
    with ops.name_scope('cyclegan_y2x_image_comparison_summaries'):
      add_image_comparison_summaries(gan_model.model_y2x, **saved_params)
    return

  _assert_is_image(gan_model.generator_inputs)
  _assert_is_image(gan_model.generated_data)
  _assert_is_image(gan_model.real_data)

  gan_model.generated_data.shape.assert_is_compatible_with(
      gan_model.generator_inputs.shape)
  gan_model.real_data.shape.assert_is_compatible_with(
      gan_model.generated_data.shape)

  image_list = []
  image_list.extend(
      array_ops.unstack(gan_model.generator_inputs[:num_comparisons]))
  image_list.extend(
      array_ops.unstack(gan_model.generated_data[:num_comparisons]))
  image_list.extend(array_ops.unstack(gan_model.real_data[:num_comparisons]))
  if display_diffs:
    generated_list = array_ops.unstack(
        gan_model.generated_data[:num_comparisons])
    real_list = array_ops.unstack(gan_model.real_data[:num_comparisons])
    diffs = [
        math_ops.abs(math_ops.to_float(generated) - math_ops.to_float(real)) for
        generated, real in zip(generated_list, real_list)]
    image_list.extend(diffs)

  # Reshape image and display.
  summary.image(
      'image_comparison',
      eval_utils.image_reshaper(image_list, num_cols=num_comparisons),
      max_outputs=1)
Ejemplo n.º 19
0
 def linear_decay_fn(global_step):
   if global_step is None:
     raise ValueError("global_step is required for linear_decay.")
   global_step = math_ops.minimum(global_step, decay_steps)
   remaining_steps = math_ops.to_int32(decay_steps) - math_ops.to_int32(
       global_step)
   decayed = math_ops.to_float(remaining_steps) / math_ops.to_float(
       decay_steps)
   return math_ops.maximum(0.0, decayed)
Ejemplo n.º 20
0
def compute_weighted_loss(
    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply @{tf.stop_gradient} to `weights` before
    passing them to `compute_weighted_loss`.
  """
  Reduction.validate(reduction)
  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, losses),)):
      losses = ops.convert_to_tensor(losses)
      input_dtype = losses.dtype
      losses = math_ops.to_float(losses)
      weights = math_ops.to_float(weights)
      weighted_losses = math_ops.multiply(losses, weights)
      if reduction == Reduction.NONE:
        loss = weighted_losses
      else:
        loss = math_ops.reduce_sum(weighted_losses)
        if reduction == Reduction.MEAN:
          loss = _safe_mean(
              loss,
              math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
        elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
              reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
          loss = _safe_mean(loss, _num_present(losses, weights))
        elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
          loss = _safe_mean(loss, _num_elements(losses))

      # Convert the result back to the input type.
      loss = math_ops.cast(loss, input_dtype)
      util.add_loss(loss, loss_collection)
      return loss
Ejemplo n.º 21
0
def check_loss_data(y_true, y_pred, logits=False):
    if logits:
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = math_ops.cast(y_true, y_pred.dtype)
    else:
        y_pred = math_ops.to_float(y_pred)
        y_true = math_ops.to_float(y_true)

    y_pred.get_shape().assert_is_compatible_with(y_true.get_shape())
    return y_true, y_pred
Ejemplo n.º 22
0
 def _testPad(self, inputs, paddings, block_size, outputs):
   with self.test_session(use_gpu=True):
     # outputs = space_to_batch(inputs)
     x_tf = self.space_to_batch(
         math_ops.to_float(inputs), paddings, block_size=block_size)
     self.assertAllEqual(x_tf.eval(), outputs)
     # inputs = batch_to_space(outputs)
     x_tf = self.batch_to_space(
         math_ops.to_float(outputs), paddings, block_size=block_size)
     self.assertAllEqual(x_tf.eval(), inputs)
Ejemplo n.º 23
0
def compute_weighted_loss(losses,
                          sample_weight=None,
                          reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE,
                          name=None):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, or be broadcastable to `losses`.
    reduction: Type of `tf.losses.Reduction` to apply to loss. Default value is
      `SUM_OVER_BATCH_SIZE`.
    name: Optional name for the op.

  Raises:
    ValueError: If the shape of `sample_weight` is not compatible with `losses`.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
  """
  losses_impl.ReductionV2.validate(reduction)
  if sample_weight is None:
    sample_weight = 1.0
  with ops.name_scope(name, 'weighted_loss', (losses, sample_weight)):
    # Save the `reduction` argument for loss normalization when distributing
    # to multiple replicas.
    # TODO(josh11b): Associate it with the returned op for more precision.
    ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access

    # Update dimensions of `sample_weight` to match with `losses` if possible.
    losses, _, sample_weight = squeeze_or_expand_dimensions(
        losses, None, sample_weight)
    losses = ops.convert_to_tensor(losses)
    input_dtype = losses.dtype
    losses = math_ops.to_float(losses)
    sample_weight = math_ops.to_float(sample_weight)

    try:
      # Broadcast weights if possible.
      sample_weight = weights_broadcast_ops.broadcast_weights(
          sample_weight, losses)
    except ValueError:
      # Reduce values to same ndim as weight array.
      ndim = K.ndim(losses)
      weight_ndim = K.ndim(sample_weight)
      losses = K.mean(losses, axis=list(range(weight_ndim, ndim)))

    sample_weight.get_shape().assert_is_compatible_with(losses.get_shape())
    weighted_losses = math_ops.multiply(losses, sample_weight)
    # Apply reduction function to the individual weighted losses.
    loss = _reduce_weighted_loss(weighted_losses, reduction)
    # Convert the result back to the input type.
    loss = math_ops.cast(loss, input_dtype)
    return loss
Ejemplo n.º 24
0
def focal_loss(predictions, labels, gamma=2, alpha=1, weights=1.0,
               epsilon=1e-7, scope=None):
  """Adds a Focal Loss term to the training procedure.

  For each value x in `predictions`, and the corresponding l in `labels`,
  the following is calculated:

  ```
    pt = 1 - x                  if l == 0
    pt = x                      if l == 1

    focal_loss = - a * (1 - pt)**g * log(pt)
  ```

  where g is `gamma`, a is `alpha`.

  See: https://arxiv.org/pdf/1708.02002.pdf

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    gamma, alpha: parameters.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    epsilon: A small increment to add to avoid taking a log of zero.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "focal_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    preds = array_ops.where(
        math_ops.equal(labels, 1), predictions, 1. - predictions)
    losses = -alpha * (1. - preds)**gamma * math_ops.log(preds + epsilon)
    return compute_weighted_loss(losses, weights, scope=scope)
Ejemplo n.º 25
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    with variable_scope.variable_scope(
        None,
        default_name='regression_head',
        values=(tuple(six.itervalues(features)) + (labels, logits))):

      # Predict.
      logits = _check_logits(logits, self._logits_dimension)
      predictions = {prediction_keys.PredictionKeys.PREDICTIONS: logits}
      if mode == model_fn.ModeKeys.PREDICT:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={'': export_output.RegressionOutput(value=logits)})

      # Eval.
      labels = _check_labels(_maybe_expand_dim(math_ops.to_float(labels)),
                             self._logits_dimension)
      unweighted_loss = losses.mean_squared_error(
          labels=labels, predictions=logits, reduction=losses.Reduction.NONE)
      weights = (
          1. if (self._weight_feature_key is None) else
          features[self._weight_feature_key])
      weights = _maybe_expand_dim(math_ops.to_float(weights, name='weights'))
      training_loss = losses.compute_weighted_loss(
          unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        # Estimator already adds a metric for loss.
        eval_metric_ops = {
            metric_keys.MetricKeys.LOSS_MEAN: metrics_lib.mean(
                unweighted_loss, weights=weights)
        }
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=eval_metric_ops)

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
      logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss)
      logging_ops.scalar_summary(
          metric_keys.MetricKeys.LOSS_MEAN,
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
      return model_fn.EstimatorSpec(
          mode=model_fn.ModeKeys.TRAIN,
          predictions=predictions,
          loss=training_loss,
          train_op=train_op_fn(training_loss))
Ejemplo n.º 26
0
def wasserstein_discriminator_loss(
    discriminator_real_outputs,
    discriminator_gen_outputs,
    real_weights=1.0,
    generated_weights=1.0,
    scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
    add_summaries=False):
  """Wasserstein discriminator loss for GANs.

  See `Wasserstein GAN` (https://arxiv.org/abs/1701.07875) for more details.

  Args:
    discriminator_real_outputs: Discriminator output on real data.
    discriminator_gen_outputs: Discriminator output on generated data. Expected
      to be in the range of (-inf, inf).
    real_weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `discriminator_real_outputs`, and must be broadcastable to
      `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or
      the same as the corresponding dimension).
    generated_weights: Same as `real_weights`, but for
      `discriminator_gen_outputs`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. The shape depends on `reduction`.
  """
  with ops.name_scope(scope, 'discriminator_wasserstein_loss', (
      discriminator_real_outputs, discriminator_gen_outputs, real_weights,
      generated_weights)) as scope:
    discriminator_real_outputs = math_ops.to_float(discriminator_real_outputs)
    discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs)
    discriminator_real_outputs.shape.assert_is_compatible_with(
        discriminator_gen_outputs.shape)

    loss_on_generated = losses.compute_weighted_loss(
        discriminator_gen_outputs, generated_weights, scope,
        loss_collection=None, reduction=reduction)
    loss_on_real = losses.compute_weighted_loss(
        discriminator_real_outputs, real_weights, scope, loss_collection=None,
        reduction=reduction)
    loss = loss_on_generated - loss_on_real
    util.add_loss(loss, loss_collection)

    if add_summaries:
      summary.scalar('discriminator_gen_wass_loss', loss_on_generated)
      summary.scalar('discriminator_real_wass_loss', loss_on_real)
      summary.scalar('discriminator_wass_loss', loss)

  return loss
Ejemplo n.º 27
0
def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
             loss_collection=ops.GraphKeys.LOSSES,
             reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds a Log Loss term to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    epsilon: A small increment to add to avoid taking a log of zero.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "log_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    losses = -math_ops.multiply(
        labels,
        math_ops.log(predictions + epsilon)) - math_ops.multiply(
            (1 - labels), math_ops.log(1 - predictions + epsilon))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Ejemplo n.º 28
0
def cosine_distance(
    labels, predictions, axis=None, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
    dim=None):
  """Adds a cosine-distance loss to the training procedure.

  Note that the function assumes that `predictions` and `labels` are already
  unit-normalized.

  Args:
    labels: `Tensor` whose shape matches 'predictions'
    predictions: An arbitrary matrix.
    axis: The dimension along which the cosine distance is computed.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: Type of reduction to apply to loss.
    dim: The old (deprecated) name for `axis`.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If `predictions` shape doesn't match `labels` shape, or
      `axis`, `labels`, `predictions` or `weights` is `None`.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
  if axis is None:
    raise ValueError("You must specify 'axis'.")
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "cosine_distance_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())

    radial_diffs = math_ops.multiply(predictions, labels)
    losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True)
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Ejemplo n.º 29
0
 def cosine_decay_fn(global_step):
   if global_step is None:
     raise ValueError("global_step is required for cosine_decay.")
   global_step = math_ops.minimum(global_step, decay_steps)
   completed_fraction = math_ops.to_float(global_step) / math_ops.to_float(
       decay_steps)
   fraction = 2.0 * num_periods * completed_fraction
   decayed = 0.5 * (
       1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
   if zero_after is not None:
     decayed = array_ops.where(
         math_ops.greater_equal(fraction, 2 * zero_after), 0.0, decayed)
   return decayed
Ejemplo n.º 30
0
 def _testPad(self, inputs, block_shape, paddings, outputs):
   block_shape = np.array(block_shape)
   paddings = np.array(paddings).reshape((len(block_shape), 2))
   for use_gpu in [False, True]:
     with self.test_session(use_gpu=use_gpu):
       # outputs = space_to_batch(inputs)
       x_tf = array_ops.space_to_batch_nd(
           math_ops.to_float(inputs), block_shape, paddings)
       self.assertAllEqual(x_tf.eval(), outputs)
       # inputs = batch_to_space(outputs)
       x_tf = array_ops.batch_to_space_nd(
           math_ops.to_float(outputs), block_shape, paddings)
       self.assertAllEqual(x_tf.eval(), inputs)
Ejemplo n.º 31
0
    def _Grad(op, grad):
        """A gradient function for RFFT with the provided `rank` and `irfft_fn`."""
        fft_length = op.inputs[1]
        input_shape = array_ops.shape(op.inputs[0])
        is_even = math_ops.cast(1 - (fft_length[-1] % 2), dtypes.complex64)

        def _TileForBroadcasting(matrix, t):
            expanded = array_ops.reshape(
                matrix,
                array_ops.concat([
                    array_ops.ones([array_ops.rank(t) - 2], dtypes.int32),
                    array_ops.shape(matrix)
                ], 0))
            return array_ops.tile(
                expanded, array_ops.concat([array_ops.shape(t)[:-2], [1, 1]],
                                           0))

        def _MaskMatrix(length):
            # TODO(rjryan): Speed up computation of twiddle factors using the
            # following recurrence relation and cache them across invocations of RFFT.
            #
            # t_n = exp(sqrt(-1) * pi * n^2 / line_len)
            # for n = 0, 1,..., line_len-1.
            # For n > 2, use t_n = t_{n-1}^2 / t_{n-2} * t_1^2
            a = array_ops.tile(
                array_ops.expand_dims(math_ops.range(length), 0), (length, 1))
            b = array_ops.transpose(a, [1, 0])
            return math_ops.exp(-2j * np.pi *
                                math_ops.cast(a * b, dtypes.complex64) /
                                math_ops.cast(length, dtypes.complex64))

        def _YMMask(length):
            """A sequence of [1+0j, -1+0j, 1+0j, -1+0j, ...] with length `length`."""
            return math_ops.cast(1 - 2 * (math_ops.range(length) % 2),
                                 dtypes.complex64)

        y0 = grad[..., 0:1]
        if rank == 1:
            ym = grad[..., -1:]
            extra_terms = y0 + is_even * ym * _YMMask(input_shape[-1])
        elif rank == 2:
            # Create a mask matrix for y0 and ym.
            base_mask = _MaskMatrix(input_shape[-2])

            # Tile base_mask to match y0 in shape so that we can batch-matmul the
            # inner 2 dimensions.
            tiled_mask = _TileForBroadcasting(base_mask, y0)

            y0_term = math_ops.matmul(tiled_mask, math_ops.conj(y0))
            extra_terms = y0_term

            ym = grad[..., -1:]
            ym_term = math_ops.matmul(tiled_mask, math_ops.conj(ym))

            inner_dim = input_shape[-1]
            ym_term = array_ops.tile(
                ym_term,
                array_ops.concat([
                    array_ops.ones([array_ops.rank(grad) - 1], dtypes.int32),
                    [inner_dim]
                ], 0)) * _YMMask(inner_dim)

            extra_terms += is_even * ym_term

        # The gradient of RFFT is the IRFFT of the incoming gradient times a scaling
        # factor, plus some additional terms to make up for the components dropped
        # due to Hermitian symmetry.
        input_size = math_ops.to_float(_FFTSizeForGrad(op.inputs[0], rank))
        irfft = irfft_fn(grad, fft_length)
        return 0.5 * (irfft * input_size + math_ops.real(extra_terms)), None
Ejemplo n.º 32
0
def histogram_fixed_width(values,
                          value_range,
                          nbins=100,
                          dtype=dtypes.int32,
                          name=None):
    """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor`.  new_values <= value_range[0] will be
      mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
      Must be same dtype as new_values.
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
  ```
  """
    with ops.name_scope(name, 'histogram_fixed_width',
                        [values, value_range, nbins]) as scope:
        values = ops.convert_to_tensor(values, name='values')
        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.to_float(nbins)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

        # TODO(langmore) This creates an array of ones to add up and place in the
        # bins.  This is inefficient, so replace when a better Op is available.
        return math_ops.unsorted_segment_sum(array_ops.ones_like(indices,
                                                                 dtype=dtype),
                                             indices,
                                             nbins,
                                             name=scope)
Ejemplo n.º 33
0
    def _training_examples_and_variables():
      """Returns dictionaries for training examples and variables."""
      batch_size = targets.get_shape()[0]

      # Iterate over all feature columns and create appropriate lists for dense
      # and sparse features as well as dense and sparse weights (variables) for
      # SDCA.
      # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
      # dict as 1-dimensional tensors.
      dense_features, sparse_features, sparse_feature_with_values = [], [], []
      dense_feature_weights = []
      sparse_feature_weights, sparse_feature_with_values_weights = [], []
      # pylint: disable=protected-access
      for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
        transformed_tensor = features[column]
        if isinstance(column, layers.feature_column._RealValuedColumn):
          # A real-valued column corresponds to a dense feature in SDCA. A
          # transformed tensor corresponding to a RealValuedColumn has rank 2
          # (its shape is typically [batch_size, column.dimension]) and so it
          # can be passed to SDCA as is.
          dense_features.append(transformed_tensor)
          # For real valued columns, the variables list contains exactly one
          # element.
          dense_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._BucketizedColumn):
          # A bucketized column corresponds to a sparse feature in SDCA. The
          # bucketized feature is "sparsified" for SDCA by converting it to a
          # SparseFeatureColumn respresenting the one-hot encoding of the
          # bucketized feature.
          dense_bucket_tensor = column.to_dnn_input_layer(transformed_tensor)
          sparse_feature_column = _tensor_to_sparse_feature_column(
              dense_bucket_tensor)
          sparse_feature_with_values.append(sparse_feature_column)
          # For bucketized columns, the variables list contains exactly one
          # element.
          sparse_feature_with_values_weights.append(
              columns_to_variables[column][0])
        elif isinstance(column, (layers.feature_column._CrossedColumn,
                                 layers.feature_column._SparseColumn)):
          sparse_features.append(sdca_ops.SparseFeatureColumn(
              array_ops.reshape(
                  array_ops.split(1, 2, transformed_tensor.indices)[0], [-1]),
              array_ops.reshape(transformed_tensor.values, [-1]), None))
          sparse_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._WeightedSparseColumn):
          id_tensor = column.id_tensor(transformed_tensor)
          weight_tensor = column.weight_tensor(transformed_tensor)
          sparse_feature_with_values.append(sdca_ops.SparseFeatureColumn(
              array_ops.reshape(
                  array_ops.split(1, 2, id_tensor.indices)[0], [-1]),
              array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape(
                  weight_tensor.values, [-1])))
          sparse_feature_with_values_weights.append(
            columns_to_variables[column][0])
        else:
          raise ValueError('SDCAOptimizer does not support column type %s.' %
                           type(column).__name__)
      # pylint: enable=protected-access

      example_weights = array_ops.reshape(
          features[weight_column_name],
          shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
      example_ids = features[self._example_id_column]
      sparse_feature_with_values.extend(sparse_features)
      sparse_feature_with_values_weights.extend(sparse_feature_weights)
      examples = dict(sparse_features=sparse_feature_with_values,
                      dense_features=dense_features,
                      example_labels=math_ops.to_float(array_ops.reshape(
                          targets, shape=[-1])),
                      example_weights=example_weights,
                      example_ids=example_ids)
      sdca_variables = dict(
          sparse_features_weights=sparse_feature_with_values_weights,
          dense_features_weights=dense_feature_weights)
      return examples, sdca_variables
Ejemplo n.º 34
0
    def _get_eval_ops(self, features, targets, metrics=None):
        """See base class."""
        features = self._get_feature_dict(features)
        logits = self._logits(features)
        loss = self._target_column.loss(logits, targets, features)
        result = {"loss": metrics_lib.streaming_mean(loss)}

        # Adds default metrics.
        if metrics is None:
            # TODO(b/29366811): This currently results in both an "accuracy" and an
            # "accuracy/threshold_0.500000_mean" metric for binary classification.
            metrics = {("accuracy", "classes"): metrics_lib.streaming_accuracy}

        # Adds additional useful metrics for the special case of binary
        # classification.
        # TODO(zakaria): Move LogisticRegressor.get_default_metrics to metrics
        #   and handle eval metric from targetcolumn.
        if self._target_column.num_label_columns == 1:
            predictions = math_ops.sigmoid(logits)
            targets_float = math_ops.to_float(targets)
            default_metrics = (
                logistic_regressor.LogisticRegressor.get_default_metrics())
            for metric_name, metric_op in default_metrics.items():
                result[metric_name] = metric_op(predictions, targets_float)

        if metrics:
            class_metrics = {}
            proba_metrics = {}
            for name, metric_op in six.iteritems(metrics):
                if isinstance(name, tuple):
                    if len(name) != 2:
                        raise ValueError(
                            "Ignoring metric {}. It returned a tuple with "
                            "len {}, expected 2.".format(name, len(name)))
                    else:
                        if name[1] not in ["classes", "probabilities"]:
                            raise ValueError(
                                "Ignoring metric {}. The 2nd element of its "
                                "name should be either 'classes' or "
                                "'probabilities'.".format(name))
                        elif name[1] == "classes":
                            class_metrics[name[0]] = metric_op
                        else:
                            proba_metrics[name[0]] = metric_op
                elif isinstance(name, str):
                    class_metrics[name] = metric_op
                else:
                    raise ValueError(
                        "Ignoring metric {}. Its name is not in the correct "
                        "form.".format(name))
            if class_metrics:
                predictions = self._target_column.logits_to_predictions(
                    logits, proba=False)
                result.update(
                    self._run_metrics(
                        predictions, targets, class_metrics,
                        self._target_column.get_weight_tensor(features)))
            if proba_metrics:
                predictions = self._target_column.logits_to_predictions(
                    logits, proba=True)
                result.update(
                    self._run_metrics(
                        predictions, targets, proba_metrics,
                        self._target_column.get_weight_tensor(features)))

        return result
Ejemplo n.º 35
0
def npairs_loss(labels,
                embeddings_anchor,
                embeddings_positive,
                reg_lambda=3e-3,
                print_losses=False):
    """Computes the npairs loss.
          Npairs loss expects paired data where a pair is composed of samples from the
          same labels and each pairs in the minibatch have different labels. The loss
          has two components. The first component is the L2 regularizer on the
          embedding vectors. The second component is the sum of cross entropy loss
          which takes each row of the pair-wise similarity matrix as logits and
          the remapped one-hot labels as labels.
          See:
          http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf
          Args:
            labels: 1-D tf.int32 `Tensor` of shape [batch_size/2].
            embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
              embedding vectors for the anchor images. Embeddings should not be
              l2 normalized.
            embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
              embedding vectors for the positive images. Embeddings should not be
              l2 normalized.
            reg_lambda: Float. L2 regularization term on the embedding vectors.
            print_losses: Boolean. Option to print the xent and l2loss.
          Returns:
            npairs_loss: tf.float32 scalar.
      """
    # pylint: enable=line-too-long
    # Add the regularizer on the embedding.
    reg_anchor = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
    reg_positive = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
    l2loss = math_ops.multiply(0.25 * reg_lambda,
                               reg_anchor + reg_positive,
                               name='l2loss')

    # Get per pair similarities.
    similarity_matrix = math_ops.matmul(embeddings_anchor,
                                        embeddings_positive,
                                        transpose_a=False,
                                        transpose_b=True)

    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    labels_remapped = math_ops.to_float(
        math_ops.equal(labels, array_ops.transpose(labels)))
    labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True)

    # Add the softmax loss.
    xent_loss = nn.softmax_cross_entropy_with_logits(logits=similarity_matrix,
                                                     labels=labels_remapped)
    xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')

    if print_losses:
        xent_loss = logging_ops.Print(
            xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

    return l2loss + xent_loss
Ejemplo n.º 36
0
        def _training_examples_and_variables():
            """Returns dictionaries for training examples and variables."""
            batch_size = targets.get_shape()[0]

            # Iterate over all feature columns and create appropriate lists for dense
            # and sparse features as well as dense and sparse weights (variables) for
            # SDCA.
            # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
            # dict as 1-dimensional tensors.
            dense_features, sparse_features, sparse_feature_with_values = [], [], []
            dense_feature_weights = []
            sparse_feature_weights, sparse_feature_with_values_weights = [], []
            for column in sorted(columns_to_variables.keys(),
                                 key=lambda x: x.key):
                transformed_tensor = features[column]
                if isinstance(column, layers.feature_column._RealValuedColumn):  # pylint: disable=protected-access
                    # A real-valued column corresponds to a dense feature in SDCA. A
                    # transformed tensor corresponding to a RealValuedColumn should have
                    # rank at most 2. In order to be passed to SDCA, its rank needs to be
                    # exactly 2 (i.e., its shape should be [batch_size, column.dim]).
                    check_rank_op = control_flow_ops.Assert(
                        math_ops.less_equal(array_ops.rank(transformed_tensor),
                                            2),
                        ['transformed_tensor should have rank at most 2.'])
                    # Reshape to [batch_size, dense_column_dimension].
                    with ops.control_dependencies([check_rank_op]):
                        transformed_tensor = array_ops.reshape(
                            transformed_tensor,
                            [array_ops.shape(transformed_tensor)[0], -1])

                    dense_features.append(transformed_tensor)
                    # For real valued columns, the variables list contains exactly one
                    # element.
                    dense_feature_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
                    # A bucketized column corresponds to a sparse feature in SDCA. The
                    # bucketized feature is "sparsified" for SDCA by converting it to a
                    # SparseFeatureColumn representing the one-hot encoding of the
                    # bucketized feature.
                    #
                    # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
                    # bucketized feature column to a dense feature in SDCA. This will
                    # likely depend on the number of buckets.
                    dense_bucket_tensor = column._to_dnn_input_layer(
                        transformed_tensor)  # pylint: disable=protected-access
                    sparse_feature_column = _dense_tensor_to_sparse_feature_column(
                        dense_bucket_tensor)
                    sparse_feature_with_values.append(sparse_feature_column)
                    # If a partitioner was used during variable creation, we will have a
                    # list of Variables here larger than 1.
                    vars_to_append = columns_to_variables[column][0]
                    if len(columns_to_variables[column]) > 1:
                        vars_to_append = columns_to_variables[column]
                    sparse_feature_with_values_weights.append(vars_to_append)
                elif isinstance(
                        column,
                    (
                        layers.feature_column._WeightedSparseColumn,  # pylint: disable=protected-access
                        layers.feature_column._CrossedColumn,  # pylint: disable=protected-access
                        layers.feature_column._SparseColumn)):  # pylint: disable=protected-access

                    if isinstance(column,
                                  layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
                        id_tensor = column.id_tensor(transformed_tensor)
                        weight_tensor = array_ops.reshape(
                            column.weight_tensor(transformed_tensor).values,
                            [-1])
                    else:
                        id_tensor = transformed_tensor
                        weight_tensor = array_ops.ones(
                            [array_ops.shape(id_tensor.indices)[0]],
                            dtypes.float32)

                    example_ids = array_ops.reshape(id_tensor.indices[:, 0],
                                                    [-1])

                    flat_ids = array_ops.reshape(id_tensor.values, [-1])
                    # Prune invalid IDs (< 0) from the flat_ids, example_ids, and
                    # weight_tensor.  These can come from looking up an OOV entry in the
                    # vocabulary (default value being -1).
                    is_id_valid = math_ops.greater_equal(flat_ids, 0)
                    flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid)
                    example_ids = array_ops.boolean_mask(
                        example_ids, is_id_valid)
                    weight_tensor = array_ops.boolean_mask(
                        weight_tensor, is_id_valid)

                    projection_length = math_ops.reduce_max(flat_ids) + 1
                    # project ids based on example ids so that we can dedup ids that
                    # occur multiple times for a single example.
                    projected_ids = projection_length * example_ids + flat_ids

                    # Remove any redundant ids.
                    ids, idx = array_ops.unique(projected_ids)
                    # Keep only one example id per duplicated ids.
                    example_ids_filtered = math_ops.unsorted_segment_min(
                        example_ids, idx,
                        array_ops.shape(ids)[0])

                    # reproject ids back feature id space.
                    reproject_ids = (ids -
                                     projection_length * example_ids_filtered)

                    weights = array_ops.reshape(
                        math_ops.unsorted_segment_sum(weight_tensor, idx,
                                                      array_ops.shape(ids)[0]),
                        [-1])
                    sparse_feature_with_values.append(
                        SparseFeatureColumn(example_ids_filtered,
                                            reproject_ids, weights))
                    # If a partitioner was used during variable creation, we will have a
                    # list of Variables here larger than 1.
                    vars_to_append = columns_to_variables[column][0]
                    if len(columns_to_variables[column]) > 1:
                        vars_to_append = columns_to_variables[column]
                    sparse_feature_with_values_weights.append(vars_to_append)
                else:
                    raise ValueError(
                        'SDCAOptimizer does not support column type %s.' %
                        type(column).__name__)

            example_weights = array_ops.reshape(
                features[weight_column_name], shape=[
                    -1
                ]) if weight_column_name else array_ops.ones([batch_size])
            example_ids = features[self._example_id_column]
            sparse_feature_with_values.extend(sparse_features)
            sparse_feature_with_values_weights.extend(sparse_feature_weights)
            examples = dict(sparse_features=sparse_feature_with_values,
                            dense_features=dense_features,
                            example_labels=math_ops.to_float(
                                array_ops.reshape(targets, shape=[-1])),
                            example_weights=example_weights,
                            example_ids=example_ids)
            sdca_variables = dict(
                sparse_features_weights=sparse_feature_with_values_weights,
                dense_features_weights=dense_feature_weights)
            return examples, sdca_variables
Ejemplo n.º 37
0
def per_example_exp_loss(labels, weights, predictions, name=None, eps=0.1):
    """Exponential loss given labels, example weights and predictions.

  Note that this is only for binary classification.
  If logistic loss tries to make sure that the classifier is certain of its
  predictions, exp loss says: "as long as it got it correct, even barely, i
  don't care". Can be used on noisy data, or when you don't care about getting
  the actual probabilities from the model, just the correct label.

  The loss returns is exp(-targets*modified_predictions), where
  modified_predictions are 1 if sigmoid is >= 0.5+eps (eg we predict positive
  class), -1 if sigmoid < 0.5-eps (e.g. we predict negative class) and ax+b in
  the interval 0.5-eps, 0.5+eps, where a = 1/eps, b=1/(2eps).

  Args:
    labels: Rank 2 (N, D) tensor of per-example labels.
    weights: Rank 2 (N, 1) tensor of per-example weights.
    predictions: Rank 2 (N, D) tensor of per-example predictions.
    name: A name for the operation (optional).
    eps: For the range (0.5-eps, 0.5+eps) we set the predictions to be ax+b.

  Returns:
    loss: A Rank 2 (N, 1) tensor of per-example exp loss
    update_op: An update operation to update the loss's internal state.
  """
    def exp_with_logits(name, eps, labels=None, logits=None):
        """Computes exponential loss given `logits`.

    The loss returns is exp(-targets*modified_predictions), where
    modified_predictions are 1 if sigmoid is >= 0.5+eps (eg we predict positive
    class), -1 if sigmoid < 0.5-eps (e.g. we predict negative class) and ax+b in
    the interval 0.5-eps, 0.5+eps, where a = 1/eps, b=1/(2eps).

    Args:
      name: A name for the operation (optional).
      eps: For the range (0.5-eps, 0.5+eps) we set the predictions to be ax+b.
      labels: A `Tensor` of the same type and shape as `logits`.
      logits: A `Tensor` of type `float32` or `float64`.

    Returns:
      A `Tensor` of the same shape as `logits` with the componentwise
      exponential losses.

    Raises:
      ValueError: If `logits` and `labels` do not have the same shape.
    """
        with ops.name_scope(name, "exp_loss", [logits, labels]) as name:
            logits = ops.convert_to_tensor(logits, name="logits")
            labels = ops.convert_to_tensor(labels, name="labels")
            try:
                labels.get_shape().merge_with(logits.get_shape())
            except ValueError:
                raise ValueError(
                    "logits and labels must have the same shape (%s vs %s)" %
                    (logits.get_shape(), labels.get_shape()))

        # Default threshold to switch between classes
        zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
        ones = array_ops.ones_like(logits, dtype=logits.dtype)
        neg_ones = -array_ops.ones_like(logits, dtype=logits.dtype)

        # Convert labels to 1 and -1
        cond_labels = (labels > zeros)
        labels_converted = array_ops.where(cond_labels, ones, neg_ones)

        # Convert predictions to 1 and -1
        # The loss we build is min(1, max(-1,ax+b))
        # where a=1/eps, b=-1/2eps.

        a = 1.0 / eps
        b = -1.0 / 2 / eps
        probs = math_ops.sigmoid(logits)
        y = a * probs + b
        # Build max(-1, ax+b)
        cond = (y < -1)
        max_res = array_ops.where(cond, neg_ones, y)
        # Build min part
        cond = (max_res > 1)
        min_res = array_ops.where(cond, ones, max_res)
        preds_converted = min_res
        return math_ops.exp(-preds_converted * labels_converted)

    labels = math_ops.to_float(labels)
    unweighted_loss = exp_with_logits(name=name,
                                      eps=eps,
                                      labels=labels,
                                      logits=predictions)
    return unweighted_loss * weights, control_flow_ops.no_op()
Ejemplo n.º 38
0
def embedding_lookup_sparse(params,
                            sp_ids,
                            sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
    """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M `SparseTensor` of int64 ids where N is typically batch size
      and M is arbitrary.
    sp_weights: either a `SparseTensor` of float / double weights, or `None` to
      indicate all weights should be taken to be 1. If specified, `sp_weights`
      must have exactly the same shape and indices as `sp_ids`.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If not `None`, each embedding is clipped if its l2-norm is
      larger than this value, before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by `sp_ids`, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      `shape(combined params) = [p0, p1, ..., pm]`

    and

      `shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]`

    then

      `shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]`.

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      ```python
      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0
      ```

    with `combiner`="mean", then the output will be a 3x20 matrix where

      ```python
      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = (params[0, :] * 1.0) / 1.0
      output[2, :] = (params[1, :] * 3.0) / 3.0
      ```

  Raises:
    TypeError: If `sp_ids` is not a `SparseTensor`, or if `sp_weights` is
      neither `None` nor `SparseTensor`.
    ValueError: If `combiner` is not one of {"mean", "sqrtn", "sum"}.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]
    if not isinstance(sp_ids, sparse_tensor.SparseTensor):
        raise TypeError("sp_ids must be SparseTensor")
    ignore_weights = sp_weights is None
    if not ignore_weights:
        if not isinstance(sp_weights, sparse_tensor.SparseTensor):
            raise TypeError("sp_weights must be either None or SparseTensor")
        sp_ids.values.get_shape().assert_is_compatible_with(
            sp_weights.values.get_shape())
        sp_ids.indices.get_shape().assert_is_compatible_with(
            sp_weights.indices.get_shape())
        sp_ids.dense_shape.get_shape().assert_is_compatible_with(
            sp_weights.dense_shape.get_shape())
        # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
        # sp_weights have equal indices and shapes.

    with ops.name_scope(name, "embedding_lookup_sparse",
                        params + [sp_ids]) as name:
        segment_ids = sp_ids.indices[:, 0]
        if segment_ids.dtype != dtypes.int32:
            segment_ids = math_ops.cast(segment_ids, dtypes.int32)

        ids = sp_ids.values
        ids, idx = array_ops.unique(ids)

        embeddings = embedding_lookup(params,
                                      ids,
                                      partition_strategy=partition_strategy,
                                      max_norm=max_norm)
        if embeddings.dtype in (dtypes.float16, dtypes.bfloat16):
            embeddings = math_ops.to_float(embeddings)
        if not ignore_weights:
            weights = sp_weights.values
            if weights.dtype != embeddings.dtype:
                weights = math_ops.cast(weights, embeddings.dtype)

            embeddings = array_ops.gather(embeddings, idx)

            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(weights), ones], 0)

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to bcast_weights_shape,
            # the shape becomes None.
            if embeddings.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [1 for _ in range(embeddings.get_shape().ndims - 1)]))

            embeddings *= weights

            if combiner == "sum":
                embeddings = math_ops.segment_sum(embeddings,
                                                  segment_ids,
                                                  name=name)
            elif combiner == "mean":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                embeddings = math_ops.div(embeddings, weight_sum, name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                embeddings = math_ops.div(embeddings,
                                          weight_sum_sqrt,
                                          name=name)
            else:
                assert False, "Unrecognized combiner"
        else:
            assert idx is not None
            if combiner == "sum":
                embeddings = math_ops.sparse_segment_sum(embeddings,
                                                         idx,
                                                         segment_ids,
                                                         name=name)
            elif combiner == "mean":
                embeddings = math_ops.sparse_segment_mean(embeddings,
                                                          idx,
                                                          segment_ids,
                                                          name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.sparse_segment_sqrt_n(embeddings,
                                                            idx,
                                                            segment_ids,
                                                            name=name)
            else:
                assert False, "Unrecognized combiner"

        return embeddings
Ejemplo n.º 39
0
    def __init__(self,
                 global_step=0,
                 max_matrix_size=768,
                 gbar_decay=0.0,
                 gbar_weight=1.0,
                 mat_gbar_decay=1.0,
                 mat_gbar_weight=1.0,
                 learning_rate=1.0,
                 svd_interval=1,
                 precond_update_interval=1,
                 epsilon=1e-4,
                 alpha=0.5,
                 use_iterative_root=False,
                 use_locking=False,
                 name="Shampoo"):
        """Default values of the various hyper-parameters.

    gbar_decay, gbar_weight etc. can be a float or a time varying parameter.
    For time-varying parameters use e.g. "lambda T: T / (T + 1.0)"
    where the expression in the lambda is a tensorflow expression

    Args:
      global_step: tensorflow variable indicating the step.
      max_matrix_size: We do not perform SVD for matrices larger than this.
      gbar_decay:
      gbar_weight:  Used to update gbar:
            gbar[t] = gbar_decay[t] * gbar[t-1] + gbar_weight[t] * g[t]
      mat_gbar_decay:
      mat_gbar_weight:  Used to update mat_gbar:
           mat_gbar_j[t] = mat_gbar_decay[t] * mat_gbar_j[t-1]
                           + mat_gbar_weight[t] * gg_j[t]
      learning_rate: Similar to SGD
      svd_interval: We should do SVD after this many steps. Default = 1, i.e.
                    every step. Usually 20 leads to no loss of accuracy, and
                    50 or 100 is also OK. May also want more often early,
                    and less often later - set in caller as for example:
                    "svd_interval = lambda(T): tf.cond(
                        T < 2000, lambda: 20.0, lambda: 1000.0)"
      precond_update_interval: We should update the preconditioners after
                               this many steps. Default = 1. Usually less than
                               svd_interval.
      epsilon:  epsilon * I_n is added to each mat_gbar_j for stability for
                non-diagonal version of shampoo.
      alpha:  total power of the preconditioners.
      use_iterative_root: should the optimizer use SVD (faster) or the
                          iterative root method (for TPU) for finding the
                          roots of PSD matrices.
      use_locking:
      name: name of optimizer.
    """

        super(ShampooOptimizer, self).__init__(use_locking, name)

        self._global_step = math_ops.to_float(global_step)
        self._max_matrix_size = max_matrix_size
        self._gbar_decay = gbar_decay
        self._gbar_weight = gbar_weight
        self._mat_gbar_decay = mat_gbar_decay
        self._mat_gbar_weight = mat_gbar_weight
        self._learning_rate = learning_rate
        self._svd_interval = svd_interval
        self._precond_update_interval = precond_update_interval
        self._epsilon = epsilon
        self._alpha = alpha
        self._use_iterative_root = use_iterative_root
        self._name = name
Ejemplo n.º 40
0
def streaming_tp_fp_arrays(num_gbboxes,
                           tp,
                           fp,
                           scores,
                           remove_zero_scores=True,
                           metrics_collections=None,
                           updates_collections=None,
                           name=None):
    """Streaming computation of True and False Positive arrays. This metrics
    also keeps track of scores and number of grountruth objects.
    """
    # Input dictionaries: dict outputs as streaming metrics.
    if isinstance(scores, dict) or isinstance(fp, dict):
        d_values = {}
        d_update_ops = {}
        for c in num_gbboxes.keys():
            scope = 'streaming_tp_fp_%s' % c
            v, up = streaming_tp_fp_arrays(num_gbboxes[c],
                                           tp[c],
                                           fp[c],
                                           scores[c],
                                           remove_zero_scores,
                                           metrics_collections,
                                           updates_collections,
                                           name=scope)
            d_values[c] = v
            d_update_ops[c] = up
        return d_values, d_update_ops

    # Input Tensors...
    with variable_scope.variable_scope(name, 'streaming_tp_fp',
                                       [num_gbboxes, tp, fp, scores]):
        num_gbboxes = math_ops.to_int64(num_gbboxes)
        scores = math_ops.to_float(scores)
        stype = tf.bool
        tp = tf.cast(tp, stype)
        fp = tf.cast(fp, stype)
        # Reshape TP and FP tensors and clean away 0 class values.
        scores = tf.reshape(scores, [-1])
        tp = tf.reshape(tp, [-1])
        fp = tf.reshape(fp, [-1])
        # Remove TP and FP both false.
        mask = tf.logical_or(tp, fp)
        if remove_zero_scores:
            rm_threshold = 1e-4
            mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
            scores = tf.boolean_mask(scores, mask)
            tp = tf.boolean_mask(tp, mask)
            fp = tf.boolean_mask(fp, mask)

        # Local variables accumlating information over batches.
        v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64)
        v_ndetections = _create_local('v_num_detections',
                                      shape=[],
                                      dtype=tf.int32)
        v_scores = _create_local('v_scores', shape=[
            0,
        ])
        v_tp = _create_local('v_tp', shape=[
            0,
        ], dtype=stype)
        v_fp = _create_local('v_fp', shape=[
            0,
        ], dtype=stype)

        # Update operations.
        nobjects_op = state_ops.assign_add(v_nobjects,
                                           tf.reduce_sum(num_gbboxes))
        ndetections_op = state_ops.assign_add(
            v_ndetections, tf.size(scores, out_type=tf.int32))
        scores_op = state_ops.assign(v_scores,
                                     tf.concat([v_scores, scores], axis=0),
                                     validate_shape=False)
        tp_op = state_ops.assign(v_tp,
                                 tf.concat([v_tp, tp], axis=0),
                                 validate_shape=False)
        fp_op = state_ops.assign(v_fp,
                                 tf.concat([v_fp, fp], axis=0),
                                 validate_shape=False)

        # Value and update ops.
        val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores)
        with ops.control_dependencies(
            [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]):
            update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op)

        val = [tf.convert_to_tensor(_) for _ in val]
        if metrics_collections:
            ops.add_to_collections(metrics_collections, val)
        if updates_collections:
            ops.add_to_collections(updates_collections, update_op)
        return val, update_op
 def _testOne(self, inputs, block_size, outputs):
     with self.test_session(use_gpu=True):
         x_tf = array_ops.space_to_depth(math_ops.to_float(inputs),
                                         block_size)
         self.assertAllEqual(x_tf.eval(), outputs)
Ejemplo n.º 42
0
def mean_pairwise_squared_error(predictions,
                                labels=None,
                                weights=1.0,
                                scope=None):
    """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of size [batch_size, d0, ... dN], the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
      where N+1 is the total number of dimensions in `predictions`.
    labels: The ground truth output tensor, whose shape must match the shape of
      the `predictions` tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape [batch_size]
      or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
    with ops.name_scope(scope, "mean_pairwise_squared_error",
                        [predictions, labels, weights]) as scope:
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        weights = math_ops.to_float(ops.convert_to_tensor(weights))

        diffs = math_ops.subtract(predictions, labels)

        # Need to verify here since the function doesn't use compute_weighted_loss
        if diffs.get_shape().ndims is None:
            raise ValueError("diffs.get_shape().ndims cannot be None")
        if weights.get_shape().ndims is None:
            raise ValueError("weights.get_shape().ndims cannot be None")

        axis = list(range(1, diffs.get_shape().ndims))

        sum_squares_diff_per_batch = math_ops.reduce_sum(
            math_ops.square(diffs), axis=axis)
        num_present_per_batch = _num_present(diffs, weights, per_batch=True)

        term1 = 2.0 * math_ops.div_no_nan(
            sum_squares_diff_per_batch, num_present_per_batch, name="value")

        sum_diff = math_ops.reduce_sum(diffs, axis=axis)
        term2 = 2.0 * math_ops.div_no_nan(
            math_ops.square(sum_diff),
            math_ops.square(num_present_per_batch),
            name="value")

        loss = _scale_losses(term1 - term2, weights)

        mean_loss = array_ops.where(
            math_ops.reduce_sum(num_present_per_batch) > 0,
            loss,
            array_ops.zeros_like(loss),
            name="value")
        add_loss(mean_loss)
        return mean_loss
Ejemplo n.º 43
0
    def _groupwise_dnn_v2(features, labels, mode, params, config):
        """Defines the dnn for groupwise scoring functions."""
        with ops.name_scope('transform'):
            context_features, per_example_features = _call_transform_fn(
                features, mode)

        def _score_fn(context_features, group_features, reuse):
            with variable_scope.variable_scope('group_score', reuse=reuse):
                return group_score_fn(context_features, group_features, mode,
                                      params, config)

        # Scatter/Gather per-example scores through groupwise comparison. Each
        # instance in a mini-batch will form a number of groups. Each groups of
        # examples are scored by 'score_fn' and socres for individual examples
        # accumulated over groups.
        with ops.name_scope('groupwise_dnn_v2'):
            with ops.name_scope('infer_sizes'):
                if labels is not None:
                    batch_size, list_size = array_ops.unstack(
                        array_ops.shape(labels))
                    is_valid = utils.is_label_valid(labels)
                else:
                    # Infer batch_size and list_size from a feature.
                    example_tensor_shape = array_ops.shape(
                        next(six.itervalues(per_example_features)))
                    batch_size = example_tensor_shape[0]
                    list_size = example_tensor_shape[1]
                    is_valid = utils.is_label_valid(
                        array_ops.ones([batch_size, list_size]))
            if batch_size is None or list_size is None:
                raise ValueError('Invalid batch_size=%s or list_size=%s' %
                                 (batch_size, list_size))

            # For each example feature, assume the shape is [batch_size, list_size,
            # feature_size], the groups are formed along the 2nd dim. Each group has a
            # 'group_size' number of indices in [0, list_size). Based on these
            # indices, we can gather the example feature into a sub-tensor for each
            # group. The total number of groups we have for a mini-batch is batch_size
            # * num_groups. Inside each group, we have a 'group_size' number of
            # examples.
            indices, mask = _form_group_indices_nd(is_valid, group_size)
            num_groups = array_ops.shape(mask)[1]

            with ops.name_scope('group_features'):
                # For context features, We have shape [batch_size * num_groups, ...].
                large_batch_context_features = {}
                for name, value in six.iteritems(context_features):
                    # [batch_size, 1, ...].
                    value = array_ops.expand_dims(value, axis=1)
                    # [batch_size, num_groups, ...].
                    value = array_ops.gather(value,
                                             array_ops.zeros([num_groups],
                                                             dtypes.int32),
                                             axis=1)
                    # [batch_size * num_groups, ...]
                    large_batch_context_features[
                        name] = utils.reshape_first_ndims(
                            value, 2, [batch_size * num_groups])

                # For example feature, we have shape [batch_size * num_groups,
                # group_size, ...].
                large_batch_group_features = {}
                for name, value in six.iteritems(per_example_features):
                    # [batch_size, num_groups, group_size, ...].
                    value = array_ops.gather_nd(value, indices)
                    # [batch_size * num_groups, group_size, ...].
                    large_batch_group_features[
                        name] = utils.reshape_first_ndims(
                            value, 3, [batch_size * num_groups, group_size])

            # Do the inference and get scores for the large batch.
            # [batch_size * num_groups, group_size].
            scores = _score_fn(large_batch_context_features,
                               large_batch_group_features,
                               reuse=False)

            with ops.name_scope('accumulate_scores'):
                scores = array_ops.reshape(
                    scores, [batch_size, num_groups, group_size])
                # Reset invalid scores to 0 based on mask.
                scores = array_ops.where(
                    array_ops.gather(array_ops.expand_dims(mask, 2),
                                     array_ops.zeros([group_size],
                                                     dtypes.int32),
                                     axis=2), scores,
                    array_ops.zeros_like(scores))
                # [batch_size, num_groups, group_size].
                list_scores = array_ops.scatter_nd(indices, scores,
                                                   [batch_size, list_size])
                # Use average.
                list_scores /= math_ops.to_float(group_size)

        if mode == model_fn.ModeKeys.PREDICT:
            return list_scores
        else:
            features.update(context_features)
            features.update(per_example_features)
            return list_scores
Ejemplo n.º 44
0
def get_weights_and_check_match_logits(features,
                                       weight_column,
                                       logits,
                                       allow_per_logit_weights=False):
    """Fetches weights from features and checks that the shape matches logits.

  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape
  can be either:
  * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`.
  * [D0, D1, ... DN, 1]
  * [D0, D1, ... DN]: In this case, weights is reshaped into
    [D0, D1, ... DN, 1] to work with weight broadcasting rules.

  Args:
    features: The features dict that contains weights.
    weight_column: The weight column. If not given, this method returns 1.
    logits: logits Tensor.
    allow_per_logit_weights: Boolean. Whether we allow weights along the logits
      dimension, namely shape `[D0, D1, ... DN, logits_dimension]`.

  Returns:
    Validated and reshaped weights Tensor.

  Raises:
    ValueError: If the weights `Tensor` cannot be cast into float.
  """
    if allow_per_logit_weights:
        err_msg = (
            'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or '
            '[D0, D1, ... DN, logits_dimension]')
    else:
        err_msg = (
            'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]')
    with ops.name_scope('weights',
                        values=tuple(six.itervalues(features)) +
                        (logits, )) as scope:
        # Fetch the weights.
        if weight_column is None:
            return 1.
        # TODO(b/117839674): update feature_column
        if isinstance(weight_column, six.string_types):
            weight_column = feature_column_lib.numeric_column(
                key=weight_column, shape=(1, ))
        if not isinstance(weight_column,
                          (feature_column_lib.NumericColumn, _NumericColumn)):
            raise TypeError(
                'Weight column must be either a string or NumericColumn.'
                ' Given type: {}.'.format(type(weight_column)))
        weights = weight_column._get_dense_tensor(  # pylint: disable=protected-access
            _LazyBuilder(features))
        if not (weights.dtype.is_floating or weights.dtype.is_integer):
            raise ValueError('Weight column should be castable to float. '
                             'Given dtype: {}'.format(weights.dtype))
        weights = math_ops.to_float(weights, name='weights')
        # Validate the weights shape.
        # Eager mode.
        if context.executing_eagerly():
            weights_shape = weights._shape_tuple()  # pylint: disable=protected-access
            logits_shape = logits._shape_tuple()  # pylint: disable=protected-access
            weights_rank = weights._rank()  # pylint: disable=protected-access
            logits_rank = logits._rank()  # pylint: disable=protected-access
            if (weights_rank is not None and logits_rank is not None
                    and weights_rank == logits_rank - 1):
                if logits_shape[:-1] != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
                return array_ops.expand_dims(weights, -1, name=scope)
            supported_weights_shape = logits_shape[:-1] + (1, )
            if allow_per_logit_weights:
                if (logits_shape != weights_shape
                        and supported_weights_shape != weights_shape):
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            else:
                if supported_weights_shape != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            return weights

        # Graph mode.
        weights_shape = array_ops.shape(weights, name='weights_shape')
        logits_shape = array_ops.shape(logits, name='logits_shape')
        if (weights.shape.ndims is not None and logits.shape.ndims is not None
                and weights.shape.ndims == logits.shape.ndims - 1):
            assert_dimension = check_ops.assert_equal(logits_shape[:-1],
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
            with ops.control_dependencies([assert_dimension]):
                return array_ops.expand_dims(weights, -1, name=scope)
        supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]],
                                                   axis=0)
        if allow_per_logit_weights:
            condition = math_ops.reduce_any([
                math_ops.reduce_all(math_ops.equal(logits_shape,
                                                   weights_shape)),
                math_ops.reduce_all(
                    math_ops.equal(supported_weights_shape, weights_shape))
            ])
            assert_dimension = control_flow_ops.Assert(condition=condition,
                                                       data=[
                                                           err_msg,
                                                           'logits_shape: ',
                                                           logits_shape,
                                                           'weights_shape: ',
                                                           weights_shape
                                                       ])
        else:
            assert_dimension = check_ops.assert_equal(supported_weights_shape,
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
        with ops.control_dependencies([assert_dimension]):
            return array_ops.identity(weights, name=scope)
Ejemplo n.º 45
0
    def evaluate(self, X, Y, metric, batch_size=None):
        """ evaluate.

        Evaluate the forest model with the given data and metric.

        Arguments:
            X: `2-D Array` of shape (n_samples, n_features).
                The input data to evaluate on.
            Y: `1-D Array` of shape (n_samples). The labels/targets data.
            metric: `func` returning a `Tensor`. The metric function.
            batch_size: `int`. If specified, process the data by batch.

        Return:
            The metric value.

        """

        with self.graph.as_default():
            # Verify data dimension
            validate_dim(X, max_dim=2, min_dim=2, var_name='X')
            if not self.regression:
                validate_dim(Y, max_dim=1, min_dim=1, var_name='Y')
            else:
                validate_dim(Y, min_dim=1, var_name='Y')

            # Get data size
            num_samples = get_num_sample(X)
            capacity = None
            if batch_size is None:
                batch_size = num_samples
                capacity = 1

            # Build Tree Graph
            self._build_estimator(X, Y)

            # Generate Data Tensors. Be aware that every eval with different
            # data will re-create a data tensor.
            if self._eval.get_params('X') != hex(id(X)) or \
                self._eval.get_params('Y') != hex(id(Y)) or \
                self._eval.get_params('batch_size') != batch_size or \
                self._eval.get_params('metric') != metric or \
                not self._eval.is_ready:

                X, Y, cr = generate_data_tensor(X,
                                                Y,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                num_threads=8,
                                                capacity=capacity)
                X, _, spec = data_ops.ParseDataTensorOrDict(X)
                Y = data_ops.ParseLabelTensorOrDict(Y)

                if not self.params.regression:
                    Y = math_ops.to_float(
                        array_ops.one_hot(
                            math_ops.to_int64(array_ops.squeeze(Y)),
                            self.params.n_classes, 1, 0))
                    Y = tf.reshape(Y, [-1, self.n_classes])

                pred, _, _ = self.forest_graph.inference_graph(X)
                self._eval_op = metric(pred, Y)
                self._build_eval(X, Y, metric, batch_size)

                # Start QueueRunners
                tf.train.start_queue_runners(sess=self.session)
                if cr: cr.launch_threads(self.session)

            n_batches = int(math.ceil(float(num_samples) / batch_size))

            m = 0.
            for i in range(n_batches):
                m += self.session.run(self._eval_op) / n_batches
            return m
Ejemplo n.º 46
0
def huber_loss(labels,
               predictions,
               weights=1.0,
               delta=1.0,
               scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Adds a Huber Loss term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  See: https://en.wikipedia.org/wiki/Huber_loss

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function
      changes from a quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or
     `predictions` is None.
  """
    if labels is None:
        raise ValueError("labels must not be None.")
    if predictions is None:
        raise ValueError("predictions must not be None.")
    with ops.name_scope(scope, "huber_loss",
                        (predictions, labels, weights)) as scope:
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        error = math_ops.subtract(predictions, labels)
        abs_error = math_ops.abs(error)
        quadratic = math_ops.minimum(abs_error, delta)
        # The following expression is the same in value as
        # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
        # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
        # This is necessary to avoid doubling the gradient, since there is already a
        # nonzero contribution to the gradient from the quadratic term.
        linear = (abs_error - quadratic)
        losses = 0.5 * quadratic**2 + delta * linear
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
Ejemplo n.º 47
0
def _sigmoid_cross_entropy_loss(logits, labels):
    # sigmoid_cross_entropy_with_logits requires [batch_size, n_classes] labels.
    return nn.sigmoid_cross_entropy_with_logits(logits,
                                                math_ops.to_float(labels))
Ejemplo n.º 48
0
def mean_pairwise_squared_error(labels,
                                predictions,
                                weights=1.0,
                                scope=None,
                                loss_collection=ops.GraphKeys.LOSSES):
    """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    labels: The ground truth output tensor, whose shape must match the shape of
      `predictions`.
    predictions: The predicted outputs, a tensor of size
      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
      `predictions`.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.
  """
    if labels is None:
        raise ValueError("labels must not be None.")
    if predictions is None:
        raise ValueError("predictions must not be None.")
    with ops.name_scope(scope, "mean_pairwise_squared_error",
                        (predictions, labels, weights)) as scope:
        weights = math_ops.to_float(weights)
        labels = math_ops.to_float(labels)
        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, labels), )):
            predictions = math_ops.to_float(predictions)
            predictions.get_shape().assert_is_compatible_with(
                labels.get_shape())

            diffs = math_ops.subtract(predictions, labels)

            reduction_indices = math_ops.range(1, array_ops.rank(diffs))

            sum_squares_diff_per_batch = math_ops.reduce_sum(
                math_ops.square(diffs),
                reduction_indices=reduction_indices,
                keep_dims=True)
            num_present_per_batch = _num_present(diffs,
                                                 weights,
                                                 per_batch=True)

            term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
                                    num_present_per_batch)

            sum_diff = math_ops.reduce_sum(diffs,
                                           reduction_indices=reduction_indices,
                                           keep_dims=True)
            term2 = 2.0 * _safe_div(math_ops.square(sum_diff),
                                    math_ops.square(num_present_per_batch))

            weighted_losses = math_ops.multiply(term1 - term2, weights)
            loss = math_ops.reduce_sum(weighted_losses)

            mean_loss = array_ops.where(
                math_ops.reduce_sum(num_present_per_batch) > 0,
                loss,
                array_ops.zeros_like(loss),
                name="value")
            util.add_loss(mean_loss, loss_collection)
            return mean_loss
Ejemplo n.º 49
0
def _sigmoid_cross_entropy_loss(logits, target):
    # sigmoid_cross_entropy_with_logits requires [batch_size, n_classes] target.
    return nn.sigmoid_cross_entropy_with_logits(logits,
                                                math_ops.to_float(target))
Ejemplo n.º 50
0
 def _accuracy_metric(predictions, labels, weights=None):
     threshold_predictions = math_ops.to_float(
         math_ops.greater_equal(predictions, threshold))
     return metrics_lib.streaming_accuracy(
         predictions=threshold_predictions, labels=labels, weights=weights)
Ejemplo n.º 51
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight,
                      coverage_penalty_weight):
  """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
    coverage_penalty_weight: Float weight to penalize the coverage of source
      sentence. Disabled with 0.0.

  Returns:
    A new beam state.
  """
  static_batch_size = tensor_util.constant_value(batch_size)

  # Calculate the current lengths of the predictions
  prediction_lengths = beam_state.lengths
  previously_finished = beam_state.finished
  not_finished = math_ops.logical_not(previously_finished)

  # Calculate the total log probs for the new hypotheses
  # Final Shape: [batch_size, beam_width, vocab_size]
  step_log_probs = nn_ops.log_softmax(logits)
  step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished)
  total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs

  # Calculate the continuation lengths by adding to all continuing beams.
  vocab_size = logits.shape.dims[-1].value or array_ops.shape(logits)[-1]
  lengths_to_add = array_ops.one_hot(
      indices=array_ops.fill([batch_size, beam_width], end_token),
      depth=vocab_size,
      on_value=np.int64(0),
      off_value=np.int64(1),
      dtype=dtypes.int64)
  add_mask = math_ops.to_int64(not_finished)
  lengths_to_add *= array_ops.expand_dims(add_mask, 2)
  new_prediction_lengths = (
      lengths_to_add + array_ops.expand_dims(prediction_lengths, 2))

  # Calculate the accumulated attention probabilities if coverage penalty is
  # enabled.
  accumulated_attention_probs = None
  attention_probs = get_attention_probs(
      next_cell_state, coverage_penalty_weight)
  if attention_probs is not None:
    attention_probs *= array_ops.expand_dims(math_ops.to_float(not_finished), 2)
    accumulated_attention_probs = (
        beam_state.accumulated_attention_probs + attention_probs)

  # Calculate the scores for each beam
  scores = _get_scores(
      log_probs=total_probs,
      sequence_lengths=new_prediction_lengths,
      length_penalty_weight=length_penalty_weight,
      coverage_penalty_weight=coverage_penalty_weight,
      finished=previously_finished,
      accumulated_attention_probs=accumulated_attention_probs)

  time = ops.convert_to_tensor(time, name="time")
  # During the first time step we only consider the initial beam
  scores_flat = array_ops.reshape(scores, [batch_size, -1])

  # Pick the next beams according to the specified successors function
  next_beam_size = ops.convert_to_tensor(
      beam_width, dtype=dtypes.int32, name="beam_width")
  next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size)

  next_beam_scores.set_shape([static_batch_size, beam_width])
  word_indices.set_shape([static_batch_size, beam_width])

  # Pick out the probs, beam_ids, and states according to the chosen predictions
  next_beam_probs = _tensor_gather_helper(
      gather_indices=word_indices,
      gather_from=total_probs,
      batch_size=batch_size,
      range_size=beam_width * vocab_size,
      gather_shape=[-1],
      name="next_beam_probs")
  # Note: just doing the following
  #   math_ops.to_int32(word_indices % vocab_size,
  #       name="next_beam_word_ids")
  # would be a lot cleaner but for reasons unclear, that hides the results of
  # the op which prevents capturing it with tfdbg debug ops.
  raw_next_word_ids = math_ops.mod(
      word_indices, vocab_size, name="next_beam_word_ids")
  next_word_ids = math_ops.to_int32(raw_next_word_ids)
  next_beam_ids = math_ops.to_int32(
      word_indices / vocab_size, name="next_beam_parent_ids")

  # Append new ids to current predictions
  previously_finished = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=previously_finished,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_finished = math_ops.logical_or(
      previously_finished,
      math_ops.equal(next_word_ids, end_token),
      name="next_beam_finished")

  # Calculate the length of the next predictions.
  # 1. Finished beams remain unchanged.
  # 2. Beams that are now finished (EOS predicted) have their length
  #    increased by 1.
  # 3. Beams that are not yet finished have their length increased by 1.
  lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished))
  next_prediction_len = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=beam_state.lengths,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_prediction_len += lengths_to_add
  next_accumulated_attention_probs = ()
  if accumulated_attention_probs is not None:
    next_accumulated_attention_probs = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=accumulated_attention_probs,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[batch_size * beam_width, -1],
        name="next_accumulated_attention_probs")

  # Pick out the cell_states according to the next_beam_ids. We use a
  # different gather_shape here because the cell_state tensors, i.e.
  # the tensors that would be gathered from, all have dimension
  # greater than two and we need to preserve those dimensions.
  # pylint: disable=g-long-lambda
  next_cell_state = nest.map_structure(
      lambda gather_from: _maybe_tensor_gather_helper(
          gather_indices=next_beam_ids,
          gather_from=gather_from,
          batch_size=batch_size,
          range_size=beam_width,
          gather_shape=[batch_size * beam_width, -1]),
      next_cell_state)
  # pylint: enable=g-long-lambda

  next_state = BeamSearchDecoderState(
      cell_state=next_cell_state,
      log_probs=next_beam_probs,
      lengths=next_prediction_len,
      finished=next_finished,
      accumulated_attention_probs=next_accumulated_attention_probs)

  output = BeamSearchDecoderOutput(
      scores=next_beam_scores,
      predicted_ids=next_word_ids,
      parent_ids=next_beam_ids)

  return output, next_state
Ejemplo n.º 52
0
 def _average_loss():
   probs = self.inference_graph(features)
   return math_ops.reduce_sum(self.loss_fn(
       probs, labels)) / math_ops.to_float(array_ops.shape(labels)[0])
def frechet_classifier_distance(real_images,
                                generated_images,
                                classifier_fn,
                                num_batches=1):
  """Classifier distance for evaluating a generative model.

  This is based on the Frechet Inception distance, but for an arbitrary
  classifier.

  This technique is described in detail in https://arxiv.org/abs/1706.08500.
  Given two Gaussian distribution with means m and m_w and covariance matrices
  C and C_w, this function calcuates

  |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))

  which captures how different the distributions of real images and generated
  images (or more accurately, their visual features) are. Note that unlike the
  Inception score, this is a true distance and utilizes information about real
  world images.

  Note that when computed using sample means and sample covariance matrices,
  Frechet distance is biased. It is more biased for small sample sizes. (e.g.
  even if the two distributions are the same, for a small sample size, the
  expected Frechet distance is large). It is important to use the same
  sample size to compute frechet classifier distance when comparing two
  generative models.

  Args:
    real_images: Real images to use to compute Frechet Inception distance.
    generated_images: Generated images to use to compute Frechet Inception
      distance.
    classifier_fn: A function that takes images and produces activations
      based on a classifier.
    num_batches: Number of batches to split images in to in order to
      efficiently run them through the classifier network.

  Returns:
    The Frechet Inception distance. A floating-point scalar.
  """

  real_images_list = array_ops.split(
      real_images, num_or_size_splits=num_batches)
  generated_images_list = array_ops.split(
      generated_images, num_or_size_splits=num_batches)

  imgs = array_ops.stack(real_images_list + generated_images_list)

  # Compute the activations using the memory-efficient `map_fn`.
  activations = functional_ops.map_fn(
      fn=classifier_fn,
      elems=imgs,
      parallel_iterations=1,
      back_prop=False,
      swap_memory=True,
      name='RunClassifier')

  # Split the activations by the real and generated images.
  real_a, gen_a = array_ops.split(activations, [num_batches, num_batches], 0)

  # Ensure the activations have the right shapes.
  real_a = array_ops.concat(array_ops.unstack(real_a), 0)
  gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
  real_a.shape.assert_has_rank(2)
  gen_a.shape.assert_has_rank(2)

  # Compute mean and covariance matrices of activations.
  m = math_ops.reduce_mean(real_a, 0)
  m_v = math_ops.reduce_mean(gen_a, 0)
  num_examples = math_ops.to_float(array_ops.shape(real_a)[0])

  # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T
  sigma = math_ops.matmul(
      real_a - m, real_a - m, transpose_a=True) / (num_examples - 1)

  sigma_v = math_ops.matmul(
      gen_a - m_v, gen_a - m_v, transpose_a=True) / (num_examples - 1)

  # Find the Tr(sqrt(sigma sigma_v)) component of FID
  sqrt_trace_component = trace_sqrt_product(sigma, sigma_v)

  # Compute the two components of FID.

  # First the covariance component.
  # Here, note that trace(A + B) = trace(A) + trace(B)
  trace = math_ops.trace(sigma + sigma_v) - 2.0 * sqrt_trace_component

  # Next the distance between means.
  mean = math_ops.square(linalg_ops.norm(m - m_v))  # This uses the L2 norm.
  fid = trace + mean

  return fid
Ejemplo n.º 54
0
def update_metric_with_broadcast_weights(eval_metric, values, weights):
    values = math_ops.to_float(values)
    if weights is not None:
        weights = weights_broadcast_ops.broadcast_weights(weights, values)
    eval_metric.update_state(values=values, sample_weight=weights)
Ejemplo n.º 55
0
def parallel_read(data_sources,
                  reader_class,
                  num_epochs=None,
                  num_readers=4,
                  reader_kwargs=None,
                  shuffle=True,
                  dtypes=None,
                  capacity=256,
                  min_after_dequeue=128,
                  seed=None,
                  scope=None):
  """Reads multiple records in parallel from data_sources using n readers.

  It uses a ParallelReader to read from multiple files in  parallel using
  multiple readers created using `reader_class` with `reader_kwargs'.

  If shuffle is True the common_queue would be a RandomShuffleQueue otherwise
  it would be a FIFOQueue.

  Usage:
      data_sources = ['path_to/train*']
      key, value = parallel_read(data_sources, tf.CSVReader, num_readers=4)

  Args:
    data_sources: a list/tuple of files or the location of the data, i.e.
      /path/to/train@128, /path/to/train* or /tmp/.../train*
    reader_class: one of the io_ops.ReaderBase subclasses ex: TFRecordReader
    num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
    num_readers: a integer, number of Readers to create.
    reader_kwargs: an optional dict, of kwargs for the reader.
    shuffle: boolean, wether should shuffle the files and the records by using
      RandomShuffleQueue as common_queue.
    dtypes:  A list of types.  The length of dtypes must equal the number
        of elements in each record. If it is None it will default to
        [tf.string, tf.string] for (key, value).
    capacity: integer, capacity of the common_queue.
    min_after_dequeue: integer, minimum number of records in the common_queue
      after dequeue. Needed for a good shuffle.
    seed: A seed for RandomShuffleQueue.
    scope: Optional name scope for the ops.

  Returns:
    key, value: a tuple of keys and values from the data_source.
  """
  data_files = get_data_files(data_sources)
  with ops.name_scope(scope, 'parallel_read'):
    filename_queue = tf_input.string_input_producer(
        data_files, num_epochs=num_epochs, shuffle=shuffle, name='filenames')
    dtypes = dtypes or [tf_dtypes.string, tf_dtypes.string]
    if shuffle:
      common_queue = data_flow_ops.RandomShuffleQueue(
          capacity=capacity,
          min_after_dequeue=min_after_dequeue,
          dtypes=dtypes,
          seed=seed,
          name='common_queue')
    else:
      common_queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=dtypes,
                                             name='common_queue')

    summary.scalar('fraction_of_%d_full' % capacity,
                   math_ops.to_float(common_queue.size()) * (1. / capacity))

    return ParallelReader(
        reader_class,
        common_queue,
        num_readers=num_readers,
        reader_kwargs=reader_kwargs).read(filename_queue)
Ejemplo n.º 56
0
def _float_weights_or_none(weights):
    if weights is None:
        return None
    return math_ops.to_float(weights)
Ejemplo n.º 57
0
def _indicator_labels_mean(labels, weights=None, name=None):
    with ops.name_scope(name, 'labels_mean', (labels, weights)) as scope:
        labels = math_ops.to_float(labels, name='labels')
        if weights is not None:
            weights = weights_broadcast_ops.broadcast_weights(weights, labels)
        return metrics_lib.mean(labels, weights=weights, name=scope)
Ejemplo n.º 58
0
def streaming_precision_recall_arrays(n_gbboxes,
                                      rclasses,
                                      rscores,
                                      tp_tensor,
                                      fp_tensor,
                                      remove_zero_labels=True,
                                      metrics_collections=None,
                                      updates_collections=None,
                                      name=None):
    """Streaming computation of precision / recall arrays. This metrics
    keeps tracks of boolean True positives and False positives arrays.
    """
    with variable_scope.variable_scope(
            name, 'stream_precision_recall',
        [n_gbboxes, rclasses, tp_tensor, fp_tensor]):
        n_gbboxes = math_ops.to_int64(n_gbboxes)
        rclasses = math_ops.to_int64(rclasses)
        rscores = math_ops.to_float(rscores)

        stype = tf.int32
        tp_tensor = tf.cast(tp_tensor, stype)
        fp_tensor = tf.cast(fp_tensor, stype)

        # Reshape TP and FP tensors and clean away 0 class values.
        rclasses = tf.reshape(rclasses, [-1])
        rscores = tf.reshape(rscores, [-1])
        tp_tensor = tf.reshape(tp_tensor, [-1])
        fp_tensor = tf.reshape(fp_tensor, [-1])
        if remove_zero_labels:
            mask = tf.greater(rclasses, 0)
            rclasses = tf.boolean_mask(rclasses, mask)
            rscores = tf.boolean_mask(rscores, mask)
            tp_tensor = tf.boolean_mask(tp_tensor, mask)
            fp_tensor = tf.boolean_mask(fp_tensor, mask)

        # Local variables accumlating information over batches.
        v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64)
        v_ndetections = _create_local('v_ndetections',
                                      shape=[],
                                      dtype=tf.int32)
        v_scores = _create_local('v_scores', shape=[
            0,
        ])
        v_tp = _create_local('v_tp', shape=[
            0,
        ], dtype=stype)
        v_fp = _create_local('v_fp', shape=[
            0,
        ], dtype=stype)

        # Update operations.
        nobjects_op = state_ops.assign_add(v_nobjects,
                                           tf.reduce_sum(n_gbboxes))
        ndetections_op = state_ops.assign_add(
            v_ndetections, tf.size(rscores, out_type=tf.int32))
        scores_op = state_ops.assign(v_scores,
                                     tf.concat([v_scores, rscores], axis=0),
                                     validate_shape=False)
        tp_op = state_ops.assign(v_tp,
                                 tf.concat([v_tp, tp_tensor], axis=0),
                                 validate_shape=False)
        fp_op = state_ops.assign(v_fp,
                                 tf.concat([v_fp, fp_tensor], axis=0),
                                 validate_shape=False)

        # Precision and recall computations.
        # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value')
        r = _precision_recall(v_nobjects, v_ndetections, v_scores, v_tp, v_fp,
                              'value')

        with ops.control_dependencies(
            [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]):
            update_op = _precision_recall(nobjects_op, ndetections_op,
                                          scores_op, tp_op, fp_op, 'update_op')

            # update_op = tf.Print(update_op,
            #                      [tf.reduce_sum(tf.cast(mask, tf.int64)),
            #                       tf.reduce_sum(tf.cast(mask2, tf.int64)),
            #                       tf.reduce_min(rscores),
            #                       tf.reduce_sum(n_gbboxes)],
            #                      'Metric: ')
            # Some debugging stuff!
            # update_op = tf.Print(update_op,
            #                      [tf.shape(tp_op),
            #                       tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)],
            #                      'TP and FP shape: ')
            # update_op[0] = tf.Print(update_op,
            #                      [nobjects_op],
            #                      '# Groundtruth bboxes: ')
            # update_op = tf.Print(update_op,
            #                      [update_op[0][0],
            #                       update_op[0][-1],
            #                       tf.reduce_min(update_op[0]),
            #                       tf.reduce_max(update_op[0]),
            #                       tf.reduce_min(update_op[1]),
            #                       tf.reduce_max(update_op[1])],
            #                      'Precision and recall :')

        if metrics_collections:
            ops.add_to_collections(metrics_collections, r)
        if updates_collections:
            ops.add_to_collections(updates_collections, update_op)
        return r, update_op
Ejemplo n.º 59
0
 def _streaming_metrics(predictions, targets):
   return streaming_metrics_fn(predictions=math_ops.to_float(
       math_ops.greater_equal(predictions, threshold)),
                               labels=targets)
Ejemplo n.º 60
0
def compute_weighted_loss(losses,
                          weights=1.0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply `tf.stop_gradient` to `weights` before
    passing them to `compute_weighted_loss`.

  @compatbility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
    Reduction.validate(reduction)
    with ops.name_scope(scope, "weighted_loss", (losses, weights)):
        # Save the `reduction` argument for loss normalization when distributing
        # to multiple towers.
        # TODO(josh11b): Associate it with the returned op for more precision.
        ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access

        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, losses), )):
            losses = ops.convert_to_tensor(losses)
            input_dtype = losses.dtype
            losses = math_ops.to_float(losses)
            weights = math_ops.to_float(weights)
            weighted_losses = math_ops.multiply(losses, weights)
            if reduction == Reduction.NONE:
                loss = weighted_losses
            else:
                loss = math_ops.reduce_sum(weighted_losses)
                if reduction == Reduction.MEAN:
                    loss = _safe_mean(
                        loss,
                        math_ops.reduce_sum(
                            array_ops.ones_like(losses) * weights))
                elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS
                      or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
                    loss = _safe_mean(loss, _num_present(losses, weights))
                elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
                    loss = _safe_mean(loss, _num_elements(losses))

            # Convert the result back to the input type.
            loss = math_ops.cast(loss, input_dtype)
            util.add_loss(loss, loss_collection)
            return loss