예제 #1
0
def add_sigmoid_cross_entropy_loss_for_each_scale(scales_to_logits,
                                                  labels,
                                                  ignore_label,
                                                  loss_weight=1.0,
                                                  upsample_logits=True,
                                                  scope=None):
    """Adds sigmoid cross entropy loss for logits of each scale.

  Implemented based on deeplab's add_softmax_cross_entropy_loss_for_each_scale
  in deeplab/utils/train_utils.py.

  Args:
    scales_to_logits: A map from logits names for different scales to logits.
      The logits have shape [batch, logits_height, logits_width, num_classes].
    labels: Groundtruth labels with shape [batch, image_height, image_width, 1].
    ignore_label: Integer, label to ignore.
    loss_weight: Float, loss weight.
    upsample_logits: Boolean, upsample logits or not.
    scope: String, the scope for the loss.

  Raises:
    ValueError: Label or logits is None.
  """
    if labels is None:
        raise ValueError('No label for softmax cross entropy loss.')

    for scale, logits in six.iteritems(scales_to_logits):
        loss_scope = None
        if scope:
            loss_scope = '%s_%s' % (scope, scale)

        if upsample_logits:
            # Label is not downsampled, and instead we upsample logits.
            logits = tf.image.resize_bilinear(logits,
                                              preprocess_utils.resolve_shape(
                                                  labels, 4)[1:3],
                                              align_corners=True)
            scaled_labels = labels
        else:
            # Label is downsampled to the same size as logits.
            scaled_labels = tf.image.resize_nearest_neighbor(
                labels,
                preprocess_utils.resolve_shape(logits, 4)[1:3],
                align_corners=True)

        logits = logits[:, :, :, 1]
        scaled_labels = tf.to_float(scaled_labels)
        scaled_labels = tf.squeeze(scaled_labels)
        not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels,
                                                   ignore_label)) * loss_weight
        losses = tf.nn.weighted_cross_entropy_with_logits(
            scaled_labels, logits, FLAGS.sigmoid_recall_weight)

        # Loss added later in model_fn by tf.losses.get_total_loss()
        tf.losses.compute_weighted_loss(losses,
                                        weights=not_ignore_mask,
                                        scope=loss_scope)
예제 #2
0
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits,
                                                  labels,
                                                  num_classes,
                                                  ignore_label,
                                                  loss_weight=1.0,
                                                  upsample_logits=True,
                                                  scope=None):
    """Adds softmax cross entropy loss for logits of each scale.

  Args:
    scales_to_logits: A map from logits names for different scales to logits.
      The logits have shape [batch, logits_height, logits_width, num_classes].
    labels: Groundtruth labels with shape [batch, image_height, image_width, 1].
    num_classes: Integer, number of target classes.
    ignore_label: Integer, label to ignore.
    loss_weight: Float, loss weight.
    upsample_logits: Boolean, upsample logits or not.
    scope: String, the scope for the loss.

  Raises:
    ValueError: Label or logits is None.
  """
    if labels is None:
        raise ValueError('No label for softmax cross entropy loss.')

    for scale, logits in six.iteritems(scales_to_logits):
        loss_scope = None
        if scope:
            loss_scope = '%s_%s' % (scope, scale)

        if upsample_logits:
            # Label is not downsampled, and instead we upsample logits.
            logits = tf.image.resize_bilinear(logits,
                                              preprocess_utils.resolve_shape(
                                                  labels, 4)[1:3],
                                              align_corners=True)
            scaled_labels = labels
        else:
            # Label is downsampled to the same size as logits.
            scaled_labels = tf.image.resize_nearest_neighbor(
                labels,
                preprocess_utils.resolve_shape(logits, 4)[1:3],
                align_corners=True)

        scaled_labels = tf.reshape(scaled_labels, shape=[-1])
        not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels,
                                                   ignore_label)) * loss_weight
        one_hot_labels = slim.one_hot_encoding(scaled_labels,
                                               num_classes,
                                               on_value=1.0,
                                               off_value=0.0)
        tf.losses.softmax_cross_entropy(one_hot_labels,
                                        tf.reshape(logits,
                                                   shape=[-1, num_classes]),
                                        weights=not_ignore_mask,
                                        scope=loss_scope)
예제 #3
0
def resize_im(image, image_size, pad_val, channels, elements_boxes=None):
  """Decodes and resizes the image.

  Args:
    image: Image to resize.
    image_size: The desired max image size.
    pad_val: The value to pad with.
    channels: The number of channels in the image.
    elements_boxes: The boxes from elements to resize.

  Returns:
    Resized image with possible padded regions,
    and possibly the resized elements boxes.
  """
  [width, height, got_channels] = preprocess_utils.resolve_shape(image, rank=3)

  new_width, new_height = get_resize_dim(width, height, image_size)

  image = tf.reshape(image, [width, height, -1])
  image = tf.cond(
      tf.logical_and(channels == 3, tf.equal(got_channels, 1)),
      true_fn=lambda: tf.image.grayscale_to_rgb(image),
      false_fn=lambda: image,
  )

  image = tf.image.resize_images(image, [new_width, new_height])

  image = preprocess_utils.pad_to_bounding_box(image, 0, 0, image_size,
                                               image_size, pad_val)
  if elements_boxes is not None:
    return image, elements_boxes / tf.to_float(tf.maximum(width, height))

  return image
예제 #4
0
def resize_im(image, image_size, pad_val, channels, features=None):
    """Decodes and resizes the image.

  Args:
    image: Image to resize.
    image_size: The desired max image size.
    pad_val: The value to pad with.
    channels: The number of channels in the image.
    features: Other features to resize.

  Returns:
    Resized image with possible padded regions,
    and possibly the resized elements boxes.
  """
    [height, width, got_channels] = preprocess_utils.resolve_shape(image,
                                                                   rank=3)

    new_height, new_width = get_resize_dim(height, width, image_size)

    image = tf.reshape(image, [height, width, -1])
    image = tf.cond(
        tf.logical_and(channels == 3, tf.equal(got_channels, 1)),
        true_fn=lambda: tf.image.grayscale_to_rgb(image),
        false_fn=lambda: image,
    )

    image = tf.image.resize_images(image, [new_height, new_width])

    image = preprocess_utils.pad_to_bounding_box(image, 0, 0, image_size,
                                                 image_size, pad_val)
    if features is not None:
        width, height = tf.to_float(width), tf.to_float(height)
        max_dim = tf.to_float(tf.maximum(width, height))
        features[ELEMENTS_BOX_ID] = features[ELEMENTS_BOX_ID] / max_dim
        if GROUNDTRUTH_XMIN_ID in features:
            features[GROUNDTRUTH_XMIN_ID] *= width / max_dim
            features[GROUNDTRUTH_XMAX_ID] *= width / max_dim
            features[GROUNDTRUTH_YMIN_ID] *= height / max_dim
            features[GROUNDTRUTH_YMAX_ID] *= height / max_dim
    return image