Esempio n. 1
0
    def set_training_random_scale_factors(self, scale_min, scale_max):
        """Set the parameters for multiscale training."""
        # Select a random scale factor.
        random_scale_factor = tf.random_uniform([], scale_min, scale_max)
        scaled_size = tf.to_int32(random_scale_factor * self._output_size)

        # Recompute the accurate scale_factor using rounded scaled image size.
        height = tf.shape(self._image)[0]
        width = tf.shape(self._image)[1]
        max_image_size = tf.to_float(tf.maximum(height, width))
        image_scale = tf.to_float(scaled_size) / max_image_size

        # Select non-zero random offset (x, y) if scaled image is larger than
        # self._output_size.
        scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
        scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
        offset_y = tf.to_float(scaled_height - self._output_size)
        offset_x = tf.to_float(scaled_width - self._output_size)
        offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1)
        offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1)
        offset_y = tf.to_int32(offset_y)
        offset_x = tf.to_int32(offset_x)
        self._image_scale = image_scale
        self._scaled_height = scaled_height
        self._scaled_width = scaled_width
        self._crop_offset_x = offset_x
        self._crop_offset_y = offset_y
    def __init__(self, image, output_size, short_side_image_size,
                 long_side_max_image_size):
        """Initializes a new `InputProcessor`.

    This InputProcessor is tailored for MLPerf. The reference implementation
    resizes images as the following:
      1. Resize the short side to 800 pixels while keeping the aspect ratio.
      2. Clip the long side at a maximum of 1333 pixels.

    Args:
      image: The input image before processing.
      output_size: A integer tuple of the output image size in the form of
        (short_side, long_side) after calling resize_and_crop_image function.
      short_side_image_size: The image size for the short side. This is analogy
        to cfg.TRAIN.scales in the MLPerf reference model.
      long_side_max_image_size: The maximum image size for the long side. This
        is analogy to cfg.TRAIN.max_size in the MLPerf reference model.
    """
        self._image = image
        self._output_size = output_size
        self._short_side_image_size = short_side_image_size
        self._long_side_max_image_size = long_side_max_image_size
        # Parameters to control rescaling and shifting during preprocessing.
        # Image scale defines scale from original image to scaled image.
        self._image_scale = tf.constant(1.0)
        # The integer height and width of scaled image.
        self._scaled_height = tf.shape(image)[0]
        self._scaled_width = tf.shape(image)[1]
        self._ori_height = tf.shape(image)[0]
        self._ori_width = tf.shape(image)[1]
    def crop_gt_masks(self, gt_mask_size):
        """Crops the ground truth binary masks and resize to fixed-size masks."""
        num_boxes = tf.shape(self._boxes)[0]
        num_masks = tf.shape(self._masks)[0]
        assert_length = tf.Assert(tf.equal(num_boxes, num_masks), [num_masks])

        def padded_bounding_box_fn():
            return tf.reshape(self._masks,
                              [-1, self._ori_height, self._ori_width, 1])

        def zeroed_box_fn():
            return tf.zeros([0, self._ori_height, self._ori_width, 1])

        num_masks = tf.shape(self._masks)[0]
        # Check if there is any instance in this image or not.
        scaled_masks = tf.cond(num_masks > 0, padded_bounding_box_fn,
                               zeroed_box_fn)
        with tf.control_dependencies([assert_length]):
            cropped_gt_masks = tf.image.crop_and_resize(
                image=scaled_masks,
                boxes=self._boxes,
                box_ind=tf.range(num_masks, dtype=tf.int32),
                crop_size=[gt_mask_size, gt_mask_size],
                method='bilinear')[:, :, :, 0]
        cropped_gt_masks = tf.pad(cropped_gt_masks,
                                  paddings=tf.constant([[
                                      0,
                                      0,
                                  ], [
                                      2,
                                      2,
                                  ], [2, 2]]),
                                  mode='CONSTANT',
                                  constant_values=0.)
        return cropped_gt_masks
Esempio n. 4
0
 def set_scale_factors_to_output_size(self):
     """Set the parameters to resize input image to self._output_size."""
     # Compute the scale_factor using rounded scaled image size.
     height = tf.shape(self._image)[0]
     width = tf.shape(self._image)[1]
     max_image_size = tf.to_float(tf.maximum(height, width))
     image_scale = tf.to_float(self._output_size) / max_image_size
     scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
     scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
     self._image_scale = image_scale
     self._scaled_height = scaled_height
     self._scaled_width = scaled_width
Esempio n. 5
0
def upsampling_tpu_compatible(data, scale):
  """Nearest neighbor upsampling TPU-compatible implementation.

  This implementation is TPU compatible as opposed to
  tf.image.resize_nearest_neighbor().

  Args:
    data: A 4D float32 tensor of shape [batch, height, width, channels].
    scale: An integer multiple to scale resolution of input data.

  Returns:
    A 4D float32 tensor of shape [batch, height*scale, width*scale, channels].
  """
  with tf.name_scope('upsampling_tpu_compatible'):
    if data.get_shape().is_fully_defined():
      bs, height, width, _ = [s.value for s in data.get_shape()]
    else:
      shape = tf.shape(data)
      bs, height, width = shape[0], shape[1], shape[2]
    channels = data.get_shape().as_list()[3]
    # Use reshape to quickly upsample the input. The nearest pixel is selected
    # implicitly via broadcasting.
    data = tf.reshape(data, [bs, height, 1, width, 1, channels]) * tf.ones(
        [1, 1, scale, 1, scale, 1], dtype=data.dtype)
    return tf.reshape(data, [bs, height * scale, width * scale, channels])
Esempio n. 6
0
    def __init__(self, image, output_size):
        """Initializes a new `InputProcessor`.

    Args:
      image: The input image before processing.
      output_size: The output image size after calling resize_and_crop_image
        function.
    """
        self._image = image
        self._output_size = output_size
        # Parameters to control rescaling and shifting during preprocessing.
        # Image scale defines scale from original image to scaled image.
        self._image_scale = tf.constant(1.0)
        # The integer height and width of scaled image.
        self._scaled_height = tf.shape(image)[0]
        self._scaled_width = tf.shape(image)[1]
        # The x and y translation offset to crop scaled image to the output size.
        self._crop_offset_y = tf.constant(0)
        self._crop_offset_x = tf.constant(0)
 def set_scale_factors_to_mlperf_reference_size(self):
     """Set the parameters to resize the image according to MLPerf reference."""
     # Compute the scale_factor using rounded scaled image size.
     height = tf.shape(self._image)[0]
     width = tf.shape(self._image)[1]
     # Recompute the accurate scale_factor using rounded scaled image size.
     # https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/utils/blob.py#L70  # pylint: disable=line-too-long
     min_image_size = tf.to_float(tf.minimum(height, width))
     max_image_size = tf.to_float(tf.maximum(height, width))
     short_side_scale = tf.to_float(
         self._short_side_image_size) / min_image_size
     long_side_scale = (tf.to_float(self._long_side_max_image_size) /
                        max_image_size)
     image_scale = tf.minimum(short_side_scale, long_side_scale)
     scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
     scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
     self._image_scale = image_scale
     self._scaled_height = scaled_height
     self._scaled_width = scaled_width
     return image_scale
Esempio n. 8
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row
Esempio n. 9
0
def upsampling(input_,
               kernel_size,
               stride,
               num_outputs,
               scope,
               activation_fn=tf.nn.relu,
               tpu_compatible=False):
  """A smooth replacement of a same-padded transposed convolution.

  This function first computes a nearest-neighbor upsampling of the input by a
  factor of `stride`, then applies a mirror-padded, same-padded convolution.

  It expects `kernel_size` to be odd.

  Args:
    input_: 4-D Tensor input.
    kernel_size: int (odd-valued) representing the kernel size.
    stride: int representing the strides.
    num_outputs: int. Number of output feature maps.
    scope: str. Scope under which to operate.
    activation_fn: activation function.
    tpu_compatible: bool. Whether to use a nearest neighbor upsampling
      compatible with TPU or the default tf.image implementation.

  Returns:
    4-D Tensor output.

  Raises:
    ValueError: if `kernel_size` is even.
  """
  if kernel_size % 2 == 0:
    raise ValueError('kernel_size is expected to be odd.')
  with tf.variable_scope(scope):
    if tpu_compatible:
      upsampled_input = upsampling_tpu_compatible(input_, stride)
    else:
      if input_.get_shape().is_fully_defined():
        _, height, width, _ = [s.value for s in input_.get_shape()]
      else:
        shape = tf.shape(input_)
        height, width = shape[1], shape[2]
      upsampled_input = tf.image.resize_nearest_neighbor(
          input_, [stride * height, stride * width])
    return conv2d(upsampled_input, kernel_size, 1, num_outputs, 'conv',
                  activation_fn=activation_fn)
Esempio n. 10
0
def pad_to_fixed_size(data, pad_value, output_shape):
    """Pad data to a fixed length at the first dimension.

  Args:
    data: Tensor to be padded to output_shape.
    pad_value: A constant value assigned to the paddings.
    output_shape: The output shape of a 2D tensor.

  Returns:
    The Padded tensor with output_shape [max_num_instances, dimension].
  """
    max_num_instances = output_shape[0]
    dimension = output_shape[1]
    data = tf.reshape(data, [-1, dimension])
    num_instances = tf.shape(data)[0]
    assert_length = tf.Assert(tf.less_equal(num_instances, max_num_instances),
                              [num_instances])
    with tf.control_dependencies([assert_length]):
        pad_length = max_num_instances - num_instances
    paddings = pad_value * tf.ones([pad_length, dimension])
    padded_data = tf.concat([data, paddings], axis=0)
    padded_data = tf.reshape(padded_data, output_shape)
    return padded_data
Esempio n. 11
0
  def parser(record):
    """function used to parse tfrecord."""

    record_spec = {
        "input": tf.FixedLenFeature([seq_len], tf.int64),
        "target": tf.FixedLenFeature([seq_len], tf.int64),
        "seg_id": tf.FixedLenFeature([seq_len], tf.int64),
        "label": tf.FixedLenFeature([1], tf.int64),
        "is_masked": tf.FixedLenFeature([seq_len], tf.int64),
    }

    # retrieve serialized example
    example = tf.parse_single_example(
        serialized=record,
        features=record_spec)

    inputs = example.pop("input")
    target = example.pop("target")
    is_masked = tf.cast(example.pop("is_masked"), tf.bool)

    non_reuse_len = seq_len - reuse_len
    assert perm_size <= reuse_len and perm_size <= non_reuse_len

    perm_mask_0, target_0, target_mask_0, input_k_0, input_q_0 = _local_perm(
        inputs[:reuse_len],
        target[:reuse_len],
        is_masked[:reuse_len],
        perm_size,
        reuse_len)

    perm_mask_1, target_1, target_mask_1, input_k_1, input_q_1 = _local_perm(
        inputs[reuse_len:],
        target[reuse_len:],
        is_masked[reuse_len:],
        perm_size,
        non_reuse_len)

    perm_mask_0 = tf.concat([perm_mask_0, tf.ones([reuse_len, non_reuse_len])],
                            axis=1)
    perm_mask_1 = tf.concat([tf.zeros([non_reuse_len, reuse_len]), perm_mask_1],
                            axis=1)
    perm_mask = tf.concat([perm_mask_0, perm_mask_1], axis=0)
    target = tf.concat([target_0, target_1], axis=0)
    target_mask = tf.concat([target_mask_0, target_mask_1], axis=0)
    input_k = tf.concat([input_k_0, input_k_1], axis=0)
    input_q = tf.concat([input_q_0, input_q_1], axis=0)

    if num_predict is not None:
      indices = tf.range(seq_len, dtype=tf.int64)
      bool_target_mask = tf.cast(target_mask, tf.bool)
      indices = tf.boolean_mask(indices, bool_target_mask)

      ##### extra padding due to CLS/SEP introduced after prepro
      actual_num_predict = tf.shape(indices)[0]
      pad_len = num_predict - actual_num_predict

      ##### target_mapping
      target_mapping = tf.one_hot(indices, seq_len, dtype=tf.float32)
      paddings = tf.zeros([pad_len, seq_len], dtype=target_mapping.dtype)
      target_mapping = tf.concat([target_mapping, paddings], axis=0)
      example["target_mapping"] = tf.reshape(target_mapping,
                                             [num_predict, seq_len])

      ##### target
      target = tf.boolean_mask(target, bool_target_mask)
      paddings = tf.zeros([pad_len], dtype=target.dtype)
      target = tf.concat([target, paddings], axis=0)
      example["target"] = tf.reshape(target, [num_predict])

      ##### target mask
      target_mask = tf.concat(
          [tf.ones([actual_num_predict], dtype=tf.float32),
           tf.zeros([pad_len], dtype=tf.float32)],
          axis=0)
      example["target_mask"] = tf.reshape(target_mask, [num_predict])
    else:
      example["target"] = tf.reshape(target, [seq_len])
      example["target_mask"] = tf.reshape(target_mask, [seq_len])

    # reshape back to fixed shape
    example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len])
    example["input_k"] = tf.reshape(input_k, [seq_len])
    example["input_q"] = tf.reshape(input_q, [seq_len])

    _convert_example(example, use_bfloat16)

    for k, v in example.items():
      tf.logging.info("%s: %s", k, v)

    return example