예제 #1
0
    def set_training_random_scale_factors(self, scale_min, scale_max):
        """Set the parameters for multiscale training."""
        # Select a random scale factor.
        random_scale_factor = tf.random_uniform([], scale_min, scale_max)
        scaled_size = tf.to_int32(random_scale_factor * self._output_size)

        # Recompute the accurate scale_factor using rounded scaled image size.
        height = tf.shape(self._image)[0]
        width = tf.shape(self._image)[1]
        max_image_size = tf.to_float(tf.maximum(height, width))
        image_scale = tf.to_float(scaled_size) / max_image_size

        # Select non-zero random offset (x, y) if scaled image is larger than
        # self._output_size.
        scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
        scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
        offset_y = tf.to_float(scaled_height - self._output_size)
        offset_x = tf.to_float(scaled_width - self._output_size)
        offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1)
        offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1)
        offset_y = tf.to_int32(offset_y)
        offset_x = tf.to_int32(offset_x)
        self._image_scale = image_scale
        self._scaled_height = scaled_height
        self._scaled_width = scaled_width
        self._crop_offset_x = offset_x
        self._crop_offset_y = offset_y
예제 #2
0
 def set_scale_factors_to_output_size(self):
     """Set the parameters to resize input image to self._output_size."""
     # Compute the scale_factor using rounded scaled image size.
     height = tf.shape(self._image)[0]
     width = tf.shape(self._image)[1]
     max_image_size = tf.to_float(tf.maximum(height, width))
     image_scale = tf.to_float(self._output_size) / max_image_size
     scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
     scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
     self._image_scale = image_scale
     self._scaled_height = scaled_height
     self._scaled_width = scaled_width
예제 #3
0
 def set_scale_factors_to_mlperf_reference_size(self):
     """Set the parameters to resize the image according to MLPerf reference."""
     # Compute the scale_factor using rounded scaled image size.
     height = tf.shape(self._image)[0]
     width = tf.shape(self._image)[1]
     # Recompute the accurate scale_factor using rounded scaled image size.
     # https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/utils/blob.py#L70  # pylint: disable=line-too-long
     min_image_size = tf.to_float(tf.minimum(height, width))
     max_image_size = tf.to_float(tf.maximum(height, width))
     short_side_scale = tf.to_float(
         self._short_side_image_size) / min_image_size
     long_side_scale = (tf.to_float(self._long_side_max_image_size) /
                        max_image_size)
     image_scale = tf.minimum(short_side_scale, long_side_scale)
     scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
     scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
     self._image_scale = image_scale
     self._scaled_height = scaled_height
     self._scaled_width = scaled_width
     return image_scale
예제 #4
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        A list of the following elements in order:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        label: label tensor of the same spatial dimension as the image.
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                image = data['image']
                label = data['labels_class']
                label = tf.to_int32(label)
                input_processor = SegmentationInputProcessor(
                    image, params['image_size'], label)
                # The image normalization is identical to Cloud TPU ResNet.
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                image = input_processor.resize_and_crop_image()

                # Set padding to background (class=0) during training.
                if self._is_training:
                    label = input_processor.resize_and_crop_label(0)
                else:
                    label = input_processor.resize_and_crop_label(
                        params['ignore_label'])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return image, label