예제 #1
0
 def get_image_info(self):
     """Returns image information for scaled and original height and width."""
     return tf.stack([
         tf.to_float(self._scaled_height),
         tf.to_float(self._scaled_width), 1.0 / self._image_scale,
         tf.to_float(self._ori_height),
         tf.to_float(self._ori_width)
     ])
예제 #2
0
 def set_scale_factors_to_output_size(self):
     """Set the parameters to resize input image to self._output_size."""
     # Compute the scale_factor using rounded scaled image size.
     height = tf.shape(self._image)[0]
     width = tf.shape(self._image)[1]
     max_image_size = tf.to_float(tf.maximum(height, width))
     image_scale = tf.to_float(self._output_size) / max_image_size
     scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
     scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
     self._image_scale = image_scale
     self._scaled_height = scaled_height
     self._scaled_width = scaled_width
예제 #3
0
    def set_training_random_scale_factors(self, scale_min, scale_max):
        """Set the parameters for multiscale training."""
        # Select a random scale factor.
        random_scale_factor = tf.random_uniform([], scale_min, scale_max)
        scaled_size = tf.to_int32(random_scale_factor * self._output_size)

        # Recompute the accurate scale_factor using rounded scaled image size.
        height = tf.shape(self._image)[0]
        width = tf.shape(self._image)[1]
        max_image_size = tf.to_float(tf.maximum(height, width))
        image_scale = tf.to_float(scaled_size) / max_image_size

        # Select non-zero random offset (x, y) if scaled image is larger than
        # self._output_size.
        scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
        scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
        offset_y = tf.to_float(scaled_height - self._output_size)
        offset_x = tf.to_float(scaled_width - self._output_size)
        offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1)
        offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1)
        offset_y = tf.to_int32(offset_y)
        offset_x = tf.to_int32(offset_x)
        self._image_scale = image_scale
        self._scaled_height = scaled_height
        self._scaled_width = scaled_width
        self._crop_offset_x = offset_x
        self._crop_offset_y = offset_y
예제 #4
0
파일: dataloader.py 프로젝트: jhseu/tpu
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row
예제 #5
0
 def resize_and_crop_boxes(self):
   """Resize boxes and crop it to the self._output dimension."""
   boxlist = preprocessor.box_list.BoxList(self._boxes)
   boxes = preprocessor.box_list_scale(
       boxlist, self._scaled_height, self._scaled_width).get()
   # Adjust box coordinates based on the offset.
   box_offset = tf.stack([self._crop_offset_y, self._crop_offset_x,
                          self._crop_offset_y, self._crop_offset_x,])
   boxes -= tf.to_float(tf.reshape(box_offset, [1, 4]))
   # Clip the boxes.
   boxes = self.clip_boxes(boxes)
   # Filter out ground truth boxes that are all zeros.
   indices = tf.where(tf.not_equal(tf.reduce_sum(boxes, axis=1), 0))
   boxes = tf.gather_nd(boxes, indices)
   classes = tf.gather_nd(self._classes, indices)
   return boxes, classes
예제 #6
0
 def set_scale_factors_to_mlperf_reference_size(self):
     """Set the parameters to resize the image according to MLPerf reference."""
     # Compute the scale_factor using rounded scaled image size.
     height = tf.shape(self._image)[0]
     width = tf.shape(self._image)[1]
     # Recompute the accurate scale_factor using rounded scaled image size.
     # https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/utils/blob.py#L70  # pylint: disable=line-too-long
     min_image_size = tf.to_float(tf.minimum(height, width))
     max_image_size = tf.to_float(tf.maximum(height, width))
     short_side_scale = tf.to_float(
         self._short_side_image_size) / min_image_size
     long_side_scale = (tf.to_float(self._long_side_max_image_size) /
                        max_image_size)
     image_scale = tf.minimum(short_side_scale, long_side_scale)
     scaled_height = tf.to_int32(tf.to_float(height) * image_scale)
     scaled_width = tf.to_int32(tf.to_float(width) * image_scale)
     self._image_scale = image_scale
     self._scaled_height = scaled_height
     self._scaled_width = scaled_width
     return image_scale