def clip_boxes(self, boxes):
     """Clip boxes to fit in an image."""
     boxes = tf.where(tf.less(boxes, 0), tf.zeros_like(boxes), boxes)
     is_height_short_side = tf.less(self._scaled_height, self._scaled_width)
     bound = tf.where(
         is_height_short_side,
         tf.convert_to_tensor(
             [self._output_size[0] - 1, self._output_size[1] - 1] * 2,
             dtype=tf.float32),
         tf.convert_to_tensor(
             [self._output_size[1] - 1, self._output_size[0] - 1] * 2,
             dtype=tf.float32))
     boxes = tf.where(tf.greater(boxes, bound), bound * tf.ones_like(boxes),
                      boxes)
     return boxes
    def resize_and_crop_image(self, method=tf.image.ResizeMethod.BILINEAR):
        """Resize input image and crop it to the self._output dimension."""
        scaled_image = tf.image.resize_images(
            self._image, [self._scaled_height, self._scaled_width],
            method=method)

        is_height_short_side = tf.less(self._scaled_height, self._scaled_width)
        output_image = tf.cond(
            is_height_short_side,
            lambda: tf.image.pad_to_bounding_box(
                scaled_image, 0, 0, self._output_size[0], self._output_size[1
                                                                            ]),  # pylint: disable=line-too-long
            lambda: tf.image.pad_to_bounding_box(
                scaled_image, 0, 0, self._output_size[1], self._output_size[0])  # pylint: disable=line-too-long
        )

        return output_image
    def _transform_images(self, params, features, labels=None):
        """Transforms images."""

        images = features['images']
        batch_size, _, _, c = images.get_shape().as_list()
        if params['conv0_space_to_depth_block_size'] != 0:
            # Transforms (space-to-depth) images for TPU performance.

            def _fused_transform(images, image_size):
                return spatial_transform.fused_transpose_and_space_to_depth(
                    images, image_size,
                    params['conv0_space_to_depth_block_size'],
                    params['transpose_input'])

            images = tf.cond(
                tf.less(features['image_info'][0, 3],
                        features['image_info'][0, 4]),
                lambda: _fused_transform(images, params['image_size']),
                lambda: _fused_transform(images, params['image_size'][::-1]))

        else:
            # Transposes images for TPU performance.
            image_area = params['image_size'][0] * params['image_size'][1]
            if params['transpose_input']:
                images = tf.transpose(images, [1, 2, 0, 3])
                # Flattens spatial dimensions so that the image tensor has a static
                # shape.
                images = tf.reshape(images, [image_area, batch_size, c])
            else:
                images = tf.reshape(images, [batch_size, image_area, c])

        if params['use_bfloat16']:
            images = tf.cast(images, dtype=tf.bfloat16)

        features['images'] = images

        if labels is not None:
            return features, labels
        else:
            return features, tf.zeros([batch_size])
 def horizontal_image(*args):
     image_info = args[0]['image_info']
     return tf.less(image_info[3], image_info[4])
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: A dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: An image tensor that is preprocessed to have normalized value
            and fixed dimension [image_size, image_size, 3]
          image_info: Image information that includes the original height and
            width, the scale of the processed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: (only for training) A dictionary that contains groundtruth
          labels. The following describes {key: value} pairs in the dictionary.
          score_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: Groundtruth masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)

                image = data['image']
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size, params['short_side_image_size'],
                        params['long_side_max_image_size'])
                    input_processor.normalize_image()
                    input_processor.set_scale_factors_to_mlperf_reference_size(
                    )
                    image = input_processor.resize_and_crop_image()
                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    image_info = input_processor.get_image_info()
                    return {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id
                    }

                # The following part is for training.
                instance_masks = data['groundtruth_instance_masks']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                if not params['use_category']:
                    classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

                if (params['skip_crowd_during_training']
                        and self._mode == tf.estimator.ModeKeys.TRAIN):
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)
                    instance_masks = tf.gather_nd(instance_masks, indices)

                input_processor = InstanceSegmentationInputProcessor(
                    image, image_size, params['short_side_image_size'],
                    params['long_side_max_image_size'], boxes, classes,
                    instance_masks)
                input_processor.normalize_image()
                if params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()

                input_processor.set_scale_factors_to_mlperf_reference_size()
                image = input_processor.resize_and_crop_image()

                boxes, classes = input_processor.resize_and_crop_boxes()
                cropped_gt_masks = input_processor.crop_gt_masks(
                    params['gt_mask_size'])

                image_info = input_processor.get_image_info()
                # Assign anchors.
                is_height_short_side = tf.less(image_info[3], image_info[4])
                score_targets, box_targets = tf.cond(
                    is_height_short_side,
                    lambda: anchor_labeler.label_anchors(boxes, classes),
                    lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes))  # pylint: disable=line-too-long

                # Pad groundtruth data.
                boxes *= image_info[2]
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                # Pads cropped_gt_masks.
                cropped_gt_masks = tf.reshape(
                    cropped_gt_masks, [-1, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = pad_to_fixed_size(
                    cropped_gt_masks, -1,
                    [self._max_num_instances, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                    self._max_num_instances, params['gt_mask_size'] + 4,
                    params['gt_mask_size'] + 4
                ])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)

                features = {}
                features['images'] = image
                features['image_info'] = image_info
                features['source_ids'] = source_id

                labels = {}
                for level in range(params['min_level'],
                                   params['max_level'] + 1):
                    labels['score_targets_%d' % level] = score_targets[level]
                    labels['box_targets_%d' % level] = box_targets[level]
                labels['gt_boxes'] = boxes
                labels['gt_classes'] = classes
                labels['cropped_gt_masks'] = cropped_gt_masks
                return features, labels
Example #6
0
 def clip_boxes(self, boxes):
     """Clip boxes to fit in an image."""
     boxes = tf.where(tf.less(boxes, 0), tf.zeros_like(boxes), boxes)
     boxes = tf.where(tf.greater(boxes, self._output_size - 1),
                      (self._output_size - 1) * tf.ones_like(boxes), boxes)
     return boxes