def _process_mask(mask, ignore_label, image_info):
      mask = tf.cast(mask, dtype=tf.float32)
      mask = tf.reshape(mask, shape=[1, data['height'], data['width'], 1])
      mask += 1

      if self._segmentation_resize_eval_groundtruth:
        # Resizes eval masks to match input image sizes. In that case, mean IoU
        # is computed on output_size not the original size of the images.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        mask = preprocess_ops.resize_and_crop_masks(
            mask, image_scale, self._output_size, offset)
      else:
        mask = tf.image.pad_to_bounding_box(
            mask, 0, 0,
            self._segmentation_groundtruth_padded_size[0],
            self._segmentation_groundtruth_padded_size[1])
      mask -= 1
      # Assign ignore label to the padded region.
      mask = tf.where(
          tf.equal(mask, -1),
          ignore_label * tf.ones_like(mask),
          mask)
      mask = tf.squeeze(mask, axis=0)
      return mask
Ejemplo n.º 2
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        if self._train_on_crops:
            if data['image/height'] < self._output_size[0] or data[
                    'image/width'] < self._output_size[1]:
                raise ValueError(
                    'Image size has to be larger than crop size (output_size)')
            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            image_mask = tf.concat([image, label], axis=2)
            image_mask_crop = tf.image.random_crop(image_mask,
                                                   self._output_size + [4])
            image = image_mask_crop[:, :, :-1]
            label = tf.reshape(image_mask_crop[:, :, -1],
                               [1] + self._output_size)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, _, label = preprocess_ops.random_horizontal_flip(
                image, masks=label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     self._output_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)
        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Ejemplo n.º 3
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        if self._resize_eval_groundtruth:
            # Resizes eval masks to match input image sizes. In that case, mean IoU
            # is computed on output_size not the original size of the images.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            label = preprocess_ops.resize_and_crop_masks(
                label, image_scale, self._output_size, offset)
        else:
            label = tf.image.pad_to_bounding_box(
                label, 0, 0, self._groundtruth_padded_size[0],
                self._groundtruth_padded_size[1])

        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info
        }

        # Normalizes image with mean and std pixel values.
        # Must be done after augmenter since certain ops rely on uint8
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Ejemplo n.º 4
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, label = preprocess_ops.random_horizontal_flip(image,
                                                                 masks=label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     self._output_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)
        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)

        if self._resize_eval:
            # Resizes and crops image.
            image, image_info = preprocess_ops.resize_and_crop_image(
                image, self._output_size, self._output_size)

            # Resizes and crops mask.
            image_scale = image_info[2, :]
            offset = image_info[3, :]

            label = preprocess_ops.resize_and_crop_masks(
                label, image_scale, self._output_size, offset)
        else:
            # Pads image and mask to output size.
            image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                 self._output_size[0],
                                                 self._output_size[1])
            label = tf.image.pad_to_bounding_box(label, 0, 0,
                                                 self._output_size[0],
                                                 self._output_size[1])

        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {'masks': label, 'valid_masks': valid_mask}

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Ejemplo n.º 6
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width]],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
        segmentation_mask = tf.cast(data['groundtruth_segmentation_mask'],
                                    tf.float32)
        segmentation_mask = tf.reshape(
            segmentation_mask, shape=[1, data['height'], data['width'], 1])
        segmentation_mask += 1

        image, labels = super(Parser, self)._parse_eval_data(data)

        if self._segmentation_resize_eval_groundtruth:
            # Resizes eval masks to match input image sizes. In that case, mean IoU
            # is computed on output_size not the original size of the images.
            image_info = labels['image_info']
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            segmentation_mask = preprocess_ops.resize_and_crop_masks(
                segmentation_mask, image_scale, self._output_size, offset)
        else:
            segmentation_mask = tf.image.pad_to_bounding_box(
                segmentation_mask, 0, 0,
                self._segmentation_groundtruth_padded_size[0],
                self._segmentation_groundtruth_padded_size[1])

        segmentation_mask -= 1
        # Assign ignore label to the padded region.
        segmentation_mask = tf.where(
            tf.equal(segmentation_mask, -1),
            self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
            segmentation_mask)
        segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
        segmentation_valid_mask = tf.not_equal(segmentation_mask,
                                               self._segmentation_ignore_label)

        labels['groundtruths'].update({
            'gt_segmentation_mask':
            segmentation_mask,
            'gt_segmentation_valid_mask':
            segmentation_valid_mask
        })
        return image, labels
Ejemplo n.º 7
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width]],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: Groundtruth masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
        gt_segmentation_mask: Groundtruth mask for segmentation head, this is
          resized to a fixed size determined by output_size.
        gt_segmentation_valid_mask: Binary mask that marks the pixels that
          are supposed to be used in computing the segmentation loss while
          training.
    """
        segmentation_mask = data['groundtruth_segmentation_mask']

        # Flips image randomly during training.
        if self.aug_rand_hflip:
            masks = data['groundtruth_instance_masks']
            image_mask = tf.concat([data['image'], segmentation_mask], axis=2)

            image_mask, boxes, masks = preprocess_ops.random_horizontal_flip(
                image_mask, data['groundtruth_boxes'], masks)

            segmentation_mask = image_mask[:, :, -1:]
            image = image_mask[:, :, :-1]

            data['image'] = image
            data['boxes'] = boxes
            data['masks'] = masks

        image, labels = super(Parser, self)._parse_train_data(data)

        image_info = labels['image_info']
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        segmentation_mask = tf.reshape(
            segmentation_mask, shape=[1, data['height'], data['width']])
        segmentation_mask = tf.cast(segmentation_mask, tf.float32)

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        segmentation_mask += 1
        segmentation_mask = tf.expand_dims(segmentation_mask, axis=3)
        segmentation_mask = preprocess_ops.resize_and_crop_masks(
            segmentation_mask, image_scale, self._output_size, offset)
        segmentation_mask -= 1
        segmentation_mask = tf.where(
            tf.equal(segmentation_mask, -1),
            self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
            segmentation_mask)
        segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
        segmentation_valid_mask = tf.not_equal(segmentation_mask,
                                               self._segmentation_ignore_label)

        labels.update({
            'gt_segmentation_mask': segmentation_mask,
            'gt_segmentation_valid_mask': segmentation_valid_mask
        })

        return image, labels
Ejemplo n.º 8
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        if self._crop_size:

            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            # If output_size is specified, resize image, and label to desired
            # output_size.
            if self._output_size:
                image = tf.image.resize(image,
                                        self._output_size,
                                        method='bilinear')
                label = tf.image.resize(label,
                                        self._output_size,
                                        method='nearest')

            image_mask = tf.concat([image, label], axis=2)
            image_mask_crop = tf.image.random_crop(image_mask,
                                                   self._crop_size + [4])
            image = image_mask_crop[:, :, :-1]
            label = tf.reshape(image_mask_crop[:, :, -1],
                               [1] + self._crop_size)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, _, label = preprocess_ops.random_horizontal_flip(
                image, masks=label)

        train_image_size = self._crop_size if self._crop_size else self._output_size
        # Rotates image randomly during training
        if self._rotate_min != 0.0 and \
          self._rotate_max != 0.0 and \
          self._rotate_min < self._rotate_max:
            image, label = preprocess_ops.random_rotation(
                image,
                masks=label,
                rotate_max=self._rotate_max,
                rotate_min=self._rotate_min,
                ignore_label=self._ignore_label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            train_image_size,
            train_image_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        # Modify brightness randomly during training
        if self._bright_min != 1.0 and \
          self._bright_max != 1.0 and \
          self._bright_min < self._bright_max:
            image = preprocess_ops.random_brightness(
                image,
                bright_min=self._bright_min,
                bright_max=self._bright_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     train_image_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        # Apply randaug
        if self._augmenter is not None:
            image, label = self._augmenter.distort_image_and_mask(
                image, label, self._ignore_label)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Normalizes image with mean and std pixel values.
        # Must be done after augmenter since certain ops rely on uint8
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels