Example #1
0
    def prepare(self, img):
        """Preprocess an image for feature extraction.

        The length of the shorter edge is scaled to :obj:`self.min_size`.
        After the scaling, if the length of the longer edge is longer than
        :obj:`self.max_size`, the image is scaled to fit the longer edge
        to :obj:`self.max_size`.

        After resizing the image, the image is subtracted by a mean image value
        :obj:`self.mean`.

        Args:
            img (~numpy.ndarray): An image. This is in CHW and RGB format.
                The range of its value is :math:`[0, 255]`.

        Returns:
            ~numpy.ndarray:
            A preprocessed image.

        """
        _, H, W = img.shape

        scale = 1.

        scale = self.min_size / min(H, W)

        if scale * max(H, W) > self.max_size:
            scale = self.max_size / max(H, W)

        img = resize(img, (int(H * scale), int(W * scale)))

        img = (img - self.mean).astype(np.float32, copy=False)
        return img
def _mask_aggregation(bbox, cmask_prob, cmask_weight, size, binary_thresh):
    assert bbox.shape[0] == len(cmask_prob)
    assert bbox.shape[0] == cmask_weight.shape[0]

    aggregated_msk = np.zeros(size, dtype=np.float32)
    for bb, cmsk_pb, cmsk_w in zip(bbox, cmask_prob, cmask_weight):
        bb = np.round(bb).astype(np.int32)
        y_min, x_min, y_max, x_max = bb
        if y_max - y_min > 0 and x_max - x_min > 0:
            cmsk_pb = resize(
                cmsk_pb.astype(np.float32)[None],
                (y_max - y_min, x_max - x_min))
            cmsk_m = (cmsk_pb >= binary_thresh).astype(np.float32)[0]
            aggregated_msk[y_min:y_max, x_min:x_max] += cmsk_m * cmsk_w

    y_indices, x_indices = np.where(aggregated_msk >= binary_thresh)
    if len(y_indices) == 0 or len(x_indices) == 0:
        return None, None
    else:
        y_max = y_indices.max() + 1
        y_min = y_indices.min()
        x_max = x_indices.max() + 1
        x_min = x_indices.min()

        aggregated_bb = np.array([y_min, x_min, y_max, x_max],
                                 dtype=np.float32)
        aggregated_cmsk = aggregated_msk[y_min:y_max, x_min:x_max]
        return aggregated_cmsk[None], aggregated_bb[None]
Example #3
0
    def prepare(self, img):
        """Preprocess an image for feature extraction.

        The length of the shorter edge is scaled to :obj:`self.min_size`.
        After the scaling, if the length of the longer edge is longer than
        :obj:`self.max_size`, the image is scaled to fit the longer edge
        to :obj:`self.max_size`.

        After resizing the image, the image is subtracted by a mean image value
        :obj:`self.mean`.

        Args:
            img (~numpy.ndarray): An image. This is in CHW and RGB format.
                The range of its value is :math:`[0, 255]`.

        Returns:
            ~numpy.ndarray:
            A preprocessed image.

        """
        _, H, W = img.shape

        scale = 1.

        scale = self.min_size / min(H, W)

        if scale * max(H, W) > self.max_size:
            scale = self.max_size / max(H, W)

        img = resize(img, (int(H * scale), int(W * scale)))

        img = (img - self.mean).astype(np.float32, copy=False)
        return img
    def prepare_img(self, img, infer=False):
        """Preprocess an image for feature extraction.

        The length of the shorter edge is scaled to :obj:`conf.min_size`.
        After the scaling, if the length of the longer edge is longer than
        :obj:`conf.max_size`, the image is scaled to fit the longer edge
        to :obj:`conf.max_size`.

        Args:
            img (np.array): RGB img [3,H,W] 

        Returns:
            A preprocessed image.
            resize scale
        """
        W, H = img.shape[2], img.shape[1]
        if infer:
            min_size = self.min_sizes[-1]
        else:
            min_size = random.choice(self.min_sizes)
        scale = min_size / min(H, W)
        if scale * max(H, W) > self.max_size:
            scale = self.max_size / max(H, W)
        img = resize(img, (int(H * scale), int(W * scale)))
        img = (img - self.mean).astype(np.float32, copy=False)
        return img, scale
 def prepare(self, img):
     _, H, W = img.shape
     scale = self.min_size / min(H, W)
     if scale * max(H, W) > self.max_size:
         scale = self.max_size / max(H, W)
     img = resize(img, (int(H * scale), int(W * scale)))
     img = (img - self.mean).astype(np.float32, copy=False)
     return img
Example #6
0
    def prepare(self, orig_img, min_size=600, max_size=1000):
        img = orig_img.copy()
        img = img.astype(np.float32)
        _, H, W = img.shape
        scale = min_size / min(H, W)

        if scale * max(H, W) > max_size:
            scale = max_size / max(H, W)

        img = resize(img, (int(H * scale), int(W * scale)))
        img = img.transpose((1, 2, 0))  # C, H, W -> H, W, C
        img -= self.mean_bgr[::-1]
        img = img.transpose((2, 0, 1))  # H, W, C -> C, H, W
        return img
Example #7
0
    def prepare(self, img):
        _, H, W = img.shape

        scale = 1.

        scale = self.min_size / min(H, W)

        if scale * max(H, W) > self.max_size:
            scale = self.max_size / max(H, W)

        img = resize(img, (int(H * scale), int(W * scale)))

        # 元のコードは平均を引くだけ、だったんだけど、なんか[0,1]にするだけでうまくいかないかなぁ
        img = img.astype(np.float32) / 255

        return img
Example #8
0
 def __call__(self, in_data):
     if len(in_data) == 5:
         img, label, bbox, mask, i = in_data
     elif len(in_data) == 4:
         img, bbox, label, i = in_data
     _, H, W = img.shape
     img = self.net.prepare(img)
     _, o_H, o_W = img.shape
     scale = o_H / H
     if len(bbox) == 0:
         return img, [], [], 1
     bbox = resize_bbox(bbox, (H, W), (o_H, o_W))
     mask = resize(mask, (o_H, o_W))
     #horizontal flip
     img, params = transforms.random_flip(img,
                                          x_random=True,
                                          return_param=True)
     bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'])
     mask = transforms.flip(mask, x_flip=params['x_flip'])
     cv2.imwrite("gt_roi.png", mask[0] * 255)
     return img, bbox, label, scale, mask
def prepare_img(conf, img, resolution=-1):
    """Preprocess an image for feature extraction.

    The length of the shorter edge is scaled to :obj:`conf.min_size`.
    After the scaling, if the length of the longer edge is longer than
    :obj:`conf.max_size`, the image is scaled to fit the longer edge
    to :obj:`conf.max_size`.

    Args:
        img (np.array): RGB img [3,H,W] 

    Returns:
        A preprocessed image.
        resize scale
    """
    W, H = img.shape[2], img.shape[1]
    min_size = conf.min_sizes[resolution]
    scale = min_size / min(H, W)
    if scale * max(H, W) > conf.max_size:
        scale = conf.max_size / max(H, W)
    img = resize(img, (int(H * scale), int(W * scale)))
    img = (img - conf.mean).astype(np.float32, copy=False)
    return img, scale
Example #10
0
 def __call__(self, in_data):
     if len(in_data)==5:
         img, label, bbox, mask, i = in_data
     elif len(in_data)==4:
         img, bbox, label, i= in_data
     label = [self.labelids.index(l) + 1 for l in label]
     _, H, W = img.shape
     if chainer.config.train:
         img = self.net.prepare(img)
     _, o_H, o_W = img.shape
     scale = o_H / H
     if len(bbox)==0:
         return img, [],[],1
     bbox = resize_bbox(bbox, (H, W), (o_H, o_W))
     mask = resize(mask,(o_H, o_W))
     if chainer.config.train:
         #horizontal flip
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)
         bbox = transforms.flip_bbox(
             bbox, (o_H, o_W), x_flip=params['x_flip'])
         mask = transforms.flip(mask, x_flip=params['x_flip'])
     return img, bbox, label, scale, mask, i
    def __call__(
            self,
            roi,
            mask,
            label,
            bbox,
            loc_normalize_mean=(0., 0., 0., 0.),
            loc_normalize_std=(0.2, 0.2, 0.5, 0.5),
            mask_size=(21, 21),
    ):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi`, :obj:`mask`, :obj:`label`
        and :obj: `bbox`. The RoIs are assigned with the ground truth class
        labels as well as bounding box offsets and scales to match the ground
        truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs
        are sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`chainercv.links.model.faster_rcnn.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.
        * :math:`H` is the image height.
        * :math:`W` is the image width.
        * :math:`RH` is the mask height.
        * :math:`RW` is the mask width.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            mask (array): The coordinates of ground truth masks.
                Its shape is :math:`(R', H, W)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bounding boxes.
            loc_normalize_std (tuple of four floats): Standard deviation of
                the coordinates of bounding boxes.
            mask_size (tuple of int or int): Generated mask size, which is
                equal to :math:`(RH, RW)`.

        Returns:
            (array, array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_mask**: Masks assigned to sampled RoIs. Its shape is \
                :math:`(S, RH, RW)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.

        """

        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        mask = cuda.to_cpu(mask)
        label = cuda.to_cpu(label)
        bbox = cuda.to_cpu(bbox)

        if not isinstance(mask_size, tuple):
            mask_size = (mask_size, mask_size)

        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        if self.n_sample is None:
            n_sample = roi.shape[0]
        else:
            n_sample = self.n_sample

        pos_roi_per_image = np.round(n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)

        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both foreground and background).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # locs
        # Compute offsets and scales to match sampled RoIs to the GTs.
        loc_normalize_mean = np.array(loc_normalize_mean, np.float32)
        loc_normalize_std = np.array(loc_normalize_std, np.float32)
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = gt_roi_loc - loc_normalize_mean
        gt_roi_loc = gt_roi_loc / loc_normalize_std

        # masks
        gt_roi_mask = -1 * np.ones(
            (len(keep_index), mask_size[0], mask_size[1]), dtype=np.int32)

        for i, pos_ind in enumerate(pos_index):
            bb = np.round(sample_roi[i]).astype(np.int)
            gt_msk = mask[gt_assignment[pos_ind]]
            gt_roi_msk = gt_msk[bb[0]:bb[2], bb[1]:bb[3]]
            gt_roi_msk = resize(
                gt_roi_msk.astype(np.float32)[None], mask_size)[0]
            gt_roi_msk = (gt_roi_msk >= self.binary_thresh).astype(np.int)
            gt_roi_mask[i] = gt_roi_msk

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)

        return sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc
Example #12
0
    def predict(self, imgs):
        """Segment object instances from images.

        This method predicts instance-aware object regions for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images of shape
                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(masks, labels, scores)`.

           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
               where :math:`R` is the number of masks in a image. \
               Each pixel holds value if it is inside the object inside or not.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the masks. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """

        prepared_imgs = []
        sizes = []
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)

        masks = []
        labels = []
        scores = []

        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                # inference
                img_var = chainer.Variable(self.xp.array(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_ag_seg_scores, _, roi_cls_scores, bboxes, _ = \
                    self.__call__(img_var, scale)

            # We are assuming that batch size is 1.
            roi_ag_seg_score = chainer.cuda.to_cpu(roi_ag_seg_scores.array)
            roi_cls_score = chainer.cuda.to_cpu(roi_cls_scores.array)
            bbox = chainer.cuda.to_cpu(bboxes)

            # filter bounding boxes with min_size
            height = bbox[:, 2] - bbox[:, 0]
            width = bbox[:, 3] - bbox[:, 1]
            keep_indices = np.where(
                (height >= self.min_drop_size) &
                (width >= self.min_drop_size))[0]
            roi_ag_seg_score = roi_ag_seg_score[keep_indices, :, :]
            roi_cls_score = roi_cls_score[keep_indices]
            bbox = bbox[keep_indices, :]

            # scale bbox
            bbox = bbox / scale

            # shape: (n_rois, 4)
            bbox[:, 0::2] = self.xp.clip(bbox[:, 0::2], 0, size[0])
            bbox[:, 1::2] = self.xp.clip(bbox[:, 1::2], 0, size[1])

            # shape: (n_roi, roi_size, roi_size)
            roi_seg_prob = F.softmax(roi_ag_seg_score).array[:, 1]
            roi_cls_prob = F.softmax(roi_cls_score).array

            roi_seg_prob, bbox, label, roi_cls_prob = mask_voting(
                roi_seg_prob, bbox, roi_cls_prob, size,
                self.score_thresh, self.nms_thresh,
                self.mask_merge_thresh, self.binary_thresh,
                limit=self.limit, bg_label=0)

            mask = np.zeros(
                (len(roi_seg_prob), size[0], size[1]), dtype=np.bool)
            for i, (roi_seg_pb, bb) in enumerate(zip(roi_seg_prob, bbox)):
                bb = np.round(bb).astype(np.int32)
                y_min, x_min, y_max, x_max = bb
                roi_msk_pb = resize(
                    roi_seg_pb.astype(np.float32)[None],
                    (y_max - y_min, x_max - x_min))
                roi_msk = (roi_msk_pb > self.binary_thresh)[0]
                mask[i, y_min:y_max, x_min:x_max] = roi_msk

            masks.append(mask)
            labels.append(label)
            scores.append(roi_cls_prob)

        return masks, labels, scores
def mask_voting(roi_cmask_prob,
                bbox,
                roi_cls_prob,
                size,
                score_thresh,
                nms_thresh,
                mask_merge_thresh,
                binary_thresh,
                limit=100,
                bg_label=0):
    """Refine mask probabilities by merging multiple masks.

    First, this function discard invalid masks with non maximum suppression.
    Then, it merges masks with weight calculated from class probabilities and
    iou.
    This function improves the mask qualities by merging overlapped masks
    predicted as the same object class.

    Here are notations used.
    * :math:`R'` is the total number of RoIs produced across batches.
    * :math:`L` is the number of classes excluding the background.
    * :math:`RH` is the height of pooled image.
    * :math:`RW` is the height of pooled image.

    Args:
        roi_cmask_prob (array): A mask probability array whose shape is
            :math:`(R, RH, RW)`.
        bbox (array): A bounding box array whose shape is
            :math:`(R, 4)`.
        cls_prob (array): A class probability array whose shape is
            :math:`(R, L + 1)`.
        size (tuple of int): Original image size.
        score_thresh (float): A threshold value of the class score.
        nms_thresh (float): A threshold value of non maximum suppression.
        mask_merge_thresh (float): A threshold value of the bounding box iou
            for mask merging.
        binary_thresh (float): A threshold value of mask score
            for mask merging.
        limit (int): The maximum number of outputs.
        bg_label (int): The id of the background label.

    Returns:
        array, array, array, array:
        * **v_cmask_prob**: Merged mask probability. Its shapes is \
            :math:`(N, RH, RW)`.
        * **v_bbox**: Bounding boxes for the merged masks. Its shape is \
            :math:`(N, 4)`.
        * **v_label**: Class labels for the merged masks. Its shape is \
            :math:`(N, )`.
        * **v_score**: Class probabilities for the merged masks. Its shape \
            is :math:`(N, )`.

    """

    roi_cmask_size = roi_cmask_prob.shape[1:]
    n_class = roi_cls_prob.shape[1]

    v_cmask_prob = []
    v_bbox = []
    v_label = []
    v_cls_prob = []

    cls_score = []
    cls_bbox = []

    for label in range(0, n_class):
        # background
        if label == bg_label:
            continue
        # non maximum suppression
        score_l = roi_cls_prob[:, label]
        keep_indices = non_maximum_suppression(bbox, nms_thresh, score_l)
        bbox_l = bbox[keep_indices]
        score_l = score_l[keep_indices]
        cls_bbox.append(bbox_l)
        cls_score.append(score_l)

    sorted_score = np.sort(np.concatenate(cls_score))[::-1]
    n_keep = min(len(sorted_score), limit)
    score_thresh = max(sorted_score[n_keep - 1], score_thresh)

    for label in range(0, n_class):
        # background
        if label == bg_label:
            continue
        bbox_l = cls_bbox[label - 1]
        score_l = cls_score[label - 1]
        keep_indices = np.where(score_l >= score_thresh)
        bbox_l = bbox_l[keep_indices]
        score_l = score_l[keep_indices]

        v_cmask_prob_l = []
        v_bbox_l = []
        v_score_l = []

        for i, bb in enumerate(bbox_l):
            iou = bbox_iou(bbox, bb[np.newaxis, :])
            keep_indices = np.where(iou >= mask_merge_thresh)[0]
            cmask_weight = roi_cls_prob[keep_indices, label]
            cmask_weight = cmask_weight / cmask_weight.sum()
            cmask_prob_i = roi_cmask_prob[keep_indices]
            bbox_i = bbox[keep_indices]
            m_cmask, m_bbox = _mask_aggregation(bbox_i, cmask_prob_i,
                                                cmask_weight, size,
                                                binary_thresh)
            if m_cmask is not None and m_bbox is not None:
                m_cmask = resize(m_cmask.astype(np.float32), roi_cmask_size)
                v_cmask_prob_l.append(m_cmask)
                v_bbox_l.append(m_bbox)
                v_score_l.append(score_l[i])

        if len(v_cmask_prob_l) > 0:
            v_cmask_prob_l = np.concatenate(v_cmask_prob_l)
            v_bbox_l = np.concatenate(v_bbox_l)
            v_score_l = np.array(v_score_l)

            v_label_l = np.repeat(label - 1, v_bbox_l.shape[0])
            v_label_l = v_label_l.astype(np.int32)
            v_cmask_prob.append(v_cmask_prob_l)
            v_bbox.append(v_bbox_l)
            v_label.append(v_label_l)
            v_cls_prob.append(v_score_l)

    if len(v_cmask_prob) > 0:
        v_cmask_prob = np.concatenate(v_cmask_prob)
        v_bbox = np.concatenate(v_bbox)
        v_label = np.concatenate(v_label)
        v_cls_prob = np.concatenate(v_cls_prob)
    else:
        v_cmask_prob = np.empty((0, roi_cmask_size[0], roi_cmask_size[1]))
        v_bbox = np.empty((0, 4))
        v_label = np.empty((0, ))
        v_cls_prob = np.empty((0, ))
    return v_cmask_prob, v_bbox, v_label, v_cls_prob