Ejemplo n.º 1
0
    def _suppress(self, raw_cls_bbox, raw_prob, raw_depth, raw_y_rot):
        bbox = list()
        label = list()
        score = list()
        depth = list()
        y_rot = list()
        # skip cls_id = 0 because it is the background class
        for l in range(1, self.n_class):
            cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
            prob_l = raw_prob[:, l]
            #             depth_l = raw_depth[:, l]
            #             y_rot_l = raw_y_rot[:, l]
            mask = prob_l > self.score_thresh
            cls_bbox_l = cls_bbox_l[mask]
            prob_l = prob_l[mask]
            #             depth_l = depth_l[mask]
            #             y_rot_l = y_rot_l[mask]
            #             print("cls_bbox_l.shape: ", cls_bbox_l.shape)
            #             print("nms_thresh: ", self.nms_thresh)
            #             print("prob_l.shape: ", prob_l.shape)
            keep = non_maximum_suppression(cp.array(cls_bbox_l),
                                           self.nms_thresh, prob_l)
            keep = cp.asnumpy(keep)
            bbox.append(cls_bbox_l[keep])
            # The labels are in [0, self.n_class - 2].
            label.append((l - 1) * np.ones((len(keep), )))
            score.append(prob_l[keep])
#             depth.append(depth_l[keep])
#             y_rot.append(y_rot_l[keep])
        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        score = np.concatenate(score, axis=0).astype(np.float32)
        #         depth = np.concatenate(depth, axis=0).astype(np.float32)
        #         y_rot = np.concatenate(y_rot, axis=0).astype(np.float32)
        return bbox, label, score, raw_depth, raw_y_rot
Ejemplo n.º 2
0
    def __call__(
            self,
            loc,
            score,
            anchor,
            img_size,
            scale=1.):  #这里的loc和score是经过region_proposal_network中经过1x1卷积分类和回归得到的

        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        roi = loc2bbox(anchor, loc)
        #裁剪将rois的ymin,ymax限定在[0,H]
        #slice() 函数实现切片对象, clip函数实现截断对象
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        min_size = self.min_size * scale  #设定roi的最小尺寸
        hs = roi[:, 2] - roi[:, 0]  #roi的高度
        ws = roi[:, 3] - roi[:, 1]  #roi的宽度
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]  #挑出大于16*16的roi
        roi = roi[keep, :]
        score = score[keep]

        order = score.ravel().argsort()[::-1]  #分数从大到小排列
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  #train时从20000中取前12000个rois,test取前6000个
        roi = roi[order, :]

        #使用nms过一遍排序后的roi
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
 def nms_suppress(self, raw_cls_bbox, raw_prob):
     bbox = list()
     label = list()
     score = list()
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
         prob_l = raw_prob[:, l]
         mask = prob_l > self.score_thresh
         cls_bbox_l = cls_bbox_l[mask]
         prob_l = prob_l[mask]
         keep = non_maximum_suppression(np.array(cls_bbox_l),
                                        self.nms_thresh, prob_l)
         bbox.append(cls_bbox_l[keep])
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep), )))
         score.append(prob_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
Ejemplo n.º 4
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.
        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.
        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.
        Type of the output is same as the inputs.
        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.
        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.
        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi