def clip_bboxs_on_image(rois, roi_locs):
    """

    :param rois: Tensor
    :param roi_locs: Tensor
    :return: bbox: Tensor
    """
    loc_normalize_mean = (0., 0., 0., 0.)
    loc_normalize_std = (0.1, 0.1, 0.2, 0.2)
    mean = torch.Tensor(loc_normalize_mean).cuda(). \
        repeat(2)[None]
    std = torch.Tensor(loc_normalize_std).cuda(). \
        repeat(2)[None]

    roi_locs = roi_locs * std + mean
    roi_loc = roi_locs.view(-1, 2, 4)
    rois = at.totensor(rois)
    rois = rois.view(-1, 1, 4).expand_as(roi_loc)
    bbox = loc2bbox(at.tonumpy(rois).reshape((-1, 4)), at.tonumpy(roi_loc).reshape((-1, 4)))
    bbox = at.totensor(bbox)
    box = bbox.view(-1, 8)
    box[:, 0::2] = (box[:, 0::2]).clamp(min=0, max=800)
    box[:, 1::2] = (box[:, 1::2]).clamp(min=0, max=800)
    box = box.reshape((-1, 2, 4))[:, 1, :]

    return box
Example #2
0
 def __call__(self, loc, score, anchor, img_size, scale=1.):
     if self.parent_model.training:
         n_pre_nms = self.n_train_pre_nms
         n_post_nms = self.n_train_post_nms
     else:
         n_pre_nms = self.n_test_pre_nms
         n_post_nms = self.n_test_post_nms
     roi = loc2bbox(anchor, loc)
     roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)],
                                      a_min=0,
                                      a_max=img_size[0])
     roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)],
                                      a_min=0,
                                      a_max=img_size[1])
     min_size = self.min_size * scale  # 这里转换为了处理后的图片的上面
     roi_h = roi[:, 2] - roi[:, 0]
     roi_w = roi[:, 3] - roi[:, 1]
     keep = np.where((roi_h >= min_size) & (roi_w >= min_size))[0]
     roi = roi[keep, :]
     score = score[keep]
     # 这是nms之前的选取,按照得分的高低进行排序,是前景的得分
     order = score.ravel().argsort()[::-1]
     if n_pre_nms > 0:
         order = order[:n_pre_nms]
     roi = roi[order, :]
     keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                    thresh=self.nms_thresh)
     if n_post_nms > 0:
         keep = keep[:n_post_nms]
     for i in range(keep.size):
         keep[i] = keep[i].tolist()
     keep = np.int32(keep)
     roi = roi[keep]
     return roi
Example #3
0
    def __call__(self, loc, score, anchor, img_size,
                 scale=1.):  # 这里的loc和score是经过region_proposal_network中经过1x1卷积分类和回归得到的
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms  # 12000
            n_post_nms = self.n_train_post_nms  # 经过NMS后有2000个
        else:
            n_pre_nms = self.n_test_pre_nms  # 6000
            n_post_nms = self.n_test_post_nms  # 经过NMS后有300个


        roi = loc2bbox(anchor, loc)  # 将bbox转换为近似groudtruth的anchor(即rois)
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0, img_size[0])  # 裁剪将rois的ymin,ymax限定在[0,H]
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0, img_size[1])  # 裁剪将rois的xmin,xmax限定在[0,W]

        min_size = self.min_size * scale  # 16
        hs = roi[:, 2] - roi[:, 0]  # rois的宽
        ws = roi[:, 3] - roi[:, 1]  # rois的长
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]  # 确保rois的长宽大于最小阈值
        roi = roi[keep, :]
        score = score[keep]  # 对剩下的ROIs进行打分(根据region_proposal_network中rois的预测前景概率)

        order = score.ravel().argsort()[::-1]  # 将score拉伸并逆序(从高到低)排序
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  # train时从20000中取前12000个rois,test取前6000个
        roi = roi[order, :]

        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)),
            thresh=self.nms_thresh)  # (具体需要看NMS的原理以及输入参数的作用)调用非极大值抑制函数,将重复的抑制掉,就可以将筛选后ROIS进行返回。经过NMS处理后Train数据集得到2000个框,Test数据集得到300个框
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
    def predict(self, imgs, sizes=None, visualize=False):
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(array_tool.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = array_tool.totensor(img[None]).float()  #img增加一维(_, C, H, W)
            scale = img.shape[3] / size[1]  # W' / W, 处理后图像和原图比例
            roi_cls_locs, roi_scores, rois, roi_indices = self(img,
                                                               scale=scale)

            #batch size为1
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_locs.data
            roi = array_tool.totensor(rois) / scale

            mean = torch.Tensor(self.loc_normalize_mean).cuda().repeat(
                self.n_class)[None]  #(1,84)
            std = torch.Tensor(self.loc_normalize_std).cuda().repeat(
                self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)  #(R, 21 ,4)

            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)  #扩充维度  #(R, 21, 4)
            cls_bbox = loc2bbox(
                array_tool.tonumpy(roi).reshape(-1, 4),
                array_tool.tonumpy(roi_cls_loc).reshape(-1, 4))
            cls_bbox = array_tool.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)  #(R, 84)
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(
                min=0, max=size[1])  #裁剪预测bbox不超出原图尺寸

            prob = array_tool.tonumpy(
                functional.softmax(array_tool.totensor(roi_score), dim=1))

            raw_cls_bbox = array_tool.tonumpy(cls_bbox)
            raw_prob = array_tool.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)  #将每个batch_size的压在一起

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Example #5
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray Propose RoIs.
        """
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        score = score[order]  # 这是为 py_cpu_nms 添加的

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # 这里做了个替换 non_maximum_suppression -> py_cpu_nms
        #        keep = non_maximum_suppression( cp.ascontiguousarray(cp.asarray(roi)),
        #                                        thresh=self.nms_thresh )

        score = score[:, np.newaxis]
        #        print('roi size:', roi.shape )
        #        print('score size:', score.shape )

        dets = np.hstack((roi, score))
        keep = py_cpu_nms(dets, self.nms_thresh)
        #        print('after nms:', len(keep) )

        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
Example #6
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        # 这里的loc和score是经过region_proposal_network中
        # 1x1卷积分类和回归得到的

        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # 将bbox 转化为近似groundtruth的anchor(rois)
        roi = loc2bbox(anchor, loc)

        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        min_size = self.min_size * scale

        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        # 确保roi的长宽大于最小阈值
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        order = score.ravel().argsort()[::-1]

        if n_pre_nms > 0:
            order = order[:n_pre_nms]

        roi = roi[order, :]
        score = score[order]

        keep = nms(
            torch.from_numpy(roi).cuda(),
            torch.from_numpy(score).cuda(), self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]

        roi = roi[keep.cpu().numpy()]

        return roi
Example #7
0
    def __call__(
            self,
            loc,
            score,
            anchor,
            img_size,
            scale=1.):  #这里的loc和score是经过region_proposal_network中经过1x1卷积分类和回归得到的

        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        roi = loc2bbox(anchor, loc)
        #裁剪将rois的ymin,ymax限定在[0,H]
        #slice() 函数实现切片对象, clip函数实现截断对象
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        min_size = self.min_size * scale  #设定roi的最小尺寸
        hs = roi[:, 2] - roi[:, 0]  #roi的高度
        ws = roi[:, 3] - roi[:, 1]  #roi的宽度
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]  #挑出大于16*16的roi
        roi = roi[keep, :]
        score = score[keep]

        order = score.ravel().argsort()[::-1]  #分数从大到小排列
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  #train时从20000中取前12000个rois,test取前6000个
        roi = roi[order, :]

        #使用nms过一遍排序后的roi
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
Example #8
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms  # 训练取出12000
            n_post_nms = self.n_train_post_nms  # NMS后变成2000
        else:
            n_pre_nms = self.n_test_pre_nms  # 测试加快推理 6000
            n_post_nms = self.n_test_post_nms  # nms 300

        roi = loc2bbox(anchor, loc)  # 将anchor 加上loc 接近GT的形式 映射回原图

        # s = slice(2,7,2)   # 从索引 2 开始到索引 7 停止,间隔为2
        # [ymin,xmin,ymax,xmax]
        # 裁剪将rois的ymin,ymax限定在[0,H]
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        # 裁剪将rois的xmin,xmax限定在[0,W]
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        min_size = self.min_size * scale  #
        hs = roi[:, 2] - roi[:, 0]  # Height
        ws = roi[:, 3] - roi[:, 1]  # Width
        keep = np.where((hs >= min_size)
                        & (ws >= min_size))[0]  # 确保这些anchor都在里面
        roi = roi[keep, :]  # 保留这些roi
        score = score[keep]  # roi对应的分数

        order = score.ravel().argsort()[::-1]  #对score进行降序排
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  #保留 train 12000
        roi = roi[order, :]  #12000
        score = score[order]  #12000

        keep = nms(
            torch.from_numpy(roi).cuda(),
            torch.from_numpy(score).cuda(), self.nms_thresh)

        if n_post_nms > 0:
            keep = keep[:n_post_nms]  #2000
        roi = roi[keep.cpu()]
        return roi
Example #9
0
    def predict(self, imgs, sizes=None, visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = (F.softmax(at.totensor(roi_score), dim=1))

            bbox, label, score = self._suppress(cls_bbox, prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Example #10
0
class FasterRCNN(nn.Module):
    """Base class for Faster R-CNN.

    This is a base class for Faster R-CNN links supporting object detection
    API [#]_. The following three stages constitute Faster R-CNN.

    1. **Feature extraction**: Images are taken and their \
        feature maps are calculated.
    2. **Region Proposal Networks**: Given the feature maps calculated in \
        the previous stage, produce set of RoIs around objects.
    3. **Localization and Classification Heads**: Using feature maps that \
        belong to the proposed RoIs, classify the categories of the objects \
        in the RoIs and improve localizations.

    Each stage is carried out by one of the callable
    :class:`torch.nn.Module` objects :obj:`feature`, :obj:`rpn` and :obj:`head`.

    There are two functions :meth:`predict` and :meth:`__call__` to conduct
    object detection.
    :meth:`predict` takes images and returns bounding boxes that are converted
    to image coordinates. This will be useful for a scenario when
    Faster R-CNN is treated as a black box function, for instance.
    :meth:`__call__` is provided for a scnerario when intermediate outputs
    are needed, for instance, for training and debugging.

    Links that support obejct detection API have method :meth:`predict` with
    the same interface. Please refer to :meth:`predict` for
    further details.

    .. [#] Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun. \
    Faster R-CNN: Towards Real-Time Object Detection with \
    Region Proposal Networks. NIPS 2015.

    Args:
        extractor (nn.Module): A module that takes a BCHW image
            array and returns feature maps.
        rpn (nn.Module): A module that has the same interface as
            :class:`model.region_proposal_network.RegionProposalNetwork`.
            Please refer to the documentation found there.
        head (nn.Module): A module that takes
            a BCHW variable, RoIs and batch indices for RoIs. This returns class
            dependent localization paramters and class scores.
        loc_normalize_mean (tuple of four floats): Mean values of
            localization estimates.
        loc_normalize_std (tupler of four floats): Standard deviation
            of localization estimates.

    """

    def __init__(self, extractor, rpn, head,
                loc_normalize_mean = (0., 0., 0., 0.),
                loc_normalize_std = (0.1, 0.1, 0.2, 0.2)
    ):
        super(FasterRCNN, self).__init__()
        self.extractor = extractor
        self.rpn = rpn
        self.head = head

        # mean and std
        self.loc_normalize_mean = loc_normalize_mean
        self.loc_normalize_std = loc_normalize_std
        self.use_preset('evaluate')

        @property          #装饰器 get方法
    def n_class(self):
        # Total number of classes including the background.
        return self.head.n_class

    def forward(self, x, scale=1.):
        """Forward Faster R-CNN.

        Scaling paramter :obj:`scale` is used by RPN to determine the
        threshold to select small objects, which are going to be
        rejected irrespective of their confidence scores.

        Here are notations used.

        * :math:`N` is the number of batch size
        * :math:`R'` is the total number of RoIs produced across batches. \
            Given :math:`R_i` proposed RoIs from the :math:`i` th image, \
            :math:`R' = \\sum _{i=1} ^ N R_i`.
        * :math:`L` is the number of classes excluding the background.

        Classes are ordered by the background, the first class, ..., and
        the :math:`L` th class.

        Args:
            x (autograd.Variable): 4D image variable.
            scale (float): Amount of scaling applied to the raw image
                during preprocessing.

        Returns:
            Variable, Variable, array, array:
            Returns tuple of four values listed below.

            * **roi_cls_locs**: Offsets and scalings for the proposed RoIs. \
                Its shape is :math:`(R', (L + 1) \\times 4)`.
            * **roi_scores**: Class predictions for the proposed RoIs. \
                Its shape is :math:`(R', L + 1)`.
            * **rois**: RoIs proposed by RPN. Its shape is \
                :math:`(R', 4)`.
            * **roi_indices**: Batch indices of RoIs. Its shape is \
                :math:`(R',)`.

        """
        img_size = x.shape[2:]                          #h,w 

        h = self.extractor(x)                           #feature map
        rpn_locs, rpn_scores, rois, roi_indices, anchor = \    #送入rpn
            self.rpn(h, img_size, scale)
        roi_cls_locs, roi_scores = self.head(           #送入roi head预测结果
            h, rois, roi_indices)
        return roi_cls_locs, roi_scores, rois, roi_indices

    def use_preset(self, preset):
        #/在预测可视化的时候会对score阀值要求高一点 显示效果更好
        """Use the given preset during prediction.

        This method changes values of :obj:`self.nms_thresh` and
        :obj:`self.score_thresh`. These values are a threshold value
        used for non maximum suppression and a threshold value
        to discard low confidence proposals in :meth:`predict`,
        respectively.

        If the attributes need to be changed to something
        other than the values provided in the presets, please modify
        them by directly accessing the public attributes.

        Args:
            preset ({'visualize', 'evaluate'): A string to determine the
                preset to use.

        """
        if preset == 'visualize':
            self.nms_thresh = 0.3
            self.score_thresh = 0.7
            #/在进行模型评估的时候会将score阀值置的很低
        elif preset == 'evaluate':
            self.nms_thresh = 0.3
            self.score_thresh = 0.05
        else:
            raise ValueError('preset must be visualize or evaluate')

            #由predict调用 预测时对每类候选框进行阀值和Nms操作
    def _suppress(self, raw_cls_bbox, raw_prob):
        bbox = list()
        label = list()
        score = list()
        # skip cls_id = 0 because it is the background class
        for l in range(1, self.n_class): #遍历20个前景
            cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]  #取出第L类的box
            prob_l = raw_prob[:, l]  #取出第L类的box
            mask = prob_l > self.score_thresh #大于阀值
            cls_bbox_l = cls_bbox_l[mask] #保留大于阀值的box
            prob_l = prob_l[mask] #保留大于阀值的prob(置信度)
            keep = non_maximum_suppression(      #nms
                cp.array(cls_bbox_l), self.nms_thresh, prob_l)
            keep = cp.asnumpy(keep) #经过nms后保留的index
            bbox.append(cls_bbox_l[keep])   #加入预测框
            # The labels are in [0, self.n_class - 2].
            label.append((l - 1) * np.ones((len(keep),)))  #加入label
            score.append(prob_l[keep])   #加入prob
        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        score = np.concatenate(score, axis=0).astype(np.float32)
        return bbox, label, score

#预测函数,用于预测图片的框和分数,标签并返回  取消变量梯度,加快计算
    @nograd #装饰器 取消梯度
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()  #取消BN和dropout
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))  #预处理
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):  #遍历我们要预测的每张图片
            img = at.totensor(img[None]).float()  # #1 C H W
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #进入forward 向前计算
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale  #对应resize前真实图片的roi

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \  #均值repeat n_class
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \  #标准差repeat n_class
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)  #还原roi_cls_loc   shape:(R,84)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)  #shape(R,21,4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)  #roi(R,4) to (R,21,4)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))  #修正roi  //shape(21R,4)
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box  # 防止越界 box 超出真实img
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
Example #11
0
    def predict(self, imgs, sizes=None, visualize=False):
        """Detect objects from images.
        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: (R, 4)
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
           * **labels** : 
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : 
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')  # 本来是 visualize   evaluate
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None],
                                      volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(
                img, scale=scale)  # 这里调用了 forward 方法

            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))

            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score),
                                        dim=1))  # 有趣 打出来的分,softmax后变成概率值
            # 可以在这里看一下预测出来的最大概率是多少,如果太小就直接return出去,下面都不用跑了
            # prob 是 300 x 21 的尺寸, np.sum(prob) = 300

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Example #12
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.
        loc.shape, score.shape,anchor.shape, img_size, scale = 
        ((16650, 4),
         (16650,),
         (16650, 4),
         (600, 800),
         tensor([ 1.6000], dtype=torch.float64))
         16650 = 37(hh) * 50(ww) * 9
        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)
        """
        loc2bbox这个函数把之前rpn网络算出来的hh*ww*9个loc和hh*ww*9个anchor
        结合起来,套公式,算出最终预测出来的hh*ww*9个bbox,这里直接就叫roi了
        """

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        # roi[:, [0,2]] 跟 roi[:, slice(0, 4, 2)] 不是一样嘛
        # 求出[y1,y2]之后用np.clip去掉bboxes伸出到图像尺寸之外的部分
        # 注意这里的img_size是原始图像经过放缩之后,输入到神经网络的size
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.

        # 这里的scale(比如说1.6),代表了原始图像经过了scale倍的放大
        # 所以原图16个像素,经过了1.6倍的放大到网络的输入,这里应该用25.6来判断是否丢弃
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        # 重新计算高和宽是为了淘汰掉一批小于25.6的框
        roi = roi[keep, :]
        score = score[keep]
        # 剩下来的roi和对应的score,score是这个roi里是前景的概率

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]  # 把score从大到小排序,取相应的序号
        if n_pre_nms > 0:  # 无论如何n_pre_nms都是大于0的吧 ?
            order = order[:n_pre_nms]
        roi = roi[order, :]  # 取最大的n_pre_nms个roi出来

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        # 调用CUPY版本的 nms,看不懂。。。。
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        # 最终输出n_post_nms个roi
        return roi
Example #13
0
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.
        从图像中检测物体

        This method predicts objects for each image.
          此方法预测每个图像的对象。
        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        #将模块设置为评估模式。这只对诸如Dropout或BatchNorm等模块有任何影响。module中的方法
        self.eval()
        #可视化
        if visualize:
            #设置为可视化  设置 self.nms_thresh = 0.3   self.score_thresh = 0.7
            #评估模式 和 可视化模式 使用不同的nms最大化抑制 和阈值
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                # print('nei img shape is ', img.shape)
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        #size[600,800]
        # print('sizes is ', sizes)
        for img, size in zip(prepared_imgs, sizes):
            #img由[3,600,800]转为[1,3,600,800]  转为变量,扩充一维 并设置为 预测模式
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            #scale 为1
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
    def __call__(self, loc, score,
                 anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(
            roi[:, slice(0, 4, 2)], 0, img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(
            roi[:, slice(1, 4, 2)], 0, img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)),
            thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
    def predict(self, imgs, sizes=None, visualize=False):
        """

        Detect objects from images.
        This method predicts objects for each image.

        """
        self.eval()
        self.use_preset('evaluate')
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None],
                                      volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
    def predict(self, imgs, sizes=None, visualize=False):
        # 设置为eval模式
        self.eval()
        # 是否开启可视化
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            # 对读入的图片求尺度scale,因为输入的图像经预处理就会有缩放,
            # 所以需记录缩放因子scale,这个缩放因子在ProposalCreator
            # 筛选roi时有用到,即将所有候选框按这个缩放因子映射回原图,
            # 超出原图边框的区域将被截断。
            scale = img.shape[3] / size[1]
            # 执行forward
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.

            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            # 为ProposalCreator对loc做了归一化(-mean /std)处理,所以这里
            # 需要再*std+mean,此时的位置参数loc为roi_cls_loc。然后将这128
            # 个roi利用roi_cls_loc进行微调,得到新的cls_bbox。
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            # 解码过程
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])
            # 对于分类得分roi_scores,我们需要将其经过softmax后转为概率prob。
            # 值得注意的是我们此时得到的是对所有输入128个roi以及位置参数、得分
            # 的预处理,下面将筛选出最终的预测结果。
            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Example #18
0
    def predict(self, imgs, sizes=None, visualize=False):
        '''
        对每张图片进行预测,
        Args:
            输入图片必须是CHW格式的RGB,是np.ndarry
        Return:
            返回的是一个tuple,包含:框的坐标,标签,得分
            (bboxes,labels,scores)
        '''
        self.eval()
        if visualize:  #可视化
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]  #get width&height
                #TODO:为什么可视化需要随机处理
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = []
        scores = []
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            #TODO:调用forward函数,为什么可以这么调用
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            #TODO:.data是什么作用
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            mean = t.Tensor(self.loc_normalize_mean).cuda().repeat(
                self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda().repeat(
                self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            #TODO: 这个会有变形的作用吗
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)

            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor((cls_bbox))
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            '''clamp表示将tensor限制在其范围,让框不超过图片'''
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Example #19
0
    def extract(self, x, num_box):
        num_batch = len(x)
        self.use_preset('visualize')
        prepared_imgs = list()
        sizes = list()
        for img in x:
            img = img.squeeze()
            size = img.shape[1:]
            img = preprocess(at.tonumpy(img))
            prepared_imgs.append(img)
            sizes.append(size)

        roi_cls_locs = list()
        roi_scores = list()
        rpn_locs = list()
        rpn_scores = list()
        rois = list()
        anchors = list()
        features = t.zeros((num_batch, num_box, self.hidden_size))
        scales = list()
        hiddens = list()
        for i, (img, size) in enumerate(zip(prepared_imgs, sizes)):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            h = self.extractor(img)
            rpn_loc, rpn_score, roi, roi_indices, anchor = \
                self.rpn(h, size, scale)
            roi_cls_loc_, roi_score_, feature = self.head(h, roi, roi_indices)
            roi_score = F.softmax(at.totensor(roi_score_), dim=1)
            # We are assuming that batch size is 1.
            roi_score = roi_score.data
            roi_cls_loc = roi_cls_loc_.data
            roi = at.totensor(roi) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)
            bbox, label, score, feat = self._suppress_by_num(
                raw_cls_bbox, raw_prob, feature, num_box)
            features[i, :, :] = feat
            roi_cls_locs.append(roi_cls_loc_)
            roi_scores.append(roi_score_)
            rpn_scores.append(rpn_score)
            rpn_locs.append(rpn_loc)
            rois.append(roi)
            anchors.append(anchor)
            scales.append(scale)
            hiddens.append(h)

        return features, roi_cls_locs, roi_scores, rpn_locs, rpn_scores, rois, anchors, scales, hiddens
Example #20
0
    def predict(self, imgs, sizes=None, visualize=False):
        """预测过程
            参数:输入的是图像
            返回:返回的是框的坐标,框的预测类别,框的预测类别得分
        """
        self.eval()  # 调整预测时网络为eval模式
        if visualize:
            # 设置visualize时的nms_thresh和score_thresh
            self.use_preset("visualize")
            prepared_imgs = list()  # 放置要输入的图片
            sizes = list()  # 放置要输入图片的尺寸H, W
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(img=tonumpy(img))  # 对图片的H, W做缩放操作
                prepared_imgs.append(img)  # 得到处理后的图片
                sizes.append(size)  # 注意这是未经处理的原始图片的尺寸
        else:
            self.use_preset("evaluate")
            prepared_imgs = imgs  # 如果不做处理就使用原始图片

        bboxes = list()  # 用于放置预测框的坐标
        labels = list()  # 用于放置预测框类别标签
        scores = list()  # 用于放置预测框类别得分
        for img, size in zip(prepared_imgs, sizes):
            # 将图片数组转变为[N, C, H, W]的张量,并为float类型
            img = totensor(img[None]).float()
            scale = img.shape[3] / size[1]  # 处理后的图片的W除以原始图片的W
            # 将图像和缩放比例代入前向传播过程得到预测的偏移量,预测得分,候选框
            roi_cls_locs, roi_scores, rois, _ = self(x=img, scale=scale)
            # 假设batch size大小为1,则有:
            roi_score = roi_scores.data  # 一张图片的预测框得分
            roi_cls_loc = roi_cls_locs.data  # 一张图片的预测框偏移量
            # 上面得到的rois是针对处理过后的图像的,除以scale就得到了在原始图像的rois
            roi = totensor(rois) / scale

            # 将偏移量缩放量转变为坐标,注意需要用到mean和std进行调整???
            mean = torch.Tensor(self.loc_normalize_mean).cuda().repeat(
                self.n_class)[None]
            std = torch.Tensor(self.loc_normalize_std).cuda().repeat(
                self.n_class)[None]
            # 经过标准差和均值之后的预测偏移量和缩放量
            roi_cls_loc = roi_cls_loc * std + mean
            # 对偏移量和缩放量进行变形,目的是借此将roi变成相应的形状
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            # 将相对于roi的偏移量和缩放量roi_cls_loc换算为对应的坐标
            # 需要注意的是需要将roi和roi_cls_loc的形状再次变为4列的形式
            cls_bbox = loc2bbox(base_box=tonumpy(roi).reshape(-1, 4),
                                locs=tonumpy(roi_cls_loc).reshape(-1, 4))
            cls_bbox = totensor(cls_bbox)  # 现在的cls_bbox是4列
            # 变形:每行是84列,其中每4列对应一个类别的框(y1, x1, y2, x2)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # 将框超出边界的部分裁剪掉
            cls_bbox[:, 0::2] = cls_bbox[:, 0::2].clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = cls_bbox[:, 1::2].clamp(min=0, max=size[1])
            # 接下来是score,softmax之前每一行有21个值,所以dim=1
            prob = tonumpy(data=F.softmax(totensor(data=roi_score), dim=1))

            raw_cls_bbox = tonumpy(data=cls_bbox)  # (N, 84)
            raw_prob = tonumpy(data=prob)  # (N, 21)

            # 返回bbox, label, score
            bbox, label, score = self._supress(raw_cls_bbox=raw_cls_bbox,
                                               raw_prob=raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
        self.use_preset("evaluate")
        self.train()
        return bboxes, labels, scores
Example #21
0
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
Example #22
0
    def __call__(self,
                 loc,
                 score,
                 anchor,
                 img_size,
                 scale=1.):  #传入,预测的loc,score分数,featuremap的所有anchor
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms  #12000
            n_post_nms = self.n_train_post_nms  #2000
        else:
            n_pre_nms = self.n_test_pre_nms  #6000
            n_post_nms = self.n_test_post_nms  #300

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)  #(35*50*9,4)

        # Clip predicted boxes to image.
        roi[:,
            slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                      img_size[0])  #裁剪将rois的ymin,ymax限定在[0,H]
        roi[:,
            slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                      img_size[1])  #裁剪将rois的xmin,xmax限定在[0,W]

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale  #16
        hs = roi[:, 2] - roi[:, 0]  #rois的宽
        ws = roi[:, 3] - roi[:, 1]  #rois的长
        keep = np.where((hs >= min_size)
                        & (ws >= min_size))[0]  ###确保rois的长宽大于最小阈值
        roi = roi[keep, :]
        score = score[
            keep]  #对剩下的ROIs进行打分(根据region_proposal_network中rois的预测前景概率)

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]  #降序排序
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  #train时从20000中取前12000个rois,test取前6000个
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)), thresh=self.nms_thresh
        )  #(具体需要看NMS的原理以及输入参数的作用)调用非极大值抑制函数,将重复的抑制掉,就可以将筛选后ROIS进行返回。经过NMS处理后Train数据集得到2000个框,Test数据集得到300个框
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]  #取出最终的2000或300个rois
        return roi
Example #23
0
    def predict(self, imgs,sizes=None,visualize=False): #预测函数
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval() #网络设置为eval模式(禁用BatchNorm和Dropout)
        if visualize: #可视化内容,(跳过)
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list() #最终的输出框
        labels = list() #最终的输出label
        scores = list() #最终的输出分数
        for img, size in zip(prepared_imgs, sizes): 
            img = at.totensor(img[None]).float() #增加batch维
            scale = img.shape[3] / size[1] #获得scale(待定)
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #前向
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale #把rois变回原图尺寸(待定)

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]
            #Q:看网上说ProposalCreator坐标归一化了所以这里要返回原图,但是我没看到。疑问
            #A:我觉得"ProposalCreator坐标归一化了"这个有错误,这里要反归一化是因为训练的时候使用的loc归一化了(ProposalTargetCreator),所以预测结果loc是归一化后的,并不是ProposalCreator时候归一化了
            roi_cls_loc = (roi_cls_loc * std + mean) #坐标反归一化
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) #一个框对应n_class个loc,所以要expand_as到同维度后面可以二次修正框
            
            #二次修正框得到最后框
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) #限制超出尺寸的框
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) #限制超出尺寸的框
            #softmax得到每个框的类别概率
            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)
            #输入框以及对应的类别概率,抑制输出
            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            
            #输出坐标,类别,该类别概率
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate') #可视化内容,(跳过)
        self.train() #返回train模式
        return bboxes, labels, scores
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        # keep = soft_nms(roi)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """Propose RoIs.
        首先对roi = loc2bbox(anchor, loc)进行预处理,燃后对score进行排序,再选择pre_nms个top score进入NMS
        NMS阈值为nms_thresh,NMS后再选择前post_nms个roi作为ProposalCreator的返回值.
        Args:
            R : w*h*9
            loc (array): 预测anchors的偏移量比例。数据shape=(R, 4)
            score (array): 预测anchors的前景概率。数据shape=(R,).
            anchor (array): anchors坐标。数据shape=(R, 4).
            img_size (tuple of ints:H,W): 包含缩放后的图像大小.
            scale (float): 图像缩放比例.

        Returns:
            array:
                roi:proposal boxes坐标(array),数据shape=(S, 4):
                S在测试时间小于n_test_post_nms,在训练时间小于n_train_post_nms。
                S取决于预测边界框的大小和NMS丢弃的边界框的数量。

        """
        # NOTE: 在测试阶段,即faster_rcnn.eval(),需要设置self.traing = False,
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # 通过base_anchor和loc解码获得目标anchor(即[y_min,x_min, y_max, x_max])

        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        #  slice(0, 4, 2 ) = [0,2]
        # np.clip(a,b,c) a为一根数组,b为min,c为max,夹逼
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # 删除预测的boxes长或者宽小于min_size*scale的boxes
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # score从高到低排序,选择前n_pre_nms个
        order = score.ravel().argsort()[::-1]  # 将score拉伸并逆序(从高到低)排序
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]  # 此时的roi的第一行就是score得分最高的那个anchor对应的anchor_boxes

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        # 调用非极大值抑制函数,将重复的抑制掉,就可以将筛选后ROIS进行返回。
        # 经过NMS处理后Train数据集得到2000个框,Test数据集得到300个框
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi