def _suppress(self, raw_cls_bbox, raw_prob):
     bbox = list()
     label = list()
     score = list()
     # skip cls_id = 0 because it is the background class
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
         prob_l = raw_prob[:, l]
         mask = prob_l > self.score_thresh
         cls_bbox_l = cls_bbox_l[mask]
         prob_l = prob_l[mask]
         keep = non_maximum_suppression(
             cp.array(cls_bbox_l), self.nms_thresh, prob_l)
         keep = cp.asnumpy(keep)
         bbox.append(cls_bbox_l[keep])
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep),)))
         score.append(prob_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
 def _suppress(self, raw_cls_bbox, raw_prob):
     bbox = list()
     label = list()
     score = list()
     # skip cls_id = 0 because it is the background class
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
         prob_l = raw_prob[:, l]
         mask = prob_l > self.score_thresh
         cls_bbox_l = cls_bbox_l[mask]
         prob_l = prob_l[mask]
         keep = non_maximum_suppression(cp.array(cls_bbox_l),
                                        self.nms_thresh, prob_l)
         keep = cp.asnumpy(keep)
         bbox.append(cls_bbox_l[keep])
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep), )))
         score.append(prob_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
Exemple #3
0
 def _suppress(self, raw_cls_bbox, raw_prob):
     bbox = list()
     label = list()
     score = list()
     # skip cls_id = 0 because it is the background class
     for l in range(1, self.n_class): #遍历20个前景
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]  #取出第L类的box
         prob_l = raw_prob[:, l]  #取出第L类的box
         mask = prob_l > self.score_thresh #大于阀值
         cls_bbox_l = cls_bbox_l[mask] #保留大于阀值的box
         prob_l = prob_l[mask] #保留大于阀值的prob(置信度)
         keep = non_maximum_suppression(      #nms
             cp.array(cls_bbox_l), self.nms_thresh, prob_l)
         keep = cp.asnumpy(keep) #经过nms后保留的index
         bbox.append(cls_bbox_l[keep])   #加入预测框
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep),)))  #加入label
         score.append(prob_l[keep])   #加入prob
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
 def _suppress(self, raw_cls_bbox, raw_prob):
     # 得到真正的预测结果
     bbox = list()
     label = list()
     score = list()
     # skip cls_id = 0 because it is the background class
     # 按类别的循环,l从1至20(0类为背景类)
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]   #筛选出当前类别的全部框
         prob_l = raw_prob[:, l]     #筛选出当前类别的全部得分
         mask = prob_l > self.score_thresh   #根据阈值判定框是否成立,eval模式默认为0.7
         cls_bbox_l = cls_bbox_l[mask]       #筛掉不成立的框
         prob_l = prob_l[mask]               #筛掉不成立的得分
         keep = non_maximum_suppression(     #非极大值抑制去重,阈值0.3
             cp.array(cls_bbox_l), self.nms_thresh, prob_l)
         keep = cp.asnumpy(keep)
         bbox.append(cls_bbox_l[keep])
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep),)))
         score.append(prob_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)      #整合预测结果
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        # keep = soft_nms(roi)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
Exemple #6
0
    def __call__(self,
                 loc,
                 score,
                 anchor,
                 img_size,
                 scale=1.):  #传入,预测的loc,score分数,featuremap的所有anchor
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms  #12000
            n_post_nms = self.n_train_post_nms  #2000
        else:
            n_pre_nms = self.n_test_pre_nms  #6000
            n_post_nms = self.n_test_post_nms  #300

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)  #(35*50*9,4)

        # Clip predicted boxes to image.
        roi[:,
            slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                      img_size[0])  #裁剪将rois的ymin,ymax限定在[0,H]
        roi[:,
            slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                      img_size[1])  #裁剪将rois的xmin,xmax限定在[0,W]

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale  #16
        hs = roi[:, 2] - roi[:, 0]  #rois的宽
        ws = roi[:, 3] - roi[:, 1]  #rois的长
        keep = np.where((hs >= min_size)
                        & (ws >= min_size))[0]  ###确保rois的长宽大于最小阈值
        roi = roi[keep, :]
        score = score[
            keep]  #对剩下的ROIs进行打分(根据region_proposal_network中rois的预测前景概率)

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]  #降序排序
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  #train时从20000中取前12000个rois,test取前6000个
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)), thresh=self.nms_thresh
        )  #(具体需要看NMS的原理以及输入参数的作用)调用非极大值抑制函数,将重复的抑制掉,就可以将筛选后ROIS进行返回。经过NMS处理后Train数据集得到2000个框,Test数据集得到300个框
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]  #取出最终的2000或300个rois
        return roi
    def _suppression(self,
                     rois,
                     roi_indices,
                     final_locs,
                     final_scores,
                     img_size,
                     img_number,
                     high_thresh=True):
        """
        rois: numpy.ndarray(N, 4)
        roi_indices: numpy.ndarray(N)
        final_locs: torch.tensor(M,4*n_class)
        final_scores: torch.tensor(M, n_class)
        img_size: numpy.ndarray(2,)
        img_number: int
        
        """

        if high_thresh is True:
            # default on
            nms_thresh = 0.3
            score_thresh = 0.7
        else:
            # visualization mode high score threshold
            nms_thresh = 0.3
            score_thresh = 0.05

        n_class = self.n_class
        final_locs = final_locs.view(-1, n_class, 4)
        final_locs = final_locs.cpu().data.numpy()
        rois = np.repeat(rois[:, np.newaxis, :], n_class, axis=1)

        final_bbox = loc2bbox(rois.reshape(-1, 4), final_locs.reshape(-1, 4))
        final_bbox[:, slice(0, 4, 2)] = np.clip(final_bbox[:,
                                                           slice(0, 4, 2)], 0,
                                                img_size[1])
        final_bbox[:, slice(1, 4, 2)] = np.clip(final_bbox[:,
                                                           slice(1, 4, 2)], 0,
                                                img_size[0])
        final_bbox = final_bbox.reshape(-1, n_class, 4)

        final_prob = F.softmax(final_scores, dim=1)
        final_prob = final_prob.cpu().data.numpy()

        bboxes = list()
        labels = list()
        scores = list()

        # select each single image
        for cnt in range(img_number):
            cnt_mask = np.where(roi_indices == cnt)
            bbox = list()
            label = list()
            score = list()

            # skip cls_id = 0 for it is background class
            for i in range(1, n_class):
                i_bbox = final_bbox[cnt_mask][:, i, :]
                i_prob = final_prob[cnt_mask][:, i]

                mask = i_prob > score_thresh
                # mask bbox and prob
                i_bbox = i_bbox[mask]
                i_prob = i_prob[mask]
                keep = non_maximum_suppression(cp.array(i_bbox), nms_thresh,
                                               i_prob)
                keep = cp.asnumpy(keep)
                bbox.append(i_bbox[keep])
                label.append((i - 1) * np.ones((len(keep), )))
                score.append(i_prob[keep])
            bbox = np.concatenate(bbox, axis=0).astype(np.float32)
            label = np.concatenate(label, axis=0).astype(np.int32)
            score = np.concatenate(score, axis=0).astype(np.float32)

            # final bbox, label and score for a single image
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        return bboxes, labels, scores
Exemple #8
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.
        loc.shape, score.shape,anchor.shape, img_size, scale = 
        ((16650, 4),
         (16650,),
         (16650, 4),
         (600, 800),
         tensor([ 1.6000], dtype=torch.float64))
         16650 = 37(hh) * 50(ww) * 9
        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)
        """
        loc2bbox这个函数把之前rpn网络算出来的hh*ww*9个loc和hh*ww*9个anchor
        结合起来,套公式,算出最终预测出来的hh*ww*9个bbox,这里直接就叫roi了
        """

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        # roi[:, [0,2]] 跟 roi[:, slice(0, 4, 2)] 不是一样嘛
        # 求出[y1,y2]之后用np.clip去掉bboxes伸出到图像尺寸之外的部分
        # 注意这里的img_size是原始图像经过放缩之后,输入到神经网络的size
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # Remove predicted boxes with either height or width < threshold.

        # 这里的scale(比如说1.6),代表了原始图像经过了scale倍的放大
        # 所以原图16个像素,经过了1.6倍的放大到网络的输入,这里应该用25.6来判断是否丢弃
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        # 重新计算高和宽是为了淘汰掉一批小于25.6的框
        roi = roi[keep, :]
        score = score[keep]
        # 剩下来的roi和对应的score,score是这个roi里是前景的概率

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]  # 把score从大到小排序,取相应的序号
        if n_pre_nms > 0:  # 无论如何n_pre_nms都是大于0的吧 ?
            order = order[:n_pre_nms]
        roi = roi[order, :]  # 取最大的n_pre_nms个roi出来

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        # 调用CUPY版本的 nms,看不懂。。。。
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        # 最终输出n_post_nms个roi
        return roi
    def __call__(self, loc, score,
                 anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(
            roi[:, slice(0, 4, 2)], 0, img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(
            roi[:, slice(1, 4, 2)], 0, img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)),
            thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """Propose RoIs.
        首先对roi = loc2bbox(anchor, loc)进行预处理,燃后对score进行排序,再选择pre_nms个top score进入NMS
        NMS阈值为nms_thresh,NMS后再选择前post_nms个roi作为ProposalCreator的返回值.
        Args:
            R : w*h*9
            loc (array): 预测anchors的偏移量比例。数据shape=(R, 4)
            score (array): 预测anchors的前景概率。数据shape=(R,).
            anchor (array): anchors坐标。数据shape=(R, 4).
            img_size (tuple of ints:H,W): 包含缩放后的图像大小.
            scale (float): 图像缩放比例.

        Returns:
            array:
                roi:proposal boxes坐标(array),数据shape=(S, 4):
                S在测试时间小于n_test_post_nms,在训练时间小于n_train_post_nms。
                S取决于预测边界框的大小和NMS丢弃的边界框的数量。

        """
        # NOTE: 在测试阶段,即faster_rcnn.eval(),需要设置self.traing = False,
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # 通过base_anchor和loc解码获得目标anchor(即[y_min,x_min, y_max, x_max])

        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        #  slice(0, 4, 2 ) = [0,2]
        # np.clip(a,b,c) a为一根数组,b为min,c为max,夹逼
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[1])

        # 删除预测的boxes长或者宽小于min_size*scale的boxes
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # score从高到低排序,选择前n_pre_nms个
        order = score.ravel().argsort()[::-1]  # 将score拉伸并逆序(从高到低)排序
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]  # 此时的roi的第一行就是score得分最高的那个anchor对应的anchor_boxes

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        # 调用非极大值抑制函数,将重复的抑制掉,就可以将筛选后ROIS进行返回。
        # 经过NMS处理后Train数据集得到2000个框,Test数据集得到300个框
        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
Exemple #11
0
    def _suppress(self, raw_cls_bbox, raw_prob, gt_labels, attack=False):
        bbox = list()
        label = list()
        score = list()
        # skip cls_id = 0 because it is the background class
        # 以下只适用于两类问题
        # for l in range(1, self.n_class):
        #     prob_l = raw_prob[:, l]
        #     prob_l_np = raw_prob.cpu().detach().numpy()
        #     mask = prob_l > self.score_thresh
        #     if raw_cls_bbox is not None:
        #         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
        #         cls_bbox_l = cls_bbox_l[mask]
        #     prob_l = prob_l[mask]
        #     if attack:
        #         label.append((l - 1) * np.ones((len(prob_l),)))
        #         probs = raw_prob[mask]
        #         probs_np = probs.cpu().detach().numpy()
        #         mask_np = mask.cpu().detach().numpy()
        #         return label, probs, mask
        # 以下适用于多类问题
        if attack:
            self.score_thresh = 0.7
            masks = t.zeros(raw_prob.size()[0]).byte().cuda()
            for l in gt_labels:
                prob_l = raw_prob[:, l]
                # m = prob_l.cpu().detach().numpy()
                mask = prob_l > self.score_thresh
                # x = mask.cpu().detach().numpy()
                if raw_cls_bbox is not None:
                    cls_bbox_l = raw_cls_bbox.reshape(
                        (-1, self.n_class, 4))[:, l, :]
                    cls_bbox_l = cls_bbox_l[mask]
                prob_l = prob_l[mask]
                masks = masks + mask
                if attack:
                    label.append((l - 1) * np.ones((len(prob_l), )))
            probs = raw_prob[masks]
            # y = masks.cpu().detach().numpy()
            return label, probs, masks

        for l in range(1, self.n_class):
            prob_l = raw_prob[:, l]
            mask = prob_l > self.score_thresh
            if raw_cls_bbox is not None:
                cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:,
                                                                         l, :]
                cls_bbox_l = cls_bbox_l[mask]
            prob_l = prob_l[mask]

            keep = non_maximum_suppression(cp.array(cls_bbox_l),
                                           self.nms_thresh, prob_l)
            keep = cp.asnumpy(keep)
            bbox.append(cls_bbox_l[keep])
            # The labels are in [0, self.n_class - 2].
            label.append((l - 1) * np.ones((len(keep), )))
            score.append(prob_l[keep])
        bbox = np.concatenate(bbox, axis=0).astype(np.float32)
        label = np.concatenate(label, axis=0).astype(np.int32)
        score = np.concatenate(score, axis=0).astype(np.float32)
        return bbox, label, score