コード例 #1
0
    def _calc_ious(self, anchor, bbox, inside_index):
        """
        ious between the anchors and the gt boxes
        主要计算ious的行和列的最大值和索引:
        :param anchor: 在fearuth_map生成的W*H*9的anchor_box
        :param bbox:  为truth_anchor_box
        :param inside_index:  不用传入inside_index参数:inside_index==anchor.shap[0]
        :return:
            argmax_ious: 行最大索引
            max_ious: 行最大值
            gt_argmax_ious: 列最大索引
        """

        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(
            axis=1)  # 行最大索引,即每个anchor对应的最大truth_anchor_box
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(
            axis=0)  # 列最大索引,即每个truth_anchor_box对应的最大anchor
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]
        # 因为每个anchor_box只能对应一个最大iou的ruth_anchor_box,但是每个ruth_anchor_box可以对应多个最大iou的anchor_box
        # argmax函数只能返回第一个最大值的索引

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #2
0
    def _calc_ious(self, anchor, bbox, inside_index):
        """
        这个函数计算每一个anchor和每一个bbox的的iou,
        然后返回每一个anchor最匹配的gt object的序号(argmax_ious),
        以及从大到小的iou值(max_ious),
        还有与每一个object的iou最大的anchor的序号列表
        (gt_argmax_ious,最大iou可能重复,所以gt_argmax_ious的size可能比gt object数目多)
        """
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        """
        (Pdb) anchor.shape
        (5834, 4)
        (Pdb) bbox.shape
        (3, 4)
        ious.shape
        (5834,3)
        """
        argmax_ious = ious.argmax(axis=1)  # (5834,)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]  # (5834,)
        gt_argmax_ious = ious.argmax(axis=0)
        # (3,),这张图片有3个gt object,分别对应的最大iou的anchor在第几行
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        # (3,),拿上面得出的idx,求出每一个gt object对应的最大iou的值
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]
        # 可能存在两个anchor和一个gt bbox有着相同iou的情况,这里重新取一次index,
        # 比如 gt_argmax_ious.shape = (14,)
        # 注意这里只需要知道是哪个anchor蒙中了,
        # 我都不关心蒙中了哪个class
        # 因为rpn的loss计算的只是有没有object,不关心哪一个class

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #3
0
    def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):  #因为这些数据是要放入到整个大网络里进行训练的,比如说位置数据,所以要对其位置坐标进行数据增强处理(归一化处理)
        n_bbox, _ = bbox.shape
        roi = np.concatenate((roi, bbox), axis=0) #首先将2000个roi和m个bbox给concatenate了一下成为新的roi(2000+m,4)。
        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)  #n_sample = 128,pos_ratio=0.5,round 对传入的数据进行四舍五入
        iou = bbox_iou(roi, bbox) #计算每一个roi与每一个bbox的iou
        gt_assignment = iou.argmax(axis=1) #按行找到最大值,返回最大值对应的序号以及其真正的IOU。返回的是每个roi与**哪个**bbox的最大,以及最大的iou值
        max_iou = iou.max(axis=1) #每个roi与对应bbox最大的iou
        gt_roi_label = label[gt_assignment] + 1 #从1开始的类别序号,给每个类得到真正的label(将0-19变为1-20)
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]  #同样的根据iou的最大值将正负样本找出来,pos_iou_thresh=0.5
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))   #需要保留的roi个数(满足大于pos_iou_thresh条件的roi与64之间较小的一个)
        if pos_index.size > 0:
            pos_index = np.random.choice(
                pos_index, size=pos_roi_per_this_image, replace=False)  #找出的样本数目过多就随机丢掉一些

        neg_index = np.where((max_iou < self.neg_iou_thresh_hi) &
                             (max_iou >= self.neg_iou_thresh_lo))[0]  #neg_iou_thresh_hi=0.5,neg_iou_thresh_lo=0.0
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image # #需要保留的roi个数(满足大于0小于neg_iou_thresh_hi条件的roi与64之间较小的一个)
        neg_roi_per_this_image = int(min(neg_roi_per_this_image,
                                         neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(
                neg_index, size=neg_roi_per_this_image, replace=False)  #找出的样本数目过多就随机丢掉一些

        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # 负样本label 设为0
        sample_roi = roi[keep_index]
#那么此时输出的128*4的sample_roi就可以去扔到 RoIHead网络里去进行分类与回归了。同样, RoIHead网络利用这sample_roi+featue为输入,输出是分类(21类)和回归(进一步微调bbox)的预测值,那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) #求这128个样本的groundtruth
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)
                       ) / np.array(loc_normalize_std, np.float32))  #ProposalTargetCreator首次用到了真实的21个类的label,且该类最后对loc进行了归一化处理,所以预测时要进行均值方差处理
        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #4
0
 def _calc_ious(self, anchor, bbox, inside_index):
     ious = bbox_iou(anchor, bbox)
     argmax_ious = ious.argmax(axis=1)
     max_ious = ious[np.arange(len(inside_index)), argmax_ious]  # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1,N]
     gt_argmax_ious = ious.argmax(axis=0)
     gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]  # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,K]
     gt_argmax_ious = np.where(ious == gt_max_ious)[0]  # 然后返回最大iou的索引(每个bbox与哪个anchor的iou最大),有K个
     return argmax_ious, max_ious, gt_argmax_ious
コード例 #5
0
    def _calc_ious(self, anchor, bbox, inside_index):
        ious = bbox_iou(anchor, bbox)  #ious格式(S, N), S为anchor数目, N为gt bbox数目

        argmax_ious = ious.argmax(axis=1)   #按行找到iou最大值索引
        max_iou = ious[np.arange(len(inside_index)), argmax_ious]   #求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1,S]
        gt_argmax_iou = ious.argmax(axis=0)  #按列找到iou最大值索引
        gt_max_iou = ious[gt_argmax_iou, np.arange(ious.shape[1])]  #求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,N]
        gt_argmax_iou = np.where(ious == gt_max_iou)[0]  #每个bbox与anchor最大iou的索引,每个bbox与哪个anchor的iou最大
        return argmax_ious, max_iou, gt_argmax_iou  #每个anchor与gt的最大iou索引号,每个anchor的最大iou值,gt与哪个anchoriou最大索引号
コード例 #6
0
def eval(seq_loader,
         faster_rcnn,
         signal_type,
         scale=1.,
         test_num=10000,
         stop=False):
    carrada = download('Carrada')
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
    # print('*** Evaluation ***')
    for n_seq, sequence_data in tqdm(enumerate(seq_loader)):
        seq_name, seq = sequence_data
        # Overfit an image
        # seq = [seq[155]] # large
        seq = [seq[115]]  # medium
        # seq = [seq[28]] # small
        path_to_frames = os.path.join(carrada, seq_name[0])
        frame_set = TestCarradaDataset(opt, seq, 'box', signal_type,
                                       path_to_frames)
        frame_loader = data_.DataLoader(frame_set,
                                        batch_size=1,
                                        shuffle=False,
                                        num_workers=opt.num_workers)

        for ii, (imgs, sizes, gt_bboxes_, gt_labels_,
                 gt_difficults_) in tqdm(enumerate(frame_loader)):
            sizes = [sizes[0][0].item(), sizes[1][0].item()]
            imgs = normalize(imgs)
            pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
                imgs, signal_type, [sizes])
            gt_bboxes += list(gt_bboxes_.numpy())
            gt_labels += list(gt_labels_.numpy())
            gt_difficults += list(gt_difficults_.numpy())
            pred_bboxes += pred_bboxes_
            pred_labels += pred_labels_
            pred_scores += pred_scores_
            # if ii == test_num: break

    result = eval_detection_voc(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults,
                                iou_thresh=0.5,
                                use_07_metric=True)
    ious = bbox_iou(gt_bboxes[0], pred_bboxes[0])
    try:
        best_iou = ious.max()
    except ValueError:
        best_iou = 0
    if stop:
        import ipdb
        ipdb.set_trace()
    # print('Best IoU in validation: {}'.format(ious.max()))
    return result, best_iou
コード例 #7
0
ファイル: creator_tool.py プロジェクト: WJ1214/My-Faster-RCNN
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1)   # anchor和所有GT的iou最大值的GT索引
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]    # 每个anchor的最大IOU
        gt_argmax_ious = ious.argmax(axis=0)   # 计算各GT与所有anchor的最大IOU对应anchor的索引
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]  # 计算各GT的最大IOU
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]   # 拥有某个GT最大IOU的anchor索引

        return argmax_ious, max_ious, gt_argmax_ious  # 每个anchor的最大iou的GT索引值,每个anchor的最大IOU,有某个GT最大IOU的anchor索引
コード例 #8
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # 计算anchor与bbox的IOU,N个anchor,K个bbox
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1) # 1代表行,0代表列
        max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1, N]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,k]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 返回最大iou索引(有k个)

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #9
0
    def _anchor_bbox_ious(self, anchor, gt_bbox):
        iou = bbox_iou(anchor, gt_bbox)
        
        bbox_index_for_anchor = iou.argmax(axis=1)   # (anchor.shape[0],)
        max_iou_for_anchor = iou.max(axis=1)

        anchor_index_for_bbox = iou.argmax(axis=0)   # (bbox.shape[0],)
        max_iou_for_bbox = iou.max(axis=0)

        return bbox_index_for_anchor, max_iou_for_anchor, anchor_index_for_bbox
コード例 #10
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #11
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #12
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):

        n_bbox, _ = bbox.shape  #(R,4),gt的边界框的坐标。

        roi = np.concatenate(
            (roi, bbox), axis=0
        )  #首先将2000个roi和R个bbox给连接一下成为新的roi (2000+R, 4),即将gt也放入roi中便于训练
        pos_roi_per_image = np.round(self.n_sample *
                                     self.pos_ratio)  #每张图片产生32个正样本
        iou = bbox_iou(roi,
                       bbox)  #计算每一个roi与每一个gt bbox的iou, roi返回格式(2000 + R, R)

        # 按行找到最大值索引,返回最大值对应的序号以及其真正的IOU。返回的是每个roi与哪个bbox的最大
        gt_assignment = iou.argmax(axis=1)
        # 按行找到iou最大值
        max_iou = iou.max(axis=1)
        gt_roi_label = label[
            gt_assignment] + 1  #从1开始的类别序号,给每个类得到真正的label(将0-19变为1-20), 0为背景

        pos_index = np.where(max_iou >= self.pos_iou_thresh)[
            0]  #根据iou的最大值将正负样本找出来,pos_iou_thresh=0.5
        # 需要保留的roi个数(满足大于pos_iou_thresh条件的roi与32之间较小的一个,即正样本数目小于等于32)
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))

        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        neg_index = np.where((max_iou < self.neg_iou_thresh_high)
                             & (max_iou >= self.neg_iou_thresh_low))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  #负样本标签为0
        sample_roi = roi[keep_index]
        # 那么此时输出的128*4的sample_roi就可以去扔到 RoIHead网络里去进行分类与回归了。
        # 同样, RoIHead网络利用这sample_roi+featue为输入,输出是分类(21类)和回归(进一步微调bbox)的预测值,
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #13
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """

        Assigns ground truth to sampled proposals.

        """
        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #14
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)  #cat bbox 保证会有重叠度高的用于计算?

        pos_roi_per_image = np.round(self.n_sample *
                                     self.pos_ratio)  #128*0.25=32
        iou = bbox_iou(roi, bbox)  #计算roi和bbox的IOU
        gt_assignment = iou.argmax(axis=1)  # 用于获取和bbox的iou最大的那几个roi
        max_iou = iou.max(axis=1)  #获取这几个roi的iou阈值 这里的几个 等于于本图片内的目标的个数
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[
            gt_assignment] + 1  #把GTlabel 变成从1开始的  pytorch 不支持label=0计算

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(
            pos_roi_per_image, pos_index.size))  #这里pos——index有可能小于 32
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (
            max_iou >= self.neg_iou_thresh_lo))[0]  #这里的采样iou有上界和下界
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image  # 负样本采样
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)  #只取出正样本和负样本的index
        gt_roi_label = gt_roi_label[keep_index]  #取出roi的label
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]  #取出roi 正负样本的

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(
            sample_roi, bbox[gt_assignment[keep_index]])  # 计算bbox和采样roi的偏移量
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #15
0
    def _anchor_bbox_ious(self, anchor, gt_bbox):
        iou = bbox_iou(anchor, gt_bbox)
        #1.criteria is to  defined anchorbox, iou with gtbbox above threshold to 1
        # 2. select anchorbox with max iou for each ground bbox
        bbox_index_for_anchor = iou.argmax(axis=1)  # (anchor.shape[0],)
        max_iou_for_anchor = iou.max(axis=1)

        anchor_index_for_bbox = iou.argmax(axis=0)  # (bbox.shape[0],)
        max_iou_for_bbox = iou.max(axis=0)

        return bbox_index_for_anchor, max_iou_for_anchor, anchor_index_for_bbox
コード例 #16
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(
            anchor, bbox
        )  #调用bbox_iou函数计算anchor与bbox的IOU, ious:(N,K),N为anchor中第N个,K为bbox中第K个,N大概有15000个
        argmax_ious = ious.argmax(axis=1)  #表示返回行方向上数值最大值下标
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #17
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)  # [nanchor,nbbox],以下的最接近表示IOU最大
        argmax_ious = ious.argmax(axis=1)  # 每个anchor,与其最接近的bbox的索引,[nanchor,]
        max_ious = ious[np.arange(len(inside_index)),
                        argmax_ious]  # 每个anchor,与其最接近的bbox的IOU值,[nanchor,]
        gt_argmax_ious = ious.argmax(axis=0)  # 每个bbox,与其最接近的anchor的索引,[nbbox,]
        gt_max_ious = ious[
            gt_argmax_ious,
            np.arange(ious.shape[1])]  # 每个bbox,与其最接近的anchor的IOU值[nbbox,]
        gt_argmax_ious = np.where(
            ious == gt_max_ious)[0]  # 这里得到的值不是与原来的gt_argmax_ious一样么?

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #18
0
    def _anchor_bbox_ious(self, anchor, gt_bbox):
        iou = bbox_iou(anchor, gt_bbox)
        #if iou.shape[1]==0:
         #   bbox_index_for_anchor = np.zeros(anchor.shape[0])
          #  anchor_index_for_bbox = np.zeros(anchor.shape[0])
           # max_iou_for_anchor = np.zeros(bbox.shape[0])
            #max_iou_for_bbox = np.zeros(bbox.shape[0])
        #else:
        bbox_index_for_anchor = iou.argmax(axis=1) # (anchor.shape[0],)
        max_iou_for_anchor = iou.max(axis=1)
        anchor_index_for_bbox = iou.argmax(axis=0)   # (bbox.shape[0],)
        max_iou_for_bbox = iou.max(axis=0)

        return bbox_index_for_anchor, max_iou_for_anchor, anchor_index_for_bbox
コード例 #19
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)  # (n,k)
        # 返回在列上的最值下标,表示每个 anchor 和 哪个 ground true box IOU最大 (n,)
        # anchor -> gt (gt 下标)
        argmax_ious = ious.argmax(axis=1)
        # 求出下标对应的值
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        # 返回在行上的最值下标,表示每个 ground true box 和 哪个anchor  IOU最大 (k,)
        # gt -> anchor (anchor 下标)
        gt_argmax_ious = ious.argmax(axis=0)
        # 求出下标对应的值
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #20
0
    def make_proposal_target(self, roi, gt_bbox, gt_bbox_label):


        assert isinstance(roi, np.ndarray)
        assert isinstance(gt_bbox, np.ndarray)
        assert isinstance(gt_bbox_label, np.ndarray)
        assert len(roi.shape) == len(gt_bbox.shape) == 2
        assert len(gt_bbox_label.shape) == 1
        assert roi.shape[1] == gt_bbox.shape[1] == 4
        assert gt_bbox.shape[0] == gt_bbox_label.shape[0]
        #---------- debug

        # concate gt_bbox as part of roi to be chose
        roi = np.concatenate((roi, gt_bbox), axis=0)   

        n_pos = int(self.n_sample * self.pos_ratio)

        iou = bbox_iou(roi, gt_bbox)
        bbox_index_for_roi = iou.argmax(axis=1)
        max_iou_for_roi = iou.max(axis=1)

        # note that bbox_bg_label_for_roi include background, class 0 stand for backdround
        # object class change from 0 ~ n_class-1 to 1 ~ n_class
        bbox_bg_label_for_roi = gt_bbox_label[bbox_index_for_roi] + 1
        
        # Select foreground(positive) RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou_for_roi >= self.pos_iou_thresh)[0]
        n_pos_real = int(min(n_pos, len(pos_index)))#you need to focus on this , it doesn't make any senses
        if n_pos_real > 0:
            pos_index = np.random.choice(pos_index, size=n_pos_real, replace=False)
        
        # Select background(negative) RoIs as those within [neg_iou_thresh_low, neg_iou_thresh_high).
        neg_index = np.where((max_iou_for_roi >= self.neg_iou_thresh_low) & (max_iou_for_roi < self.neg_iou_thresh_high))[0]
        n_neg = self.n_sample - n_pos_real
        n_neg_real = int(min(n_neg, len(neg_index)))
        if n_neg_real > 0:
            neg_index = np.random.choice(neg_index, size=n_neg_real, replace=False)
        
        keep_index = np.append(pos_index, neg_index)
        sample_roi = roi[keep_index]
        bbox_bg_label_for_sample_roi = bbox_bg_label_for_roi[keep_index]
        bbox_bg_label_for_sample_roi[n_pos_real:] = 0   # set negative sample's label to background 0
        #whatever value of bounding box we pedicted in rpn layer we now at as reference value for creating delta bounding boxes for roi layer
        target_delta_for_sample_roi = bbox2delta(sample_roi, gt_bbox[bbox_index_for_roi[keep_index]])

        target_delta_for_sample_roi = (target_delta_for_sample_roi - np.array([0., 0., 0., 0.])) / np.array([0.1, 0.1, 0.2, 0.2])
        return sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi
コード例 #21
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        roi = np.concatenate((roi, bbox), axis=0)
        # 图片上要抽取的阳性框个数
        pos_roi_per_img = np.round(self.n_sample * self.pos_ratio)
        # 计算和真实框的iou
        iou = bbox_iou(roi, bbox)
        # 和roi的iou较大的那个真实框的索引
        gt_assignment = iou.argmax(axis=1)
        # 那个较大的iou是多少
        max_iou = iou.max(axis=1)
        # 去除背景类?
        gt_roi_label = label[gt_assignment] + 1

        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        # 从非背景类中选取一定数量的roi
        pos_roi_per_this_img = int(min(pos_roi_per_img, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_img,
                                         replace=False)
        # 从背景类中选取一定数量的背景
        neg_index = np.where((max_iou >= self.neg_iou_thresh_lo)
                             & (max_iou < self.neg_iou_thresh_hi))[0]
        neg_roi_per_img = self.n_sample - pos_roi_per_this_img
        neg_roi_per_this_img = int(min(neg_roi_per_img, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_img,
                                         replace=False)
        # 选取
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_img:] = 0  # 背景类的标签设为0
        sample_roi = roi[keep_index]

        # 为这些候选框赋予真实的偏移量和标签
        gt_roi_loc = bbox2loc(src_bbox=sample_roi,
                              dst_bbox=bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))
        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #22
0
    def _calc_ious(self, anchor, bbox, inside_index):
        ious = bbox_iou(
            anchor, bbox
        )  ##调用bbox_iou函数计算anchor与bbox的IOU, ious:(N,K),N为anchor中第N个,K为bbox中第K个,N大概有15000个
        argmax_ious = ious.argmax(axis=1)  ##1代表行,0代表列
        max_ious = ious[
            np.arange(len(inside_index)),
            argmax_ious]  # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[N,1]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[
            gt_argmax_ious,
            np.arange(ious.shape[1]
                      )]  # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,K]

        gt_argmax_ious = np.where(ious == gt_max_ious)[0]  # k个,保证每个目标都有一个

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #23
0
    def __call__(self,
                 roi,
                 bbox,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):

        n_bbox, _ = bbox.shape
        n_pos_roi = np.round(self.n_filtered * self.pos_ratio)
        roi = np.concatenate((roi, bbox), axis=0)
        iou = bbox_iou(roi, bbox)
        # gt_assignment = iou.argmax(axis=1)
        # max_iou = iou.max(axis=1)

        # Select foreground ROI
        pos_index = np.where(iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(n_pos_roi, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background ROI
        neg_index = np.where((iou < self.neg_iou_thresh_h)
                             & (iou >= self.neg_iou_thresh_l))[0]
        neg_roi_per_this_image = self.n_filtered - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))

        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        keep_index = np.append(pos_index, neg_index)
        sample_roi = roi[keep_index]

        gt_roi_label = np.empty((len(sample_roi), ), dtype=np.int32)
        gt_roi_label.fill(1.)
        gt_roi_label[pos_roi_per_this_image:] = 0

        gt_roi_loc = bbox2loc(sample_roi, bbox)
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        return sample_roi, gt_roi_loc, gt_roi_label, pos_roi_per_this_image
コード例 #24
0
 def _calc_ious(self, anchor, bbox, inside_index):
     """根据iou筛选锚点框索引
     返回的是:
         1. 和锚点框具有较大iou的真实框索引
         2. 这些iou是多少
         3. 和真实框具有最大iou的锚点框索引"""
     iou = bbox_iou(anchor, bbox)
     # import ipdb; ipdb.set_trace()
     # 和锚点框具有较大iou的真实框的索引
     argmax_iou = iou.argmax(axis=1)
     # 得到inside_index个这些较大的iou
     max_ious = iou[np.arange(len(inside_index)), argmax_iou]
     # 和真实框具有较大iou的锚点框索引
     gt_argmax_iou = iou.argmax(axis=0)
     # 这些iou是
     gt_max_ious = iou[gt_argmax_iou, np.arange(iou.shape[1])]
     gt_argmax_iou = np.where(iou == gt_max_ious)[0]
     return argmax_iou, max_ious, gt_argmax_iou
コード例 #25
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        n_bbox = bbox.shape
        if len(n_bbox) > 0:
            n_bbox = n_bbox[0]
        if n_bbox > 0:
            ious = bbox_iou(anchor, bbox)
            argmax_ious = ious.argmax(axis=1)
            max_ious = ious[np.arange(len(inside_index)), argmax_ious]
            gt_argmax_ious = ious.argmax(axis=0)
            gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
            gt_argmax_ious = np.where(ious == gt_max_ious)[0]
        else:
            argmax_ious = []
            max_ious = np.asarray([0 for i in range(len(anchor))])
            gt_argmax_ious = []

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #26
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        # input:
        #       anchor (K * A => len(inside_index), 4)
        #       bbox (R, 4)
        # return:
        #       argmax_iou:对每一个锚框,与之有最大IOU的实际框所在的索引位置
        #       max_iou: 对每一个锚框,他和每一个实际框能得到的最大iou
        #       gt_argmax_iou: 对于每一个锚框,其对应的真实的实际框所在的索引位置
        ious = bbox_iou(anchor, bbox) # (K * A, R)
        argmax_ious = ious.argmax(axis=1) # 对于每个锚框,算出和他算得最大IOU的目的框索引 (K * A) 保存的值是 (0~R-1)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 对于每个锚框,找出和他算得最大IOU的目的框,返回他们的iou (K * A)

        gt_argmax_ious = ious.argmax(axis=0) # 对于每个真实框,和他算得最大IOU的锚框索引,认为这个锚框是真实的label, 大小(R),保存的值是(0 ~ K*A - 1)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # 对于每个真实框, 找出和他算得最大IOU的锚框,返回他们的iou (R) 

        gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 其实和前面相同,(R)

        return argmax_ious, max_ious, gt_argmax_ious
コード例 #27
0
 def _calc_ious(self, anchor, bbox, inside_index):
     # ious between the anchors and the gt boxes
     ious = bbox_iou(anchor, bbox)  #计算IOU
     argmax_ious = ious.argmax(
         axis=1)  #argmax_ious[i]=j 表示第i个anchor与第j个bbox IOU值最大
     max_ious = ious[
         np.arange(len(inside_index)),
         argmax_ious]  #max_ious[i]=j 表示第i个anchor与对应gt_box IOU的最大|值|是 j
     gt_argmax_ious = ious.argmax(
         axis=0)  #gt_argmax_ious[i]=j 表示第i个bbox与第j个anchor IOU值最大
     gt_max_ious = ious[gt_argmax_ious,
                        np.arange(
                            ious.shape[1]
                        )]  #max_ious[i]=j 表示第i个gt_bbox与对应anchor IOU的最大值是 j
     gt_argmax_ious = np.where(ious == gt_max_ious)[
         0]  #是一个索引gt_argmax_ious[a,b,c,d,e] 表示第a个anchor与对应box IOU值最大
     #其中a<=b<=c<=d<=e即表示我们将要选出的框的索引 表示a这个anchor和某个bbox IOU最大
     #这已经够了,我们要选他,并不关心他与哪个框最大。所以为什么不取gt_argmax_ious = ious.argmax(axis=0) 的结果呢,
     #   因为他没有把等值算上,比如第二个box与 第20个anchor和第300个anchor IOU最大都是0.9 前面的运算只会选择第20个anchor 而不会选择300
     return argmax_ious, max_ious, gt_argmax_ious
コード例 #28
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor,
                        bbox)  # ious:(N,K),N为anchor中第N个,K为bbox中第K个,N大概有15000个

        argmax_ious = ious.argmax(axis=1)  # 1代表行,0代表列
        max_ious = ious[
            np.arange(len(inside_index)),
            argmax_ious]  # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1,N]

        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[
            gt_argmax_ious,
            np.arange(ious.shape[1]
                      )]  # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,K]
        gt_argmax_ious = np.where(
            ious == gt_max_ious
        )[0]  # 然后返回最大iou的索引(每个bbox与哪个anchor的iou最大),有K个:gt_argmax_ious就是和gt_bbox重叠读最高的anchor

        # argmax ious为对于每一个anchor最接近的那个bbox,共有N个
        # max ious 为每个anchor和最接近的bbox的iou值,也为N个
        # gt argmax ious为对于每一个bbox的找到重叠度最大的anchor,有K个
        return argmax_ious, max_ious, gt_argmax_ious
コード例 #29
0
def eval(seq_loader, faster_rcnn, signal_type, test_num=10000):
    carrada = download('Carrada')
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
    # print('*** Evaluation ***')
    for n_seq, sequence_data in tqdm(enumerate(seq_loader)):
        seq_name, seq = sequence_data
        path_to_frames = os.path.join(carrada, seq_name[0])
        frame_set = TestCarradaDataset(opt, seq, 'box', signal_type,
                                       path_to_frames)
        frame_loader = data_.DataLoader(frame_set,
                                        batch_size=1,
                                        shuffle=False,
                                        num_workers=opt.num_workers)

        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(frame_loader)):
            sizes = [sizes[0][0].item(), sizes[1][0].item()]
            imgs = normalize(imgs)
            pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(imgs, [sizes])
            gt_bboxes += list(gt_bboxes_.numpy())
            gt_labels += list(gt_labels_.numpy())
            gt_difficults += list(gt_difficults_.numpy())
            pred_bboxes += pred_bboxes_
            pred_labels += pred_labels_
            pred_scores += pred_scores_
            # if ii == test_num: break

    result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores,
                                gt_bboxes, gt_labels, gt_difficults,
                                use_07_metric=True)
    ious = bbox_iou(gt_bboxes[0], pred_bboxes[0])
    try:
        best_iou = ious.max()
    except ValueError:
        best_iou = 0

    return result, best_iou
コード例 #30
0
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`model.utils.bbox_tools.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #31
0
def calc_detection_voc_prec_rec(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults=None,
                                iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.

    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to
            :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        tuple of two lists:
        This function returns two lists: :obj:`prec` and :obj:`rec`.

        * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
            set to :obj:`None`.
        * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
            for class :math:`l`. If class :math:`l` that is not marked as \
            difficult does not exist in \
            :obj:`gt_labels`, :obj:`rec[l]` is \
            set to :obj:`None`.

    """

    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
            six.moves.zip(
                pred_bboxes, pred_labels, pred_scores,
                gt_bboxes, gt_labels, gt_difficults):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0, ) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
                  gt_difficults):
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same.')

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec
コード例 #32
0
    def __call__(self, roi, bbox, label,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`model.utils.bbox_tools.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        n_bbox, _ = bbox.shape

        roi = np.concatenate((roi, bbox), axis=0)

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(
                pos_index, size=pos_roi_per_this_image, replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi) &
                             (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(min(neg_roi_per_this_image,
                                         neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(
                neg_index, size=neg_roi_per_this_image, replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)
                       ) / np.array(loc_normalize_std, np.float32))

        return sample_roi, gt_roi_loc, gt_roi_label
コード例 #33
0
def calc_detection_voc_prec_rec(
        pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
        gt_difficults=None,
        iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.

    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to
            :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        tuple of two lists:
        This function returns two lists: :obj:`prec` and :obj:`rec`.

        * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
            set to :obj:`None`.
        * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
            for class :math:`l`. If class :math:`l` that is not marked as \
            difficult does not exist in \
            :obj:`gt_labels`, :obj:`rec[l]` is \
            set to :obj:`None`.

    """

    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
            six.moves.zip(
                pred_bboxes, pred_labels, pred_scores,
                gt_bboxes, gt_labels, gt_difficults):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0,) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same.')

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec