def _calc_ious(self, anchor, bbox, inside_index): """ ious between the anchors and the gt boxes 主要计算ious的行和列的最大值和索引: :param anchor: 在fearuth_map生成的W*H*9的anchor_box :param bbox: 为truth_anchor_box :param inside_index: 不用传入inside_index参数:inside_index==anchor.shap[0] :return: argmax_ious: 行最大索引 max_ious: 行最大值 gt_argmax_ious: 列最大索引 """ ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax( axis=1) # 行最大索引,即每个anchor对应的最大truth_anchor_box max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax( axis=0) # 列最大索引,即每个truth_anchor_box对应的最大anchor gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 因为每个anchor_box只能对应一个最大iou的ruth_anchor_box,但是每个ruth_anchor_box可以对应多个最大iou的anchor_box # argmax函数只能返回第一个最大值的索引 return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): """ 这个函数计算每一个anchor和每一个bbox的的iou, 然后返回每一个anchor最匹配的gt object的序号(argmax_ious), 以及从大到小的iou值(max_ious), 还有与每一个object的iou最大的anchor的序号列表 (gt_argmax_ious,最大iou可能重复,所以gt_argmax_ious的size可能比gt object数目多) """ # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) """ (Pdb) anchor.shape (5834, 4) (Pdb) bbox.shape (3, 4) ious.shape (5834,3) """ argmax_ious = ious.argmax(axis=1) # (5834,) max_ious = ious[np.arange(len(inside_index)), argmax_ious] # (5834,) gt_argmax_ious = ious.argmax(axis=0) # (3,),这张图片有3个gt object,分别对应的最大iou的anchor在第几行 gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # (3,),拿上面得出的idx,求出每一个gt object对应的最大iou的值 gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 可能存在两个anchor和一个gt bbox有着相同iou的情况,这里重新取一次index, # 比如 gt_argmax_ious.shape = (14,) # 注意这里只需要知道是哪个anchor蒙中了, # 我都不关心蒙中了哪个class # 因为rpn的loss计算的只是有没有object,不关心哪一个class return argmax_ious, max_ious, gt_argmax_ious
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): #因为这些数据是要放入到整个大网络里进行训练的,比如说位置数据,所以要对其位置坐标进行数据增强处理(归一化处理) n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) #首先将2000个roi和m个bbox给concatenate了一下成为新的roi(2000+m,4)。 pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) #n_sample = 128,pos_ratio=0.5,round 对传入的数据进行四舍五入 iou = bbox_iou(roi, bbox) #计算每一个roi与每一个bbox的iou gt_assignment = iou.argmax(axis=1) #按行找到最大值,返回最大值对应的序号以及其真正的IOU。返回的是每个roi与**哪个**bbox的最大,以及最大的iou值 max_iou = iou.max(axis=1) #每个roi与对应bbox最大的iou gt_roi_label = label[gt_assignment] + 1 #从1开始的类别序号,给每个类得到真正的label(将0-19变为1-20) pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] #同样的根据iou的最大值将正负样本找出来,pos_iou_thresh=0.5 pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) #需要保留的roi个数(满足大于pos_iou_thresh条件的roi与64之间较小的一个) if pos_index.size > 0: pos_index = np.random.choice( pos_index, size=pos_roi_per_this_image, replace=False) #找出的样本数目过多就随机丢掉一些 neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] #neg_iou_thresh_hi=0.5,neg_iou_thresh_lo=0.0 neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image # #需要保留的roi个数(满足大于0小于neg_iou_thresh_hi条件的roi与64之间较小的一个) neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice( neg_index, size=neg_roi_per_this_image, replace=False) #找出的样本数目过多就随机丢掉一些 keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # 负样本label 设为0 sample_roi = roi[keep_index] #那么此时输出的128*4的sample_roi就可以去扔到 RoIHead网络里去进行分类与回归了。同样, RoIHead网络利用这sample_roi+featue为输入,输出是分类(21类)和回归(进一步微调bbox)的预测值,那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。 gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) #求这128个样本的groundtruth gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32) ) / np.array(loc_normalize_std, np.float32)) #ProposalTargetCreator首次用到了真实的21个类的label,且该类最后对loc进行了归一化处理,所以预测时要进行均值方差处理 return sample_roi, gt_roi_loc, gt_roi_label
def _calc_ious(self, anchor, bbox, inside_index): ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1,N] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,K] gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 然后返回最大iou的索引(每个bbox与哪个anchor的iou最大),有K个 return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): ious = bbox_iou(anchor, bbox) #ious格式(S, N), S为anchor数目, N为gt bbox数目 argmax_ious = ious.argmax(axis=1) #按行找到iou最大值索引 max_iou = ious[np.arange(len(inside_index)), argmax_ious] #求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1,S] gt_argmax_iou = ious.argmax(axis=0) #按列找到iou最大值索引 gt_max_iou = ious[gt_argmax_iou, np.arange(ious.shape[1])] #求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,N] gt_argmax_iou = np.where(ious == gt_max_iou)[0] #每个bbox与anchor最大iou的索引,每个bbox与哪个anchor的iou最大 return argmax_ious, max_iou, gt_argmax_iou #每个anchor与gt的最大iou索引号,每个anchor的最大iou值,gt与哪个anchoriou最大索引号
def eval(seq_loader, faster_rcnn, signal_type, scale=1., test_num=10000, stop=False): carrada = download('Carrada') pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() # print('*** Evaluation ***') for n_seq, sequence_data in tqdm(enumerate(seq_loader)): seq_name, seq = sequence_data # Overfit an image # seq = [seq[155]] # large seq = [seq[115]] # medium # seq = [seq[28]] # small path_to_frames = os.path.join(carrada, seq_name[0]) frame_set = TestCarradaDataset(opt, seq, 'box', signal_type, path_to_frames) frame_loader = data_.DataLoader(frame_set, batch_size=1, shuffle=False, num_workers=opt.num_workers) for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(frame_loader)): sizes = [sizes[0][0].item(), sizes[1][0].item()] imgs = normalize(imgs) pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, signal_type, [sizes]) gt_bboxes += list(gt_bboxes_.numpy()) gt_labels += list(gt_labels_.numpy()) gt_difficults += list(gt_difficults_.numpy()) pred_bboxes += pred_bboxes_ pred_labels += pred_labels_ pred_scores += pred_scores_ # if ii == test_num: break result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, iou_thresh=0.5, use_07_metric=True) ious = bbox_iou(gt_bboxes[0], pred_bboxes[0]) try: best_iou = ious.max() except ValueError: best_iou = 0 if stop: import ipdb ipdb.set_trace() # print('Best IoU in validation: {}'.format(ious.max())) return result, best_iou
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) # anchor和所有GT的iou最大值的GT索引 max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 每个anchor的最大IOU gt_argmax_ious = ious.argmax(axis=0) # 计算各GT与所有anchor的最大IOU对应anchor的索引 gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # 计算各GT的最大IOU gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 拥有某个GT最大IOU的anchor索引 return argmax_ious, max_ious, gt_argmax_ious # 每个anchor的最大iou的GT索引值,每个anchor的最大IOU,有某个GT最大IOU的anchor索引
def _calc_ious(self, anchor, bbox, inside_index): # 计算anchor与bbox的IOU,N个anchor,K个bbox ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) # 1代表行,0代表列 max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1, N] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,k] gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 返回最大iou索引(有k个) return argmax_ious, max_ious, gt_argmax_ious
def _anchor_bbox_ious(self, anchor, gt_bbox): iou = bbox_iou(anchor, gt_bbox) bbox_index_for_anchor = iou.argmax(axis=1) # (anchor.shape[0],) max_iou_for_anchor = iou.max(axis=1) anchor_index_for_bbox = iou.argmax(axis=0) # (bbox.shape[0],) max_iou_for_bbox = iou.max(axis=0) return bbox_index_for_anchor, max_iou_for_anchor, anchor_index_for_bbox
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] return argmax_ious, max_ious, gt_argmax_ious
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): n_bbox, _ = bbox.shape #(R,4),gt的边界框的坐标。 roi = np.concatenate( (roi, bbox), axis=0 ) #首先将2000个roi和R个bbox给连接一下成为新的roi (2000+R, 4),即将gt也放入roi中便于训练 pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) #每张图片产生32个正样本 iou = bbox_iou(roi, bbox) #计算每一个roi与每一个gt bbox的iou, roi返回格式(2000 + R, R) # 按行找到最大值索引,返回最大值对应的序号以及其真正的IOU。返回的是每个roi与哪个bbox的最大 gt_assignment = iou.argmax(axis=1) # 按行找到iou最大值 max_iou = iou.max(axis=1) gt_roi_label = label[ gt_assignment] + 1 #从1开始的类别序号,给每个类得到真正的label(将0-19变为1-20), 0为背景 pos_index = np.where(max_iou >= self.pos_iou_thresh)[ 0] #根据iou的最大值将正负样本找出来,pos_iou_thresh=0.5 # 需要保留的roi个数(满足大于pos_iou_thresh条件的roi与32之间较小的一个,即正样本数目小于等于32) pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) neg_index = np.where((max_iou < self.neg_iou_thresh_high) & (max_iou >= self.neg_iou_thresh_low))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 #负样本标签为0 sample_roi = roi[keep_index] # 那么此时输出的128*4的sample_roi就可以去扔到 RoIHead网络里去进行分类与回归了。 # 同样, RoIHead网络利用这sample_roi+featue为输入,输出是分类(21类)和回归(进一步微调bbox)的预测值, gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """ Assigns ground truth to sampled proposals. """ n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) #cat bbox 保证会有重叠度高的用于计算? pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) #128*0.25=32 iou = bbox_iou(roi, bbox) #计算roi和bbox的IOU gt_assignment = iou.argmax(axis=1) # 用于获取和bbox的iou最大的那几个roi max_iou = iou.max(axis=1) #获取这几个roi的iou阈值 这里的几个 等于于本图片内的目标的个数 # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[ gt_assignment] + 1 #把GTlabel 变成从1开始的 pytorch 不支持label=0计算 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min( pos_roi_per_image, pos_index.size)) #这里pos——index有可能小于 32 if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & ( max_iou >= self.neg_iou_thresh_lo))[0] #这里的采样iou有上界和下界 neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image # 负样本采样 neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) #只取出正样本和负样本的index gt_roi_label = gt_roi_label[keep_index] #取出roi的label gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] #取出roi 正负样本的 # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc( sample_roi, bbox[gt_assignment[keep_index]]) # 计算bbox和采样roi的偏移量 gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def _anchor_bbox_ious(self, anchor, gt_bbox): iou = bbox_iou(anchor, gt_bbox) #1.criteria is to defined anchorbox, iou with gtbbox above threshold to 1 # 2. select anchorbox with max iou for each ground bbox bbox_index_for_anchor = iou.argmax(axis=1) # (anchor.shape[0],) max_iou_for_anchor = iou.max(axis=1) anchor_index_for_bbox = iou.argmax(axis=0) # (bbox.shape[0],) max_iou_for_bbox = iou.max(axis=0) return bbox_index_for_anchor, max_iou_for_anchor, anchor_index_for_bbox
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou( anchor, bbox ) #调用bbox_iou函数计算anchor与bbox的IOU, ious:(N,K),N为anchor中第N个,K为bbox中第K个,N大概有15000个 argmax_ious = ious.argmax(axis=1) #表示返回行方向上数值最大值下标 max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) # [nanchor,nbbox],以下的最接近表示IOU最大 argmax_ious = ious.argmax(axis=1) # 每个anchor,与其最接近的bbox的索引,[nanchor,] max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 每个anchor,与其最接近的bbox的IOU值,[nanchor,] gt_argmax_ious = ious.argmax(axis=0) # 每个bbox,与其最接近的anchor的索引,[nbbox,] gt_max_ious = ious[ gt_argmax_ious, np.arange(ious.shape[1])] # 每个bbox,与其最接近的anchor的IOU值[nbbox,] gt_argmax_ious = np.where( ious == gt_max_ious)[0] # 这里得到的值不是与原来的gt_argmax_ious一样么? return argmax_ious, max_ious, gt_argmax_ious
def _anchor_bbox_ious(self, anchor, gt_bbox): iou = bbox_iou(anchor, gt_bbox) #if iou.shape[1]==0: # bbox_index_for_anchor = np.zeros(anchor.shape[0]) # anchor_index_for_bbox = np.zeros(anchor.shape[0]) # max_iou_for_anchor = np.zeros(bbox.shape[0]) #max_iou_for_bbox = np.zeros(bbox.shape[0]) #else: bbox_index_for_anchor = iou.argmax(axis=1) # (anchor.shape[0],) max_iou_for_anchor = iou.max(axis=1) anchor_index_for_bbox = iou.argmax(axis=0) # (bbox.shape[0],) max_iou_for_bbox = iou.max(axis=0) return bbox_index_for_anchor, max_iou_for_anchor, anchor_index_for_bbox
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) # (n,k) # 返回在列上的最值下标,表示每个 anchor 和 哪个 ground true box IOU最大 (n,) # anchor -> gt (gt 下标) argmax_ious = ious.argmax(axis=1) # 求出下标对应的值 max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 返回在行上的最值下标,表示每个 ground true box 和 哪个anchor IOU最大 (k,) # gt -> anchor (anchor 下标) gt_argmax_ious = ious.argmax(axis=0) # 求出下标对应的值 gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] return argmax_ious, max_ious, gt_argmax_ious
def make_proposal_target(self, roi, gt_bbox, gt_bbox_label): assert isinstance(roi, np.ndarray) assert isinstance(gt_bbox, np.ndarray) assert isinstance(gt_bbox_label, np.ndarray) assert len(roi.shape) == len(gt_bbox.shape) == 2 assert len(gt_bbox_label.shape) == 1 assert roi.shape[1] == gt_bbox.shape[1] == 4 assert gt_bbox.shape[0] == gt_bbox_label.shape[0] #---------- debug # concate gt_bbox as part of roi to be chose roi = np.concatenate((roi, gt_bbox), axis=0) n_pos = int(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, gt_bbox) bbox_index_for_roi = iou.argmax(axis=1) max_iou_for_roi = iou.max(axis=1) # note that bbox_bg_label_for_roi include background, class 0 stand for backdround # object class change from 0 ~ n_class-1 to 1 ~ n_class bbox_bg_label_for_roi = gt_bbox_label[bbox_index_for_roi] + 1 # Select foreground(positive) RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou_for_roi >= self.pos_iou_thresh)[0] n_pos_real = int(min(n_pos, len(pos_index)))#you need to focus on this , it doesn't make any senses if n_pos_real > 0: pos_index = np.random.choice(pos_index, size=n_pos_real, replace=False) # Select background(negative) RoIs as those within [neg_iou_thresh_low, neg_iou_thresh_high). neg_index = np.where((max_iou_for_roi >= self.neg_iou_thresh_low) & (max_iou_for_roi < self.neg_iou_thresh_high))[0] n_neg = self.n_sample - n_pos_real n_neg_real = int(min(n_neg, len(neg_index))) if n_neg_real > 0: neg_index = np.random.choice(neg_index, size=n_neg_real, replace=False) keep_index = np.append(pos_index, neg_index) sample_roi = roi[keep_index] bbox_bg_label_for_sample_roi = bbox_bg_label_for_roi[keep_index] bbox_bg_label_for_sample_roi[n_pos_real:] = 0 # set negative sample's label to background 0 #whatever value of bounding box we pedicted in rpn layer we now at as reference value for creating delta bounding boxes for roi layer target_delta_for_sample_roi = bbox2delta(sample_roi, gt_bbox[bbox_index_for_roi[keep_index]]) target_delta_for_sample_roi = (target_delta_for_sample_roi - np.array([0., 0., 0., 0.])) / np.array([0.1, 0.1, 0.2, 0.2]) return sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): roi = np.concatenate((roi, bbox), axis=0) # 图片上要抽取的阳性框个数 pos_roi_per_img = np.round(self.n_sample * self.pos_ratio) # 计算和真实框的iou iou = bbox_iou(roi, bbox) # 和roi的iou较大的那个真实框的索引 gt_assignment = iou.argmax(axis=1) # 那个较大的iou是多少 max_iou = iou.max(axis=1) # 去除背景类? gt_roi_label = label[gt_assignment] + 1 pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] # 从非背景类中选取一定数量的roi pos_roi_per_this_img = int(min(pos_roi_per_img, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_img, replace=False) # 从背景类中选取一定数量的背景 neg_index = np.where((max_iou >= self.neg_iou_thresh_lo) & (max_iou < self.neg_iou_thresh_hi))[0] neg_roi_per_img = self.n_sample - pos_roi_per_this_img neg_roi_per_this_img = int(min(neg_roi_per_img, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_img, replace=False) # 选取 keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_img:] = 0 # 背景类的标签设为0 sample_roi = roi[keep_index] # 为这些候选框赋予真实的偏移量和标签 gt_roi_loc = bbox2loc(src_bbox=sample_roi, dst_bbox=bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def _calc_ious(self, anchor, bbox, inside_index): ious = bbox_iou( anchor, bbox ) ##调用bbox_iou函数计算anchor与bbox的IOU, ious:(N,K),N为anchor中第N个,K为bbox中第K个,N大概有15000个 argmax_ious = ious.argmax(axis=1) ##1代表行,0代表列 max_ious = ious[ np.arange(len(inside_index)), argmax_ious] # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[N,1] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[ gt_argmax_ious, np.arange(ious.shape[1] )] # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,K] gt_argmax_ious = np.where(ious == gt_max_ious)[0] # k个,保证每个目标都有一个 return argmax_ious, max_ious, gt_argmax_ious
def __call__(self, roi, bbox, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): n_bbox, _ = bbox.shape n_pos_roi = np.round(self.n_filtered * self.pos_ratio) roi = np.concatenate((roi, bbox), axis=0) iou = bbox_iou(roi, bbox) # gt_assignment = iou.argmax(axis=1) # max_iou = iou.max(axis=1) # Select foreground ROI pos_index = np.where(iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(n_pos_roi, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background ROI neg_index = np.where((iou < self.neg_iou_thresh_h) & (iou >= self.neg_iou_thresh_l))[0] neg_roi_per_this_image = self.n_filtered - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) keep_index = np.append(pos_index, neg_index) sample_roi = roi[keep_index] gt_roi_label = np.empty((len(sample_roi), ), dtype=np.int32) gt_roi_label.fill(1.) gt_roi_label[pos_roi_per_this_image:] = 0 gt_roi_loc = bbox2loc(sample_roi, bbox) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label, pos_roi_per_this_image
def _calc_ious(self, anchor, bbox, inside_index): """根据iou筛选锚点框索引 返回的是: 1. 和锚点框具有较大iou的真实框索引 2. 这些iou是多少 3. 和真实框具有最大iou的锚点框索引""" iou = bbox_iou(anchor, bbox) # import ipdb; ipdb.set_trace() # 和锚点框具有较大iou的真实框的索引 argmax_iou = iou.argmax(axis=1) # 得到inside_index个这些较大的iou max_ious = iou[np.arange(len(inside_index)), argmax_iou] # 和真实框具有较大iou的锚点框索引 gt_argmax_iou = iou.argmax(axis=0) # 这些iou是 gt_max_ious = iou[gt_argmax_iou, np.arange(iou.shape[1])] gt_argmax_iou = np.where(iou == gt_max_ious)[0] return argmax_iou, max_ious, gt_argmax_iou
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes n_bbox = bbox.shape if len(n_bbox) > 0: n_bbox = n_bbox[0] if n_bbox > 0: ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] else: argmax_ious = [] max_ious = np.asarray([0 for i in range(len(anchor))]) gt_argmax_ious = [] return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes # input: # anchor (K * A => len(inside_index), 4) # bbox (R, 4) # return: # argmax_iou:对每一个锚框,与之有最大IOU的实际框所在的索引位置 # max_iou: 对每一个锚框,他和每一个实际框能得到的最大iou # gt_argmax_iou: 对于每一个锚框,其对应的真实的实际框所在的索引位置 ious = bbox_iou(anchor, bbox) # (K * A, R) argmax_ious = ious.argmax(axis=1) # 对于每个锚框,算出和他算得最大IOU的目的框索引 (K * A) 保存的值是 (0~R-1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] # 对于每个锚框,找出和他算得最大IOU的目的框,返回他们的iou (K * A) gt_argmax_ious = ious.argmax(axis=0) # 对于每个真实框,和他算得最大IOU的锚框索引,认为这个锚框是真实的label, 大小(R),保存的值是(0 ~ K*A - 1) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] # 对于每个真实框, 找出和他算得最大IOU的锚框,返回他们的iou (R) gt_argmax_ious = np.where(ious == gt_max_ious)[0] # 其实和前面相同,(R) return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) #计算IOU argmax_ious = ious.argmax( axis=1) #argmax_ious[i]=j 表示第i个anchor与第j个bbox IOU值最大 max_ious = ious[ np.arange(len(inside_index)), argmax_ious] #max_ious[i]=j 表示第i个anchor与对应gt_box IOU的最大|值|是 j gt_argmax_ious = ious.argmax( axis=0) #gt_argmax_ious[i]=j 表示第i个bbox与第j个anchor IOU值最大 gt_max_ious = ious[gt_argmax_ious, np.arange( ious.shape[1] )] #max_ious[i]=j 表示第i个gt_bbox与对应anchor IOU的最大值是 j gt_argmax_ious = np.where(ious == gt_max_ious)[ 0] #是一个索引gt_argmax_ious[a,b,c,d,e] 表示第a个anchor与对应box IOU值最大 #其中a<=b<=c<=d<=e即表示我们将要选出的框的索引 表示a这个anchor和某个bbox IOU最大 #这已经够了,我们要选他,并不关心他与哪个框最大。所以为什么不取gt_argmax_ious = ious.argmax(axis=0) 的结果呢, # 因为他没有把等值算上,比如第二个box与 第20个anchor和第300个anchor IOU最大都是0.9 前面的运算只会选择第20个anchor 而不会选择300 return argmax_ious, max_ious, gt_argmax_ious
def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) # ious:(N,K),N为anchor中第N个,K为bbox中第K个,N大概有15000个 argmax_ious = ious.argmax(axis=1) # 1代表行,0代表列 max_ious = ious[ np.arange(len(inside_index)), argmax_ious] # 求出每个anchor与哪个bbox的iou最大,以及最大值,max_ious:[1,N] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[ gt_argmax_ious, np.arange(ious.shape[1] )] # 求出每个bbox与哪个anchor的iou最大,以及最大值,gt_max_ious:[1,K] gt_argmax_ious = np.where( ious == gt_max_ious )[0] # 然后返回最大iou的索引(每个bbox与哪个anchor的iou最大),有K个:gt_argmax_ious就是和gt_bbox重叠读最高的anchor # argmax ious为对于每一个anchor最接近的那个bbox,共有N个 # max ious 为每个anchor和最接近的bbox的iou值,也为N个 # gt argmax ious为对于每一个bbox的找到重叠度最大的anchor,有K个 return argmax_ious, max_ious, gt_argmax_ious
def eval(seq_loader, faster_rcnn, signal_type, test_num=10000): carrada = download('Carrada') pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() # print('*** Evaluation ***') for n_seq, sequence_data in tqdm(enumerate(seq_loader)): seq_name, seq = sequence_data path_to_frames = os.path.join(carrada, seq_name[0]) frame_set = TestCarradaDataset(opt, seq, 'box', signal_type, path_to_frames) frame_loader = data_.DataLoader(frame_set, batch_size=1, shuffle=False, num_workers=opt.num_workers) for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(frame_loader)): sizes = [sizes[0][0].item(), sizes[1][0].item()] imgs = normalize(imgs) pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(imgs, [sizes]) gt_bboxes += list(gt_bboxes_.numpy()) gt_labels += list(gt_labels_.numpy()) gt_difficults += list(gt_difficults_.numpy()) pred_bboxes += pred_bboxes_ pred_labels += pred_labels_ pred_scores += pred_scores_ # if ii == test_num: break result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) ious = bbox_iou(gt_bboxes[0], pred_bboxes[0]) try: best_iou = ious.max() except ValueError: best_iou = 0 return result, best_iou
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def calc_detection_voc_prec_rec(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults=None, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. Args: pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` sets of bounding boxes. Its index corresponds to an index for the base dataset. Each element of :obj:`pred_bboxes` is a set of coordinates of bounding boxes. This is an array whose shape is :math:`(R, 4)`, where :math:`R` corresponds to the number of bounding boxes, which may vary among boxes. The second axis corresponds to :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. pred_labels (iterable of numpy.ndarray): An iterable of labels. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. pred_scores (iterable of numpy.ndarray): An iterable of confidence scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth bounding boxes whose length is :math:`N`. An element of :obj:`gt_bboxes` is a bounding box whose shape is :math:`(R, 4)`. Note that the number of bounding boxes in each image does not need to be same as the number of corresponding predicted boxes. gt_labels (iterable of numpy.ndarray): An iterable of ground truth labels which are organized similarly to :obj:`gt_bboxes`. gt_difficults (iterable of numpy.ndarray): An iterable of boolean arrays which is organized similarly to :obj:`gt_bboxes`. This tells whether the corresponding ground truth bounding box is difficult or not. By default, this is :obj:`None`. In that case, this function considers all bounding boxes to be not difficult. iou_thresh (float): A prediction is correct if its Intersection over Union with the ground truth is above this value.. Returns: tuple of two lists: This function returns two lists: :obj:`prec` and :obj:`rec`. * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \ for class :math:`l`. If class :math:`l` does not exist in \ either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \ set to :obj:`None`. * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \ for class :math:`l`. If class :math:`l` that is not marked as \ difficult does not exist in \ :obj:`gt_labels`, :obj:`rec[l]` is \ set to :obj:`None`. """ pred_bboxes = iter(pred_bboxes) pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_bboxes = iter(gt_bboxes) gt_labels = iter(gt_labels) if gt_difficults is None: gt_difficults = itertools.repeat(None) else: gt_difficults = iter(gt_difficults) n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \ six.moves.zip( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if gt_difficult is None: gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = bbox_iou(pred_bbox_l, gt_bbox_l) gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) for iter_ in (pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if next(iter_, None) is not None: raise ValueError('Length of input iterables need to be same.') n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice( pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice( neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32) ) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def calc_detection_voc_prec_rec( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults=None, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. Args: pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` sets of bounding boxes. Its index corresponds to an index for the base dataset. Each element of :obj:`pred_bboxes` is a set of coordinates of bounding boxes. This is an array whose shape is :math:`(R, 4)`, where :math:`R` corresponds to the number of bounding boxes, which may vary among boxes. The second axis corresponds to :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. pred_labels (iterable of numpy.ndarray): An iterable of labels. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. pred_scores (iterable of numpy.ndarray): An iterable of confidence scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth bounding boxes whose length is :math:`N`. An element of :obj:`gt_bboxes` is a bounding box whose shape is :math:`(R, 4)`. Note that the number of bounding boxes in each image does not need to be same as the number of corresponding predicted boxes. gt_labels (iterable of numpy.ndarray): An iterable of ground truth labels which are organized similarly to :obj:`gt_bboxes`. gt_difficults (iterable of numpy.ndarray): An iterable of boolean arrays which is organized similarly to :obj:`gt_bboxes`. This tells whether the corresponding ground truth bounding box is difficult or not. By default, this is :obj:`None`. In that case, this function considers all bounding boxes to be not difficult. iou_thresh (float): A prediction is correct if its Intersection over Union with the ground truth is above this value.. Returns: tuple of two lists: This function returns two lists: :obj:`prec` and :obj:`rec`. * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \ for class :math:`l`. If class :math:`l` does not exist in \ either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \ set to :obj:`None`. * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \ for class :math:`l`. If class :math:`l` that is not marked as \ difficult does not exist in \ :obj:`gt_labels`, :obj:`rec[l]` is \ set to :obj:`None`. """ pred_bboxes = iter(pred_bboxes) pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_bboxes = iter(gt_bboxes) gt_labels = iter(gt_labels) if gt_difficults is None: gt_difficults = itertools.repeat(None) else: gt_difficults = iter(gt_difficults) n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \ six.moves.zip( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if gt_difficult is None: gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0,) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = bbox_iou(pred_bbox_l, gt_bbox_l) gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) for iter_ in ( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if next(iter_, None) is not None: raise ValueError('Length of input iterables need to be same.') n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec