def get_bboxes_single( self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, ): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert (rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]) anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox( anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape, ) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] # 遍历每一张特征图得到的anchors进行坐标还原 for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) # 按照得分从上万个里面选取2000个最高得分的box if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # 偏移转换为真实的bbox作标 proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] # NMS筛选 proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) #将得加入xywh中成为xywhc,便于NMS proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) #每张特征图选完得到的box都加到这里 # 全部的proposal box连接到一起 proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: # proposal再按照分类得分压缩到2000(如果本身就小于2000就只排序) scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) # proposal的数目 _, topk_inds = scores.topk(num) # 按照score排序 proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single( self, cls_scores, # 包含不同特征层的结果,以便fpn使用 bbox_preds, # cls_scores/bbox_preds/m1v1_anchors的第一维表示哪层特征图 mlvl_anchors, # 假设有3层特征图,则cls_scores[0]-bbox_preds[0]-m1v1_anchors[0]对应 img_shape, # 同理,cls_scores[1]-bbox_preds[1]-m1v1_anchors[1]对应 scale_factor, cfg, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) # nms_pre:NMS前,选出置信度前nms_pre高的anchor if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk( cfg.nms_pre) # 置信度前nms_pre高的anchor inds rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # 将选出的anchor由delta变换得到proposals(x1,y1,x2,y2) proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) #根据nms_thr完成NMS proposals = proposals[:cfg.nms_post, :] # 选出置信度前nms_post的proposals mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, centernesses, mlvl_points, img_shape, scale_factor, cfg, rescale=False): # TODO: change output to proposals without labels assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) # mlvl_bboxes = [] # mlvl_scores = [] # mlvl_centerness = [] mlvl_proposals = [] for cls_score, bbox_pred, centerness, points in zip( cls_scores, bbox_preds, centernesses, mlvl_points): # iteration by levels assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] centerness = centerness[topk_inds] scores = scores.squeeze() # scores *= centerness proposals = distance2bbox(points, bbox_pred, max_shape=img_shape) proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) # mlvl_bboxes.append(bboxes) # mlvl_scores.append(scores) # mlvl_centerness.append(centerness) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds, mlvl_anchors, img_shape, coo_num, cfg): mlvl_proposals = [] for idx in range(len(rpn_cls_scores)): rpn_cls_score = rpn_cls_scores[idx] rpn_bbox_pred = rpn_bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(1, 2, 0).contiguous().view(-1) rpn_cls_prob = rpn_cls_score.sigmoid() scores = rpn_cls_prob else: rpn_cls_score = rpn_cls_score.permute(1, 2, 0).contiguous().view( -1, 2) rpn_cls_prob = F.softmax(rpn_cls_score, dim=1) scores = rpn_cls_prob[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).contiguous().view( -1, coo_num) _, order = scores.sort(0, descending=True) if cfg.nms_pre > 0: order = order[:cfg.nms_pre] rpn_bbox_pred = rpn_bbox_pred[order, :] anchors = anchors[order, :] scores = scores[order] if coo_num==4: proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) elif coo_num==8: proposals = delta2bbox_8_coo(anchors, rpn_bbox_pred, img_shape) w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] _, order = scores.sort(0, descending=True) num = min(cfg.max_num, proposals.shape[0]) order = order[:num] proposals = proposals[order, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) # permute:change the order of dimension.CHW to HWC if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) # 2 classes: object/non-object scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) # CHW to HWC ,then reshape to (H*W,4) # get top nms_pre number of bbox_pred,anchors & scores. if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) # get the top-k largest values, _ values, topk_inds index rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] # top-k bbox_pred. anchors = anchors[topk_inds, :] # get anchors corresponding to bbox_pred scores = scores[topk_inds] proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, # translate pred to proposals with respect to anchors. self.target_stds, img_shape) if cfg.min_bbox_size > 0: # get bbox which size bigger than cfg.min_bbox_size w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) # same as tf.concat proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, #scores如果是sigmoid就是输出尺度4维,通道为1,如果不是就是classes,结合AnchorHead看,中间有个-1的操作。 bbox_preds, #pred有4列 对应回归的四个参数 mlvl_anchors, #结合fpn的多层anchors img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] #多层的proposal for idx in range(len(cls_scores)): #对一个batch中的每个图像进行循环 rpn_cls_score = cls_scores[idx] #例:图像1的多通道cls_score rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] #确认特征图大小一致 anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) #这里为什么要换一下维度呢,这里rpn_cls_score只有3维,0是通道也是anchors的数量,1、2分别是hw #这里换维度的原因是按照分类组合数据,结合后面reshape if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) #size=[h*w*1] scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) #同分类 if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: #nms-pre=1000 _, topk_inds = scores.topk(cfg.nms_pre) #这里指定topk的个数为1000 rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] #nms的过程,top_k个anchors是nms之后保留的 anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) #根据回归的值矫正bbox,在特征图上完成proposal if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() #nonzero制作索引剔除不符合大小的anchors proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) #nms proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) #fpn对应的nms方案 proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] #非fpn num = min(cfg.max_num, proposals.shape[0]) #保留proposal的个数不超过max_num _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def nms_crossclass(bbox_results: list, iou_thr=0.5) -> (list, list): class_num = len(bbox_results) labels = [] indexs = [] for i, bboxes in enumerate(bbox_results): label = i index = 0 for bbox in bboxes: labels.append(label) indexs.append(index) index += 1 labels = np.array(labels) indexs = np.array(indexs) bbox_all = np.concatenate(bbox_results) bbox_all, inds = nms(bbox_all, iou_thr) labels = labels[inds] indexs = indexs[inds] new_bbox_results = [bbox_all[labels == i] for i in range(class_num)] new_indexs = [indexs[labels == i] for i in range(class_num)] return new_bbox_results, new_indexs
def get_result(result, score_thr=0.5): bbox_result = result bboxes = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) assert bboxes.ndim == 2 assert labels.ndim == 1 assert bboxes.shape[0] == labels.shape[0] assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 if score_thr > 0: assert bboxes.shape[1] == 5 scores = bboxes[:, -1] inds = scores > score_thr bboxes = bboxes[inds, :] labels = labels[inds] scores = scores[inds] test_bboxes = nms(bboxes, 0.5) new_bboxes = [bboxes[i] for i in test_bboxes[1]] new_labels = [labels[i] for i in test_bboxes[1]] new_scores = [scores[i] for i in test_bboxes[1]] return new_bboxes, new_labels, new_scores
def verify_images(): im_list, out_dir = get_imlist2() detections = {} for imf in tqdm(im_list, total=len(im_list)): im = cv2.imread(imf) reiszed_im = cv2.resize(im, (1280, 800)) inputs = preprocess(reiszed_im) result = model(inputs.cuda(), return_loss=False, try_dummy=False) labels = [i * np.ones((len(r), 1)) for i, r in enumerate(result, 1)] bboxes = np.vstack(result) labels = np.vstack(labels) # nms for all classes bboxes, idx = nms(bboxes, 0.7) labels = labels[idx] # detection above score threshold idx = bboxes[:, -1] > thresh bboxes = bboxes[idx] labels = labels[idx] detections[imf] = np.hstack([bboxes, labels]) continue out_file = os.path.join(out_dir, os.path.basename(imf)) show_result(reiszed_im, result, class_names, score_thr=thresh, out_file=out_file) out_file = os.path.join(out_dir, 'detections.pkl') with open(out_file, 'wb') as f: pkl.dump(detections, f)
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): """Merge augmented proposals (multiscale, flip, etc.) Args: aug_proposals (list[Tensor]): proposals from different testing schemes, shape (n, 5). Note that they are not rescaled to the original image size. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and my also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. For details on the values of these keys see `mmdet/datasets/pipelines/formatting.py:Collect`. rpn_test_cfg (dict): rpn test config. Returns: Tensor: shape (n, 4), proposals corresponding to original image scale. """ recovered_proposals = [] for proposals, img_info in zip(aug_proposals, img_metas): img_shape = img_info['img_shape'] scale_factor = img_info['scale_factor'] flip = img_info['flip'] _proposals = proposals.clone() _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, scale_factor, flip) recovered_proposals.append(_proposals) aug_proposals = torch.cat(recovered_proposals, dim=0) merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) scores = merged_proposals[:, 4] _, order = scores.sort(0, descending=True) num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) order = order[:num] merged_proposals = merged_proposals[order, :] return merged_proposals
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): """Merge augmented proposals (multiscale, flip, etc.) Args: aug_proposals (list[Tensor]): proposals from different testing schemes, shape (n, 5). Note that they are not rescaled to the original image size. img_metas (list[dict]): image info including "shape_scale" and "flip". rpn_test_cfg (dict): rpn test config. Returns: Tensor: shape (n, 4), proposals corresponding to original image scale. """ recovered_proposals = [] for proposals, img_info in zip(aug_proposals, img_metas): img_shape = img_info['img_shape'] scale_factor = img_info['scale_factor'] flip = img_info['flip'] _proposals = proposals.clone() # map the proposals back to the original image scale. _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, scale_factor, flip) recovered_proposals.append(_proposals) aug_proposals = torch.cat(recovered_proposals, dim=0) merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) scores = merged_proposals[:, 4] _, order = scores.sort(0, descending=True) num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) order = order[:num] merged_proposals = merged_proposals[order, :] return merged_proposals
def corners_nms(det_corners, det_bboxes, iou_thr=0.5): det_corners_nms = [] det_bboxes_nms = [] for corners, bboxes in zip(det_corners, det_bboxes): bboxes, nms_idx = nms(bboxes, iou_thr) det_corners_nms.append(corners[nms_idx]) det_bboxes_nms.append(bboxes) return det_corners_nms, det_bboxes_nms
def show_result_in_Chinese(img, result, class_names, score_thr=0.3, out_file=None, thickness=1, bbox_color='green', text_color='green'): assert isinstance(class_names, (tuple, list)) img = mmcv.imread(img) if isinstance(result, tuple): bbox_result, segm_result = result else: bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) # draw segmentation masks if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] for i in inds: color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8) mask = maskUtils.decode(segms[i]).astype(np.bool) img[mask] = img[mask] * 0.5 + color_mask * 0.5 # draw bounding boxes labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) assert bboxes.ndim == 2 assert labels.ndim == 1 assert bboxes.shape[0] == labels.shape[0] assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 if score_thr > 0: assert bboxes.shape[1] == 5 scores = bboxes[:, -1] inds = scores > score_thr bboxes = bboxes[inds, :] labels = labels[inds] bbox_color = color_val(bbox_color) text_color = color_val(text_color) test_bboxes = nms(bboxes, 0.5) new_bboxes = [bboxes[i] for i in test_bboxes[1]] new_labels = [labels[i] for i in test_bboxes[1]] for bbox, label in zip(new_bboxes, new_labels): bbox_int = bbox.astype(np.int32) left_top = (bbox_int[0], bbox_int[1]) right_bottom = (bbox_int[2], bbox_int[3]) cv2.rectangle( img, left_top, right_bottom, bbox_color, thickness=thickness) label_text = class_names[ label] if class_names is not None else 'cls {}'.format(label) if len(bbox) > 4: label_text += '|{:.02f}'.format(bbox[-1]) img = write_text_to_image(img, label_text, (bbox_int[0], bbox_int[1] - 2), text_color) if out_file is not None: imwrite(img, out_file)
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): """Merge augmented proposals (multiscale, flip, etc.) Args: aug_proposals (list[Tensor]): proposals from different testing schemes, shape (n, 5). Note that they are not rescaled to the original image size. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and my also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. For details on the values of these keys see `mmdet/datasets/pipelines/formatting.py:Collect`. rpn_test_cfg (dict): rpn test config. Returns: Tensor: shape (n, 4), proposals corresponding to original image scale. """ recovered_proposals = [] for proposals, img_info in zip(aug_proposals, img_metas): img_shape = img_info['img_shape'] scale_factor = img_info['scale_factor'] flip = img_info['flip'] flip_direction = img_info.get('flip_direction', 'horizontal') _proposals = proposals.clone() _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, scale_factor, flip, flip_direction) # recovered_proposals.append(_proposals) w = _proposals[:, 2] - _proposals[:, 0] h = _proposals[:, 3] - _proposals[:, 1] scale = (w * h).sqrt() # if scale_factor <= 1.0: # detetion large object on small scale # inds = (w * h).sqrt() >= 128. # else: # detection small object on large scale # inds = (w * h).sqrt() <= 256. # recovered_proposals.append(_proposals[inds]) if scale_factor < 1.0 - 0.05: inds = scale >= 64 recovered_proposals.append(_proposals[inds]) elif scale_factor > 1.0 + 0.05: inds = scale <= 400 recovered_proposals.append(_proposals[inds]) else: recovered_proposals.append(_proposals) aug_proposals = torch.cat(recovered_proposals, dim=0) merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) scores = merged_proposals[:, 4] _, order = scores.sort(0, descending=True) num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) order = order[:num] merged_proposals = merged_proposals[order, :] return merged_proposals
def nms(self, result, nms_th=0.3): dets_num = [len(det_cls) for det_cls in result] result = np.vstack(result) _, ids = nms(result, nms_th) total_num = 0 nms_result = [] for num in dets_num: ids_cls = ids[np.where((total_num <= ids) & (ids < num))[0]] nms_result.append(result[ids_cls]) total_num += num return nms_result
def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_thr, max_num=-1): """NMS for multi-class bboxes. Args: multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) multi_scores (Tensor): shape (n, #class) score_thr (float): bbox threshold, bboxes with scores lower than it will not be considered. nms_thr (float): NMS IoU threshold max_num (int): if there are more than max_num bboxes after NMS, only top max_num will be kept. Returns: tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels are 0-based. """ num_classes = multi_scores.shape[1] bboxes, labels = [], [] for i in range(1, num_classes): cls_inds = multi_scores[:, i] > score_thr if not cls_inds.any(): continue # get bboxes and scores of this class if multi_bboxes.shape[1] == 4: _bboxes = multi_bboxes[cls_inds, :] else: _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] _scores = multi_scores[cls_inds, i] cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) # perform nms nms_keep = nms(cls_dets, nms_thr) cls_dets = cls_dets[nms_keep, :] cls_labels = multi_bboxes.new_full((len(nms_keep), ), i - 1, dtype=torch.long) bboxes.append(cls_dets) labels.append(cls_labels) if bboxes: bboxes = torch.cat(bboxes) labels = torch.cat(labels) if bboxes.shape[0] > max_num: _, inds = bboxes[:, -1].sort(descending=True) inds = inds[:max_num] bboxes = bboxes[inds] labels = labels[inds] else: bboxes = multi_bboxes.new_zeros((0, 5)) labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) return bboxes, labels
def post_process(preds, num_classes=4787, iou_thr=0.3, score_thr=0.3): ret = [] for pred in tqdm(preds): bboxes = np.vstack(pred) labels = np.concatenate([[i] * len(bb) for i, bb in enumerate(pred)]) # nms _, inds = nms(bboxes, iou_thr) bboxes, labels = bboxes[inds], labels[inds] # score filtering inds = bboxes[:, 4] > score_thr bboxes, labels = bboxes[inds], labels[inds] # ret.append([bboxes[labels == i] for i in range(num_classes)]) return ret
def filter_results(results, score_thr=0.2, iou_thr=0.65): det_corners = [] det_bboxes = [] det_poses = [] for r in results: if len(r) == 5: corners, _, scores, poses, pose_scores = r else: corners, _, scores, poses = r bboxes = corners2bboxes(corners) bboxes = np.hstack([bboxes, scores[:, np.newaxis]]) bboxes, nms_idx = nms(bboxes, iou_thr) selected = bboxes[:, -1] > score_thr det_bboxes.append(bboxes[selected]) det_corners.append(corners[nms_idx][selected]) det_poses.append(poses[nms_idx][selected]) return det_corners, det_bboxes, det_poses
def max_class_per_position(dets): # boxes_all=[] boxes_all = np.zeros((0, 6)) for cls, det in enumerate(dets): if len(det) > 0: det_out = np.zeros((len(det), 6)) det_out[:, :5] = det det_out[:, 5] = cls #boxes_all.append(det_out) boxes_all = np.vstack((boxes_all, det_out)) # boxes_all=np.array(boxes_all) boxes_all = boxes_all[boxes_all[:, 4] > cls_th] _, idx = nms(boxes_all[:, :5], nms_all_th) boxes_max = boxes_all[idx] boxes_max_cls = [[] for cls in range(num_cls)] for cls in range(num_cls): idx_cls = boxes_max[:, 5] == cls boxes_max_cls[cls] = boxes_max[idx_cls] return boxes_max_cls
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: if isinstance(scale_factor, float): bboxes /= scale_factor else: bboxes /= torch.from_numpy(scale_factor).to(bboxes.device) scores, labels = torch.max(scores[:, 1:], dim=1) if cfg is None: return bboxes, labels else: proposals = torch.cat([bboxes, scores.unsqueeze(-1)], dim=-1) det_bboxes, nms_inds = nms(proposals, cfg.nms.iou_thr) det_labels = labels[nms_inds] valid = det_bboxes[:, -1] > cfg.score_thr det_bboxes = det_bboxes[valid] det_labels = det_labels[valid] nms_inds = nms_inds[valid] return det_bboxes, det_labels, nms_inds
def get_bboxes_single( self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, gt_bboxes, # added by WSK gt_labels, # added by WSK cfg, rescale=False): """ :param cls_scores: list[Tensor]. len(cls_score) equals to the number of feature map levels. and cls_scores[i].size() is (A*C, width_i, height_i). width_i and height_i is the size of the i-th level feature map. :param bbox_preds: list[Tensor]. len(bbox_preds) equals to the number of feature map levels. and bbox_preds[i].size() is (A*4, width_i, height_i). width_i and height_i is the size of the i-th level feature map. if use_iou_branch is used, (A*5, width_i, height_i), the additional dim represents the predicted IoU. :param mlvl_anchors: list[Tensor]. len(bbox_preds) equals to the number of feature map levels. and mlvl_anchors[i].size() is (A*4, width_i, height_i). width_i and height_i is the size of the i-th level feature map. :param img_shape: :param scale_factor: :param cfg: :param rescale: :return: proposals: tensor of shape (num_proposal, 5) """ mlvl_proposals = [] # postprocess the detection results of RPN for each feature level respectively. for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() # (width_i*height_i*A) else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax( dim=1)[:, 1] # (width_i*height_i*A, 2) rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) # select the top-k proposals based on the scores for each feature level respectively. if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] # apply nms to the selected proposals for each feature level respectively. proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) # (width_i*height_i*A, 5) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) # (num_proposal, 5) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] # cfg.max_num proposals after nms for rpn num = min(cfg.max_num, proposals.shape[0]) # select the top-k proposals for all the feature levels. _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_second_single(self, cls_scores, bbox_preds, mlvl_anchors, mlvl_masks, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] anchors = mlvl_anchors[idx] mask = mlvl_masks[idx] # if no location is kept, end. if mask.sum() == 0: continue rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] # filter scores, bbox_pred w.r.t. mask. # anchors are filtered in get_anchors() beforehand. anchors = anchors[mask] scores = scores[mask] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)[mask, :] if scores.dim() == 0: rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0) anchors = anchors.unsqueeze(0) scores = scores.unsqueeze(0) # filter anchors, bbox_pred, scores w.r.t. scores if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # get proposals w.r.t. anchors and rpn_bbox_pred proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) # filter out too small bboxes if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] heatmap = heatmap[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) # proposals_heatmap = torch.cat([proposals, heatmap.unsqueeze(-1)], dim=-1) # NMS in current level # proposals_heatmap, nms_inds = nms(proposals_heatmap, cfg.nms_thr) proposals, _ = nms(proposals, cfg.nms_thr) # proposals = proposals[nms_inds] proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: # NMS across multi levels proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
"r") as f: detecions = json.load(f) print("load results json.") # merge results = [] iter = 1 for img_id in tqdm(img_list): iter += 1 img_name = img_id + '.jpg' det_nms = [] if img_name in detecions: det = np.array(detecions[img_name]) det = det[det[:, -2] > args.score_thr] for i in range(args.nclass): det_nms.append(nms(det[det[:, -1] == i, :5], iou_thr=0.5)[0]) else: det_nms = [np.array([]).reshape(0, 5) for i in range(args.nclass)] results.append(det_nms) # ground truth xml_file = osp.join(source_anno, img_id + '.xml') bboxes, labels = getGtFromXml(xml_file) annotations.append({"bboxes": bboxes, "labels": labels}) # show img_file = osp.join(source_img, img_name) # model.show_result(img_file, det_nms, out_file='source_result.jpg') # voc metric eval_results = eval_map(results, annotations, iou_thr=0.5,
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, mlvl_masks, img_shape, scale_factor, cfg, rescale=False): cfg = self.test_cfg if cfg is None else cfg mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] anchors = mlvl_anchors[idx] mask = mlvl_masks[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] # if no location is kept, end. if mask.sum() == 0: continue rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class scores = rpn_cls_score.softmax(dim=1)[:, :-1] # filter scores, bbox_pred w.r.t. mask. # anchors are filtered in get_anchors() beforehand. scores = scores[mask] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)[mask, :] if scores.dim() == 0: rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0) anchors = anchors.unsqueeze(0) scores = scores.unsqueeze(0) # filter anchors, bbox_pred, scores w.r.t. scores if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # get proposals w.r.t. anchors and rpn_bbox_pred proposals = self.bbox_coder.decode(anchors, rpn_bbox_pred, max_shape=img_shape) # filter out too small bboxes if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] h = proposals[:, 3] - proposals[:, 1] valid_inds = torch.nonzero( (w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) # NMS in current level proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: # NMS across multi levels proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: # 单类(只区分前景和背景)用sigmoid操作 rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] # 1. 根据feature map中每个格子每个anchor的预测概率值,取出前nms_pre(2000)个 if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) # 取出pred系数,网格中anchor,预测概率值 rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # 根据网格中anchor和pred系数,进行坐标变换生成真实坐标 proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) # 通过min_bbox_size过滤 if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] # proposals的坐标和对应的置信度拼接在一起,执行nms操作 proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) # 将所有feature map的proposals拼接在一起 => 所有proposals都是原图尺寸!!! proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, gt_bboxes, gt_labels, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) if cfg.nms_resampling is not None: # only used in training if cfg.nms_resampling[0] == 'discrete': a_r = cfg.nms_resampling[1] a_c = cfg.nms_resampling[2] a_f = cfg.nms_resampling[3] proposals = self.nms_resampling_discrete( proposals, gt_bboxes, gt_labels, a_r, a_c, a_f) elif cfg.nms_resampling[0] == 'linear': thresh = cfg.nms_resampling[1] proposals = self.nms_resampling_linear( proposals, gt_bboxes, gt_labels, thresh) else: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def nms_resampling_discrete(self, proposals, gt_bboxes, gt_labels, a_r, a_c, a_f): assert any(gt_labels > 0) # proposal is considered as background when its iou with gt < 0.3 select_thresh = 0.3 out = [] rare, common, frequent = self.get_category_frequency(gt_labels.device) rare_gtbox = torch.zeros((2000, 4), device=gt_labels.device) rare_gtbox_idx = 0 common_gtbox = torch.zeros((2000, 4), device=gt_labels.device) common_gtbox_idx = 0 frequent_gtbox = torch.zeros((2000, 4), device=gt_labels.device) frequent_gtbox_idx = 0 for gt_bbox, gt_label in zip(gt_bboxes, gt_labels): if gt_label in rare: rare_gtbox[rare_gtbox_idx, ...] = gt_bbox rare_gtbox_idx += 1 elif gt_label in common: common_gtbox[common_gtbox_idx, ...] = gt_bbox common_gtbox_idx += 1 else: frequent_gtbox[frequent_gtbox_idx, ...] = gt_bbox frequent_gtbox_idx += 1 rare_gtbox = rare_gtbox[:rare_gtbox_idx, ...] common_gtbox = common_gtbox[:common_gtbox_idx, ...] frequent_proposals, _ = nms(proposals, a_f) if len(rare_gtbox) > 0: rare_proposals, _ = nms(proposals, a_r) rare_overlaps = bbox_overlaps(rare_gtbox, rare_proposals[:, :4]) rare_max_overlaps, rare_argmax_overlaps = rare_overlaps.max(dim=0) rare_pos_inds = rare_max_overlaps >= select_thresh rare_proposals = rare_proposals[rare_pos_inds, :] out.append(rare_proposals) frequent_rare_overlaps = bbox_overlaps(rare_gtbox, frequent_proposals[:, :4]) frequent_rare_max_overlaps, frequent_rare_argmax_overlaps = frequent_rare_overlaps.max( dim=0) valid_inds = frequent_rare_max_overlaps < select_thresh frequent_proposals = frequent_proposals[valid_inds, :] if len(common_gtbox) > 0: common_proposals, _ = nms(proposals, a_c) common_overlaps = bbox_overlaps(common_gtbox, common_proposals[:, :4]) common_max_overlaps, common_argmax_overlaps = common_overlaps.max( dim=0) common_pos_inds = common_max_overlaps >= select_thresh common_proposals = common_proposals[common_pos_inds, :] out.append(common_proposals) frequent_common_overlaps = bbox_overlaps(common_gtbox, frequent_proposals[:, :4]) frequent_common_max_overlaps, frequent_common_argmax_overlaps = frequent_common_overlaps.max( dim=0) valid_inds = frequent_common_max_overlaps < select_thresh frequent_proposals = frequent_proposals[valid_inds, :] out.append(frequent_proposals) if len(out) > 1: out_proposals = torch.cat(out, 0) else: out_proposals = frequent_proposals return out_proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] anchors_levels = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-3:] == rpn_bbox_pred.size()[-3:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(2, 3, 1, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(2, 3, 1, 0).reshape(-1, 6) if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: # filter out all the negative anchors if self.pos_indices is not None and self.pos_indices[ idx].shape == scores.shape: pos_indices = self.pos_indices[idx] scores = scores[pos_indices] rpn_bbox_pred = rpn_bbox_pred[pos_indices] anchors = anchors[pos_indices] elif self.pos_indices_test is not None and self.pos_indices_test[ idx].shape == scores.shape: pos_indices = self.pos_indices_test[idx] scores = scores[pos_indices] rpn_bbox_pred = rpn_bbox_pred[pos_indices] anchors = anchors[pos_indices] if scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # debug only... # out = open('output.json', 'a+') # out.write("best anchors.......:\n") # out.write("topk_inds: {}\n".format(topk_inds)) # out.write("anchors: {}\n".format(anchors)) # out.write("scores: {}\n".format(scores)) # out.write("num anchors and scores: {}\n".format(len(anchors))) # out.write("\n\n") proposals = delta2bbox3D(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: breakpoint() w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) anchors_levels.append(anchors) anchors_levels = torch.cat(anchors_levels, 0) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 6] num = min(cfg.max_num, proposals.shape[0]) # topk_inds = scores > 0.1 # RPN soft cutoff _, topk_inds = scores.topk(num) # original code proposals = proposals[topk_inds, :] return proposals, anchors_levels
def merge_detects_all(data_all, img_dir=None, is_soft=False, is_vote=False, is_small_extend=False, beta=5): data_vote = {} max_dets = {} start = time.time() print('==> data load', start - start0) count = 0 for key, bxs in data_all.items(): if is_border: img = cv2.imread(img_dir + key) sz = img.shape count += 1 if count % 5 == 0: end = time.time() print(count, 'time=%0.2f' % (end - start)) if not key in data_vote: data_vote[key] = [ np.empty((0, 5), np.float32) for cls in range(num_cls) ] result = [] if img_dir is not None: im = cv2.imread(os.path.join(img_dir, key)) im_sz = im.shape # bxs=get_merged_box(bxs, im_sz) # for cls in range(num_cls): if len(bxs[cls]) == 0: continue # dets_all= bxs[cls].astype(np.float32).copy() dets_all = np.array(bxs[cls], np.float32) dets_nms, _ = nms(dets_all, nms_th) group_coef = 0.75 if is_vote: vote_dets = box_utils.box_voting(dets_nms, dets_all, group_coef, scoring_method='IOU_WAVG', beta=beta) else: vote_dets = dets_nms # print(vote_dets[:,4]) if is_soft: idx = np.argsort(-vote_dets[:, 4]) dim = min(4000, len(idx)) vote_dets = vote_dets[idx[:dim]].copy() vote_dets, _ = soft_nms(vote_dets, 0.5, min_score=1e-9) # idx = np.argsort(-vote_dets[:, 4]) # vote_dets = vote_dets[idx].copy() if is_border: vote_dets[vote_dets[:, 0] < 10, 4] *= 0.7 vote_dets[vote_dets[:, 1] < 10, 4] *= 0.7 vote_dets[vote_dets[:, 3] > sz[0] - 10, 4] *= 0.7 vote_dets[vote_dets[:, 2] > sz[1] - 10, 4] *= 0.7 vote_dets = vote_dets[vote_dets[:, 4] > cls_th_v2] data_vote[key][cls] = vote_dets.copy() zz = 0 max_dets[key] = max_class_per_position(data_vote[key]) return max_dets