def filter_results(self, boxlist, num_classes, return_idxs=False): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] idxs = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if return_idxs: boxlist_for_class, boxlist_idxs = boxlist_nms( boxlist_for_class, self.nms, return_idxs=True) inds_j = inds[boxlist_idxs] else: boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms, return_idxs=False) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) if return_idxs: idxs.append(inds_j) result = cat_boxlist(result) number_of_detections = len(result) if return_idxs: idxs = torch.cat(idxs, dim=0) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] if return_idxs: idxs = idxs[keep] if return_idxs: return result, idxs return result
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") scores_all = boxlists[i].get_field("scores_all") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] scores_j_all = scores_all[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field("scores_all", scores_j_all) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms_thresh, score_field="scores", iou_flag=True ) if self.use_nms_iom: boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms_iom, score_field="scores", iou_flag=False ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device) ) result.append(boxlist_for_class) result = cat_boxlist(result) if self.use_nms_inter_class: result = boxlist_nms( result, self.nms_inter_class, score_field="scores", iou_flag=True ) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() #object: (B, H*W*A) box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) #box_regression: (B, H*W*A, 4) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # select the biggest pre_nms_top_n batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] # select the corresponding box_regression # anchors: (boxlist(H/4*W/4*len(aspect_ratios))*B) image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # anchors: (B*(H/4*W/4*len(aspect_ratios), 4) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # anchors: (B, topk_idx, 4) proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) # proposals: (B*topk_idx, 4) proposals = proposals.view(N, -1, 4) # proposals: (B, topk_idx, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) # result: [Boxlist(with objectness)*B] return result
def _class_independ_nms(self, boxlist, nms_thresh): """ 类别独立的非极大抑制(nms) :param boxlist (BoxList): 输入检测目标框 :param nms_thresh (float): nms阈值 :return: """ scores = boxlist.get_field("scores") labels = boxlist.get_field("labels") boxes = boxlist.bbox num_classes = self.spire_anno.num_classes result = [] for i in range(1, num_classes + 1): # boxlist含有背景类0 inds = (labels == i).nonzero().view(-1) scores_i = scores[inds] boxes_i = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_i, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_i) boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh, score_field="scores") num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), i, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) return result
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] scores_j = scores #[inds] boxes_j = boxes #[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms_thresh, score_field="scores") num_labels = len(boxlist_for_class) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") sorted_cls_scores, sorted_cls_indices = torch.sort( cls_scores, descending=True) keep = sorted_cls_indices[:self.fpn_post_nms_top_n] result = result[keep] results.append(result) return results
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") coeffs = boxlists[i].get_field("coeffs") boxes = boxlists[i].bbox boxlist = boxlists[i] if cfg.MODEL.YOLACT.USE_FAST_NMS: scores, boxes, coeffs, labels = self.fast_nms(scores, boxes, coeffs) result = BoxList(boxes, boxlist.size, mode="xyxy") result.add_field("scores", scores) result.add_field("coeffs", coeffs) result.add_field("labels", labels) else: labels = boxlists[i].get_field("labels") result = [] # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().squeeze(1) # if inds.numel() == 0: # continue scores_j = scores[inds] coeffs_j = coeffs[inds, :] boxes_j = boxes[inds, :] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field("coeffs", coeffs_j) # per class nms boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms_thresh, score_field="scores" ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device) ) result.append(boxlist_for_class) result = cat_boxlist(result) # Limit to max_per_image detections **over all classes** number_of_detections = len(result) if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def forward_for_single_feature_map(self, anchors, objectness, box_regression, box_orien): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # print(objectness.size(),box_regression.size(),box_orien.size(),'==============================') # put in the same format as anchors objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) objectness = objectness.sigmoid() box_orien = box_orien.view(N, -1, 2, H, W).permute(0, 3, 4, 1, 2) box_orien = box_orien.reshape(N, -1, 2) box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # print(objectness.size(), box_orien.size(),topk_idx.size() ,'==============================oo') batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] box_orien = box_orien[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] # print(proposals.size(), objectness.size(), box_orien.size(), '==============================oo') for proposal, score, im_shape, orien in zip(proposals, objectness, image_shapes, box_orien): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist.add_field("rotations", orien) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.max_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def track_per_video(video_name): print(video_name) json_path = os.path.join(root, video_name+'.json') with open(json_path, 'r') as f: proposal_dict = json.load(f) gt = None frame_num = len(proposal_dict) boxes = np.zeros((frame_num, 4)) # xywh times = np.zeros(frame_num) i = 0 for img_name, proposals_ in proposal_dict.items(): start_time = time.time() if img_name == '00000001.jpg': gt = proposals_[0][:-1] boxes[0] = gt times[0] = time.time() - start_time gt = torch.Tensor(gt).reshape(1, 4) gt = BoxList(gt, (-1,-1), mode="xywh").convert("xyxy") i += 1 continue proposals = [proposal[:-1] for proposal in proposals_] scores = [proposal[-1] for proposal in proposals_] scores = torch.Tensor(scores) proposals = torch.Tensor(proposals) proposals = BoxList(proposals, (-1,-1), mode="xywh").convert("xyxy") proposals.add_field('objectness', scores) '''对proposals执行nms,保留top_n个样本。''' proposals_nms = boxlist_nms( proposals, 0.1, max_proposals=10, score_field="objectness", ) last_box = torch.Tensor(boxes[i - 1]).reshape(1, 4) last_box = BoxList(last_box, (-1, -1), mode="xywh").convert("xyxy") overlaps = boxlist_iou(proposals_nms, last_box).squeeze(0) selected_id = torch.argmax(overlaps) if overlaps[selected_id] == 0: print('消失') selected_id = torch.argmax(proposals_nms.extra_fields['objectness']) proposals_nms = proposals_nms.convert("xywh") res_box = proposals_nms.bbox[selected_id].cpu().numpy() boxes[i] = res_box # visualization(video_name, img_name, proposals_nms.bbox, res_box, boxes[i - 1]) times[i] = time.time() - start_time i += 1 '''保存该帧跟踪结果''' record_file = os.path.join(cfg.OUTPUT_DIR, 'result', video_name, '%s_%03d.txt' % (video_name, 1)) record_dir = os.path.dirname(record_file) if not os.path.isdir(record_dir): os.makedirs(record_dir) np.savetxt(record_file, boxes, fmt='%.3f', delimiter=',') '''保存时间文件''' time_file = record_file[:record_file.rfind('_')] + '_time.txt' times = times[:, np.newaxis] np.savetxt(time_file, times, fmt='%.8f', delimiter=',')
def filter_results(self, boxlist, num_classes, feature=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh boxlist_empty = self.prepare_empty_boxlist(boxlist) for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) if len(inds)>0: scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4 : (j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if self.output_feature: feature_j = feature[inds] boxlist_for_class.add_field("box_features", feature_j) scores_all = scores[inds] boxlist_for_class.add_field("scores_all", scores_all) boxlist_for_class.add_field("boxes_all", boxes[inds].view(-1, num_classes, 4)) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) ) result.append(boxlist_for_class) else: result.append(boxlist_empty) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(1, 81): inds = (labels == j).nonzero().view(-1) if len(inds) == 0: continue scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms_thresh, score_field="scores") num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) if len(result): result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) else: device = boxlist.bbox.device empty_boxlist = BoxList( torch.zeros(1, 4).to(device), boxlist.size) empty_boxlist.add_field("labels", torch.LongTensor([1]).to(device)) empty_boxlist.add_field("scores", torch.Tensor([0.01]).to(device)) results.append(empty_boxlist) return results
def filter_results(self, boxlist, num_classes): """首先移除 score<=score_thresh 的box, 然后对box进行nms, 最后对所有保留的box按score 排序, 并保留置信度最大的前detections_per_img个box """ # [roi_per_img*81, 4] --> [roi_per_img, 81*4] boxes = boxlist.bbox.reshape(-1, num_classes * 4) # [roi_per_img*81] --> [roi_per_img, 81] scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device # result中存放每个类别对应的boxlist对象 result = [] # [roi_per_img, 81], scores中大于阈值的位置为1, 小于的为0 inds_all = scores > self.score_thresh # 0.05 for j in range(1, num_classes): # 0代表背景 # 所有roi中, 第j个类别的score大于阈值的roi索引值 inds = inds_all[:, j].nonzero().squeeze(1) # 第j类中, 所有大于阈值的score scores_j = scores[inds, j] # 第j类中, 所有score大于阈值的box boxes_j = boxes[inds, j * 4:(j + 1) * 4] # 构造当前类别的BoxList对象 boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) # 将各个类别的BoxList对象合并成一个 result = cat_boxlist(result) # 过滤之前共有 roi_per_img*81 个box, 经过上面score阈值过滤和nms后剩余的box数量 number_of_detections = len(result) # self.detections_per_img默认为100, 即每张图片所有类别的box最多保留100个 if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") # 返回第k小的元素和索引值 image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def my_filter_results(self, boxlist, num_classes): boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) num_boxes = boxes.size(0) features = boxlist.get_field('features').reshape(num_boxes, -1) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class thresh = 0.005 while True: result = [] inds_all = scores > thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] features_j = features[inds, :] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field('features', features_j) boxlist_for_class, keep = boxlist_nms(boxlist_for_class, self.nms, return_idxs=True) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > 100: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] break thresh = 0 return result
def sliding_window_wsi(self, pil_image, nms_thresh=0.3, model_size=600, overlap=200): step = model_size - overlap w, h = pil_image.size l_bboxes = [] l_scores = [] l_labels = [] for i in range(0, w, step): for j in range(0, h, step): if i + model_size > w: i = w - model_size if j + model_size > h: j = h - model_size image1 = pil_image.crop((i, j, i + model_size, j + model_size)) pil_image_rgb = image1.convert("RGB") image = np.array(pil_image_rgb)[:, :, [2, 1, 0]] boxlist = self.compute_prediction(image) bbox = boxlist.bbox bbox_delta = torch.Tensor([i, j, i, j]).expand_as(bbox) l_bboxes.append(bbox + bbox_delta) if boxlist.has_field('scores'): l_scores.append(boxlist.get_field('scores')) if boxlist.has_field('objectness'): l_scores.append(boxlist.get_field('objectness')) # l_labels.append(boxlist.get_field('objectness')>0.5) if boxlist.has_field('labels'): l_labels.append(boxlist.get_field('labels')) if l_scores: bboxes = torch.cat(l_bboxes, 0) scores = torch.cat(l_scores, 0) if l_labels: labels = torch.cat(l_labels, 0) else: labels = scores > 0.5 boxlist = BoxList(bboxes, pil_image.size, mode="xyxy") boxlist.add_field("scores", scores) boxlist.add_field("labels", labels) boxlist = boxlist_nms( boxlist, nms_thresh=nms_thresh, max_proposals=-1, score_field="scores", ) return boxlist else: return None
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device # 得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽 N, A, H, W = objectness.shape # put in the same format as anchors # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标。然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维。 objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # Top K # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引 objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList], anchors size:(N, H*W*ratios,4) N为batch的个数, H W为当前特征层的size, 4为x1y1x2y2 objectness: tensor of size N, A, H, W , 处理成 (N, H*W*ratios) box_regression: tensor of size N, A * 4, H, W , 处理成 (N, H*W*ratios,4) 功能: 根据objectness概率的高低选出前pre_nms_top_n个anchor, 通过这些anchor和box_regression(学习映射函数dx dy dw dh) 计算得到基于候选框的预测框xyxy,然后通过nms等条件进一步筛选得到最后的boxlist(把objectness分数存在extra_fields) """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() # 归一化到0-1. 取top前2000 box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W #在每个特征图上每张图片选取的anchor数 pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # 得到预测的候选框, 通过anchor和box_regression(学习映射函数dx dy dw dh)计算得到建议框 proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes( boxlist, self.min_size) # 确保proposal的w & h > min_size boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result # 元素为N (batch)
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) if 'head_id' in boxlist.fields(): head_id = boxlist.get_field('head_id') else: head_id=torch.zeros( scores.shape[0]) # pdb.set_trace() device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh # import pdb;pdb.set_trace() for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] head_id_j = head_id[inds] boxes_j = boxes[inds, j * 4 : (j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field("objectness", scores_j) boxlist_for_class.add_field('head_id', head_id_j) if self.cfg.MODEL.ROI_BOX_HEAD.K_HEAD>1 and \ self.cfg.MODEL.ROI_HEADS.NMS_TYPE=='set': boxlist_for_class = set_cpu_nms(boxlist_for_class, self.nms, score_field="scores") else: boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms, score_field="scores" ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) ) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # uncomment this when rpn only !!!! # boxlist.add_field("labels", score>0.5) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def detect_masked(img, coco_demo, preds, args, masking_color=128): masking_thresh = args.masking_thresh if args.all_at_once: if args.masking_type == 'bbox': mask = mask_from_boxes(img, preds, args) # mask = get_blocked_image(img,preds.bbox,inflation=masking_args.inflation_factor,is_abs=False, # block_color=masking_color) else: # mask = mask_from_masks(img,preds) # inflation factor not used here. if len(preds) == 0: mask = img * 0 else: mask = mask_from_masks(img, preds, args) if args.negative_masking: mask = 1 - mask masked_img = img * mask + (1 - mask) * masking_color preds_masked = coco_demo.compute_prediction(masked_img[:, :, ::-1]) return preds_masked else: all_preds = [] ff = preds.extra_fields # if 'mask' in preds.extra_fields: # del preds.extra_fields['mask'] n_above = sum(ff['scores'].numpy() >= args.masking_thresh) for bb, score, mask in tqdm(zip(preds.bbox, ff['scores'], ff['mask']), desc='masking one by one...', total=n_above): if score < args.masking_thresh: continue if args.masking_type == 'bbox': cur_mask = img * 0 mask_from_a_box(cur_mask, bb, args.inflation_factor) else: cur_mask = np.repeat(mask.numpy().transpose(1, 2, 0), 3, 2) if args.negative_masking: cur_mask = 1 - cur_mask masked_img = img * cur_mask + (1 - cur_mask) * masking_color preds_masked = coco_demo.compute_prediction(masked_img[:, :, ::-1]) all_preds.append(preds_masked) if 'mask' in preds_masked.extra_fields: del preds_masked.extra_fields['mask'] if len(all_preds) > 0: all_preds = cat_boxlist(all_preds) all_preds = boxlist_nms(all_preds, .5, max_proposals=100, score_field="scores") else: all_preds = BoxList(torch.Tensor(0, 4), img.shape[:2]) return all_preds
def select_over_all_levels(self, boxlists): num_images = len(boxlists) # different behavior during training and during testing: # during training, post_nms_top_n is over *all* the proposals combined, while # during testing, it is over the proposals for each image # NOTE: it should be per image, and not per batch. However, to be consistent # with Detectron, the default is per batch (see Issue #672) for i in range(num_images): boxlist = boxlists[i] objectness = boxlist.get_field("objectness") # objectness = objectness[:, 1] pre_nms_top_n = min(self.pre_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, pre_nms_top_n, dim=0, sorted=True) boxlist = boxlist[inds_sorted] boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) # boxlist.add_field('objectness', boxlist.get_field('objectness_ori')) boxlists[i] = boxlist if self.training and self.fpn_post_nms_per_batch: objectness = torch.cat( [boxlist.get_field("objectness") for boxlist in boxlists], dim=0) box_sizes = [len(boxlist) for boxlist in boxlists] post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) inds_mask = torch.zeros_like(objectness, dtype=torch.bool) inds_mask[inds_sorted] = 1 inds_mask = inds_mask.split(box_sizes) for i in range(num_images): boxlists[i] = boxlists[i][inds_mask[i]] else: for i in range(num_images): objectness = boxlists[i].get_field("objectness") post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) boxlists[i] = boxlists[i][inds_sorted] return boxlists
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList], (assume list number = batchSize N) objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # modify tensor shape [N, A*1, H, W] => [N, H*W*A, 1] => [N, H*W*A] objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() # modify tensor shape [N, A*41, H, W] => [N, H*W*A, 4] box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # [N, top_k_elems(H*W*A)] batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] # [N, top_k_elems(H*W*A), 4] image_shapes = [box.size for box in anchors] # list(tuple) concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # list(3HW,4),list size=N => (N*AHW,4) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # box offsets + orig boxes => proposals(N*3HW, 4) proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) ) proposals = proposals.view(N, -1, 4) # => (N,3HW,4) result = [] # for each img if a batch(N), image_shapes => input image size for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # clip proposals to image_shapes boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result # N*BoxList
def filter_results(self, boxlist, num_classes, unique_labels): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox #.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores") #.reshape(-1, num_classes) result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class # inds_all = scores > self.score_thresh k = scores.numel() // len(unique_labels) for j in range(0, num_classes): score_thresh = self.score_thresh scores_j = scores[j * k:(j + 1) * k] boxes_j = boxes[j * k:(j + 1) * k] inds = scores_j > (score_thresh) scores_j = scores_j[inds] boxes_j = boxes_j[inds] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if scores_j.numel(): box_len = len(boxlist_for_class) labels = unique_labels[j].repeat(box_len) proto_idx = (torch.zeros(box_len) + (j + 1)).long().cuda() else: # assign empty tensor labels = scores_j.long() proto_idx = scores_j.long() boxlist_for_class.add_field("labels", labels) boxlist_for_class.add_field("proto_labels", proto_idx) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms, score_field="scores") result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def get_nms_boxes(self, detection): detection = boxlist_nms(detection, nms_thresh=0.5) _ids = detection.get_field('ids') _scores = detection.get_field('scores') # adjust the scores to the right range # _scores -= torch.floor(_scores) * (_ids >= 0) * (torch.floor(_scores) != _scores) # _scores[_scores >= 1.] = 1. _scores[_scores >= 2.] = _scores[_scores >= 2.] - 2. _scores[_scores >= 1.] = _scores[_scores >= 1.] - 1. return detection, _ids, _scores
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device assert (boxlist.has_field('ids')) ids = boxlist.get_field('ids') result = [self.create_empty_boxlist(device=device) for _ in range(1, num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4: (j + 1) * 4] ids_j = ids[inds] det_idx = ids_j < 0 det_boxlist = BoxList(boxes_j[det_idx, :], boxlist.size, mode="xyxy") det_boxlist.add_field("scores", scores_j[det_idx]) det_boxlist.add_field("ids", ids_j[det_idx]) det_boxlist = boxlist_nms(det_boxlist, self.nms) track_idx = ids_j >= 0 # track_box is available if torch.any(track_idx > 0): track_boxlist = BoxList(boxes_j[track_idx, :], boxlist.size, mode="xyxy") track_boxlist.add_field("scores", scores_j[track_idx]) track_boxlist.add_field("ids", ids_j[track_idx]) det_boxlist = cat_boxlist([det_boxlist, track_boxlist]) num_labels = len(det_boxlist) det_boxlist.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) ) result[j-1] = det_boxlist result = cat_boxlist(result) return result
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) # 某一张图的某个种类的所有proposal的位置 scores_j = scores[inds, j] # 某一proposal的某个种类的得分 boxes_j = boxes[inds, j * 4: (j + 1) * 4] # 某一proposal的某个种类的实际坐标 boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms( # 只在同类别中做NMS boxlist_for_class, self.nms ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) ) result.append(boxlist_for_class) result = cat_boxlist(result) # 某一张图的NMS后剩下的所有种类的proposal number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: # 再按照scores过滤一遍 cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is a list of captions for the image. """ img_id, sent_id = self.ids[index].split( '\t')[0], self.ids[index].split('\t')[1] topN_box = self.topN_box_anno[img_id][int(sent_id)] filename = os.path.join(self.img_root, img_id + '.jpg') img = Image.open(filename).convert('RGB') sent_sg = self.sg_anno[img_id]['relations'][int(sent_id)] _, feature_map, precompute_bbox, img_scale, precompute_score, cls_label = self.get_precompute_img_feat( img_id) precompute_bbox = BoxList(precompute_bbox, img.size, mode='xyxy') if cfg.MODEL.VG.USE_BOTTOMUP_NMS: precompute_bbox.add_field("scores", torch.FloatTensor(precompute_score)) precompute_bbox, keep_inds = boxlist_nms( precompute_bbox, cfg.MODEL.VG.BOTTOMUP_NMS_THRESH, require_keep_idx=True) precompute_score = precompute_score[keep_inds.numpy()] sentence = self.get_sentence(img_id, int(sent_id)) phrase_ids, gt_boxes = self.get_gt_boxes(img_id) target = BoxList(gt_boxes, img.size, mode="xyxy") vocab_label_elmo = self.vocab_embed[cls_label] if self.transforms is not None: img, target, precompute_bbox, img_scale = self.transforms( img, target, precompute_bbox, img_scale) return None, target, img_id, phrase_ids, sent_id, sentence, precompute_bbox, precompute_score, feature_map, vocab_label_elmo, sent_sg, topN_box
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape objectness, topk_idx, box_regression = self.objectness_top_k( objectness, box_regression) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def filter_results(self, boxlist, num_classes, target_id=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4 : (j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), target_id, dtype=torch.int64, device=device) ) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") sorted_cls_scores, sorted_cls_indices = torch.sort(cls_scores, descending=True) keep = sorted_cls_indices[:self.detections_per_img] result = result[keep] return result
def select_over_all_levels(self, boxlists): num_images = len(boxlists) # different behavior during training and during testing: # during training, post_nms_top_n is over *all* the proposals combined, while # during testing, it is over the proposals for each image # TODO resolve this difference and make it consistent. It should be per image, # and not per batch if self.training: objectness = torch.cat( [boxlist.get_field("objectness") for boxlist in boxlists], dim=0) box_sizes = [len(boxlist) for boxlist in boxlists] post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) inds_mask = torch.zeros_like(objectness, dtype=torch.uint8) inds_mask[inds_sorted] = 1 inds_mask = inds_mask.split(box_sizes) for i in range(num_images): boxlists[i] = boxlists[i][inds_mask[i]] else: for i in range(num_images): objectness = boxlists[i].get_field("objectness") post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) boxlist = boxlists[i][inds_sorted] boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) boxlists[i] = boxlist return boxlists
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) # device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4 : (j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms ) boxlist_for_class.add_field( # we use full_like to allow tracing with flexible shape "labels", torch.full_like(boxlist_for_class.bbox[:, 0], j, dtype=torch.int64) ) result.append(boxlist_for_class) result = cat_boxlist(result) scores = result.get_field("scores") if self.onnx_export: keep = self.detections_to_keep_onnx(scores) else: keep = self.detections_to_keep(scores) result = result[keep] return result
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ apply the RPN result on anchors generate from single feature level from ont batch(has multiple images) Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W # decrease the proposal anchor number before the nms pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # filter the proposal bboxes by objectness score, # only left the hign objectness proposals for following operation objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] # take out the high objectness bbox regression result box_regression = box_regression[batch_idx, topk_idx] # preprocess the anchors for easy to process image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # apply the regression on the anchor boxes proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] # collect the processed anchor boxes in to BoxList form # and apply the nms to generate the final proposals for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result