def postprocess(self, outputs, images, image_ids, to_cpu): frames = [] for instances, image, image_id in zip(outputs, images, image_ids): height, width = image.shape[:2] instances = detector_postprocess(instances, height, width) type_valid = [ self.model_meta.thing_classes[pred_class] in TYPE_MAPPING for pred_class in instances.pred_classes] instances = instances[type_valid] instances.pred_classes = torch.as_tensor([ TYPE_MAPPING[self.model_meta.thing_classes[pred_class]] for pred_class in instances.pred_classes]) if len(instances) > 0: nms_mapping = torch.as_tensor([ NMS_MAPPING[pred_class.item()] for pred_class in instances.pred_classes], dtype=torch.int, device=self.device) nms_types = nms_mapping[:, 0] nms_scores = instances.scores + nms_mapping[:, 1] keep_indices = batched_nms( instances.pred_boxes.tensor, nms_scores, nms_types, self.nms_threshold) instances = instances[keep_indices] features = instances.roi_features.mean(dim=(2, 3)) features = features / features.norm(dim=1, keepdim=True) instances.roi_features = features if to_cpu: instances = instances.to('cpu') frame = Frame(image_id, image, instances) frames.append(frame) return frames
def ml_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores", label_field="labels"): """ Performs non-maximum suppression on a boxlist, with scores specified in a boxlist field via score_field. Args: boxlist (detectron2.structures.Boxes): nms_thresh (float): max_proposals (int): if > 0, then only the top max_proposals are kept after non-maximum suppression score_field (str): """ if nms_thresh <= 0: return boxlist boxes = boxlist.pred_boxes.tensor scores = boxlist.scores labels = boxlist.pred_classes keep = batched_nms(boxes, scores, labels, nms_thresh) if max_proposals > 0: keep = keep[:max_proposals] boxlist = boxlist[keep] return boxlist
def inference_single_image(self, anchors, box_cls, box_delta, image_size): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Arguments: anchors (list[Boxes]): list of #feature levels. Each entry contains a Boxes object, which contains all the anchors in that feature level. box_cls (list[Tensor]): list of #feature levels. Each entry contains tensor of size (H x W x A, K) box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. image_size (tuple(H, W)): a tuple of the image height and width. Returns: Same as `inference`, but for only one image. """ boxes_all = [] scores_all = [] class_idxs_all = [] # Iterate over every feature level for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors): # (HxWxAxK,) box_cls_i = box_cls_i.flatten().sigmoid_() # Keep top k top scoring indices only. num_topk = min(self.topk_candidates, box_reg_i.size(0)) # torch.sort is actually faster than .topk (at least on GPUs) predicted_prob, topk_idxs = box_cls_i.sort(descending=True) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] anchor_idxs = topk_idxs // self.num_classes classes_idxs = topk_idxs % self.num_classes box_reg_i = box_reg_i[anchor_idxs] anchors_i = anchors_i[anchor_idxs] # predict boxes predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor) boxes_all.append(predicted_boxes) scores_all.append(predicted_prob) class_idxs_all.append(classes_idxs) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[: self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
def _get_class_predictions(self, boxes, scores, image_shape): num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > self.class_score_thresh_test # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. class_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[class_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep_class = batched_nms(boxes, scores, class_inds[:, 1], self.class_nms_thresh_test) if self.topk_per_image_test >= 0: keep_class = keep_class[:self.topk_per_image_test] boxes, scores, class_inds = boxes[keep_class], scores[ keep_class], class_inds[keep_class] return boxes, scores, class_inds
def test_batched_nms_rotated_0_degree_cuda(self): N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N) idxs = torch.randint(0, num_classes, (N, )) rotated_boxes = torch.zeros(N, 5) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: backup = boxes.clone() keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou) self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms") backup = rotated_boxes.clone() keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou) self.assertTrue( torch.allclose(rotated_boxes, backup), "rotated_boxes modified by batched_nms_rotated", ) self.assertLessEqual(nms_edit_distance(keep, keep_ref), 2, err_msg.format(iou))
def inference_single_image( self, anchors: List[Boxes], box_cls: List[Tensor], box_delta: List[Tensor], image_size: Tuple[int, int], ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Arguments: anchors (list[Boxes]): list of #feature levels. Each entry contains a Boxes object, which contains all the anchors in that feature level. box_cls (list[Tensor]): list of #feature levels. Each entry contains tensor of size (H x W x A, K) box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. image_size (tuple(H, W)): a tuple of the image height and width. Returns: Same as `inference`, but for only one image. """ pred = self._decode_multi_level_predictions( anchors, box_cls, box_delta, self.test_score_thresh, self.test_topk_candidates, image_size, ) keep = batched_nms( # per-class NMS pred.pred_boxes.tensor, pred.scores, pred.pred_classes, self.test_nms_thresh) return pred[keep[:self.max_detections_per_image]]
def ml_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores", label_field="labels"): """ Performs non-maximum suppression on a boxlist, with scores specified in a boxlist field via score_field. Arguments: boxlist(BoxList) nms_thresh (float) max_proposals (int): if > 0, then only the top max_proposals are kept after non-maximum suppression score_field (str) """ if nms_thresh <= 0: return boxlist if boxlist.has('pred_boxes'): boxes = boxlist.pred_boxes.tensor labels = boxlist.pred_classes else: boxes = boxlist.proposal_boxes.tensor labels = boxlist.proposal_boxes.tensor.new_zeros( len(boxlist.proposal_boxes.tensor)) scores = boxlist.scores keep = batched_nms(boxes, scores, labels, nms_thresh) if max_proposals > 0: keep = keep[:max_proposals] boxlist = boxlist[keep] return boxlist
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, light=None): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 if type(light) == type(boxes): # print(light) light = Boxes(light.reshape(-1, 4)) # light.clip(image_shape) light = light.tensor.view(-1, num_bbox_reg_classes, 4) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] if type(light) == type(boxes): light = light[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] if type(light) == type(boxes): light = light[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] if type(light) == type(boxes): light = light[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] if type(light) == type(boxes): result.pred_light = Boxes(light) return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] Tscores = scores #print (scores) num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] #print (scores) # Apply per-class NMS uniclass = torch.zeros(len(filter_inds[:, 1].tolist())).cuda() #keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) keep = batched_nms(boxes, scores, uniclass, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] #print(filter_inds[:, 0]) #print(torch.ByteTensor([0,1,0,0,1])) #print(filter_inds[:, 1]) #print(keep) #print(Tscores[filter_inds[:, 0]]) #print (scores) result = Instances(image_shape) result.pred_boxes = Boxes(boxes) #result.scores = scores result.scores = Tscores[filter_inds[:, 0]] result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ # Make sure boxes and scores don't contain infinite or Nan valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \ & torch.isfinite(attr_scores).all(dim=1) # Get scores from finite boxes and scores if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] attr_scores = attr_scores[valid_mask] scores = scores[:, :-1] # Remove background class? num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # If using Attributes class: # attributes = Attributes(attributes.reshape(-1, 295)) # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[ keep], attr_scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.attr_scores = attr_scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def inference_single_image(self, conf_pred_per_image, loc_pred_per_image, default_boxes, image_size): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: conf_pred_per_image (list[Tensor]): list of #feature levels. Each entry contains tensor of size [Hi x Wi x D, C]. loc_pred_per_image (list[Tensor]): same shape as 'conf_pred_per_image' except that C becomes 4. default_boxes (list['Boxes']): a list of 'Boxes' elements. The Boxes contains default boxes of one image on the specific feature level. image_size (tuple(H, W)): a tuple of the image height and width. Returns: Same as `inference`, but for only one image. """ # predict confidence conf_pred = torch.cat(conf_pred_per_image, dim=0) # [R, C] conf_pred = conf_pred.softmax(dim=1) # predict boxes loc_pred = torch.cat(loc_pred_per_image, dim=0) # [R, 4] default_boxes = Boxes.cat(default_boxes) # [R, 4] boxes_pred = self.box2box_transform.apply_deltas( loc_pred, default_boxes.tensor) num_boxes, num_classes = conf_pred.shape boxes_pred = boxes_pred.view(num_boxes, 1, 4).expand(num_boxes, num_classes, 4) # [R, C, 4] labels = torch.arange(num_classes, device=self.device) # [0, ..., C] labels = labels.view(1, num_classes).expand_as(conf_pred) # [R, C] # remove predictions with the background label boxes_pred = boxes_pred[:, :-1] conf_pred = conf_pred[:, :-1] labels = labels[:, :-1] # batch everything, by making every class prediction be a separate instance boxes_pred = boxes_pred.reshape(-1, 4) conf_pred = conf_pred.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes indices = torch.nonzero(conf_pred > self.score_threshold).squeeze(1) boxes_pred, conf_pred, labels = boxes_pred[indices], conf_pred[ indices], labels[indices] keep = batched_nms(boxes_pred, conf_pred, labels, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_pred[keep]) result.scores = conf_pred[keep] result.pred_classes = labels[keep] return result
def fast_rcnn_inference_single_image( boxes, scores, image_shape: Tuple[int, int], score_thresh: float, nms_thresh: float, topk_per_image: int, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] if len(category_disabler.global_cat_mask) > 0: print('<<<<<< category disabler activated >>>>>>') scores *= torch.tensor(category_disabler.global_cat_mask).cuda() num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # 1. Filter results based on detection scores. It can make NMS more efficient # by filtering out low-confidence detections. filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # 2. Apply NMS for each class independently. keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def inference_single_image(self, locations, box_cls, box_reg, image_size): boxes_all = [] scores_all = [] class_idxs_all = [] # Iterate over every feature level for box_cls_i, box_reg_i, locs_i in zip(box_cls, box_reg, locations): # (HxW, C) box_cls_i = box_cls_i.sigmoid_() keep_idxs = box_cls_i > self.pre_nms_thresh box_cls_i = box_cls_i[keep_idxs] keep_idxs_nonzero_i = keep_idxs.nonzero() box_loc_i = keep_idxs_nonzero_i[:, 0] class_i = keep_idxs_nonzero_i[:, 1] box_reg_i = box_reg_i[box_loc_i] locs_i = locs_i[box_loc_i] per_pre_nms_top_n = keep_idxs.sum().clamp(max=self.pre_nms_top_n) if keep_idxs.sum().item() > per_pre_nms_top_n.item(): box_cls_i, topk_idxs = box_cls_i.topk(per_pre_nms_top_n, sorted=False) class_i = class_i[topk_idxs] box_reg_i = box_reg_i[topk_idxs] locs_i = locs_i[topk_idxs] # predict boxes predicted_boxes = torch.stack([ locs_i[:, 0] - box_reg_i[:, 0], locs_i[:, 1] - box_reg_i[:, 1], locs_i[:, 0] + box_reg_i[:, 2], locs_i[:, 1] + box_reg_i[:, 3], ], dim=1) box_cls_i = torch.sqrt(box_cls_i) boxes_all.append(predicted_boxes) scores_all.append(box_cls_i) class_idxs_all.append(class_i) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] # Apply per-class nms for each image keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_thresh) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
def fast_rcnn_inference_single_image( boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # 1. Filter results based on detection scores. It can make NMS more efficient # by filtering out low-confidence detections. filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # 2. Apply NMS for each class independently. keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) # DIOU NMS commented for now # keep = batched_diou_nms(boxes, scores, filter_inds[:, 1], nms_thresh) \ # if global_cfg.MODEL.ROI_BOX_HEAD.NMS_TYPE == "diou_nms" \ # else \ # batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image( boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, fc_box_features=None, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ probs = scores.clone().detach() scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] # Compact all fc layers into a single tensor to work nicely with Instance class for now if fc_box_features is not None: fc_box_features = [fc_layer_box_features[filter_inds[:, 0]] for fc_layer_box_features in fc_box_features] # will need to know number of layers and dimensions to unpack fc_box_features = torch.cat(fc_box_features, dim=1) result.fc_box_features = fc_box_features probs = probs[filter_inds[:, 0]] result.probs = probs return result, filter_inds[:, 0]
def softmax_fast_rcnn_inference_single_image( boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ # print("boxes:", boxes.shape, boxes[0, :4]) # print("scores:", scores.shape, torch.sum(scores[0,:])) valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] #remove bg from scores scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] old_scores = scores[filter_inds[:, 0], :] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, old_scores, scores, filter_inds = boxes[keep], old_scores[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] result.softmax_scores = old_scores return result, filter_inds[:, 0]
def get_pgt_mist(self, prev_pred_boxes, prev_pred_scores, proposals, top_pro=0.15, suffix=""): pgt_scores, pgt_boxes, pgt_classes, pgt_weights = self.get_pgt_top_k( prev_pred_boxes, prev_pred_scores, proposals, top_k=top_pro, # thres=0.05, thres=0.0, need_instance=False, need_weight=True, suffix=suffix, ) # NMS pgt_idxs = [torch.zeros_like(pgt_class) for pgt_class in pgt_classes] keeps = [ batched_nms(pgt_box, pgt_score, pgt_class, 0.2) for pgt_box, pgt_score, pgt_class in zip( pgt_boxes, pgt_scores, pgt_idxs) ] pgt_scores = [ pgt_score[keep] for pgt_score, keep in zip(pgt_scores, keeps) ] pgt_boxes = [pgt_box[keep] for pgt_box, keep in zip(pgt_boxes, keeps)] pgt_classes = [ pgt_class[keep] for pgt_class, keep in zip(pgt_classes, keeps) ] pgt_weights = [ pgt_weight[keep] for pgt_weight, keep in zip(pgt_weights, keeps) ] pgt_boxes = [Boxes(pgt_box) for pgt_box in pgt_boxes] targets = [ Instances( proposals[i].image_size, gt_boxes=pgt_box, gt_classes=pgt_class, gt_scores=pgt_score, gt_weights=pgt_weight, ) for i, (pgt_box, pgt_class, pgt_score, pgt_weight) in enumerate( zip(pgt_boxes, pgt_classes, pgt_scores, pgt_scores)) ] self._vis_pgt(targets, "pgt_mist", suffix) return targets
def measure_nms_perf(boxes_shape, scores_shape, levels_shape, threshold): """ Args: """ assert len(boxes_shape) == 2 assert len(scores_shape) == 1 assert len(levels_shape) == 1 assert boxes_shape[0] == scores_shape[0] assert boxes_shape[0] == levels_shape[0] # Preparing Inputs # (0,1100) range chosen based on boxes observed in runs of detectron. boxes = torch.FloatTensor(boxes_shape[0], boxes_shape[1]).uniform_(0, 1100) # creating a random distribution between [-0.8, 0.8) scores_per_img = 1.6 * torch.rand(scores_shape, dtype=torch.float) - 0.8 if levels_shape[0] > 8000: # max lvl value = 4. First 2000 entries: 0, next 2000 entries: 1, ... : 3, remaining entries: 4 lvl = torch.tensor(np.array([i / 2000 for i in range(levels_shape[0])]), dtype=torch.long) else: # overdoing simple things lower_bound = levels_shape[0] // 5 upper_bound = levels_shape[0] // 4 np_lvl = [] count = 0 for lvl in range(4): tmp_shape = (random.randint(lower_bound, upper_bound)) tmp = np.full(tmp_shape, lvl, dtype=int) #tmp = np.array([lvl for i in range(random.randint(lower_bound, upper_bound))]) np_lvl.append(tmp) count += len(tmp) #np_lvl.append(np.array([4 for _ in range(levels_shape - count)])) np_lvl.append(np.full(levels_shape[0] - count, 4, dtype=int)) lvl = torch.tensor(np.concatenate(np_lvl), dtype=torch.long) assert lvl.shape == levels_shape, "ensure lvl shape is correct" boxes = boxes.cuda() scores_per_img = scores_per_img.cuda() lvl = lvl.cuda() # Forward Pass # warmup - 2 iters batched_nms(boxes, scores_per_img, lvl, threshold) batched_nms(boxes, scores_per_img, lvl, threshold) torch.cuda.synchronize() start = time.time() for _ in range(ITERATIONS): batched_nms(boxes, scores_per_img, lvl, threshold) torch.cuda.synchronize() end = time.time() fwd_time = (end - start) * 1000 / ITERATIONS return fwd_time
def test_nms_scriptability(self): N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N) idxs = torch.randint(0, num_classes, (N,)) scripted_batched_nms = torch.jit.script(batched_nms) err_msg = "NMS is incompatible with jit-scripted NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: keep_ref = batched_nms(boxes, scores, idxs, iou) backup = boxes.clone() scripted_keep = scripted_batched_nms(boxes, scores, idxs, iou) assert torch.allclose(boxes, backup), "boxes modified by jit-scripted batched_nms" self.assertTrue(torch.equal(keep_ref, scripted_keep), err_msg.format(iou))
def fast_rcnn_inference_single_image(image_shape, boxes, scores, classes=None, score_thresh=0.05, nms_thresh=0.5, topk_per_image=1000): valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) replace_cls = classes is not None if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] if replace_cls: classes = classes[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores if replace_cls: result.pred_classes = classes[keep] else: result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def inference(self, _box_cls, _box_pred, image_sizes): """ Arguments: box_cls (Tensor): tensor of shape (batch_size, K, H, W). box_pred (Tensor): tensors of shape (batch_size, 4, H, W) . image_sizes (List[torch.Size]): the input image sizes Returns: results (List[Instances]): a list of #images elements. """ box_cls = _box_cls.flatten(2) box_pred = _box_pred.flatten(2) assert len(box_cls) == len(image_sizes) results = [] scores = torch.sigmoid(box_cls) for i, (scores_per_image, box_pred_per_image, image_size) in enumerate(zip( scores, box_pred, image_sizes )): result = Instances(image_size) # refer to https://github.com/FateScript/CenterNet-better topk_score_cat, topk_inds_cat = torch.topk(scores_per_image, k=self.num_boxes) topk_score, topk_inds = torch.topk(topk_score_cat.reshape(-1), k=self.num_boxes) topk_clses = topk_inds // self.num_boxes scores_per_image = topk_score labels_per_image = topk_clses topk_box_cat = box_pred_per_image[:, topk_inds_cat.reshape(-1)] topk_box = topk_box_cat[:, topk_inds] box_pred_per_image = topk_box.transpose(0, 1) if self.nms: keep = batched_nms(box_pred_per_image, scores_per_image, labels_per_image, 0.5) box_pred_per_image = box_pred_per_image[keep] scores_per_image = scores_per_image[keep] labels_per_image = labels_per_image[keep] result.pred_boxes = Boxes(box_pred_per_image) result.scores = scores_per_image result.pred_classes = labels_per_image results.append(result) return results
def inference_single_image(self, logits, init_boxes, refine_boxes, image_size): boxes_all = [] init_boxes_all = [] class_idxs_all = [] scores_all = [] for logit, init_box, refine_box in zip(logits, init_boxes, refine_boxes): scores, cls = logit.sigmoid().max(0) cls = cls.view(-1) scores = scores.view(-1) init_box = init_box.view(4, -1).permute(1, 0) refine_box = refine_box.view(4, -1).permute(1, 0) predicted_prob, topk_idxs = scores.sort(descending=True) num_topk = min(self.topk_candidates, cls.size(0)) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] init_box_topk = init_box[topk_idxs] refine_box_topk = refine_box[topk_idxs] cls_topk = cls[topk_idxs] score_topk = scores[topk_idxs] boxes_all.append(refine_box_topk) init_boxes_all.append(init_box_topk) class_idxs_all.append(cls_topk) scores_all.append(score_topk) boxes_all, scores_all, class_idxs_all, init_boxes_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all, init_boxes_all] ] keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] result.init_boxes = init_boxes_all[keep] return result
def postprocess_single_image(self, box_cls, box_delta): boxes_all = [] scores_all = [] class_idxs_all = [] # Iterate over every feature level for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, self.anchors): box_cls_i = box_cls_i.flatten() # (HxWxAxK,) # Keep top k top scoring indices only. num_topk = min(self.topk_candidates, box_reg_i.size(0)) # torch.sort is actually faster than .topk (at least on GPUs) predicted_prob, topk_idxs = box_cls_i.sort(descending=True) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] anchor_idxs = topk_idxs // self.num_classes classes_idxs = topk_idxs % self.num_classes box_reg_i = box_reg_i[anchor_idxs] anchors_i = anchors_i[anchor_idxs] predicted_boxes = self.box2box_transform.apply_deltas( box_reg_i, anchors_i) boxes_all.append(predicted_boxes) scores_all.append(predicted_prob) class_idxs_all.append(classes_idxs) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(self.image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
def inference_single_image( self, anchors: List[Boxes], box_cls: List[torch.Tensor], box_delta: List[torch.Tensor], image_size: Tuple[int, int], ): """ Identical to :meth:`RetinaNet.inference_single_image. """ pred = self._decode_multi_level_predictions( anchors, box_cls, box_delta, self.test_score_thresh, self.test_topk_candidates, image_size, ) keep = batched_nms(pred.pred_boxes.tensor, pred.scores, pred.pred_classes, self.test_nms_thresh) return pred[keep[:self.max_detections_per_image]]
def select_over_all_levels(self, instances, image_sizes): results = [] for instance in instances: # multiclass nms keep = batched_nms(instance.proposal_boxes.tensor, instance.objectness_logits, instance.labels.float(), self.nms_thresh) instance = instance[keep] cls_scores = instance.objectness_logits number_of_detections = len(cls_scores) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) instance = instance[keep] instance.remove("labels") results.append(instance) return results
def general_standard_nms_postprocessing(input_im, outputs, nms_threshold=0.5, max_detections_per_image=100): """ Args: input_im (list): an input im list generated from dataset handler. outputs (list): output list form model specific inference function nms_threshold (float): non-maximum suppression threshold max_detections_per_image (int): maximum allowed number of detections per image. Returns: result (Instances): final results after nms """ predicted_boxes, predicted_boxes_covariance, predicted_prob, classes_idxs, predicted_prob_vectors = outputs # Perform nms keep = batched_nms(predicted_boxes, predicted_prob, classes_idxs, nms_threshold) keep = keep[:max_detections_per_image] # Keep highest scoring results result = Instances( (input_im[0]['image'].shape[1], input_im[0]['image'].shape[2])) result.pred_boxes = Boxes(predicted_boxes[keep]) result.scores = predicted_prob[keep] result.pred_classes = classes_idxs[keep] result.pred_cls_probs = predicted_prob_vectors[keep] # Handle case where there is no covariance matrix such as classical # inference. if isinstance(predicted_boxes_covariance, torch.Tensor): result.pred_boxes_covariance = predicted_boxes_covariance[keep] else: result.pred_boxes_covariance = torch.zeros( predicted_boxes[keep].shape + (4, )).to(device) return result
def test_batched_nms_rotated_0_degree_cpu(self, device="cpu"): N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N, device=device) idxs = torch.randint(0, num_classes, (N,)) rotated_boxes = torch.zeros(N, 5, device=device) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: backup = boxes.clone() keep_ref = batched_nms(boxes, scores, idxs, iou) assert torch.allclose(boxes, backup), "boxes modified by batched_nms" backup = rotated_boxes.clone() keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) assert torch.allclose( rotated_boxes, backup ), "rotated_boxes modified by batched_nms_rotated" # Occasionally the gap can be large if there are many IOU on the threshold boundary self.assertLessEqual(nms_edit_distance(keep, keep_ref), 5, err_msg.format(iou))
def postprocess(self, instances, batched_inputs, image_sizes): """ Rescale the output instances to the target size. """ # note: private function; subject to changes processed_results = [] for results_per_image, input_per_image, image_size in zip( instances, batched_inputs, image_sizes): boxes = results_per_image.pred_boxes.tensor scores = results_per_image.scores class_idxs = results_per_image.pred_classes # Apply per-class nms for each image keep = batched_nms(boxes, scores, class_idxs, self.nms_thresh) keep = keep[:self.max_detections_per_image] results_per_image = results_per_image[keep] height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def test_batched_nms_rotated_0_degree_cpu(self): # torch.manual_seed(0) N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N) idxs = torch.randint(0, num_classes, (N, )) rotated_boxes = torch.zeros(N, 5) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: backup = boxes.clone() keep_ref = batched_nms(boxes, scores, idxs, iou) assert torch.allclose(boxes, backup), "boxes modified by batched_nms" backup = rotated_boxes.clone() keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) assert torch.allclose( rotated_boxes, backup), "rotated_boxes modified by batched_nms_rotated" assert torch.equal(keep, keep_ref), err_msg.format(iou)
def merge_branch_instances(instances, num_branch, nms_thrsh, topk_per_image): """ Merge detection results from different branches of TridentNet. Return detection results by applying non-maximum suppression (NMS) on bounding boxes and keep the unsuppressed boxes and other instances (e.g mask) if any. Args: instances (list[Instances]): A list of N * num_branch instances that store detection results. Contain N images and each image has num_branch instances. num_branch (int): Number of branches used for merging detection results for each image. nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. topk_per_image (int): The number of top scoring detections to return. Set < 0 to return all detections. Returns: results: (list[Instances]): A list of N instances, one for each image in the batch, that stores the topk most confidence detections after merging results from multiple branches. """ if num_branch == 1: return instances batch_size = len(instances) // num_branch results = [] for i in range(batch_size): instance = Instances.cat( [instances[i + batch_size * j] for j in range(num_branch)]) # Apply per-class NMS keep = batched_nms(instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thrsh) keep = keep[:topk_per_image] result = instance[keep] results.append(result) return results