def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, box_features): valid_mask = torch.isfinite(boxes).all( dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] box_features = box_features[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, 4) # R x C x 4 max_conf = torch.zeros((boxes.shape[0])).cuda() for cls_ind in range(0, scores.shape[1]): cls_scores = scores[:, cls_ind] # dets = torch.cat([boxes, cls_scores.view(-1, 1)], 1) keep = nms(boxes, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.where(max_conf >= 0.2)[0] if len(keep_boxes) < 36: keep_boxes = torch.argsort(max_conf, descending=True)[:36] elif len(keep_boxes) > 36: keep_boxes = keep_boxes[:36] boxes, scores = boxes[keep_boxes], scores[keep_boxes] box_features = box_features[keep_boxes] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = keep_boxes return result, keep_boxes, box_features
def _get_class_predictions(self, boxes, scores, image_shape): num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > self.class_score_thresh_test # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. class_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[class_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep_class = batched_nms(boxes, scores, class_inds[:, 1], self.class_nms_thresh_test) if self.topk_per_image_test >= 0: keep_class = keep_class[:self.topk_per_image_test] boxes, scores, class_inds = boxes[keep_class], scores[ keep_class], class_inds[keep_class] return boxes, scores, class_inds
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) idxs = torch.arange(num_objs).cuda() * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = max_classes[keep] return result, keep
def relationnet_inference(boxes, scores, targets, image_shapes): """ Args: boxes (Tensor): (batch_images, first_n, num_classes, 4) scores (Tensor): (batch_images, first_n, num_classes, num_thresh) targets (Tensor): (batch_images, first_n, num_classes, num_thresh) image_shapes (List[Tuple]): A list of (width, height) tuples for each image in the batch. Return: result (List[Instances]): - pred_boxes (Boxes): (num_pred, 4) - scores (Tensor): (num_pred, num_classes) - pred_classes (Tensor): (num_pred,) filter_indices (Tensor) """ thresh_idx = int( troch.where(self.iou_thresh == self.nms_thresh_test)[0][0]) batch_images, first_n, num_classes = boxes.shape[:3] scores = scores[..., thresh_idx] results, filter_indices = [], [] for batch_idx in range(batch_images): filter_idx = targets[batch_idx, :, :, thresh_idx].nonzero()[:, 0] result = Instances(image_shapes[batch_idx]) mask_idx = filter_idx.split(1, dim=1) pred_boxes = Boxes(boxes[batch_idx, ...][mask_idx].view(-1, 4)) pred_boxes.clip(image_shapes[batch_idx]) result.pred_boxes = pred_boxes result.scores = scores[batch_idx, ...][mask_idx] result.pred_classes = filter_idx[:, 1] results.append(result) filter_indices.append(filter_idx) return results, filter_indices
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] Tscores = scores #print (scores) num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] #print (scores) # Apply per-class NMS uniclass = torch.zeros(len(filter_inds[:, 1].tolist())).cuda() #keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) keep = batched_nms(boxes, scores, uniclass, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] #print(filter_inds[:, 0]) #print(torch.ByteTensor([0,1,0,0,1])) #print(filter_inds[:, 1]) #print(keep) #print(Tscores[filter_inds[:, 0]]) #print (scores) result = Instances(image_shape) result.pred_boxes = Boxes(boxes) #result.scores = scores result.scores = Tscores[filter_inds[:, 0]] result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ # Make sure boxes and scores don't contain infinite or Nan valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \ & torch.isfinite(attr_scores).all(dim=1) # Get scores from finite boxes and scores if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] attr_scores = attr_scores[valid_mask] scores = scores[:, :-1] # Remove background class? num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # If using Attributes class: # attributes = Attributes(attributes.reshape(-1, 295)) # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[ keep], attr_scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.attr_scores = attr_scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, light=None): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 if type(light) == type(boxes): # print(light) light = Boxes(light.reshape(-1, 4)) # light.clip(image_shape) light = light.tensor.view(-1, num_bbox_reg_classes, 4) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] if type(light) == type(boxes): light = light[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] if type(light) == type(boxes): light = light[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] if type(light) == type(boxes): light = light[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] if type(light) == type(boxes): result.pred_light = Boxes(light) return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, allow_oob=False): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... if not allow_oob: boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 else: boxes = boxes.view(-1, num_bbox_reg_classes, 4) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS from torchvision.ops import nms keep = nms(boxes, scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image( boxes, scores, image_shape: Tuple[int, int], score_thresh: float, nms_thresh: float, topk_per_image: int, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] if len(category_disabler.global_cat_mask) > 0: print('<<<<<< category disabler activated >>>>>>') scores *= torch.tensor(category_disabler.global_cat_mask).cuda() num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # 1. Filter results based on detection scores. It can make NMS more efficient # by filtering out low-confidence detections. filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # 2. Apply NMS for each class independently. keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image( boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # 1. Filter results based on detection scores. It can make NMS more efficient # by filtering out low-confidence detections. filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # 2. Apply NMS for each class independently. keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) # DIOU NMS commented for now # keep = batched_diou_nms(boxes, scores, filter_inds[:, 1], nms_thresh) \ # if global_cfg.MODEL.ROI_BOX_HEAD.NMS_TYPE == "diou_nms" \ # else \ # batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def transform_proposals_seg(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): proposal_topk (int): only keep top-K scoring proposals min_box_size (int): proposals with either side smaller than this threshold are removed The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy() boxes = transforms.apply_box(boxes) boxes = Boxes(boxes) objectness_logits = dataset_dict["proposals"].objectness_logits oh_labels = dataset_dict["proposals"].oh_labels superpixels = dataset_dict["superpixels"].cpu().numpy() boxes.clip(image_shape) # keep = boxes.unique_boxes() # boxes = boxes[keep] # objectness_logits = objectness_logits[keep] keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] oh_labels = oh_labels[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] proposals.oh_labels = oh_labels[:proposal_topk] dataset_dict["proposals"] = proposals # for tfm in transforms: # if isinstance(tfm, HFlipTransform): # superpixels = tfm.apply_segmentation(superpixels) superpixels = transforms.apply_segmentation(superpixels.astype("float32")) dataset_dict["superpixels"] = torch.as_tensor( np.ascontiguousarray(superpixels.astype("int32")))
def fast_rcnn_inference_single_image( boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, fc_box_features=None, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ probs = scores.clone().detach() scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] # Compact all fc layers into a single tensor to work nicely with Instance class for now if fc_box_features is not None: fc_box_features = [fc_layer_box_features[filter_inds[:, 0]] for fc_layer_box_features in fc_box_features] # will need to know number of layers and dimensions to unpack fc_box_features = torch.cat(fc_box_features, dim=1) result.fc_box_features = fc_box_features probs = probs[filter_inds[:, 0]] result.probs = probs return result, filter_inds[:, 0]
def regress_and_classify(self, image: np.ndarray, tracklets: List[Tracklet]) -> Tuple[np.ndarray, np.ndarray]: # Convert boxes to proposals height, width = image.shape[:2] image = self.transform_gen.get_transform(image).apply_image(image) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) # Size of feature maps, used in the detector feat_height, feat_width = image.shape[1:3] scale_x = feat_width / width scale_y = feat_height / height proposal_boxes = Boxes(torch.tensor([tracklet.last_detection.box for tracklet in tracklets])) # Scale proposals to the same size as boxes proposal_boxes.scale(scale_x, scale_y) proposals = Instances((feat_height, feat_width), proposal_boxes=proposal_boxes) inputs = {"image": image, "height": height, "width": width, "proposals": proposals} images = self.model.preprocess_image([inputs]) features = self.model.backbone(images.tensor) proposals = [inputs["proposals"].to(self.model.device)] # Extract features, perform RoI pooling and perform regression/classification for each RoI features_list = [features[f] for f in self.model.roi_heads.in_features] box_features = self.model.roi_heads.box_pooler(features_list, [x.proposal_boxes for x in proposals]) box_features = self.model.roi_heads.box_head(box_features) pred_class_logits, pred_proposal_deltas = self.model.roi_heads.box_predictor(box_features) del box_features raw_outputs = FastRCNNOutputs( self.model.roi_heads.box_predictor.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, self.model.roi_heads.box_predictor.smooth_l1_beta, ) # Convert raw outputs to predicted boxes and scores boxes = raw_outputs.predict_boxes()[0] scores = raw_outputs.predict_probs()[0] num_bbox_reg_classes = boxes.shape[1] // 4 boxes = Boxes(boxes.reshape(-1, 4)) # Scale regressed boxes to the same size as original image boxes.clip((feat_height, feat_width)) boxes.scale(1 / scale_x, 1 / scale_y) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) boxes = boxes[:, 0, :] scores = scores[:, 0] pred_boxes = boxes.detach().cpu().numpy() scores = scores.detach().cpu().numpy() return pred_boxes, scores
def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): proposal_topk (int): only keep top-K scoring proposals min_box_size (int): proposals with either side smaller than this threshold are removed The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_boxes" in dataset_dict: # Transform proposal boxes boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, )) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32")) boxes.clip(image_shape) keep = boxes.unique_boxes() boxes = boxes[keep] objectness_logits = objectness_logits[keep] keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): proposal_topk (int): only keep top-K scoring proposals min_box_size (int): proposals with either side smaller than this threshold are removed The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_file" in dataset_dict: return transform_proposals_seg(dataset_dict, image_shape, transforms, proposal_topk=proposal_topk) boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy() boxes = transforms.apply_box(boxes) boxes = Boxes(boxes) objectness_logits = dataset_dict["proposals"].objectness_logits boxes.clip(image_shape) # keep = boxes.unique_boxes() # boxes = boxes[keep] # objectness_logits = objectness_logits[keep] keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def fast_rcnn_inference_single_image(image_shape, boxes, scores, classes=None, score_thresh=0.05, nms_thresh=0.5, topk_per_image=1000): valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) replace_cls = classes is not None if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] if replace_cls: classes = classes[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores if replace_cls: result.pred_classes = classes[keep] else: result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image( boxes, scores, image_shape, nms_thresh, topk_per_image ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ class_distr_scores = scores.clone() scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) num_objs = torch.arange(num_objs) if torch.cuda.is_available(): num_objs = num_objs.cuda() idxs = num_objs * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) class_distr_scores = class_distr_scores[keep] # we set the background probability to 0 class_distr_scores[:, -1] = 0.0 result.scores = class_distr_scores return result, keep
def trend_rcnn_inference_single_image(boxes, scores, attributes, image_shape, score_thresh, nms_thresh, topk_per_image, attr_score_thresh, num_attr_classes, max_attr_pred): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] attributes = attributes[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 #print("Printing the number of classes in the box: ", num_bbox_reg_classes) # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 num_attr_reg_classes = attributes.shape[1] // num_attr_classes # [ANMOL] this just prints the number of object classes that we have... here its 46 attributes = attributes.view(-1, num_attr_reg_classes, num_attr_classes) # [ANMOL] reshaped the attributes [proposals, objectclass, attrclass] # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # filter mask shape is same as score shape: [proposals, obj classes] # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() # there would be more indices/proposals after this compared as more number of scores might be > # greater than threshold would be interesting to check how it would work class agnostic attr classification # might fail there.. In the current example: R=1000, but R'=45806 #print("filter ind shape: ", filter_inds.shape) if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] #before this scores shape was [R,num_classes], after filter mask it will just convert to [R'] if num_attr_reg_classes == 1: attributes = attributes[filter_inds[:, 0], 0] else: attributes = attributes[filter_mask] #BOTH of these should produce attribute of shape [R', attr_classes] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds, attributes = boxes[keep], scores[ keep], filter_inds[keep], attributes[keep] attributes[attributes < attr_score_thresh] = 0 attr_scores_sorted, attr_indices = torch.sort(attributes, 1, descending=True) attr_indices[attr_scores_sorted < attr_score_thresh] = 294 attributes_inds = attr_indices[:, 0:max_attr_pred] #del attr_indices result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.attr_scores = attributes result.attr_classes = attributes_inds result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, class_logits=None, estimate_uncertainty=False, variance=torch.Tensor([])): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. # Get box ID with predicted class label: [box id, class label] filter_inds = filter_mask.nonzero() import numpy as np class_id = np.argmax(scores.cpu().numpy(), axis=1) class_id = np.array([np.arange(1000), class_id]) class_id = np.swapaxes(class_id, 1, 0) boxes_one_class = boxes[class_id[:, 0], class_id[:, 1], :].cpu().numpy() scores_one_class = np.max(scores.cpu().numpy(), axis=1) if not class_logits == None: class_logits = class_logits[filter_inds[:, 0]] predicted_probs = scores[filter_inds[:, 0]] if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores_filtered = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores_filtered, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes_final, scores_final, filter_inds_final = boxes[ keep], scores_filtered[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes_final) result.scores = scores_final result.pred_classes = filter_inds_final[:, 1] # Jamie # Save out logits if not class_logits == None: #result.class_logits = class_logits[filter_inds_final[:,0]] result.class_logits = class_logits[keep] result.prob_score = predicted_probs[keep] #class_logits = class_logits[filter_inds_final[:,0]] #result.class_logits = class_logits[keep] if estimate_uncertainty: # std from 1000 proposals #stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes_one_class, scores_one_class, 0.75) # std from bbox with class confidence score higher than threshold stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes.cpu().numpy(), scores_filtered.cpu().numpy(), 0.9) result.stds = torch.Tensor(stds).cuda() if len(variance) > 0: result.vars = variance[keep] return result, filter_inds_final[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, objness_scores, score_thresh, nms_thresh, topk_per_image, use_unknown=False, num_classes=80, reverse_label_converter=None): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if reverse_label_converter is not None: ignore_void = reverse_label_converter[-1] == -1 else: ignore_void = scores.shape[1] == num_classes + 1 if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] objness_scores = objness_scores[valid_mask] original_scores = scores.clone() if ignore_void: scores = scores[:, :-1] else: scores = scores[:, :-2] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores if scores.shape[1] > num_classes: filter_mask = scores[:, :-1] > score_thresh else: filter_mask = scores > score_thresh # R x K if use_unknown: new_filter_mask = filter_mask.sum(-1) < 1 if original_scores.shape[1] > num_classes + 1 or not ignore_void: new_filter_mask = torch.logical_and( new_filter_mask, original_scores.argmax(-1) == num_classes) objness_scores = objness_scores.sigmoid() obj_th = 0.500 unknown_filter_mask = torch.logical_and(new_filter_mask, objness_scores > obj_th) unknown_filter_inds = unknown_filter_mask.nonzero() unknown_boxes = boxes[unknown_filter_inds[:, 0], 0] unknown_scores = objness_scores[unknown_filter_inds[:, 0]] keep = nms(unknown_boxes, unknown_scores, nms_thresh) keep = keep[:int(topk_per_image * 0.5)] unknown_boxes = unknown_boxes[keep] unknown_scores = unknown_scores[keep] unknown_filter_inds = unknown_filter_inds[keep] if scores.shape[1] > num_classes: scores = scores[:, :-1] # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) if use_unknown: boxes = torch.cat((boxes, unknown_boxes), dim=0) scores = torch.cat((scores, unknown_scores), dim=0) if ignore_void: classes = torch.cat((filter_inds[:, 1], -torch.ones( len(unknown_scores), device=filter_inds.device).long()), dim=0) else: classes = torch.cat((filter_inds[:, 1], -2 * torch.ones( len(unknown_scores), device=filter_inds.device).long()), dim=0) else: classes = filter_inds[:, -1] if reverse_label_converter is not None: classes = reverse_label_converter.to(classes.device)[classes] boxes = boxes[:topk_per_image] scores = scores[:topk_per_image] classes = classes[:topk_per_image] result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = classes inds = filter_inds[:, 0] if use_unknown: inds = torch.cat((inds, unknown_filter_inds[:, 0])) inds = inds[:topk_per_image] return result, inds
def eopsn_inference_single_image( boxes, scores, image_shape, objness_scores, score_thresh, nms_thresh, topk_per_image, use_unknown=False, num_classes=80, reverse_label_converter=None ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] objness_scores = objness_scores[valid_mask] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS classes = filter_inds[:,-1] classes[classes > len(reverse_label_converter)-1] = -1 filter_inds[:,-1] = classes keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) classes = filter_inds[:,-1] if reverse_label_converter is not None: classes = reverse_label_converter.to(classes.device)[classes] boxes = boxes[:topk_per_image] scores = scores[:topk_per_image] classes = classes[:topk_per_image] inds = filter_inds[:,0] result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = classes inds = inds[:topk_per_image] return result, inds
def find_top_rpn_proposals( proposals, pred_objectness_logits, images, nms_thresh, pre_nms_topk, post_nms_topk, min_box_side_len, training, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps if `training` is True, otherwise, returns the highest `post_nms_topk` scoring proposals for each feature map. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). images (ImageList): Input images as an :class:`ImageList`. nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_side_len (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: proposals (list[Instances]): list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i. """ image_sizes = images.image_sizes # in (h, w) order num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip(itertools.count(), proposals, pred_objectness_logits): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i, ), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] valid_mask = torch.isfinite( boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_side_len) lvl = level_ids if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[ keep], level_ids[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results
def forward_for_single_feature_map(self, locations, box_cls, reg_pred, image_sizes): N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) # ctrness = ctrness.view(N, 1, H, W).permute(0, 2, 3, 1) # ctrness = ctrness.reshape(N, -1).sigmoid() # if self.thresh_with_ctr is True, we multiply the classification # scores with centerness scores before applying the threshold. # if self.thresh_with_ctr: # box_cls = box_cls * ctrness[:, :, None] candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # if not self.thresh_with_ctr: # box_cls = box_cls * ctrness[:, :, None] results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) boxlist = Instances(image_sizes[i]) boxes = Boxes(detections) boxes.clip(image_sizes[i]) boxlist.pred_boxes = boxes boxlist.scores = torch.sqrt(per_box_cls) boxlist.pred_classes = per_class boxlist.locations = per_locations results.append(boxlist) return results
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, vp_bins=None, vp=None, vp_res=None, rotated_box_training=False, h=None): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS if not rotated_box_training or len(boxes) == 0: keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) else: # BBox with encoding ctr_x,ctr_y,w,l if vp is not None and vp_bins is not None: _vp = vp.view(-1, num_bbox_reg_classes, vp_bins) # R x C x bins _vp = _vp[filter_mask] if len(_vp) > 0: _, vp_max = torch.max(_vp, 1) vp_filtered = vp_max if vp_res is not None: _vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins) _vp_res = _vp_res[filter_mask] vp_res_filtered = list() for i, k in enumerate(vp_max): vp_res_filtered.append(_vp_res[i, k]) else: vp_filtered = _vp rboxes = [] for i in range(boxes.shape[0]): box = boxes[i] angle = anglecorrection(vp_res_filtered[i] * 180 / math.pi).to( box.device) if vp_res is not None else bin2ang( vp_filtered[i], vp_bins).to(box.device) box = torch.cat((box, angle)) rboxes.append(box) rboxes = torch.cat(rboxes).reshape(-1, 5).to(vp_filtered.device) #keep = nms_rotated(rboxes, scores, nms_thresh) keep = batched_nms_rotated(rboxes, scores, filter_inds[:, 1], nms_thresh) else: boxes[:, :, 2] = boxes[:, :, 2] + boxes[:, :, 0] #x2 boxes[:, :, 3] = boxes[:, :, 3] + boxes[:, :, 1] #y2 keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] if vp is not None and vp_bins is not None: vp = vp.view(-1, num_bbox_reg_classes, vp_bins) # R x C x bins vp = vp[filter_mask] vp = vp[keep] if vp_res is not None: vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins) vp_res = vp_res[filter_mask] vp_res = vp_res[keep] if len(vp) > 0: _, vp_max = torch.max(vp, 1) result.viewpoint = vp_max if vp_res is not None: vp_res_filtered = list() for i, k in enumerate(vp_max): vp_res_filtered.append(vp_res[i, k]) # This result is directly the yaw orientation predicted result.viewpoint_residual = torch.tensor(vp_res_filtered).to( vp_max.device) else: result.viewpoint = vp result.viewpoint_residual = vp_res if h is not None: h = h.view(-1, num_bbox_reg_classes, 2) # R x C x bins h = h[filter_mask] h = h[keep] result.height = h return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ all_scores = scores.clone() all_scores = torch.unsqueeze(all_scores, 0) all_boxes = boxes.clone() all_boxes = torch.unsqueeze(all_boxes, 0) pred_inds = torch.unsqueeze(torch.arange(scores.size(0), device=scores.device, dtype=torch.long), dim=1).repeat(1, scores.size(1)) valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] pred_inds = pred_inds[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 pred_inds = pred_inds[:, :-1] # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] pred_inds = pred_inds[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] pred_inds = pred_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] result.pred_inds = pred_inds return result, filter_inds[:, 0], all_scores, all_boxes
def fast_rcnn_inference_single_image_with_overlap( boxes, scores, overlap_boxes, overlap_probs, image_shape, score_thresh, nms_thresh, topk_per_image, allow_oob=False, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] overlap_boxes = overlap_boxes[valid_mask] overlap_probs = overlap_probs[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... if not allow_oob: boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 assert (overlap_boxes.size(1) == 4 ), "overlap boxes prediction has no category, but: {}".format( overlap_boxes.size()) overlap_boxes = Boxes(overlap_boxes) overlap_boxes.clip(image_shape) overlap_boxes = overlap_boxes.tensor else: boxes = boxes.view(-1, num_bbox_reg_classes, 4) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] overlap_boxes = overlap_boxes[filter_inds[:, 0]] else: boxes = boxes[filter_mask] overlap_boxes = overlap_boxes[filter_inds[:, 0]] scores = scores[filter_mask] overlap_probs = overlap_probs[filter_mask] # Apply per-class NMS self_defined_nms_on = True # False if self_defined_nms_on: boxes = np.ascontiguousarray(boxes.cpu()) scores = np.ascontiguousarray(scores.cpu()) overlap_probs = np.ascontiguousarray(overlap_probs.cpu()) overlap_boxes = np.ascontiguousarray(overlap_boxes.cpu()) keep = batched_noh_nms(boxes, scores, overlap_probs, overlap_boxes, Nt=nms_thresh, thresh=0.01, method=3) boxes = torch.from_numpy(boxes).cuda() scores = torch.from_numpy(scores).cuda() overlap_probs = torch.from_numpy(overlap_probs).cuda() overlap_boxes = torch.from_numpy(overlap_boxes).cuda() keep = keep[scores[keep].argsort(descending=True)] else: from torchvision.ops import nms keep = nms(boxes, scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, overlap_boxes, overlap_probs, filter_inds = ( boxes[keep], scores[keep], overlap_boxes[keep], overlap_probs[keep], filter_inds[keep], ) result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.overlap_boxes = Boxes(overlap_boxes) result.overlap_probs = overlap_probs result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def test_caffe2_pytorch_eq(self): ims_per_batch = 8 post_nms_topk = 100 detections_per_im = 10 num_class = 80 score_thresh = 0.05 nms_thresh = 0.5 image_shapes = [torch.Size([800, 800])] * ims_per_batch batch_splits = [post_nms_topk] * ims_per_batch # NOTE: There're still some unsure minor implementation differences # (eg. ordering when equal score across classes) causing some seeds # don't pass the test. # Thus set a fixed seed to make sure this test passes consistantly. rng = torch.Generator() rng.manual_seed(42) boxes = [] for n in batch_splits: box = 1000.0 * 0.5 * torch.rand(n, num_class, 4, generator=rng) + 0.001 box[:, :, -2:] += box[:, :, :2] box = box.view(n, num_class * 4) boxes.append(box) scores = [ torch.rand(n, num_class + 1, generator=rng) for n in batch_splits ] ref_results, ref_kept_indices = fast_rcnn_inference( boxes, scores, image_shapes, score_thresh=score_thresh, nms_thresh=nms_thresh, topk_per_image=detections_per_im) for result, kept_index, score in zip(ref_results, ref_kept_indices, scores): torch.testing.assert_allclose( score[kept_index, result.pred_classes], result.scores, ) # clip is done in BBoxTransformOp c2_boxes = [] for box, image_shape in zip(boxes, image_shapes): num_bbox_reg_classes = box.shape[1] // 4 clipped_box = Boxes(box.reshape(-1, 4)) clipped_box.clip(image_shape) clipped_box = clipped_box.tensor.view(-1, num_bbox_reg_classes * 4) c2_boxes.append(clipped_box) c2_boxes = cat(c2_boxes) c2_scores = cat(scores) c2_batch_splits = torch.Tensor(batch_splits) nms_outputs = torch.ops._caffe2.BoxWithNMSLimit( c2_scores, c2_boxes, c2_batch_splits, score_thresh=float(score_thresh), nms=float(nms_thresh), detections_per_im=int(detections_per_im), soft_nms_enabled=False, soft_nms_method="linear", soft_nms_sigma=0.5, soft_nms_min_score_thres=0.001, rotated=False, cls_agnostic_bbox_reg=False, input_boxes_include_bg_cls=False, output_classes_include_bg_cls=False, legacy_plus_one=False, ) roi_score_nms, roi_bbox_nms, roi_class_nms, roi_batch_splits_nms, roi_keeps_nms, roi_keeps_size_nms = nms_outputs # noqa roi_score_nms = roi_score_nms.split( roi_batch_splits_nms.int().tolist()) roi_bbox_nms = roi_bbox_nms.split(roi_batch_splits_nms.int().tolist()) roi_class_nms = roi_class_nms.split( roi_batch_splits_nms.int().tolist()) roi_keeps_nms = roi_keeps_nms.split( roi_batch_splits_nms.int().tolist()) for _score_nms, _class_nms, _keeps_nms, _score in zip( roi_score_nms, roi_class_nms, roi_keeps_nms, scores): torch.testing.assert_allclose( _score[_keeps_nms.to(torch.int64), _class_nms.to(torch.int64)], _score_nms, ) for ref, s, b, c in zip(ref_results, roi_score_nms, roi_bbox_nms, roi_class_nms): s1, i1 = s.sort() s2, i2 = ref.scores.sort() torch.testing.assert_allclose(s1, s2) torch.testing.assert_allclose(b[i1], ref.pred_boxes.tensor[i2]) torch.testing.assert_allclose( c.to(torch.int64)[i1], ref.pred_classes[i2]) for ref, k in zip(ref_kept_indices, roi_keeps_nms): # NOTE: order might be different due to implementation ref_set = set(ref.tolist()) k_set = set(k.tolist()) self.assertEqual(ref_set, k_set)
def fsod_fast_rcnn_inference_single_image(pred_cls, boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fsod_fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fsod_fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] pred_cls = pred_cls[valid_mask] scores = scores[:, :-1] cls_num = pred_cls.unique().shape[0] box_num = int(scores.shape[0] / cls_num) scores = scores.reshape(cls_num, box_num).permute(1, 0) boxes = boxes.reshape(cls_num, box_num, 4).permute(1, 0, 2).reshape(box_num, -1) pred_cls = pred_cls.reshape(cls_num, box_num).permute(1, 0) num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] pred_cls = pred_cls[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] #boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] boxes, scores, filter_inds, pred_cls = boxes[keep], scores[ keep], filter_inds[keep], pred_cls[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores #result.pred_classes = filter_inds[:, 1] result.pred_classes = pred_cls return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image_recon_recls(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, features, mask_pooler, mask_head, recon_net=None, alpha=2, recls=None): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # apply recon net mask_features = mask_pooler(features, [Boxes(boxes)]) if mask_head.cfg.MODEL.ROI_HEADS.NAME == "StandardROIHeads": pred_mask_logits = mask_head(mask_features) else: results = Instances(image_shape) results.pred_classes = filter_inds[:, 1] pred_mask_logits, _, = mask_head(mask_features, [results]) n = 1 if recls and pred_mask_logits[0][1].size(0) != 0: if recls.rescoring: pred_visible_mask_logits = pred_mask_logits[1][1] if len( pred_mask_logits) > 1 else pred_mask_logits[0][1] pred_visible_mask_logits = get_pred_masks_logits_by_cls( pred_visible_mask_logits, filter_inds[:, 1]) if recls.attention_mode == "mask": recls_logits = recls(mask_features * F.avg_pool2d( (pred_visible_mask_logits > 0).float(), 2)) else: recls_logits = recls(mask_features * F.avg_pool2d(pred_visible_mask_logits, 2)) recls_prob = torch.softmax(recls_logits, dim=1) indices = torch.arange(recls_prob.size(0), device=recls_prob.device) # filter_inds[:, 1] = torch.argmax(recls_logits, dim=1) # scores = scores * (recls_logits[0][indices, filter_inds[:, 1]] * 0.3 + 0.7) scores = scores * (recls_prob[indices, filter_inds[:, 1]] * 0.4 + 0.6) n += 1 if recon_net and pred_mask_logits[0][0].size(0): if recon_net.rescoring: mode = "normal" select = 1 if len(pred_mask_logits) == 2 else 0 indices = torch.arange(pred_mask_logits[select][0].size(0), device=pred_mask_logits[select][0].device) pred_masks = (pred_mask_logits[select][0][indices, filter_inds[:, 1]] > 0).unsqueeze(1).float() similiarity, recon_logits = get_similarity(pred_masks, recon_net, filter_inds, post_process=mode) # similiarity_filter_l = ((scores > 0.6) * (similiarity > 0.8)).nonzero() # similiarity_filter_s = ((scores > 0.6) * (similiarity < 0.5)).nonzero() # if 64 > len(similiarity_filter_l) > 0: # vis.images(cat([pred_masks[similiarity_filter_l[:, 0]], recon_logits[similiarity_filter_l[:, 0]]], dim=0), # win_name="large similiarity:{}".format(len(similiarity_filter_l)), # nrow=len(similiarity_filter_l[:, 0])) # if 64 > len(similiarity_filter_s) > 0: # vis.images(cat([pred_masks[similiarity_filter_s[:, 0]], recon_logits[similiarity_filter_s[:, 0]]], dim=0), # win_name="small similiarity:{}".format(len(similiarity_filter_s)), # nrow=len(similiarity_filter_s[:, 0])) # Apply per-class NMS # print("sorted simi:{}".format(sorted(np.array(similiarity.cpu())))) # print("Scores changed") scores = scores * torch.relu( torch.log( torch.FloatTensor([alpha]).to(similiarity.device) - similiarity) / torch.log(torch.FloatTensor([alpha]).to(similiarity.device))) n += 1 scores = scores**(1 / n) keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] results = Instances(image_shape) results.pred_boxes = Boxes(boxes) results.scores = scores results.pred_classes = filter_inds[:, 1] return results, filter_inds[:, 0]
def find_top_rpn_proposals( proposals: List[torch.Tensor], pred_objectness_logits: List[torch.Tensor], image_sizes: List[Tuple[int, int]], nms_thresh: float, pre_nms_topk: int, post_nms_topk: int, min_box_size: float, training: bool, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps for each image. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). image_sizes (list[tuple]): sizes (h, w) for each image nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_size (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: list[Instances]: list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i, sorted by their objectness score in descending order. """ num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, (proposals_i, logits_i) in enumerate( zip(proposals, pred_objectness_logits)): Hi_Wi_A = logits_i.shape[1] if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk) else: num_proposals_i = min(Hi_Wi_A, pre_nms_topk) # sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812 # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i.narrow(1, 0, num_proposals_i) topk_idx = idx.narrow(1, 0, num_proposals_i) # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i, ), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results: List[Instances] = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] lvl = level_ids valid_mask = torch.isfinite( boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): if training: raise FloatingPointError( "Predicted boxes or scores contain Inf/NaN. Training has diverged." ) boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] lvl = lvl[valid_mask] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_size) if _is_tracing() or keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[ keep], lvl[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] # keep is already sorted res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results