def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) idxs = torch.arange(num_objs).cuda() * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = max_classes[keep] return result, keep
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, allow_oob=False): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... if not allow_oob: boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 else: boxes = boxes.view(-1, num_bbox_reg_classes, 4) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS from torchvision.ops import nms keep = nms(boxes, scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image( boxes, scores, image_shape, nms_thresh, topk_per_image ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ class_distr_scores = scores.clone() scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) num_objs = torch.arange(num_objs) if torch.cuda.is_available(): num_objs = num_objs.cuda() idxs = num_objs * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) class_distr_scores = class_distr_scores[keep] # we set the background probability to 0 class_distr_scores[:, -1] = 0.0 result.scores = class_distr_scores return result, keep
def fast_rcnn_inference_single_image_with_overlap( boxes, scores, overlap_boxes, overlap_probs, image_shape, score_thresh, nms_thresh, topk_per_image, allow_oob=False, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] overlap_boxes = overlap_boxes[valid_mask] overlap_probs = overlap_probs[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... if not allow_oob: boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 assert (overlap_boxes.size(1) == 4 ), "overlap boxes prediction has no category, but: {}".format( overlap_boxes.size()) overlap_boxes = Boxes(overlap_boxes) overlap_boxes.clip(image_shape) overlap_boxes = overlap_boxes.tensor else: boxes = boxes.view(-1, num_bbox_reg_classes, 4) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] overlap_boxes = overlap_boxes[filter_inds[:, 0]] else: boxes = boxes[filter_mask] overlap_boxes = overlap_boxes[filter_inds[:, 0]] scores = scores[filter_mask] overlap_probs = overlap_probs[filter_mask] # Apply per-class NMS self_defined_nms_on = True # False if self_defined_nms_on: boxes = np.ascontiguousarray(boxes.cpu()) scores = np.ascontiguousarray(scores.cpu()) overlap_probs = np.ascontiguousarray(overlap_probs.cpu()) overlap_boxes = np.ascontiguousarray(overlap_boxes.cpu()) keep = batched_noh_nms(boxes, scores, overlap_probs, overlap_boxes, Nt=nms_thresh, thresh=0.01, method=3) boxes = torch.from_numpy(boxes).cuda() scores = torch.from_numpy(scores).cuda() overlap_probs = torch.from_numpy(overlap_probs).cuda() overlap_boxes = torch.from_numpy(overlap_boxes).cuda() keep = keep[scores[keep].argsort(descending=True)] else: from torchvision.ops import nms keep = nms(boxes, scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, overlap_boxes, overlap_probs, filter_inds = ( boxes[keep], scores[keep], overlap_boxes[keep], overlap_probs[keep], filter_inds[keep], ) result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.overlap_boxes = Boxes(overlap_boxes) result.overlap_probs = overlap_probs result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, device, preferred_labels=[], tokens=[], tokenizer=None): scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) idxs = torch.arange(num_objs).cuda( device) * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) # calculate the closes tokens words = get_image_labels2(preferred_labels, max_classes[keep].tolist(), keep.tolist()) relevant = [] others = [] class_list = [] for word, c, i in words: tok = tokenizer.vocab.get(word, tokenizer.vocab["[UNK]"]) ## inserting the relevant first if tok in tokens: relevant.append(i) ## repeated predictions go last. elif c in class_list: class_list.append(c) others.append(i) ## Inserting varied predictions first else: class_list.append(c) others.insert(i, 0) keep = torch.tensor(relevant + others, device=device) #remove duplicate classes...... if topk_per_image >= 0: keep = keep[:topk_per_image] keep = keep[torch.randperm(keep.size()[0])] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = max_classes[keep] return result, keep