def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) idxs = torch.arange(num_objs).cuda() * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = max_classes[keep] return result, keep
def fast_rcnn_inference_single_image( boxes, scores, image_shape, nms_thresh, topk_per_image ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ class_distr_scores = scores.clone() scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) num_objs = torch.arange(num_objs) if torch.cuda.is_available(): num_objs = num_objs.cuda() idxs = num_objs * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) class_distr_scores = class_distr_scores[keep] # we set the background probability to 0 class_distr_scores[:, -1] = 0.0 result.scores = class_distr_scores return result, keep
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, device, preferred_labels=[], tokens=[], tokenizer=None): scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Select max scores max_scores, max_classes = scores.max(1) # R x C --> R num_objs = boxes.size(0) boxes = boxes.view(-1, 4) idxs = torch.arange(num_objs).cuda( device) * num_bbox_reg_classes + max_classes max_boxes = boxes[idxs] # Select max boxes according to the max scores. # Apply NMS keep = nms(max_boxes, max_scores, nms_thresh) # calculate the closes tokens words = get_image_labels2(preferred_labels, max_classes[keep].tolist(), keep.tolist()) relevant = [] others = [] class_list = [] for word, c, i in words: tok = tokenizer.vocab.get(word, tokenizer.vocab["[UNK]"]) ## inserting the relevant first if tok in tokens: relevant.append(i) ## repeated predictions go last. elif c in class_list: class_list.append(c) others.append(i) ## Inserting varied predictions first else: class_list.append(c) others.insert(i, 0) keep = torch.tensor(relevant + others, device=device) #remove duplicate classes...... if topk_per_image >= 0: keep = keep[:topk_per_image] keep = keep[torch.randperm(keep.size()[0])] boxes, scores = max_boxes[keep], max_scores[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = max_classes[keep] return result, keep