def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) filter_mask = scores > score_thresh filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def select_over_all_levels(self, instances): num_images = len(instances) results = [] for i in range(num_images): boxes = instances[i].get("pred_boxes") scores = instances[i].get("scores") pred_classes = instances[i].get("pred_classes") keep = batched_nms(boxes.tensor, scores, pred_classes, self.nms_threshold) keep = keep[: self.max_detections_per_image] result = Instances(instances[i].image_size) result.pred_boxes = boxes[keep] result.scores = scores[keep] result.pred_classes = pred_classes[keep] results.append(result) return results
def inference_single_image(self, box_cls, box_delta, anchors, image_size): boxes_all = [] scores_all = [] class_idxs_all = [] for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors): box_cls_i = F.softmax(box_cls_i, dim=-1)[:, :-1].flatten() num_topk = box_reg_i.size(0) predicted_prob, topk_idxs = box_cls_i.sort(descending=True) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] anchor_idxs = topk_idxs // self.num_classes classes_idxs = topk_idxs % self.num_classes box_reg_i = box_reg_i[anchor_idxs] anchors_i = anchors_i[anchor_idxs] predicted_boxes = self.box2box_transform.apply_deltas( box_reg_i, anchors_i.tensor) boxes_all.append(predicted_boxes) scores_all.append(predicted_prob) class_idxs_all.append(classes_idxs) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
def create_instances(predictions, image_size, conf_threshold=0.5): ret = Instances(image_size) score = np.asarray([x["score"] for x in predictions]) chosen = (score > conf_threshold).nonzero()[0] score = score[chosen] bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) labels = np.asarray( [dataset_id_map(predictions[i]["category_id"]) for i in chosen]) ret.scores = score ret.pred_boxes = Boxes( BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)) ret.pred_classes = labels try: ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] except KeyError: pass return ret
def inference_on_single_feature_map( self, locations, box_cls, box_regression, centerness, image_sizes ): N, C, H, W = box_cls.shape box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() candidate_inds = box_cls > self.score_threshold pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.topk_candidates) box_cls = box_cls * centerness[:, :, None] results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack( [ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1 ) result = Instances(image_sizes[i]) detections = Boxes(detections) detections.clip(image_sizes[i]) result.pred_boxes = detections result.scores = torch.sqrt(per_box_cls) result.pred_classes = per_class results.append(result) return results