def nms_1(info_1, info_2, info_3=''): # Boxes boxes = info_1['bbox'].copy() boxes.extend(info_2['bbox']) # Scores scores = info_1['score'].copy() scores.extend(info_2['score']) # Classes classes = info_1['class'].copy() classes.extend(info_2['class']) if info_3: boxes.extend(info_3['bbox']) scores.extend(info_3['score']) classes.extend(info_3['class']) classes = torch.Tensor(classes) scores = torch.Tensor(scores) boxes = torch.Tensor(boxes) # Perform nms iou_threshold = 0.5 keep_id = batched_nms(boxes, scores, classes, iou_threshold) # Add to output out_boxes = boxes[keep_id] out_scores = torch.Tensor(scores[keep_id]) out_class = torch.Tensor(classes[keep_id]) return out_boxes, out_scores, out_class
def fast_rcnn_inference_single_image( self, img, boxes, scores, features, dataset_dict, score_thresh: float, nms_thresh: float, topk_per_image: int, ): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all( dim=1) & torch.isfinite(scores).all(dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] # scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = boxes / dataset_dict['im_scale'] max_scores, max_classes = scores.max(1) # 2. Apply NMS for each class independently. keep = batched_nms(boxes, max_scores, max_classes, nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] # print('num boxes before: {} after: {}'.format(len(boxes), len(keep))) boxes, scores, features = boxes[keep], scores[keep], features[keep] image_objects = np.argmax(scores.numpy()[:, 1:], axis=1) image_h, image_w, _ = np.shape(img) loc_feat = normalize_box_feats(boxes, image_h, image_w) feat = torch.cat((features, loc_feat), axis=1) obj_label = [self.vg_objects[i] for i in image_objects] objects = ' '.join(obj_label) info = { 'objects': objects, 'img_feat': feat, } meta = { 'obj_label': obj_label, 'bbox': boxes.tolist(), 'image_h': image_h, 'image_w': image_w, } return info, meta
def fast_rcnn_inference_single_image_with_logits( boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ scores = scores[:, :-1] probabs = scores.clone() num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] # R' scores = scores[filter_mask] # R' x K probabs = probabs[filter_inds[:, 0], :] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[ keep], filter_inds[keep] probabs = probabs[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.probabs = probabs result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def __call__(self, instances: Instances, select=None): scores = extract_scores_from_instances(instances) boxes_xywh = extract_boxes_xywh_from_instances(instances) if boxes_xywh is None: return None select_local_idx = batched_nms( boxes_xywh, scores, torch.zeros(len(scores), dtype=torch.int32), iou_threshold=self.iou_threshold, ).squeeze() select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device) select_local[select_local_idx] = True select = select_local if select is None else (select & select_local) return self.extractor(instances, select=select)
def _eval_routine(cf, cf_add_d2, out, cls_names, TEST_DATASET_NAME, datalist: Datalist, model_to_eval): num_classes = len(cls_names) d2_output_dir = str(small.mkdir(out/'d2_output')) d_cfg = set_detectron_cfg_base( d2_output_dir, num_classes, cf['seed']) d_cfg = set_detectron_cfg_test( d_cfg, TEST_DATASET_NAME, model_to_eval, cf['conf_thresh'], cf_add_d2) simple_d2_setup(d_cfg) predictor = DefaultPredictor(d_cfg) cpu_device = torch.device("cpu") def eval_func(dl_item: Dl_record): video_path: Path = dl_item['video_path'] frame_number: int = dl_item['video_frame_number'] frame_time: float = dl_item['video_frame_time'] frame_u8 = get_frame_without_crashing( video_path, frame_number, frame_time) predictions = predictor(frame_u8) cpu_instances = predictions["instances"].to(cpu_device) return cpu_instances df_isaver = snippets.Isaver_simple( small.mkdir(out/'isaver'), datalist, eval_func, '::50') predicted_datalist = df_isaver.run() if cf['nms.enable']: nms_thresh = cf['nms.thresh'] nmsed_predicted_datalist = [] for pred_item in predicted_datalist: if cf['nms.batched']: keep = batched_nms(pred_item.pred_boxes.tensor, pred_item.scores, pred_item.pred_classes, nms_thresh) else: keep = nms(pred_item.pred_boxes.tensor, pred_item.scores, nms_thresh) nmsed_item = pred_item[keep] nmsed_predicted_datalist.append(nmsed_item) predicted_datalist = nmsed_predicted_datalist log.info('AP v2:') computeprint_ap_for_video_datalist(cls_names, datalist, predicted_datalist)