def compute_features_from_bbox(self, original_image, gt_boxes): """ Extracts features given the ground-truth boxes assume ground-truth boxes are list of boxes in xyxy format Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: features (BoxList): the ground truth boxes with features accessible using features.get_field() """ # Convert gt boxes to BoxList gt_box_list = BoxList( gt_boxes, (original_image.shape[1], original_image.shape[0]), mode='xyxy').to(self.device) # Convert image as in `run_on_opencv_image` image = self.transforms(original_image) # Convert gt boxes for a single image to a list #print(image.size(1)) gt_box_list = [gt_box_list.resize((image.size(2), image.size(1)))] image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) with torch.no_grad(): features = self.feat_extractor(image_list, gt_box_list) #print(features) # sanity check #assert len(features) == len(gt_box_list[0].bbox) #feats = gt_box_list[0] #feats.add_field('features', features) return features[0].cpu().detach().numpy()[0]
def get_batch_proposals(self, images, im_scales, im_infos, proposals): proposals_batch = [] for idx, img_info in enumerate(im_infos): boxes_tensor = torch.from_numpy( proposals[idx]["bbox"][:int(proposals[idx]["num_box"]), 0:]).to("cuda") orig_image_size = (img_info["width"], img_info["height"]) boxes = BoxList(boxes_tensor, orig_image_size) image_size = (images.image_sizes[idx][1], images.image_sizes[idx][0]) boxes = boxes.resize(image_size) proposals_batch.append(boxes) return proposals_batch
def normalize_output(self, frame, results: BoxList): if self._vis_height is not None: boxlist_height = results.size[1] frame_height, frame_width = frame.shape[:2] assert (boxlist_height == frame_height) rescale_ratio = float(self._vis_height) / float(frame_height) new_height = int(round(frame_height * rescale_ratio)) new_width = int(round(frame_width * rescale_ratio)) frame = cv2.resize(frame, (new_width, new_height)) results = results.resize((new_width, new_height)) return frame, results
def prepare_for_vrd_detection(predictions, dataset): # assert isinstance(dataset, COCODataset) vrd_results = [] for image_id, prediction in enumerate(predictions): original_id = dataset.ann_file[image_id]['filename'] # if len(prediction) == 0: # continue # TODO replace with get_img_info? image_width = dataset.ann_file[image_id]["width"] image_height = dataset.ann_file[image_id]["height"] subject_boundingboxes = prediction.get_field("subject_boundingboxes") object_boundingboxes = prediction.get_field("object_boundingboxes") prediction_size = prediction.size prediction_sub = BoxList(subject_boundingboxes, prediction_size, mode="xyxy") prediction_ob = BoxList(object_boundingboxes, prediction_size, mode="xyxy") prediction = prediction.resize((image_width, image_height)) prediction_sub = prediction_sub.resize((image_width, image_height)) prediction_ob = prediction_ob.resize((image_width, image_height)) prediction_sub = prediction_sub.convert("xywh") prediction = prediction.convert("xywh") prediction_ob = prediction_ob.convert("xywh") boxes = prediction.bbox.tolist() subject_boundingboxes = prediction_sub.bbox.tolist() object_boundingboxes = prediction_ob.bbox.tolist() subject_category = prediction.get_field("subject_category").tolist() object_category = prediction.get_field("object_category").tolist() subject_scores = prediction.get_field("subject_scores").tolist() object_scores = prediction.get_field("object_scores").tolist() objectpairs_scores = prediction.get_field( "objectpairs_scores").tolist() predicate_scores = prediction.get_field("predicate_scores").tolist() ids = prediction.get_field("ids").tolist() a = {} a.update(filename=original_id) a.update(height=image_height) a.update(width=image_width) a.update(objects_num=len(prediction)) objects = [{ "subject_boundingboxes": subject_boundingboxes[k], "object_boundingboxes": object_boundingboxes[k], "subject_category": subject_category[k], "object_category": object_category[k], "subject_scores": subject_scores[k], "object_scores": object_scores[k], "objectpairs_scores": objectpairs_scores[k], "predicate_scores": predicate_scores[k], "ids": ids[k], } for k, box in enumerate(boxes)] a.update(objects=objects) vrd_results.append(a) return vrd_results
def depth_evaluation( dataset, predictions, output_folder, box_only, iou_types, expected_results, expected_results_sigma_tol, score_threshold=0.05, bbox_iou_threshold=0.5, height_to_depth=False, ): logger = logging.getLogger("maskrcnn_benchmark.inference") logger.info("Preparing results for Depth Evaluation") # result table "file_name" : result depth_results = {} gt_box_num = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] if len(prediction) == 0: continue img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] file_name = img_info["file_name"] # ground truth # img, gt, idx = dataset[original_id] # TODO: load gt only ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) # filter crowd annotations # TODO might be better to add an extra field if hasattr(dataset, 'remove_truncated') and dataset.remove_truncated: anno = [obj for obj in anno if obj["truncated"] == 0] if hasattr(dataset, 'class_filter_list') and len(dataset.class_filter_list) > 0: anno = [ obj for obj in anno if obj["category_id"] in dataset.class_filter_list ] depth_key = dataset.depth_key if hasattr(dataset, 'depth_key') else "depth" input_depth_mode = dataset.input_depth_mode if hasattr( dataset, 'input_depth_mode') else depth_key output_depth_mode = dataset.output_depth_mode if hasattr( dataset, 'output_depth_mode') else "depth" min_value = dataset.min_value if hasattr(dataset, 'min_value') else 0.1 max_value = dataset.max_value if hasattr(dataset, 'max_value') else 100 boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, (image_width, image_height), mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [ dataset.json_category_id_to_contiguous_id[c] for c in classes ] classes = torch.tensor(classes) target.add_field("labels", classes) if height_to_depth: height = [obj["height_rw"] for obj in anno] height = torch.tensor(height) target.add_field("depths", height) target = _height_to_depth(target, img_info) elif anno and depth_key in anno[0]: depth = [obj[depth_key] for obj in anno] # depth = torch.tensor(depth) depth = PointDepth( depth, (image_width, image_height), focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"]["baseline"], min_value=min_value, max_value=max_value, mode=input_depth_mode) target.add_field("depths", depth) gt = target.resize((image_width, image_height)) gt_boxes = gt.bbox.tolist() if len(gt_boxes) == 0: continue gt_box_num += len(gt_boxes) gt_labels = gt.get_field("labels").tolist() gt_depths = gt.get_field('depths').convert("depth").depths.tolist() # print(gt_depths) gt_mapped_labels = [ dataset.contiguous_category_id_to_json_id[i] for i in gt_labels ] prediction = prediction.resize((image_width, image_height)) prediction = prediction.convert("xyxy") # print(prediction) scores = prediction.get_field("scores") positive_indices = scores > score_threshold scores = scores.tolist() boxes = prediction.bbox[positive_indices].tolist() if len(boxes) == 0: continue labels = prediction.get_field("labels")[positive_indices].tolist() if height_to_depth: prediction = _height_to_depth(prediction, img_info) depths = prediction.get_field('depths')[ positive_indices] # .convert("depth").depths if isinstance(depths, PointDepth): depths = depths #.convert(output_depth_mode) else: depths = PointDepth( depths, (image_width, image_height), focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"]["baseline"], min_value=min_value, max_value=max_value, mode="depth") depths = depths.convert("depth") depths = depths.depths.tolist() # print(depths, gt_depths) mapped_labels = [ dataset.contiguous_category_id_to_json_id[i] for i in labels ] # find corresponding box overlaps = boxlist_iou(prediction[positive_indices], gt) gt_overlaps = torch.zeros(len(gt_boxes)) dt_matches = [-1] * len(boxes) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) if gt_ovr < bbox_iou_threshold: continue assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] dt_matches[box_ind] = gt_ind # record the iou coverage of this gt box gt_overlaps[j] = overlaps[box_ind, gt_ind] assert gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # locations, rotation_y = ddd2locrot( # center, alpha, dimensions, depth, calibs[0]) depth_results[file_name] = [] # gt[file_name] = {} for k in range(len(boxes)): depth_results[file_name].append({ 'image_id': original_id, # 'calib': img_info['calib'], 'category_id': mapped_labels[k], 'bbox': boxes[k], 'depth': depths[k][0], 'gt_category_id': gt_mapped_labels[dt_matches[k]] if dt_matches[k] >= 0 else None, 'gt_bbox': gt_boxes[dt_matches[k]] if dt_matches[k] >= 0 else None, 'gt_depth': gt_depths[dt_matches[k]] if dt_matches[k] >= 0 else None, 'score': scores[k], }) # for k in range(len(gt_boxes)): # gt[file_name].append({ # 'image_id': original_id, # 'calib': img_info['calib'], # 'category_id': gt_mapped_labels[k], # 'bbox': gt_boxes[k], # 'depth': gt_depths[k], # }) logger.info("Evaluating predictions") logger.info("Ground Truth boxes %d" % gt_box_num) results = evaluate_results(depth_results) import json logger.info(json.dumps(results, sort_keys=True, indent=4)) return results
def main(): parser = argparse.ArgumentParser() parser.add_argument('config_file') parser.add_argument('ckpt_file') parser.add_argument('image_dir') parser.add_argument('name_file') parser.add_argument('bbox_file') parser.add_argument('output_dir') parser.add_argument('--layer_name', default='fc7') parser.add_argument('--start_id', type=int, default=0) parser.add_argument('--end_id', type=int, default=None) opts = parser.parse_args() bbox_data = json.load(open(opts.bbox_file)) if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) ########### build model ############# # update the config options with the config file cfg.merge_from_file(opts.config_file) # manual override some options cfg.merge_from_list(['MODEL.DEVICE', 'cuda:0']) cfg.freeze() device = torch.device(cfg.MODEL.DEVICE) cpu_device = torch.device("cpu") model = build_detection_model(cfg) model.to(device) model.eval() checkpointer = DetectronCheckpointer(cfg, model) _ = checkpointer.load(f=opts.ckpt_file, use_latest=False) transform_fn = build_transform(cfg) ########### extract feature ############# names = np.load(opts.name_file) if opts.end_id is None: opts.end_id = len(names) total_images = opts.end_id - opts.start_id for i, name in enumerate(names): if i < opts.start_id or i >= opts.end_id: continue outname = name.replace('/', '_') outfile = os.path.join(opts.output_dir, '%s.hdf5'%outname) if os.path.exists(outfile): continue img_file = os.path.join(opts.image_dir, name) # apply pre-processing to image original_image = cv2.imread(img_file) height, width = original_image.shape[:-1] image = transform_fn(original_image) nheight, nwidth = image.size(1), image.size(2) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(device) # compute predictions: one image one mini-batch with torch.no_grad(): # features: tuples in FPN (batch, dim_ft: 256, h, w) features = model.backbone(image_list.tensors) if name in bbox_data: cpu_boxes = bbox_data[name] boxes = torch.FloatTensor(cpu_boxes).to(device) cand_proposals = BoxList(boxes, (width, height), mode='xyxy') cand_proposals = cand_proposals.resize((nwidth, nheight)) bbox_fts, _, _ = model.roi_heads.extract_features(features, [cand_proposals]) bbox_fts = bbox_fts[opts.layer_name].cpu() # save to file with h5py.File(outfile, 'w') as outf: outf.create_dataset(outname, bbox_fts.size(), dtype='float', compression='gzip') outf[outname][...] = bbox_fts.data.numpy() outf[outname].attrs['image_w'] = width outf[outname].attrs['image_h'] = height outf[outname].attrs['boxes'] = np.array(cpu_boxes).astype(np.int32) if i % 1000 == 0: print('name %s shape %s, processing %d/%d (%.2f%% done)'%(name, bbox_fts.shape, i-opts.start_id, total_images, (i-opts.start_id)*100/total_images))