コード例 #1
0
 def compute_features_from_bbox(self, original_image, gt_boxes):
     """
     Extracts features given the ground-truth boxes
     assume ground-truth boxes are list of boxes in xyxy format
     Arguments:
         original_image (np.ndarray): an image as returned by OpenCV
     Returns:
         features (BoxList): the ground truth boxes with features
         accessible using features.get_field()
     """
     # Convert gt boxes to BoxList
     gt_box_list = BoxList(
         gt_boxes, (original_image.shape[1], original_image.shape[0]),
         mode='xyxy').to(self.device)
     # Convert image as in `run_on_opencv_image`
     image = self.transforms(original_image)
     # Convert gt boxes for a single image to a list
     #print(image.size(1))
     gt_box_list = [gt_box_list.resize((image.size(2), image.size(1)))]
     image_list = to_image_list(image,
                                self.cfg.DATALOADER.SIZE_DIVISIBILITY)
     image_list = image_list.to(self.device)
     with torch.no_grad():
         features = self.feat_extractor(image_list, gt_box_list)
     #print(features)
     # sanity check
     #assert len(features) == len(gt_box_list[0].bbox)
     #feats = gt_box_list[0]
     #feats.add_field('features', features)
     return features[0].cpu().detach().numpy()[0]
コード例 #2
0
 def get_batch_proposals(self, images, im_scales, im_infos, proposals):
     proposals_batch = []
     for idx, img_info in enumerate(im_infos):
         boxes_tensor = torch.from_numpy(
             proposals[idx]["bbox"][:int(proposals[idx]["num_box"]),
                                    0:]).to("cuda")
         orig_image_size = (img_info["width"], img_info["height"])
         boxes = BoxList(boxes_tensor, orig_image_size)
         image_size = (images.image_sizes[idx][1],
                       images.image_sizes[idx][0])
         boxes = boxes.resize(image_size)
         proposals_batch.append(boxes)
     return proposals_batch
コード例 #3
0
    def normalize_output(self, frame, results: BoxList):
        if self._vis_height is not None:
            boxlist_height = results.size[1]
            frame_height, frame_width = frame.shape[:2]
            assert (boxlist_height == frame_height)

            rescale_ratio = float(self._vis_height) / float(frame_height)
            new_height = int(round(frame_height * rescale_ratio))
            new_width = int(round(frame_width * rescale_ratio))

            frame = cv2.resize(frame, (new_width, new_height))
            results = results.resize((new_width, new_height))

        return frame, results
コード例 #4
0
def prepare_for_vrd_detection(predictions, dataset):
    # assert isinstance(dataset, COCODataset)
    vrd_results = []
    for image_id, prediction in enumerate(predictions):
        original_id = dataset.ann_file[image_id]['filename']
        # if len(prediction) == 0:
        #     continue

        # TODO replace with get_img_info?
        image_width = dataset.ann_file[image_id]["width"]
        image_height = dataset.ann_file[image_id]["height"]

        subject_boundingboxes = prediction.get_field("subject_boundingboxes")
        object_boundingboxes = prediction.get_field("object_boundingboxes")
        prediction_size = prediction.size

        prediction_sub = BoxList(subject_boundingboxes,
                                 prediction_size,
                                 mode="xyxy")
        prediction_ob = BoxList(object_boundingboxes,
                                prediction_size,
                                mode="xyxy")

        prediction = prediction.resize((image_width, image_height))
        prediction_sub = prediction_sub.resize((image_width, image_height))
        prediction_ob = prediction_ob.resize((image_width, image_height))
        prediction_sub = prediction_sub.convert("xywh")
        prediction = prediction.convert("xywh")
        prediction_ob = prediction_ob.convert("xywh")

        boxes = prediction.bbox.tolist()
        subject_boundingboxes = prediction_sub.bbox.tolist()
        object_boundingboxes = prediction_ob.bbox.tolist()
        subject_category = prediction.get_field("subject_category").tolist()
        object_category = prediction.get_field("object_category").tolist()
        subject_scores = prediction.get_field("subject_scores").tolist()
        object_scores = prediction.get_field("object_scores").tolist()
        objectpairs_scores = prediction.get_field(
            "objectpairs_scores").tolist()
        predicate_scores = prediction.get_field("predicate_scores").tolist()

        ids = prediction.get_field("ids").tolist()

        a = {}
        a.update(filename=original_id)
        a.update(height=image_height)
        a.update(width=image_width)
        a.update(objects_num=len(prediction))
        objects = [{
            "subject_boundingboxes": subject_boundingboxes[k],
            "object_boundingboxes": object_boundingboxes[k],
            "subject_category": subject_category[k],
            "object_category": object_category[k],
            "subject_scores": subject_scores[k],
            "object_scores": object_scores[k],
            "objectpairs_scores": objectpairs_scores[k],
            "predicate_scores": predicate_scores[k],
            "ids": ids[k],
        } for k, box in enumerate(boxes)]
        a.update(objects=objects)
        vrd_results.append(a)
    return vrd_results
コード例 #5
0
def depth_evaluation(
    dataset,
    predictions,
    output_folder,
    box_only,
    iou_types,
    expected_results,
    expected_results_sigma_tol,
    score_threshold=0.05,
    bbox_iou_threshold=0.5,
    height_to_depth=False,
):
    logger = logging.getLogger("maskrcnn_benchmark.inference")

    logger.info("Preparing results for Depth Evaluation")
    # result table "file_name" : result
    depth_results = {}
    gt_box_num = 0
    for image_id, prediction in enumerate(predictions):
        original_id = dataset.id_to_img_map[image_id]
        if len(prediction) == 0:
            continue

        img_info = dataset.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        file_name = img_info["file_name"]

        # ground truth
        # img, gt, idx = dataset[original_id] # TODO: load gt only
        ann_ids = dataset.coco.getAnnIds(imgIds=original_id)
        anno = dataset.coco.loadAnns(ann_ids)

        # filter crowd annotations
        # TODO might be better to add an extra field
        if hasattr(dataset, 'remove_truncated') and dataset.remove_truncated:
            anno = [obj for obj in anno if obj["truncated"] == 0]

        if hasattr(dataset,
                   'class_filter_list') and len(dataset.class_filter_list) > 0:
            anno = [
                obj for obj in anno
                if obj["category_id"] in dataset.class_filter_list
            ]

        depth_key = dataset.depth_key if hasattr(dataset,
                                                 'depth_key') else "depth"
        input_depth_mode = dataset.input_depth_mode if hasattr(
            dataset, 'input_depth_mode') else depth_key
        output_depth_mode = dataset.output_depth_mode if hasattr(
            dataset, 'output_depth_mode') else "depth"
        min_value = dataset.min_value if hasattr(dataset, 'min_value') else 0.1
        max_value = dataset.max_value if hasattr(dataset, 'max_value') else 100

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, (image_width, image_height),
                         mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [
            dataset.json_category_id_to_contiguous_id[c] for c in classes
        ]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        if height_to_depth:
            height = [obj["height_rw"] for obj in anno]
            height = torch.tensor(height)
            target.add_field("depths", height)
            target = _height_to_depth(target, img_info)
        elif anno and depth_key in anno[0]:
            depth = [obj[depth_key] for obj in anno]
            # depth = torch.tensor(depth)
            depth = PointDepth(
                depth, (image_width, image_height),
                focal_length=img_info["camera_params"]["intrinsic"]["fx"],
                baseline=img_info["camera_params"]["extrinsic"]["baseline"],
                min_value=min_value,
                max_value=max_value,
                mode=input_depth_mode)
            target.add_field("depths", depth)

        gt = target.resize((image_width, image_height))

        gt_boxes = gt.bbox.tolist()
        if len(gt_boxes) == 0: continue
        gt_box_num += len(gt_boxes)
        gt_labels = gt.get_field("labels").tolist()
        gt_depths = gt.get_field('depths').convert("depth").depths.tolist()
        # print(gt_depths)
        gt_mapped_labels = [
            dataset.contiguous_category_id_to_json_id[i] for i in gt_labels
        ]

        prediction = prediction.resize((image_width, image_height))
        prediction = prediction.convert("xyxy")
        # print(prediction)

        scores = prediction.get_field("scores")
        positive_indices = scores > score_threshold
        scores = scores.tolist()

        boxes = prediction.bbox[positive_indices].tolist()
        if len(boxes) == 0: continue
        labels = prediction.get_field("labels")[positive_indices].tolist()

        if height_to_depth:
            prediction = _height_to_depth(prediction, img_info)
        depths = prediction.get_field('depths')[
            positive_indices]  # .convert("depth").depths
        if isinstance(depths, PointDepth):
            depths = depths  #.convert(output_depth_mode)
        else:
            depths = PointDepth(
                depths, (image_width, image_height),
                focal_length=img_info["camera_params"]["intrinsic"]["fx"],
                baseline=img_info["camera_params"]["extrinsic"]["baseline"],
                min_value=min_value,
                max_value=max_value,
                mode="depth")
        depths = depths.convert("depth")
        depths = depths.depths.tolist()
        # print(depths, gt_depths)

        mapped_labels = [
            dataset.contiguous_category_id_to_json_id[i] for i in labels
        ]

        # find corresponding box
        overlaps = boxlist_iou(prediction[positive_indices], gt)
        gt_overlaps = torch.zeros(len(gt_boxes))
        dt_matches = [-1] * len(boxes)
        for j in range(min(len(prediction), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            if gt_ovr < bbox_iou_threshold: continue
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            dt_matches[box_ind] = gt_ind
            # record the iou coverage of this gt box
            gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # locations, rotation_y = ddd2locrot(
        #   center, alpha, dimensions, depth, calibs[0])

        depth_results[file_name] = []
        # gt[file_name] = {}

        for k in range(len(boxes)):
            depth_results[file_name].append({
                'image_id':
                original_id,
                # 'calib': img_info['calib'],
                'category_id':
                mapped_labels[k],
                'bbox':
                boxes[k],
                'depth':
                depths[k][0],
                'gt_category_id':
                gt_mapped_labels[dt_matches[k]]
                if dt_matches[k] >= 0 else None,
                'gt_bbox':
                gt_boxes[dt_matches[k]] if dt_matches[k] >= 0 else None,
                'gt_depth':
                gt_depths[dt_matches[k]] if dt_matches[k] >= 0 else None,
                'score':
                scores[k],
            })

        # for k in range(len(gt_boxes)):
        #     gt[file_name].append({
        #         'image_id': original_id,
        #         'calib': img_info['calib'],
        #         'category_id': gt_mapped_labels[k],
        #         'bbox': gt_boxes[k],
        #         'depth': gt_depths[k],
        #     })

    logger.info("Evaluating predictions")
    logger.info("Ground Truth boxes %d" % gt_box_num)
    results = evaluate_results(depth_results)
    import json
    logger.info(json.dumps(results, sort_keys=True, indent=4))

    return results
コード例 #6
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('config_file')
  parser.add_argument('ckpt_file')
  parser.add_argument('image_dir')
  parser.add_argument('name_file')
  parser.add_argument('bbox_file')
  parser.add_argument('output_dir')
  parser.add_argument('--layer_name', default='fc7')
  parser.add_argument('--start_id', type=int, default=0)
  parser.add_argument('--end_id', type=int, default=None)
  opts = parser.parse_args()

  bbox_data = json.load(open(opts.bbox_file))

  if not os.path.exists(opts.output_dir):
    os.makedirs(opts.output_dir)

  ########### build model #############
  # update the config options with the config file
  cfg.merge_from_file(opts.config_file)
  # manual override some options
  cfg.merge_from_list(['MODEL.DEVICE', 'cuda:0'])
  cfg.freeze()

  device = torch.device(cfg.MODEL.DEVICE)
  cpu_device = torch.device("cpu")

  model = build_detection_model(cfg)
  model.to(device)
  model.eval()

  checkpointer = DetectronCheckpointer(cfg, model)
  _ = checkpointer.load(f=opts.ckpt_file, use_latest=False)

  transform_fn = build_transform(cfg)

  ########### extract feature #############
  names = np.load(opts.name_file)
  if opts.end_id is None:
    opts.end_id = len(names)
  total_images = opts.end_id - opts.start_id

  for i, name in enumerate(names):
    if i < opts.start_id or i >= opts.end_id:
      continue
    outname = name.replace('/', '_')
    outfile = os.path.join(opts.output_dir, '%s.hdf5'%outname)

    if os.path.exists(outfile):
      continue

    img_file = os.path.join(opts.image_dir, name)

    # apply pre-processing to image
    original_image = cv2.imread(img_file)
    height, width = original_image.shape[:-1]
    image = transform_fn(original_image)
    nheight, nwidth = image.size(1), image.size(2)

    # convert to an ImageList, padded so that it is divisible by
    # cfg.DATALOADER.SIZE_DIVISIBILITY
    image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY)
    image_list = image_list.to(device)

    # compute predictions: one image one mini-batch
    with torch.no_grad():
      # features: tuples in FPN (batch, dim_ft: 256, h, w)
      features = model.backbone(image_list.tensors)

      if name in bbox_data:
        cpu_boxes = bbox_data[name]
        boxes = torch.FloatTensor(cpu_boxes).to(device)
        cand_proposals = BoxList(boxes, (width, height), mode='xyxy')
        cand_proposals = cand_proposals.resize((nwidth, nheight))

        bbox_fts, _, _ = model.roi_heads.extract_features(features, [cand_proposals])
        bbox_fts = bbox_fts[opts.layer_name].cpu()

        # save to file
        with h5py.File(outfile, 'w') as outf:
          outf.create_dataset(outname, bbox_fts.size(), dtype='float', compression='gzip')
          outf[outname][...] = bbox_fts.data.numpy()
          outf[outname].attrs['image_w'] = width
          outf[outname].attrs['image_h'] = height
          outf[outname].attrs['boxes'] = np.array(cpu_boxes).astype(np.int32)

      if i % 1000 == 0:
        print('name %s shape %s, processing %d/%d (%.2f%% done)'%(name, 
          bbox_fts.shape, i-opts.start_id, total_images, (i-opts.start_id)*100/total_images))