Python BoxList.convertの例、maskrcnn_benchmark.structures.bounding_box.BoxList.convert Pythonの例

コード例 #1

0

ファイルを表示

ファイル: bounding_box_3d.py プロジェクト: poodarchu/maskrcnn-benchmark-3d

    def resize(self, size, *args, **kwargs):
        """
        Returns a resized copy of this bounding box

        :param size: The requested size in pixels, as a 2-tuple:
            (width, height).
        """

        ratios = tuple(
            float(s) / float(s_orig) for s, s_orig in zip(size, self.size))
        if ratios[0] == ratios[1]:
            ratio = ratios[0]
            scaled_box = self.bbox_3d * ratio
            bbox_3d = Box3dList(scaled_box, size, mode=self.mode)
            # bbox_3d._copy_extra_fields(self)
            return bbox_3d

        ratio_width, ratio_height = ratios
        xmin, ymin, xmax, ymax = self._split_into_xyxy()
        scaled_xmin = xmin * ratio_width
        scaled_xmax = xmax * ratio_width
        scaled_ymin = ymin * ratio_height
        scaled_ymax = ymax * ratio_height
        scaled_box = torch.cat(
            (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1)
        bbox_3d = BoxList(scaled_box, size, mode="xyxy")
        # bbox_3d._copy_extra_fields(self)
        for k, v in self.extra_fields.items():
            if not isinstance(v, torch.Tensor):
                v = v.resize(size, *args, **kwargs)
            bbox_3d.add_field(k, v)

        return bbox_3d.convert(self.mode)

コード例 #2

0

ファイルを表示

ファイル: image_augmentation.py プロジェクト: amazon-research/siam-mot

    def boxlist_crop(self, box: BoxList, x1, y1, x2, y2):
        """
         Adjust the coordinate of the bounding box within
         image crop specified by (x1, y1, x2, y2)
        """

        w, h = (x2 - x1), (y2 - y1)
        xmin, ymin, xmax, ymax = box._split_into_xyxy()
        cropped_xmin = (xmin - x1)
        cropped_ymin = (ymin - y1)
        cropped_xmax = (xmax - x1)
        cropped_ymax = (ymax - y1)
        cropped_bbox = torch.cat(
            (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1)
        cropped_box = BoxList(cropped_bbox, (w, h), mode="xyxy")
        for k, v in box.extra_fields.items():
            cropped_box.add_field(k, v)

        if self.amodal:
            # amodal allows the corners of bbox go beyond image boundary
            cropped_box = self.remove_invisible_box(cropped_box)
        else:
            # the corners of bbox need to be within image boundary for non-amodal training
            cropped_box = cropped_box.clip_to_image(remove_empty=True)
        return cropped_box.convert(box.mode)

コード例 #3

0

ファイルを表示

ファイル: bounding_box_3d.py プロジェクト: poodarchu/maskrcnn-benchmark-3d

    def crop(self, box):
        """
        Cropss a rectangular region from this bounding box. The box is a
        4-tuple defining the left, upper, right, and lower pixel
        coordinate.
        """
        xmin, ymin, xmax, ymax = self._split_into_xyxy()
        w, h = box[2] - box[0], box[3] - box[1]
        cropped_xmin = (xmin - box[0]).clamp(min=0, max=w)
        cropped_ymin = (ymin - box[1]).clamp(min=0, max=h)
        cropped_xmax = (xmax - box[0]).clamp(min=0, max=w)
        cropped_ymax = (ymax - box[1]).clamp(min=0, max=h)

        # TODO should I filter empty boxes here?
        if False:
            is_empty = (cropped_xmin == cropped_xmax) | (cropped_ymin
                                                         == cropped_ymax)

        cropped_box = torch.cat(
            (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1)
        bbox_3d = BoxList(cropped_box, (w, h), mode="xyxy")
        # bbox_3d._copy_extra_fields(self)
        for k, v in self.extra_fields.items():
            if not isinstance(v, torch.Tensor):
                v = v.crop(box)
            bbox_3d.add_field(k, v)
        return bbox_3d.convert(self.mode)

コード例 #4

0

ファイルを表示

ファイル: model_utils.py プロジェクト: anhle-uet/ScaleNet

def oneLargeBboxList(W_batch_array, H_batch_array):
    bbox_list_list = []
    for W, H in zip(W_batch_array, H_batch_array):
        bbox_list = BoxList(np.asarray([[0, 0, W, H]]), (W, H), "xywh") # Following COCO annotations: (box coordinates are measured from the top left image corner and are 0-indexed) # http://cocodataset.org/#format-data
        bbox_list = bbox_list.convert('xyxy')
        bbox_list_list.append(bbox_list)
    return bbox_list_list

コード例 #5

0

ファイルを表示

    def frame_vis_generator(self, frame, results: BoxList):
        frame, results = self.normalize_output(frame, results)
        ids = results.get_field('ids')
        results = results[ids >= 0]
        results = results.convert('xyxy')
        bbox = results.bbox.detach().cpu().numpy()
        ids = results.get_field('ids').tolist()
        labels = results.get_field('labels').tolist()

        for i, entity_id in enumerate(ids):
            color = self.colors[entity_id % self.num_colors]
            class_name = self.class_names[labels[i] - 1]
            text_width = len(class_name) * 20
            x1, y1, x2, y2 = (np.round(bbox[i, :])).astype(np.int)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness=3)
            cv2.putText(frame,
                        str(entity_id), (x1 + 5, y1 + 40),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        1.5,
                        color,
                        thickness=3)
            # Draw black background rectangle for test
            cv2.rectangle(frame, (x1 - 5, y1 - 25), (x1 + text_width, y1),
                          color, -1)
            cv2.putText(frame,
                        '{}'.format(class_name), (x1 + 5, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        1, (0, 0, 0),
                        thickness=2)
        return frame

コード例 #6

0

ファイルを表示

ファイル: model_utils.py プロジェクト: anhle-uet/ScaleNet

def bboxArray_to_bboxList(bboxes_batch_array, bboxes_length_batch_array, W_batch_array, H_batch_array):
    bbox_list_list = []
    for bboxes_array, bboxes_length, W, H in zip(bboxes_batch_array, bboxes_length_batch_array, W_batch_array, H_batch_array):
        bbox_list = BoxList(bboxes_array[:bboxes_length, :], (W, H), "xywh")
        bbox_list = bbox_list.convert('xyxy')
        bbox_list_list.append(bbox_list)
    return bbox_list_list

コード例 #7

0

ファイルを表示

ファイル: inference.py プロジェクト: jacobswan1/maskrcnn-benchmark

    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        #decode2cxywh = self.nms_func.input_mode == 'cxywh'
        decode2cxywh = False
        if decode2cxywh:
            proposals = self.box_coder.decode2cxywh(box_regression.view(-1, 4),
                                                    concat_anchors.view(-1, 4))
            mode = 'cxywh'
        else:
            proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                              concat_anchors.view(-1, 4))
            mode = 'xyxy'

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode=mode)
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = self.nms_func(boxlist)
            boxlist = boxlist.convert('xyxy')
            result.append(boxlist)
        return result

コード例 #8

0

ファイルを表示

ファイル: train_rtmdnet.py プロジェクト: sydney0zq/RT-MDNet-OPN

def sample_pos_neg_idxs(
        gt,
        rois,
        fg_thres=pretrain_opts['overlap_pos'][0],
        bg_thres=pretrain_opts['overlap_neg'][1],
        fg_num=pretrain_opts['batch_pos'] * pretrain_opts['batch_frames'],
        bg_num=pretrain_opts['batch_neg'] * pretrain_opts['batch_frames']):
    if len(gt) != len(rois):
        assert False, "gt size {} is not same with rois size {}".format(
            len(gt), len(rois))
    gt = torch.from_numpy(gt).cuda()
    proposal_matcher = Matcher(fg_thres, bg_thres)

    total_matched_idxs = torch.LongTensor([]).cuda()
    for i_gt, i_roi in zip(gt, rois):
        i_gt = BoxList(i_gt[None, :], i_roi.size, mode="xywh")
        i_gt = i_gt.convert("xyxy")
        match_quality_matrix = boxlist_iou(i_gt, i_roi)
        matched_idxs = proposal_matcher(match_quality_matrix)
        total_matched_idxs = torch.cat([total_matched_idxs, matched_idxs
                                        ])  # 0 is fg, -1 is bg, -2 is fg<>bg

    pos_idx = torch.nonzero(total_matched_idxs == 0).squeeze(1)
    neg_idx = torch.nonzero(total_matched_idxs == -1).squeeze(1)
    # randomly select positive and negative examples
    num_pos = min(pos_idx.numel(), fg_num)
    num_neg = min(neg_idx.numel(), bg_num)
    if len(pos_idx) >= fg_num:
        perm1 = torch.randperm(pos_idx.numel(), device=pos_idx.device)[:fg_num]
        perm2 = torch.randperm(neg_idx.numel(), device=neg_idx.device)[:bg_num]
    elif len(pos_idx) > 0:
        perm1 = torch.randint(0, pos_idx.size(0),
                              (fg_num, )).type(torch.LongTensor)
        perm2 = torch.randint(0, neg_idx.size(0),
                              (bg_num, )).type(torch.LongTensor)
    else:
        return None, None
    pos_idx = pos_idx[perm1]
    neg_idx = neg_idx[perm2]

    return pos_idx, neg_idx

コード例 #9

0

ファイルを表示

ファイル: convert_kitti_pkl_to_coco_json.py プロジェクト: poodarchu/maskrcnn-benchmark-3d

def convert_kitti_instance_only(root, ann_file, out_dir, dataset):
    image_index, label_list, boxes_list, boxes_3d_list, \
    alphas_list = get_pkl_element(ann_file)
    number_image = len(image_index)
    image_lists = []
    calib_lists = []
    depth_list = []
    for i in range(number_image):
        image_lists.append(root + '/training' + '/image_2/' + image_index[i] +
                           ".png")
        calib_lists.append(root + '/training' + '/calib/' + image_index[i] +
                           ".txt")
        depth_list.append(root + '/training' + '/depth/' + image_index[i] +
                          "_01.png.npz")

    # img_id = 0
    # ann_id = 0
    img_id = 3712
    ann_id = 11855

    # cat_id = 1
    category_dict = {'car': 1}

    category_instancesonly = [
        'person',
        'rider',
        'car',
        'truck',
        'bus',
        'train',
        'motorcycle',
        'bicycle',
    ]

    ann_dict = {}
    images = []
    annotations = []

    for i, id in image_index.items():
        if len(images) % 50 == 0:
            print("Processed %s images, %s annotations" %
                  (len(images), len(annotations)))
        image = {}
        image['id'] = img_id
        img_id += 1

        img = Image.open(image_lists[i]).convert("RGB")
        width, height = img.size
        image['width'] = width
        image['height'] = height
        image['file_name'] = image_lists[i].split('/')[-1]
        image['seg_file_name'] = image['file_name']

        images.append(image)

        num_instances = label_list[i].shape[0]
        boxes = boxes_list[i]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)
        box2d = BoxList(boxes, img.size, mode="xyxy")
        area = box2d.area().tolist()
        boxes = box2d.convert('xywh')
        boxes = boxes.bbox.tolist()

        for j in range(num_instances):
            ann = {}
            ann['id'] = ann_id
            ann_id += 1
            ann['image_id'] = image['id']
            ann['segmentation'] = []

            ann['category_id'] = category_dict['car']
            ann['iscrowd'] = 0
            ann['area'] = area[j]
            ann['bbox'] = boxes[j]

            annotations.append(ann)

    ann_dict['images'] = images
    categories = [{
        "id": category_dict[name],
        "name": name
    } for name in category_dict]
    ann_dict['categories'] = categories
    ann_dict['annotations'] = annotations
    print("Num categories: %s" % len(categories))
    print("Num images: %s" % len(images))
    print("Num annotations: %s" % len(annotations))

    with open(
            os.path.join(out_dir,
                         'instancesonly_filtered_gtFine_' + dataset + '.json'),
            'w') as outfile:
        outfile.write(json.dumps(ann_dict))

コード例 #10

0

ファイルを表示

ファイル: predictor-checkpoint.py プロジェクト: simone-codeluppi/nuclei_cell_detect

    def inference(self,
                  colors_pred,
                  add_class_names=None,
                  save_path=None,
                  save_independently=None,
                  show_ground_truth=True):
        """
        Do Inference, either show the boxes or the masks
        """

        # load the config
        paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                    cfg.PATHS_CATALOG, True)
        DatasetCatalog = paths_catalog.DatasetCatalog
        test_datasets = DatasetCatalog.get(cfg.DATASETS.TEST[0])
        img_dir = test_datasets['args']['root']
        anno_file = test_datasets['args']['ann_file']
        data = json.load(open(anno_file))
        coco = COCO(anno_file)
        predis = []
        filenames = []

        # iterate through data
        for i, image in enumerate(data['images']):

            pil_img = Image.open(img_dir + '/' + image['file_name'])
            filenames.append(image['file_name'])
            img = np.array(pil_img)[:, :, [0, 1, 2]]

            # get ground truth boxes or masks
            anno = [
                obj for obj in data['annotations']
                if obj['image_id'] == image['id']
            ]
            classes = [
                obj['category_id'] for obj in data['annotations']
                if obj['image_id'] == image['id']
            ]
            json_category_id_to_contiguous_id = {
                v: i + 1
                for i, v in enumerate(coco.getCatIds())
            }
            classes = [json_category_id_to_contiguous_id[c] for c in classes]
            classes = torch.tensor(classes)
            boxes = [obj['bbox'] for obj in anno]
            boxes = torch.as_tensor(boxes).reshape(-1, 4)
            target = BoxList(boxes, pil_img.size, mode='xywh').convert('xyxy')
            target.add_field('labels', classes)
            masks = [obj["segmentation"] for obj in anno]
            masks = SegmentationMask(masks, img.size)
            target.add_field("masks", masks)
            target = target.clip_to_image(remove_empty=True)

            # these are the ground truth polygons
            polygons = []
            color_rgb = [[255, 101, 80], [255, 55, 55], [255, 255, 61],
                         [255, 128, 0]]
            colors = {
                i: [s / 255 for s in color]
                for i, color in enumerate(color_rgb)
            }
            color = [colors[i.item()] for i in classes]

            # ground truth boxes
            boxes = []

            polys = vars(target)['extra_fields']['masks']
            for polygon in polys:
                try:
                    tenso = vars(polygon)['polygons'][0]
                except KeyError:
                    continue

                poly1 = tenso.numpy()
                poly = poly1.reshape((int(len(poly1) / 2), 2))
                polygons.append(Polygon(poly))

            xywh_tar = target.convert("xywh")
            for box in vars(xywh_tar)['bbox'].numpy():

                rect = Rectangle((box[0], box[1]), box[2], box[3])
                boxes.append(rect)

            # compute predictions
            predictions = self.compute_prediction(img)
            predis.append(predictions)
            top_predictions = self.select_top_predictions(predictions)

            polygons_predicted, colors_prediction = self.overlay_mask(
                img, top_predictions, colors_pred, inference=True)
            #print(colors_prediction)

            fig = plt.figure()
            ax = fig.add_subplot(1, 1, 1)

            ax.imshow(Image.fromarray(img))
            ax.axis('off')

            # this is for ground thruth
            if show_ground_truth == True:
                p = PatchCollection(polygons,
                                    facecolor='none',
                                    linewidths=0,
                                    alpha=0.4)
                ax.add_collection(p)
                p = PatchCollection(polygons,
                                    facecolor='none',
                                    edgecolors=color,
                                    linewidths=2)
                ax.add_collection(p)

            # this is for prediction
            ppd = PatchCollection(polygons_predicted,
                                  facecolor='none',
                                  linewidths=0,
                                  alpha=0.4)
            ax.add_collection(ppd)
            ppd = PatchCollection(polygons_predicted,
                                  facecolor='none',
                                  edgecolors=colors_prediction,
                                  linewidths=2)
            ax.add_collection(ppd)

            plt.savefig(save_path + image['file_name'],
                        dpi=200,
                        bbox_inches='tight',
                        pad_inches=0)

            plt.show()

        dic = {}
        for i in range(len(filenames)):
            dic[filenames[i]] = predis[i]
        return dic

コード例 #11

0

ファイルを表示

ファイル: coco_eval.py プロジェクト: swiftshunfeng/VisDrone_FCOS

def prepare_for_coco_detection_mstest(predictions, dataset):

    # pdb.set_trace()

    predictions_s = predictions[0]
    predictions_m = predictions[1]
    predictions_l = predictions[2]

    dataset_s = dataset[0]
    dataset_m = dataset[1]
    dataset_l = dataset[2]

    coco_results = []
    # one image.
    for image_id, predictions in enumerate(
            zip(predictions_s, predictions_m, predictions_l)):

        prediction_s = predictions[0]
        prediction_m = predictions[1]
        prediction_l = predictions[2]

        original_id = dataset_l.id_to_img_map[image_id]

        if len(predictions_l) == 0:
            continue

        img_info = dataset_l.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        img_id_json = img_info['id']

        # rescale predict bbox to original images size.
        prediction_s = prediction_s.resize((image_width, image_height))
        prediction_m = prediction_m.resize((image_width, image_height))
        prediction_l = prediction_l.resize((image_width, image_height))

        # get single-scale results from type BoxList.
        bbox_s = prediction_s.bbox
        score_s = prediction_s.get_field('scores').unsqueeze(1)
        label_s = prediction_s.get_field('labels').unsqueeze(1)

        bbox_m = prediction_m.bbox
        score_m = prediction_m.get_field('scores').unsqueeze(1)
        label_m = prediction_m.get_field('labels').unsqueeze(1)

        bbox_l = prediction_l.bbox
        score_l = prediction_l.get_field('scores').unsqueeze(1)
        label_l = prediction_l.get_field('labels').unsqueeze(1)

        # concat single-scale result and convert to type BoxList. (small, medium, large)
        min_size = 0
        w = prediction_l.size[0]
        h = prediction_l.size[1]

        detections = torch.from_numpy(np.row_stack(
            (bbox_s, bbox_m, bbox_l))).cuda()
        per_class = torch.from_numpy(np.row_stack(
            (label_s, label_m, label_l))).cuda()
        per_class = torch.squeeze(per_class, dim=1)
        per_box_cls = torch.from_numpy(
            np.row_stack((score_s, score_m, score_l))).cuda()
        per_box_cls = torch.squeeze(per_box_cls, dim=1)

        boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
        boxlist.add_field("labels", per_class)
        boxlist.add_field("scores", per_box_cls)
        boxlist = boxlist.clip_to_image(remove_empty=False)
        boxlist = remove_small_boxes(boxlist, min_size)

        # multi-scale results apply NMS. (small, medium, large)
        nms_method = cfg.TEST.MS_TEST_NMS
        nms_thresh = cfg.TEST.MS_TEST_NMS_THR

        num_classes = 81
        scores = boxlist.get_field("scores")
        labels = boxlist.get_field("labels")
        boxes = boxlist.bbox
        result = []

        # multi-scale test + NMS
        for j in range(1, num_classes):
            inds = (labels == j).nonzero().view(-1)
            scores_j = scores[inds]
            boxes_j = boxes[inds, :].view(-1, 4)
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)

            if nms_method == "nms":
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                nms_thresh,
                                                score_field="scores")
            elif nms_method == "soft_nms":
                boxlist_for_class = boxlist_soft_nms(boxlist_for_class,
                                                     nms_thresh,
                                                     score_field="scores")
            else:
                print('the nms method is wrong')

            num_labels = len(boxlist_for_class)

            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ),
                           j,
                           dtype=torch.int64,
                           device=scores.device))

            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        boxlist = result

        boxlist = boxlist.convert("xywh")
        boxes = boxlist.bbox.tolist()
        scores = boxlist.get_field("scores").tolist()
        labels = boxlist.get_field("labels").tolist()

        mapped_labels = [
            dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels
        ]

        coco_results.extend([{
            "image_id": original_id,
            "category_id": mapped_labels[k],
            "bbox": box,
            "score": scores[k],
        } for k, box in enumerate(boxes)])

    return coco_results

コード例 #12

0

ファイルを表示

def prepare_for_vrd_detection(predictions, dataset):
    # assert isinstance(dataset, COCODataset)
    vrd_results = []
    for image_id, prediction in enumerate(predictions):
        original_id = dataset.ann_file[image_id]['filename']
        # if len(prediction) == 0:
        #     continue

        # TODO replace with get_img_info?
        image_width = dataset.ann_file[image_id]["width"]
        image_height = dataset.ann_file[image_id]["height"]

        subject_boundingboxes = prediction.get_field("subject_boundingboxes")
        object_boundingboxes = prediction.get_field("object_boundingboxes")
        prediction_size = prediction.size

        prediction_sub = BoxList(subject_boundingboxes,
                                 prediction_size,
                                 mode="xyxy")
        prediction_ob = BoxList(object_boundingboxes,
                                prediction_size,
                                mode="xyxy")

        prediction = prediction.resize((image_width, image_height))
        prediction_sub = prediction_sub.resize((image_width, image_height))
        prediction_ob = prediction_ob.resize((image_width, image_height))
        prediction_sub = prediction_sub.convert("xywh")
        prediction = prediction.convert("xywh")
        prediction_ob = prediction_ob.convert("xywh")

        boxes = prediction.bbox.tolist()
        subject_boundingboxes = prediction_sub.bbox.tolist()
        object_boundingboxes = prediction_ob.bbox.tolist()
        subject_category = prediction.get_field("subject_category").tolist()
        object_category = prediction.get_field("object_category").tolist()
        subject_scores = prediction.get_field("subject_scores").tolist()
        object_scores = prediction.get_field("object_scores").tolist()
        objectpairs_scores = prediction.get_field(
            "objectpairs_scores").tolist()
        predicate_scores = prediction.get_field("predicate_scores").tolist()

        ids = prediction.get_field("ids").tolist()

        a = {}
        a.update(filename=original_id)
        a.update(height=image_height)
        a.update(width=image_width)
        a.update(objects_num=len(prediction))
        objects = [{
            "subject_boundingboxes": subject_boundingboxes[k],
            "object_boundingboxes": object_boundingboxes[k],
            "subject_category": subject_category[k],
            "object_category": object_category[k],
            "subject_scores": subject_scores[k],
            "object_scores": object_scores[k],
            "objectpairs_scores": objectpairs_scores[k],
            "predicate_scores": predicate_scores[k],
            "ids": ids[k],
        } for k, box in enumerate(boxes)]
        a.update(objects=objects)
        vrd_results.append(a)
    return vrd_results