예제 #1
0
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(
         prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
     )
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(
         prop_xyxy_boxes, deltas, weights=weights
     )
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
def load_detections(detection_file, dataset, thresholds):
    
    detections = json.load(open(detection_file))
    roidb = dataset.get_roidb()
    classes = dataset.classes
    
    #apply class thresholds
    thres_dets = [det for det in detections if det['score'] > thresholds[classes[det['category_id']]]]
    
    #save thresholded detections to file
    output_dir = os.path.abspath(get_output_dir(dataset.name, training=False))
    
    res_file = os.path.join(
        output_dir, 'bbox_' + dataset.name + '_results_thresh.json'
    )
    
    with open(res_file,'w') as out_file:
        json.dump(thres_dets, out_file)
    
    #get per-image detections
    detections_per_image = []

    for entry in roidb:
        #get all detections for image
        im_dets = [det for det in thres_dets if det['image_id'] == entry['id']]
        
        #convert bbox from xywh to xyxy format
        for det in im_dets:
            det["bbox"] = box_utils.xywh_to_xyxy(det["bbox"])
        
        detections_per_image.append(im_dets)
    
    return detections_per_image, res_file
예제 #3
0
        def _toPersonMask(anns, coco):
            # modify ann['segmentation'] by reference
            for ann in anns:
                if ann['ignore']:
                    continue
                rle = segm_utils.GetDensePoseMask(ann['dp_masks'])
                per_mask = (rle > 0).astype(np.uint8)

                ref_box = boxes.xywh_to_xyxy(ann['bbox'])
                ref_box = np.array(ref_box).astype(np.int32)
                w = ref_box[2] - ref_box[0] + 1
                h = ref_box[3] - ref_box[1] + 1
                w = np.maximum(w, 1)
                h = np.maximum(h, 1)
                per_mask = cv2.resize(per_mask, (w, h))
                img = coco.loadImgs(ann['image_id'])[0]
                im_h = img['height']
                im_w = img['width']
                im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
                x_0 = max(ref_box[0], 0)
                x_1 = min(ref_box[2] + 1, im_w)
                y_0 = max(ref_box[1], 0)
                y_1 = min(ref_box[3] + 1, im_h)

                im_mask[y_0:y_1,
                        x_0:x_1] = per_mask[(y_0 - ref_box[1]):(y_1 -
                                                                ref_box[1]),
                                            (x_0 - ref_box[0]):(x_1 -
                                                                ref_box[0])]
                rle = maskUtils.encode(
                    np.array(im_mask[:, :, np.newaxis], order='F'))[0]
                ann['segmentation'] = rle
                rle = coco.annToRLE(ann)
                ann['person_mask'] = rle
예제 #4
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints
예제 #5
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        ####
        valid_dp_x = []
        valid_dp_y = []
        valid_dp_I = []
        valid_dp_U = []
        valid_dp_V = []
        valid_dp_masks = []
        ####
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
                ###
                if 'dp_x' in obj.keys():
                    valid_dp_x.append(obj['dp_x'])
                    valid_dp_y.append(obj['dp_y'])
                    valid_dp_I.append(obj['dp_I'])
                    valid_dp_U.append(obj['dp_U'])
                    valid_dp_V.append(obj['dp_V'])
                    valid_dp_masks.append(obj['dp_masks'])
                else:
                    valid_dp_x.append([])
                    valid_dp_y.append([])
                    valid_dp_I.append([])
                    valid_dp_U.append([])
                    valid_dp_V.append([])
                    valid_dp_masks.append([])
                ###
        num_valid_objs = len(valid_objs)
        ##
        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )
        if cfg.MODEL.BODY_UV_ON:
            ignore_UV_body = np.zeros((num_valid_objs))
            #Box_image_body  = [None]*num_valid_objs

        im_has_visible_keypoints = False
        im_has_any_body_uv = False

        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if cfg.MODEL.BODY_UV_ON:
                if 'dp_x' in obj:
                    ignore_UV_body[ix] = False
                    im_has_any_body_uv = True
                else:
                    ignore_UV_body[ix] = True
                    
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        entry['dp_x'].extend(valid_dp_x)
        entry['dp_y'].extend(valid_dp_y)
        entry['dp_I'].extend(valid_dp_I)
        entry['dp_U'].extend(valid_dp_U)
        entry['dp_V'].extend(valid_dp_V)
        entry['dp_masks'].extend(valid_dp_masks)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
        if cfg.MODEL.BODY_UV_ON:
            entry['ignore_UV_body'] = np.append(entry['ignore_UV_body'], ignore_UV_body)
            #entry['Box_image_links_body'].extend(Box_image_body)
            entry['has_body_uv'] = im_has_any_body_uv
예제 #6
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        ####
        valid_dp_x = []
        valid_dp_y = []
        valid_dp_I = []
        valid_dp_U = []
        valid_dp_V = []
        valid_dp_masks = []
        ####
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded
            if segm_utils.is_poly(obj['segmentation']):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
                ###
                if 'dp_x' in obj:
                    valid_dp_x.append(obj['dp_x'])
                    valid_dp_y.append(obj['dp_y'])
                    valid_dp_I.append(obj['dp_I'])
                    valid_dp_U.append(obj['dp_U'])
                    valid_dp_V.append(obj['dp_V'])
                    valid_dp_masks.append(obj['dp_masks'])
                else:
                    valid_dp_x.append([])
                    valid_dp_y.append([])
                    valid_dp_I.append([])
                    valid_dp_U.append([])
                    valid_dp_V.append([])
                    valid_dp_masks.append([])
                ###
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)
        if cfg.MODEL.BODY_UV_ON:
            ignore_UV_body = np.zeros((num_valid_objs),
                                      dtype=entry['ignore_UV_body'].dtype)
            #Box_image_body  = [None]*num_valid_objs

        im_has_visible_keypoints = False
        im_has_any_body_uv = False

        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if cfg.MODEL.BODY_UV_ON:
                if 'dp_x' in obj:
                    ignore_UV_body[ix] = False
                    im_has_any_body_uv = True
                else:
                    ignore_UV_body[ix] = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        entry['dp_x'].extend(valid_dp_x)
        entry['dp_y'].extend(valid_dp_y)
        entry['dp_I'].extend(valid_dp_I)
        entry['dp_U'].extend(valid_dp_U)
        entry['dp_V'].extend(valid_dp_V)
        entry['dp_masks'].extend(valid_dp_masks)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints
        if cfg.MODEL.BODY_UV_ON:
            entry['ignore_UV_body'] = np.append(entry['ignore_UV_body'],
                                                ignore_UV_body)
            #entry['Box_image_links_body'].extend(Box_image_body)
            entry['has_body_uv'] = im_has_any_body_uv
예제 #7
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded
            if segm_utils.is_poly(obj['segmentation']):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints