def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(
         prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
     )
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(
         prop_xyxy_boxes, deltas, weights=weights
     )
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(prop_xyxy_boxes,
                                           np.array([gt_xyxy_box]),
                                           weights=weights)
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(prop_xyxy_boxes,
                                                deltas,
                                                weights=weights)
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]),
                         not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
Exemple #3
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
        num_valid_objs = len(valid_objs)

        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            gt_classes[ix] = cls

        for cls in gt_classes:
            entry['gt_classes'][0, cls] = 1
Exemple #4
0
    def _add_gt_boxes(self, entry):
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        valid_objs = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # Convert from (x1, y1, w, h) to (x_center/img_w, y_center/img_h, w/img_w, h/img_h)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            x_yolo, y_yolo, w_yolo, h_yolo = box_utils.xyxy_to_xywh_yolo(
                [x1, y1, x2, y2], height, width)

            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x_yolo, y_yolo, w_yolo, h_yolo]
                valid_objs.append(obj)
        num_valid_objs = len(valid_objs)
        gt_boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            gt_boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
        entry['boxes'] = np.append(entry['boxes'], gt_boxes, axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
	def load_annotation(self, index):
		"""
		Load image and bounding boxes info from XML file in the PASCAL VOC
		format.
		"""
		boxes = []
		labels = []
		preds = []
		for spo in self.data[index]['relationships']:
			try:
				gt_sbj_label = spo['subject']['name']
			except:
				gt_sbj_label = ''.join(spo['subject']['names'][0])
			gt_sbj_bbox = spo['subject']['x'], spo['subject']['y'], spo['subject']['w'], spo['subject']['h']

			try:
				gt_obj_label = spo['object']['name']
			except:
				gt_obj_label = ''.join(spo['object']['names'][0])
			gt_obj_bbox = spo['object']['x'], spo['object']['y'], spo['object']['w'], spo['object']['h']

			predicate = spo['predicate']

			if (gt_sbj_label not in self.all_objects_list or gt_obj_label not in self.all_objects_list or predicate not in self.predicates_list):
				continue

			# prepare bboxes for subject and object
			gt_sbj_bbox = xywh_to_xyxy(gt_sbj_bbox)
			gt_obj_bbox = xywh_to_xyxy(gt_obj_bbox)
			boxes.append([gt_sbj_bbox, gt_obj_bbox])

			# prepare labels for subject and object
			# map to index
			labels.append([self._class_to_ind[gt_sbj_label],
						   self._class_to_ind[gt_obj_label]])
			preds.append(self._preds_to_ind[predicate])
		return boxes, labels, preds
def save_im_masks(im, M, id, dir):
    from utils.boxes import xywh_to_xyxy
    import os
    try:
        os.mkdir(os.path.join('vis', dir))
    except:
        pass
    M[M > 0] = 1
    aug_rles = mask_util.encode(np.asarray(M, order='F'))
    boxes = xywh_to_xyxy(np.asarray(mask_util.toBbox(aug_rles)))
    boxes = np.append(boxes, np.ones((len(boxes), 2)), 1)

    from utils.vis import vis_one_image
    vis_one_image(
        im,
        str(id),
        os.path.join('vis', dir),
        boxes,
        segms=aug_rles,
        keypoints=None,
        thresh=0.9,
        box_alpha=0.8,
        show_class=False,
    )
Exemple #7
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints
Exemple #8
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
Exemple #9
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                # valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
            
        entry['dataset_name'] = self.name

        # add relationship annotations
        im_rels = self.rel_anns[entry['file_name']]
        sbj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['sbj_gt_boxes'].dtype)
        obj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['obj_gt_boxes'].dtype)
        sbj_gt_classes = np.zeros(len(im_rels), dtype=entry['sbj_gt_classes'].dtype)
        obj_gt_classes = np.zeros(len(im_rels), dtype=entry['obj_gt_classes'].dtype)
        prd_gt_classes = np.zeros(len(im_rels), dtype=entry['prd_gt_classes'].dtype)
        for ix, rel in enumerate(im_rels):
            # sbj
            sbj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['subject']['bbox'])
            sbj_gt_boxes[ix] = sbj_gt_box
            sbj_gt_classes[ix] = rel['subject']['category']  # excludes background
            # obj
            obj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['object']['bbox'])
            obj_gt_boxes[ix] = obj_gt_box
            obj_gt_classes[ix] = rel['object']['category']  # excludes background
            # prd
            prd_gt_classes[ix] = rel['predicate']  # exclude background
        entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'], sbj_gt_boxes, axis=0)
        entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'], obj_gt_boxes, axis=0)
        entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'], sbj_gt_classes)
        entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'], obj_gt_classes)
        entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'], prd_gt_classes)
        # misc
        sbj_gt_overlaps = np.zeros(
            (len(im_rels), self.num_obj_classes), dtype=entry['sbj_gt_overlaps'].dtype)
        for ix in range(len(im_rels)):
            sbj_cls = sbj_gt_classes[ix]
            sbj_gt_overlaps[ix, sbj_cls] = 1.0
        entry['sbj_gt_overlaps'] = np.append(
            entry['sbj_gt_overlaps'].toarray(), sbj_gt_overlaps, axis=0)
        entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['sbj_gt_overlaps'])

        obj_gt_overlaps = np.zeros(
            (len(im_rels), self.num_obj_classes), dtype=entry['obj_gt_overlaps'].dtype)
        for ix in range(len(im_rels)):
            obj_cls = obj_gt_classes[ix]
            obj_gt_overlaps[ix, obj_cls] = 1.0
        entry['obj_gt_overlaps'] = np.append(
            entry['obj_gt_overlaps'].toarray(), obj_gt_overlaps, axis=0)
        entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['obj_gt_overlaps'])

        prd_gt_overlaps = np.zeros(
            (len(im_rels), self.num_prd_classes), dtype=entry['prd_gt_overlaps'].dtype)
        pair_to_gt_ind_map = np.zeros(
            (len(im_rels)), dtype=entry['pair_to_gt_ind_map'].dtype)
        for ix in range(len(im_rels)):
            prd_cls = prd_gt_classes[ix]
            prd_gt_overlaps[ix, prd_cls] = 1.0
            pair_to_gt_ind_map[ix] = ix
        entry['prd_gt_overlaps'] = np.append(
            entry['prd_gt_overlaps'].toarray(), prd_gt_overlaps, axis=0)
        entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(entry['prd_gt_overlaps'])
        entry['pair_to_gt_ind_map'] = np.append(
            entry['pair_to_gt_ind_map'], pair_to_gt_ind_map)
        
        for k in ['file_name']:
            if k in entry:
                del entry[k]
Exemple #10
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry.
    """
        seq_idx = entry['seq_idx']
        idx = entry['idx']
        self.set_to_sequence(seq_idx)
        #get gt image.
        gt = self.get_gt(idx)
        vals = np.unique(gt)

        objs = []
        for val in vals:
            #it is background when val==0
            if val != 0:
                obj = {}
                mask = np.array(gt == val, dtype=np.uint8)
                #make sure gt==val is converted to value in 0 and 1.
                assert (len(set(mask.reshape(-1)) - {0, 1}) == 0)
                x, y, w, h = cv2.boundingRect(mask)
                x, y, w, h = self._expand_box(x, y, w, h, rate=0.05)
                #obj['segmentation'] = binary_mask_to_rle(mask)
                obj['segmentation'] = mask_util.encode(
                    np.array(mask, order='F', dtype=np.uint8))
                obj['area'] = np.sum(mask)
                obj['iscrowd'] = 0
                obj['bbox'] = x, y, w, h
                if self.cls_mapper is not None:
                    #set category id by cls_mapper.
                    obj['category_id'] = self.cls_mapper[val]
                else:
                    if not self.use_local_id:
                        obj['category_id'] = self.global_instance_id_start_of_seq[
                            seq_idx] + val - 1
                    else:
                        obj['category_id'] = val
                obj['instance_id'] = val
                assert (self.global_instance_id_start_of_seq[seq_idx] != 0)
                # val-1 to remove background.
                obj['global_instance_id'] = self.global_instance_id_start_of_seq[
                    seq_idx] + val - 1
                objs.append(obj)

        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            assert (isinstance(obj['segmentation'], dict))
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]

            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        instance_id = np.zeros((num_valid_objs),
                               dtype=entry['instance_id'].dtype)
        global_instance_id = np.zeros((num_valid_objs),
                                      dtype=entry['global_instance_id'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        gt_overlaps_id = np.zeros(
            (num_valid_objs, self.number_of_instance_ids),
            dtype=entry['gt_overlaps_id'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            if obj['category_id'] is not None:
                #cls = self.json_category_id_to_contiguous_id[obj['category_id']]
                cls = obj['category_id']
            else:
                #if no category_id specified, use background instead. index is 'self.num_classes-1'
                cls = self.num_classes - 1
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            instance_id[ix] = obj['instance_id']
            global_instance_id[ix] = obj['global_instance_id']
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
                gt_overlaps_id[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
                gt_overlaps_id[ix, global_instance_id[ix]] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['instance_id'] = np.append(entry['instance_id'], instance_id)
        entry['global_instance_id'] = np.append(entry['global_instance_id'],
                                                global_instance_id)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])

        entry['gt_overlaps_id'] = np.append(entry['gt_overlaps_id'].toarray(),
                                            gt_overlaps_id,
                                            axis=0)
        entry['gt_overlaps_id'] = scipy.sparse.csr_matrix(
            entry['gt_overlaps_id'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        assert (
            entry['gt_overlaps_id'].shape[0] == entry['gt_overlaps'].shape[0])
def augment_images(roidb, augment_masks=False, augment_boxes=False):
    from pycocotools import mask as mask_util
    import numpy as np
    from scipy import ndimage

    if cfg.TRAIN.AUGMENTATION_MODE == 'OPTIMISTIC':
        seq, hooks_masks = seq_optimistic, hooks_optimistic_masks
    else:
        seq, hooks_masks = seq_old, hooks_masks_old

    import cv2
    ims = [cv2.imread(roi['image'])[:, :, [2, 1, 0]] for roi in roidb]
    seq_det = seq.to_deterministic(
    )  # call this for each batch again, NOT only once at the start

    aug_ims_augs = seq_det.augment_images(ims)

    orig_masks = [mask_util.decode(roi['segms']) for roi in roidb]

    if augment_masks and np.random.random() > 0.5:
        orig_masks = masks_augmentation(orig_masks)

    mask_augs = seq_det.augment_images(orig_masks, hooks=hooks_masks)

    for idx in range(mask_augs[0].shape[2]):
        mask_augs[0][:, :, idx] = ndimage.morphology.binary_fill_holes(
            mask_augs[0][:, :, idx]).astype(np.uint8)

    im_augs, roi_augs = [], []

    for im, im_aug, roi, M in zip(ims, aug_ims_augs, roidb, mask_augs):
        M[M > 0] = 1
        aug_rles = mask_util.encode(np.asarray(M, order='F'))

        valid = np.sum(M, axis=(0, 1)) > 15
        aug_rles = [rle for idx, rle in enumerate(aug_rles) if valid[idx]]

        n_masks = len(aug_rles)
        if aug_rles:
            for rle_aug in aug_rles:
                rle_aug['size'] = [int(i) for i in rle_aug['size']]
            new_boxes = np.float32(mask_util.toBbox(aug_rles))

            if augment_boxes and np.random.random() > 0.5:
                new_boxes = scale_boxes(new_boxes)

            new_boxes = box_utils.xywh_to_xyxy(new_boxes)
            roi_aug = roi.copy()
            roi_aug['box_to_gt_ind_map'] = np.asarray(
                range(n_masks),
                np.int32)  # roi['box_to_gt_ind_map'][:n_masks]  #
            roi_aug['boxes'] = new_boxes
            roi_aug['gt_classes'] = roi['gt_classes'][:n_masks]
            roi_aug['gt_overlaps'] = roi[
                'gt_overlaps'][:
                               n_masks]  # csr_matrix(roi['gt_overlaps'].todense()[:n_masks])
            roi_aug['is_crowd'] = roi['is_crowd'][:n_masks]
            roi_aug['max_classes'] = roi['max_classes'][:n_masks]
            roi_aug['max_overlaps'] = roi['max_overlaps'][:n_masks]
            roi_aug['seg_areas'] = np.float32(mask_util.area(aug_rles))
            roi_aug['segms'] = aug_rles

            roi_augs.append(roi_aug)
            im_augs.append(im_aug)
        else:
            roi_augs.append(roi)
            im_augs.append(im)

    im_augs = [im[:, :, [2, 1, 0]] for im in im_augs]

    return roi_augs, im_augs
Exemple #12
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'])
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            im_path = self.image_path_from_index(obj['image_id'])
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                # Compress the rle
                if 'counts' in obj['segmentation'] and type(obj['segmentation']['counts']) == list:
                    # Magic RLE format handling painfully discovered by looking at the
                    # COCO API showAnns function.
                    rle = mask_util.frPyObjects(obj['segmentation'], height, width)
                    valid_segms.append(rle)
                else:
                    binary_mask = segm_utils.polys_to_mask(obj['segmentation'], height, width)
                    if len(np.unique(binary_mask))==1:
                        continue
                    valid_segms.append(obj['segmentation'])
                valid_objs.append(obj)
                query = {
                        'boxes': obj['clean_bbox'],
                        'segms': valid_segms,
                        'image_path': im_path,
                        'area': obj['area'],
                        }

                self.cat_data[obj['category_id']].append(query)

        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_cats = np.zeros((num_valid_objs), dtype=entry['gt_cats'].dtype)
        #gt_overlaps = np.zeros(
        #    (num_valid_objs, self.num_classes),
        #    dtype=entry['gt_overlaps'].dtype
        #)
        gt_overlaps = np.zeros(
            (num_valid_objs, cfg.MODEL.NUM_CLASSES),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = obj['category_id']
            boxes[ix, :] = obj['clean_bbox']
            #gt_classes[ix] = cls
            gt_classes[ix] = 1
            gt_cats[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            #gt_overlaps[ix, cls] = 1.0
            gt_overlaps[ix, 1] = 1.0
        box_utils.validate_boxes(boxes, width=width, height=height)
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['gt_cats'] = np.append(entry['gt_cats'], gt_cats)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)

        if len(objs) > 0:
            has_scores = ('score' in objs[0].keys())
        else:
            has_scores = False
        if len(objs) > 0:
            has_source = ('source' in objs[0].keys())
        else:
            has_source = False

        # EDIT: detection "soft"-scores
        # if has_scores:
        if not 'gt_scores' in entry.keys():
            entry['gt_scores'] = np.empty((0), dtype=np.float32)
        # EDIT: dataset name
        if len(objs) > 0:
            has_dataset = ('dataset' in objs[0].keys())
        else:
            has_dataset = True  # TODO: Check. This assumes that the "dataset" field is given
        # if has_dataset:
        if not 'dataset_id' in entry.keys():
            entry['dataset_id'] = np.empty((0), dtype=np.int32)
        if not 'gt_source' in entry.keys():
            entry['gt_source'] = np.empty((0), dtype=np.int32)

        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        """input('HEREEEE1111  '+str(has_dataset)+': '+str(entry['dataset'].name)+' __ '+str(entry))
        if not has_dataset:
            print(entry['dataset'].name)
            input(str(entry))
        """
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        #print('>>>',num_valid_objs)
        #input('checkpoint here')

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        # if has_scores:
        gt_scores = np.zeros((num_valid_objs),
                             dtype=entry['gt_scores'].dtype)  # EDIT: scores
        gt_source = np.zeros((num_valid_objs), dtype=entry['gt_source'].dtype)
        # if has_dataset:
        dataset_annot = np.zeros(
            (num_valid_objs), dtype=entry['dataset_id'].dtype)  # EDIT: dataset

        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            if has_scores:
                gt_scores[ix] = obj['score']  # EDIT: scores
            if has_source:
                gt_source[ix] = obj[
                    'source']  # EDIT: annot source (det or track)
            if has_dataset:
                dataset_annot[ix] = cfg.TRAIN.DATASETS.index(
                    obj['dataset'])  # EDIT: dataset id
            #else:
            #    input('>>>'+str(obj['dataset']))
            #    input(':::'+str(has_dataset)+'__'+str(dataset_annot))
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)

        if has_scores:
            entry['gt_scores'] = np.append(entry['gt_scores'],
                                           gt_scores)  # EDIT: scores
        if has_source:
            entry['gt_source'] = np.append(entry['gt_source'], gt_source)
        if has_dataset:
            entry['dataset_id'] = np.append(entry['dataset_id'],
                                            dataset_annot)  # EDIT: dataset

        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints