def test_bbox_dataset_to_prediction_roundtrip(self): """Simulate the process of reading a ground-truth box from a dataset, make predictions from proposals, convert the predictions back to the dataset format, and then use the COCO API to compute IoU overlap between the gt box and the predictions. These should have IoU of 1. """ weights = (5, 5, 10, 10) # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format gt_xywh_box = [10, 20, 100, 150] # 2/ convert it to our internal (x1, y1, x2, y2) format gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box) # 3/ consider nearby proposal boxes prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10) # 4/ compute proposal-to-gt transformation deltas deltas = box_utils.bbox_transform_inv( prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights ) # 5/ use deltas to transform proposals to xyxy predicted box pred_xyxy_boxes = box_utils.bbox_transform( prop_xyxy_boxes, deltas, weights=weights ) # 6/ convert xyxy predicted box to xywh predicted box pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes) # 7/ use COCO API to compute IoU not_crowd = [int(False)] * pred_xywh_boxes.shape[0] ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd) np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
def test_bbox_dataset_to_prediction_roundtrip(self): """Simulate the process of reading a ground-truth box from a dataset, make predictions from proposals, convert the predictions back to the dataset format, and then use the COCO API to compute IoU overlap between the gt box and the predictions. These should have IoU of 1. """ weights = (5, 5, 10, 10) # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format gt_xywh_box = [10, 20, 100, 150] # 2/ convert it to our internal (x1, y1, x2, y2) format gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box) # 3/ consider nearby proposal boxes prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10) # 4/ compute proposal-to-gt transformation deltas deltas = box_utils.bbox_transform_inv(prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights) # 5/ use deltas to transform proposals to xyxy predicted box pred_xyxy_boxes = box_utils.bbox_transform(prop_xyxy_boxes, deltas, weights=weights) # 6/ convert xyxy predicted box to xywh predicted box pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes) # 7/ use COCO API to compute IoU not_crowd = [int(False)] * pred_xywh_boxes.shape[0] ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd) np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) num_valid_objs = len(valid_objs) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] gt_classes[ix] = cls for cls in gt_classes: entry['gt_classes'][0, cls] = 1
def _add_gt_boxes(self, entry): ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) valid_objs = [] width = entry['width'] height = entry['height'] for obj in objs: # Convert from (x1, y1, w, h) to (x_center/img_w, y_center/img_h, w/img_w, h/img_h) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) x_yolo, y_yolo, w_yolo, h_yolo = box_utils.xyxy_to_xywh_yolo( [x1, y1, x2, y2], height, width) if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x_yolo, y_yolo, w_yolo, h_yolo] valid_objs.append(obj) num_valid_objs = len(valid_objs) gt_boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] gt_boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls entry['boxes'] = np.append(entry['boxes'], gt_boxes, axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
def load_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ boxes = [] labels = [] preds = [] for spo in self.data[index]['relationships']: try: gt_sbj_label = spo['subject']['name'] except: gt_sbj_label = ''.join(spo['subject']['names'][0]) gt_sbj_bbox = spo['subject']['x'], spo['subject']['y'], spo['subject']['w'], spo['subject']['h'] try: gt_obj_label = spo['object']['name'] except: gt_obj_label = ''.join(spo['object']['names'][0]) gt_obj_bbox = spo['object']['x'], spo['object']['y'], spo['object']['w'], spo['object']['h'] predicate = spo['predicate'] if (gt_sbj_label not in self.all_objects_list or gt_obj_label not in self.all_objects_list or predicate not in self.predicates_list): continue # prepare bboxes for subject and object gt_sbj_bbox = xywh_to_xyxy(gt_sbj_bbox) gt_obj_bbox = xywh_to_xyxy(gt_obj_bbox) boxes.append([gt_sbj_bbox, gt_obj_bbox]) # prepare labels for subject and object # map to index labels.append([self._class_to_ind[gt_sbj_label], self._class_to_ind[gt_obj_label]]) preds.append(self._preds_to_ind[predicate]) return boxes, labels, preds
def save_im_masks(im, M, id, dir): from utils.boxes import xywh_to_xyxy import os try: os.mkdir(os.path.join('vis', dir)) except: pass M[M > 0] = 1 aug_rles = mask_util.encode(np.asarray(M, order='F')) boxes = xywh_to_xyxy(np.asarray(mask_util.toBbox(aug_rles))) boxes = np.append(boxes, np.ones((len(boxes), 2)), 1) from utils.vis import vis_one_image vis_one_image( im, str(id), os.path.join('vis', dir), boxes, segms=aug_rles, keypoints=None, thresh=0.9, box_alpha=0.8, show_class=False, )
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros( (num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) # valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros( (num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints entry['dataset_name'] = self.name # add relationship annotations im_rels = self.rel_anns[entry['file_name']] sbj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['sbj_gt_boxes'].dtype) obj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['obj_gt_boxes'].dtype) sbj_gt_classes = np.zeros(len(im_rels), dtype=entry['sbj_gt_classes'].dtype) obj_gt_classes = np.zeros(len(im_rels), dtype=entry['obj_gt_classes'].dtype) prd_gt_classes = np.zeros(len(im_rels), dtype=entry['prd_gt_classes'].dtype) for ix, rel in enumerate(im_rels): # sbj sbj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['subject']['bbox']) sbj_gt_boxes[ix] = sbj_gt_box sbj_gt_classes[ix] = rel['subject']['category'] # excludes background # obj obj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['object']['bbox']) obj_gt_boxes[ix] = obj_gt_box obj_gt_classes[ix] = rel['object']['category'] # excludes background # prd prd_gt_classes[ix] = rel['predicate'] # exclude background entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'], sbj_gt_boxes, axis=0) entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'], obj_gt_boxes, axis=0) entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'], sbj_gt_classes) entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'], obj_gt_classes) entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'], prd_gt_classes) # misc sbj_gt_overlaps = np.zeros( (len(im_rels), self.num_obj_classes), dtype=entry['sbj_gt_overlaps'].dtype) for ix in range(len(im_rels)): sbj_cls = sbj_gt_classes[ix] sbj_gt_overlaps[ix, sbj_cls] = 1.0 entry['sbj_gt_overlaps'] = np.append( entry['sbj_gt_overlaps'].toarray(), sbj_gt_overlaps, axis=0) entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['sbj_gt_overlaps']) obj_gt_overlaps = np.zeros( (len(im_rels), self.num_obj_classes), dtype=entry['obj_gt_overlaps'].dtype) for ix in range(len(im_rels)): obj_cls = obj_gt_classes[ix] obj_gt_overlaps[ix, obj_cls] = 1.0 entry['obj_gt_overlaps'] = np.append( entry['obj_gt_overlaps'].toarray(), obj_gt_overlaps, axis=0) entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['obj_gt_overlaps']) prd_gt_overlaps = np.zeros( (len(im_rels), self.num_prd_classes), dtype=entry['prd_gt_overlaps'].dtype) pair_to_gt_ind_map = np.zeros( (len(im_rels)), dtype=entry['pair_to_gt_ind_map'].dtype) for ix in range(len(im_rels)): prd_cls = prd_gt_classes[ix] prd_gt_overlaps[ix, prd_cls] = 1.0 pair_to_gt_ind_map[ix] = ix entry['prd_gt_overlaps'] = np.append( entry['prd_gt_overlaps'].toarray(), prd_gt_overlaps, axis=0) entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(entry['prd_gt_overlaps']) entry['pair_to_gt_ind_map'] = np.append( entry['pair_to_gt_ind_map'], pair_to_gt_ind_map) for k in ['file_name']: if k in entry: del entry[k]
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry. """ seq_idx = entry['seq_idx'] idx = entry['idx'] self.set_to_sequence(seq_idx) #get gt image. gt = self.get_gt(idx) vals = np.unique(gt) objs = [] for val in vals: #it is background when val==0 if val != 0: obj = {} mask = np.array(gt == val, dtype=np.uint8) #make sure gt==val is converted to value in 0 and 1. assert (len(set(mask.reshape(-1)) - {0, 1}) == 0) x, y, w, h = cv2.boundingRect(mask) x, y, w, h = self._expand_box(x, y, w, h, rate=0.05) #obj['segmentation'] = binary_mask_to_rle(mask) obj['segmentation'] = mask_util.encode( np.array(mask, order='F', dtype=np.uint8)) obj['area'] = np.sum(mask) obj['iscrowd'] = 0 obj['bbox'] = x, y, w, h if self.cls_mapper is not None: #set category id by cls_mapper. obj['category_id'] = self.cls_mapper[val] else: if not self.use_local_id: obj['category_id'] = self.global_instance_id_start_of_seq[ seq_idx] + val - 1 else: obj['category_id'] = val obj['instance_id'] = val assert (self.global_instance_id_start_of_seq[seq_idx] != 0) # val-1 to remove background. obj['global_instance_id'] = self.global_instance_id_start_of_seq[ seq_idx] + val - 1 objs.append(obj) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts assert (isinstance(obj['segmentation'], dict)) if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) instance_id = np.zeros((num_valid_objs), dtype=entry['instance_id'].dtype) global_instance_id = np.zeros((num_valid_objs), dtype=entry['global_instance_id'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) gt_overlaps_id = np.zeros( (num_valid_objs, self.number_of_instance_ids), dtype=entry['gt_overlaps_id'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): if obj['category_id'] is not None: #cls = self.json_category_id_to_contiguous_id[obj['category_id']] cls = obj['category_id'] else: #if no category_id specified, use background instead. index is 'self.num_classes-1' cls = self.num_classes - 1 boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls instance_id[ix] = obj['instance_id'] global_instance_id[ix] = obj['global_instance_id'] seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 gt_overlaps_id[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 gt_overlaps_id[ix, global_instance_id[ix]] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['instance_id'] = np.append(entry['instance_id'], instance_id) entry['global_instance_id'] = np.append(entry['global_instance_id'], global_instance_id) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['gt_overlaps_id'] = np.append(entry['gt_overlaps_id'].toarray(), gt_overlaps_id, axis=0) entry['gt_overlaps_id'] = scipy.sparse.csr_matrix( entry['gt_overlaps_id']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) assert ( entry['gt_overlaps_id'].shape[0] == entry['gt_overlaps'].shape[0])
def augment_images(roidb, augment_masks=False, augment_boxes=False): from pycocotools import mask as mask_util import numpy as np from scipy import ndimage if cfg.TRAIN.AUGMENTATION_MODE == 'OPTIMISTIC': seq, hooks_masks = seq_optimistic, hooks_optimistic_masks else: seq, hooks_masks = seq_old, hooks_masks_old import cv2 ims = [cv2.imread(roi['image'])[:, :, [2, 1, 0]] for roi in roidb] seq_det = seq.to_deterministic( ) # call this for each batch again, NOT only once at the start aug_ims_augs = seq_det.augment_images(ims) orig_masks = [mask_util.decode(roi['segms']) for roi in roidb] if augment_masks and np.random.random() > 0.5: orig_masks = masks_augmentation(orig_masks) mask_augs = seq_det.augment_images(orig_masks, hooks=hooks_masks) for idx in range(mask_augs[0].shape[2]): mask_augs[0][:, :, idx] = ndimage.morphology.binary_fill_holes( mask_augs[0][:, :, idx]).astype(np.uint8) im_augs, roi_augs = [], [] for im, im_aug, roi, M in zip(ims, aug_ims_augs, roidb, mask_augs): M[M > 0] = 1 aug_rles = mask_util.encode(np.asarray(M, order='F')) valid = np.sum(M, axis=(0, 1)) > 15 aug_rles = [rle for idx, rle in enumerate(aug_rles) if valid[idx]] n_masks = len(aug_rles) if aug_rles: for rle_aug in aug_rles: rle_aug['size'] = [int(i) for i in rle_aug['size']] new_boxes = np.float32(mask_util.toBbox(aug_rles)) if augment_boxes and np.random.random() > 0.5: new_boxes = scale_boxes(new_boxes) new_boxes = box_utils.xywh_to_xyxy(new_boxes) roi_aug = roi.copy() roi_aug['box_to_gt_ind_map'] = np.asarray( range(n_masks), np.int32) # roi['box_to_gt_ind_map'][:n_masks] # roi_aug['boxes'] = new_boxes roi_aug['gt_classes'] = roi['gt_classes'][:n_masks] roi_aug['gt_overlaps'] = roi[ 'gt_overlaps'][: n_masks] # csr_matrix(roi['gt_overlaps'].todense()[:n_masks]) roi_aug['is_crowd'] = roi['is_crowd'][:n_masks] roi_aug['max_classes'] = roi['max_classes'][:n_masks] roi_aug['max_overlaps'] = roi['max_overlaps'][:n_masks] roi_aug['seg_areas'] = np.float32(mask_util.area(aug_rles)) roi_aug['segms'] = aug_rles roi_augs.append(roi_aug) im_augs.append(im_aug) else: roi_augs.append(roi) im_augs.append(im) im_augs = [im[:, :, [2, 1, 0]] for im in im_augs] return roi_augs, im_augs
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id']) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: im_path = self.image_path_from_index(obj['image_id']) # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] # Compress the rle if 'counts' in obj['segmentation'] and type(obj['segmentation']['counts']) == list: # Magic RLE format handling painfully discovered by looking at the # COCO API showAnns function. rle = mask_util.frPyObjects(obj['segmentation'], height, width) valid_segms.append(rle) else: binary_mask = segm_utils.polys_to_mask(obj['segmentation'], height, width) if len(np.unique(binary_mask))==1: continue valid_segms.append(obj['segmentation']) valid_objs.append(obj) query = { 'boxes': obj['clean_bbox'], 'segms': valid_segms, 'image_path': im_path, 'area': obj['area'], } self.cat_data[obj['category_id']].append(query) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_cats = np.zeros((num_valid_objs), dtype=entry['gt_cats'].dtype) #gt_overlaps = np.zeros( # (num_valid_objs, self.num_classes), # dtype=entry['gt_overlaps'].dtype #) gt_overlaps = np.zeros( (num_valid_objs, cfg.MODEL.NUM_CLASSES), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = obj['category_id'] boxes[ix, :] = obj['clean_bbox'] #gt_classes[ix] = cls gt_classes[ix] = 1 gt_cats[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True #gt_overlaps[ix, cls] = 1.0 gt_overlaps[ix, 1] = 1.0 box_utils.validate_boxes(boxes, width=width, height=height) entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['gt_cats'] = np.append(entry['gt_cats'], gt_cats) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) if len(objs) > 0: has_scores = ('score' in objs[0].keys()) else: has_scores = False if len(objs) > 0: has_source = ('source' in objs[0].keys()) else: has_source = False # EDIT: detection "soft"-scores # if has_scores: if not 'gt_scores' in entry.keys(): entry['gt_scores'] = np.empty((0), dtype=np.float32) # EDIT: dataset name if len(objs) > 0: has_dataset = ('dataset' in objs[0].keys()) else: has_dataset = True # TODO: Check. This assumes that the "dataset" field is given # if has_dataset: if not 'dataset_id' in entry.keys(): entry['dataset_id'] = np.empty((0), dtype=np.int32) if not 'gt_source' in entry.keys(): entry['gt_source'] = np.empty((0), dtype=np.int32) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] """input('HEREEEE1111 '+str(has_dataset)+': '+str(entry['dataset'].name)+' __ '+str(entry)) if not has_dataset: print(entry['dataset'].name) input(str(entry)) """ for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) #print('>>>',num_valid_objs) #input('checkpoint here') boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) # if has_scores: gt_scores = np.zeros((num_valid_objs), dtype=entry['gt_scores'].dtype) # EDIT: scores gt_source = np.zeros((num_valid_objs), dtype=entry['gt_source'].dtype) # if has_dataset: dataset_annot = np.zeros( (num_valid_objs), dtype=entry['dataset_id'].dtype) # EDIT: dataset seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls if has_scores: gt_scores[ix] = obj['score'] # EDIT: scores if has_source: gt_source[ix] = obj[ 'source'] # EDIT: annot source (det or track) if has_dataset: dataset_annot[ix] = cfg.TRAIN.DATASETS.index( obj['dataset']) # EDIT: dataset id #else: # input('>>>'+str(obj['dataset'])) # input(':::'+str(has_dataset)+'__'+str(dataset_annot)) seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) if has_scores: entry['gt_scores'] = np.append(entry['gt_scores'], gt_scores) # EDIT: scores if has_source: entry['gt_source'] = np.append(entry['gt_source'], gt_source) if has_dataset: entry['dataset_id'] = np.append(entry['dataset_id'], dataset_annot) # EDIT: dataset entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints