def __init__(self, mode): """ :param mode: train2014 or val2014 """ self.mode = mode self.root = os.path.join(COCO_PATH, mode) self.ann_file = os.path.join(COCO_PATH, 'annotations', 'instances_{}.json'.format(mode)) self.coco = COCO(self.ann_file) self.ids = [ k for k in self.coco.imgs.keys() if len(self.coco.imgToAnns[k]) > 0 ] tform = [] if self.is_train: tform.append( RandomOrder([ Grayscale(), Brightness(), Contrast(), Sharpness(), Hue(), ])) tform += [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) self.ind_to_classes = ['__background__'] + [ v['name'] for k, v in self.coco.cats.items() ] # COCO inds are weird (84 inds in total but a bunch of numbers are skipped) self.id_to_ind = { coco_id: (ind + 1) for ind, coco_id in enumerate(self.coco.cats.keys()) } self.id_to_ind[0] = 0 self.ind_to_id = {x: y for y, x in self.id_to_ind.items()}
def coco(self): """ :return: a Coco-like object that we can use to evaluate detection! """ anns = [] for i, (cls_array, box_array) in enumerate(zip(self.gt_classes, self.gt_boxes)): for cls, box in zip(cls_array.tolist(), box_array.tolist()): anns.append({ 'area': (box[3] - box[1] + 1) * (box[2] - box[0] + 1), 'bbox': [box[0], box[1], box[2] - box[0] + 1, box[3] - box[1] + 1], 'category_id': cls, 'id': len(anns), 'image_id': i, 'iscrowd': 0, }) fauxcoco = COCO() fauxcoco.dataset = { 'info': { 'description': 'ayy lmao' }, 'images': [{ 'id': i } for i in range(self.__len__())], 'categories': [{ 'supercategory': 'person', 'id': i, 'name': name } for i, name in enumerate(self.ind_to_classes) if name != '__background__'], 'annotations': anns, } fauxcoco.createIndex() return fauxcoco
def __init__(self, image_set, year): imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options self.config = {'use_salt': True, 'cleanup': True} # name, paths self._year = year self._image_set = image_set self._data_path = osp.join(cfg.DATA_DIR, 'coco') # load COCO API, classes, class <-> id mappings self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) self._classes = tuple(['__background__'] + [c['name'] for c in cats]) self._class_to_ind = dict(list(zip(self.classes, list(range(self.num_classes))))) self._class_to_coco_cat_id = dict(list(zip([c['name'] for c in cats], self._COCO.getCatIds()))) self._image_index = self._load_image_set_index() # Default to roidb handler self.set_proposal_method('gt') self.competition_mode(False) # Some image sets are "views" (i.e. subsets) into others. # For example, minival2014 is a random 5000 image subset of val2014. # This mapping tells us where the view's images and proposals come from. self._view_map = { 'minival2014': 'val2014', # 5k val2014 subset 'valminusminival2014': 'val2014', # val2014 \setminus minival2014 'test-dev2015': 'test2015', 'valminuscapval2014': 'val2014', 'capval2014': 'val2014', 'captest2014': 'val2014' } coco_name = image_set + year # e.g., "val2014" self._data_name = (self._view_map[coco_name] if coco_name in self._view_map else coco_name) # Dataset splits that have ground-truth annotations (test splits # do not have gt annotations) self._gt_splits = ('train', 'val', 'minival')
class coco(imdb): def __init__(self, image_set, year): imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options self.config = {'use_salt': True, 'cleanup': True} # name, paths self._year = year self._image_set = image_set self._data_path = osp.join(cfg.DATA_DIR, 'coco') # load COCO API, classes, class <-> id mappings self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) self._classes = tuple(['__background__'] + [c['name'] for c in cats]) self._class_to_ind = dict( list(zip(self.classes, list(range(self.num_classes))))) self._class_to_coco_cat_id = dict( list(zip([c['name'] for c in cats], self._COCO.getCatIds()))) self._image_index = self._load_image_set_index() # Default to roidb handler self.set_proposal_method('gt') self.competition_mode(False) # Some image sets are "views" (i.e. subsets) into others. # For example, minival2014 is a random 5000 image subset of val2014. # This mapping tells us where the view's images and proposals come from. self._view_map = { 'minival2014': 'val2014', # 5k val2014 subset 'valminusminival2014': 'val2014', # val2014 \setminus minival2014 'test-dev2015': 'test2015', 'valminuscapval2014': 'val2014', 'capval2014': 'val2014', 'captest2014': 'val2014' } coco_name = image_set + year # e.g., "val2014" self._data_name = (self._view_map[coco_name] if coco_name in self._view_map else coco_name) # Dataset splits that have ground-truth annotations (test splits # do not have gt annotations) self._gt_splits = ('train', 'val', 'minival') def _get_ann_file(self): prefix = 'instances' if self._image_set.find('test') == -1 \ else 'image_info' return osp.join(self._data_path, 'annotations', prefix + '_' + self._image_set + self._year + '.json') def _load_image_set_index(self): """ Load image ids. """ image_ids = self._COCO.getImgIds() return image_ids def _get_widths(self): anns = self._COCO.loadImgs(self._image_index) widths = [ann['width'] for ann in anns] return widths def image_path_at(self, i): """ Return the absolute path to image i in the image sequence. """ return self.image_path_from_index(self._image_index[i]) def image_id_at(self, i): """ Return the absolute path to image i in the image sequence. """ return self._image_index[i] def image_path_from_index(self, index): """ Construct an image path from the image's "index" identifier. """ # Example image path for index=119993: # images/train2014/COCO_train2014_000000119993.jpg file_name = ('COCO_' + self._data_name + '_' + str(index).zfill(12) + '.jpg') image_path = osp.join(self._data_path, 'images', self._data_name, file_name) assert osp.exists(image_path), \ 'Path does not exist: {}'.format(image_path) return image_path def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = pickle.load(fid) print('{} gt roidb loaded from {}'.format(self.name, cache_file)) return roidb gt_roidb = [ self._load_coco_annotation(index) for index in self._image_index ] with open(cache_file, 'wb') as fid: pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote gt roidb to {}'.format(cache_file)) return gt_roidb def _load_coco_annotation(self, index): """ Loads COCO bounding-box instance annotations. Crowd instances are handled by marking their overlaps (with all categories) to -1. This overlap value means that crowd "instances" are excluded from training. """ im_ann = self._COCO.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None) objs = self._COCO.loadAnns(annIds) # Sanitize bboxes -- some are invalid valid_objs = [] for obj in objs: x1 = np.max((0, obj['bbox'][0])) y1 = np.max((0, obj['bbox'][1])) x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) objs = valid_objs num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) seg_areas = np.zeros((num_objs), dtype=np.float32) # Lookup table to map from COCO category ids to our internal class # indices coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls], self._class_to_ind[cls]) for cls in self._classes[1:]]) for ix, obj in enumerate(objs): cls = coco_cat_id_to_class_ind[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training overlaps[ix, :] = -1.0 else: overlaps[ix, cls] = 1.0 ds_utils.validate_boxes(boxes, width=width, height=height) overlaps = scipy.sparse.csr_matrix(overlaps) return { 'width': width, 'height': height, 'boxes': boxes, 'gt_classes': gt_classes, 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': seg_areas } def _get_widths(self): return [r['width'] for r in self.roidb] def append_flipped_images(self): num_images = self.num_images widths = self._get_widths() for i in range(num_images): boxes = self.roidb[i]['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = widths[i] - oldx2 - 1 boxes[:, 2] = widths[i] - oldx1 - 1 assert (boxes[:, 2] >= boxes[:, 0]).all() entry = { 'width': widths[i], 'height': self.roidb[i]['height'], 'boxes': boxes, 'gt_classes': self.roidb[i]['gt_classes'], 'gt_overlaps': self.roidb[i]['gt_overlaps'], 'flipped': True, 'seg_areas': self.roidb[i]['seg_areas'] } self.roidb.append(entry) self._image_index = self._image_index * 2 def _get_box_file(self, index): # first 14 chars / first 22 chars / all chars + .mat # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat file_name = ('COCO_' + self._data_name + '_' + str(index).zfill(12) + '.mat') return osp.join(file_name[:14], file_name[:22], file_name) def _print_detection_eval_metrics(self, coco_eval): IoU_lo_thresh = 0.5 IoU_hi_thresh = 0.95 def _get_thr_ind(coco_eval, thr): ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & (coco_eval.params.iouThrs < thr + 1e-5))[0][0] iou_thr = coco_eval.params.iouThrs[ind] assert np.isclose(iou_thr, thr) return ind ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh) # precision has dims (iou, recall, cls, area range, max dets) # area range index 0: all area ranges # max dets index 2: 100 per image precision = \ coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] ap_default = np.mean(precision[precision > -1]) print(('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' '~~~~').format(IoU_lo_thresh, IoU_hi_thresh)) print('{:.1f}'.format(100 * ap_default)) for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # minus 1 because of __background__ precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] ap = np.mean(precision[precision > -1]) print('{:.1f}'.format(100 * ap)) print('~~~~ Summary metrics ~~~~') coco_eval.summarize() def _do_detection_eval(self, res_file, output_dir): ann_type = 'bbox' coco_dt = self._COCO.loadRes(res_file) coco_eval = COCOeval(self._COCO, coco_dt) coco_eval.params.useSegm = (ann_type == 'segm') coco_eval.evaluate() coco_eval.accumulate() self._print_detection_eval_metrics(coco_eval) eval_file = osp.join(output_dir, 'detection_results.pkl') with open(eval_file, 'wb') as fid: pickle.dump(coco_eval, fid, pickle.HIGHEST_PROTOCOL) print('Wrote COCO eval results to: {}'.format(eval_file)) def _coco_results_one_category(self, boxes, cat_id): results = [] for im_ind, index in enumerate(self.image_index): dets = boxes[im_ind].astype(np.float) if dets == []: continue scores = dets[:, -1] xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 results.extend([{ 'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k] } for k in range(dets.shape[0])]) return results def _write_coco_results_file(self, all_boxes, res_file): # [{"image_id": 42, # "category_id": 18, # "bbox": [258.15,41.29,348.26,243.78], # "score": 0.236}, ...] results = [] for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue print('Collecting {} results ({:d}/{:d})'.format( cls, cls_ind, self.num_classes - 1)) coco_cat_id = self._class_to_coco_cat_id[cls] results.extend( self._coco_results_one_category(all_boxes[cls_ind], coco_cat_id)) print('Writing results json to {}'.format(res_file)) with open(res_file, 'w') as fid: json.dump(results, fid) def evaluate_detections(self, all_boxes, output_dir): res_file = osp.join( output_dir, ('detections_' + self._image_set + self._year + '_results')) if self.config['use_salt']: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' self._write_coco_results_file(all_boxes, res_file) # Only do evaluation on non-test sets if self._image_set.find('test') == -1: self._do_detection_eval(res_file, output_dir) # Optionally cleanup results json file if self.config['cleanup']: os.remove(res_file) def competition_mode(self, on): if on: self.config['use_salt'] = False self.config['cleanup'] = False else: self.config['use_salt'] = True self.config['cleanup'] = True
def load_coco(self, dataset_dir, subsets, class_ids=None, class_map=None, return_coco=False): """Load a subset of the COCO dataset. dataset_dir: The root directory of the COCO dataset. subset: What to load (train, val, minival, val35k) class_ids: If provided, only loads images that have the given classes. class_map: TODO: Not implemented yet. Supports maping classes from different datasets to the same class ID. return_coco: If True, returns the COCO object. """ # Path self._view_map = { 'minival2014': 'val2014', # 5k val2014 subset 'valminusminival2014': 'val2014', # val2014 \setminus minival2014 'test-dev2015': 'test2015', } self.image_ids = [] for subset in subsets: image_dir = os.path.join( dataset_dir, 'images', "train2014" if subset == "train" else "val2014") # Create COCO object json_path_dict = { "train": "annotations/instances_train2014.json", "val": "annotations/instances_val2014.json", "minival": "annotations/instances_minival2014.json", "val35k": "annotations/instances_valminusminival2014.json", } coco = COCO(os.path.join(dataset_dir, json_path_dict[subset])) image_ids = [] # Load all classes or a subset? if not class_ids: # All classes class_ids = sorted(coco.getCatIds()) # All images or a subset? if class_ids: for id in class_ids: image_ids.extend(list(coco.getImgIds(catIds=[id]))) # Remove duplicates image_ids = list(set(image_ids)) else: # All images image_ids = list(coco.imgs.keys()) # Add classes for i in class_ids: self.add_class("coco", i, coco.loadCats(i)[0]["name"]) # Add images self.image_ids += image_ids for i in image_ids: self.add_image("coco", image_id=i, path=os.path.join(image_dir, coco.imgs[i]['file_name']), width=coco.imgs[i]["width"], height=coco.imgs[i]["height"], annotations=coco.loadAnns( coco.getAnnIds(imgIds=[i], catIds=class_ids, iscrowd=None))) print(subset) print(len(self.image_info)) if return_coco: return coco
class CocoDetection(Dataset): """ Adapted from the torchvision code """ def __init__(self, mode): """ :param mode: train2014 or val2014 """ self.mode = mode self.root = os.path.join(COCO_PATH, mode) self.ann_file = os.path.join(COCO_PATH, 'annotations', 'instances_{}.json'.format(mode)) self.coco = COCO(self.ann_file) self.ids = [ k for k in self.coco.imgs.keys() if len(self.coco.imgToAnns[k]) > 0 ] tform = [] if self.is_train: tform.append( RandomOrder([ Grayscale(), Brightness(), Contrast(), Sharpness(), Hue(), ])) tform += [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) self.ind_to_classes = ['__background__'] + [ v['name'] for k, v in self.coco.cats.items() ] # COCO inds are weird (84 inds in total but a bunch of numbers are skipped) self.id_to_ind = { coco_id: (ind + 1) for ind, coco_id in enumerate(self.coco.cats.keys()) } self.id_to_ind[0] = 0 self.ind_to_id = {x: y for y, x in self.id_to_ind.items()} @property def is_train(self): return self.mode.startswith('train') def __getitem__(self, index): """ Args: index (int): Index Returns: entry dict """ img_id = self.ids[index] path = self.coco.loadImgs(img_id)[0]['file_name'] image_unpadded = Image.open(os.path.join(self.root, path)).convert('RGB') ann_ids = self.coco.getAnnIds(imgIds=img_id) anns = self.coco.loadAnns(ann_ids) gt_classes = np.array([self.id_to_ind[x['category_id']] for x in anns], dtype=np.int64) if np.any(gt_classes >= len(self.ind_to_classes)): raise ValueError("OH NO {}".format(index)) if len(anns) == 0: raise ValueError("Annotations should not be empty") # gt_boxes = np.array((0, 4), dtype=np.float32) # else: gt_boxes = np.array([x['bbox'] for x in anns], dtype=np.float32) if np.any(gt_boxes[:, [0, 1]] < 0): raise ValueError("GT boxes empty columns") if np.any(gt_boxes[:, [2, 3]] < 0): raise ValueError("GT boxes empty h/w") gt_boxes[:, [2, 3]] += gt_boxes[:, [0, 1]] # Rescale so that the boxes are at BOX_SCALE if self.is_train: image_unpadded, gt_boxes = random_crop( image_unpadded, gt_boxes * BOX_SCALE / max(image_unpadded.size), BOX_SCALE, round_boxes=False, ) else: # Seems a bit silly because we won't be using GT boxes then but whatever gt_boxes = gt_boxes * BOX_SCALE / max(image_unpadded.size) w, h = image_unpadded.size box_scale_factor = BOX_SCALE / max(w, h) # Optionally flip the image if we're doing training flipped = self.is_train and np.random.random() > 0.5 if flipped: scaled_w = int(box_scale_factor * float(w)) image_unpadded = image_unpadded.transpose(Image.FLIP_LEFT_RIGHT) gt_boxes[:, [0, 2]] = scaled_w - gt_boxes[:, [2, 0]] img_scale_factor = IM_SCALE / max(w, h) if h > w: im_size = (IM_SCALE, int(w * img_scale_factor), img_scale_factor) elif h < w: im_size = (int(h * img_scale_factor), IM_SCALE, img_scale_factor) else: im_size = (IM_SCALE, IM_SCALE, img_scale_factor) entry = { 'img': self.transform_pipeline(image_unpadded), 'img_size': im_size, 'gt_boxes': gt_boxes, 'gt_classes': gt_classes, 'scale': IM_SCALE / BOX_SCALE, 'index': index, 'image_id': img_id, 'flipped': flipped, 'fn': path, } return entry @classmethod def splits(cls, *args, **kwargs): """ Helper method to generate splits of the dataset""" train = cls('train2014', *args, **kwargs) val = cls('val2014', *args, **kwargs) return train, val def __len__(self): return len(self.ids)
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, class_map=None, return_coco=False, auto_download=False): """Load a subset of the COCO dataset. dataset_dir: The root directory of the COCO dataset. subset: What to load (train, val, minival, valminusminival) year: What dataset year to load (2014, 2017) as a string, not an integer class_ids: If provided, only loads images that have the given classes. class_map: TODO: Not implemented yet. Supports maping classes from different datasets to the same class ID. return_coco: If True, returns the COCO object. auto_download: Automatically download and unzip MS-COCO images and annotations """ if auto_download is True: self.auto_download(dataset_dir, subset, year) coco = COCO("{}/annotations/instances_{}{}.json".format( dataset_dir, subset, year)) if subset == "minival" or subset == "valminusminival": subset = "val" image_dir = "{}/{}{}".format(dataset_dir, subset, year) # Load all classes or a subset? if not class_ids: # All classes class_ids = sorted(coco.getCatIds()) # All images or a subset? if class_ids: image_ids = [] for id in class_ids: image_ids.extend(list(coco.getImgIds(catIds=[id]))) # Remove duplicates image_ids = list(set(image_ids)) else: # All images image_ids = list(coco.imgs.keys()) # Add classes for i in class_ids: self.add_class("coco", i, coco.loadCats(i)[0]["name"]) # Add images for i in image_ids: self.add_image("coco", image_id=i, path=os.path.join(image_dir, coco.imgs[i]['file_name']), width=coco.imgs[i]["width"], height=coco.imgs[i]["height"], annotations=coco.loadAnns( coco.getAnnIds(imgIds=[i], catIds=class_ids, iscrowd=None))) if return_coco: return coco
class coco(imdb): def __init__(self, image_set, year): imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options self.config = { 'top_k': 2000, 'use_salt': True, 'cleanup': True, 'crowd_thresh': 0.7, 'min_size': 2 } # name, paths self._year = year self._image_set = image_set self._data_path = osp.join(cfg.DATA_DIR, 'coco') # load COCO API, classes, class <-> id mappings self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) self._classes = tuple(['__background__'] + [c['name'] for c in cats]) self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._class_to_coco_cat_id = dict( zip([c['name'] for c in cats], self._COCO.getCatIds())) self._image_index = self._load_image_set_index() # Default to roidb handler self.set_proposal_method('selective_search') self.competition_mode(False) # Some image sets are "views" (i.e. subsets) into others. # For example, minival2014 is a random 5000 image subset of val2014. # This mapping tells us where the view's images and proposals come from. self._view_map = { 'minival2014': 'val2014', # 5k val2014 subset 'valminusminival2014': 'val2014', # val2014 \setminus minival2014 } coco_name = image_set + year # e.g., "val2014" self._data_name = (self._view_map[coco_name] if self._view_map.has_key(coco_name) else coco_name) # Dataset splits that have ground-truth annotations (test splits # do not have gt annotations) self._gt_splits = ('train', 'val', 'minival') def _get_ann_file(self): prefix = 'instances' if self._image_set.find('test') == -1 \ else 'image_info' return osp.join(self._data_path, 'annotations', prefix + '_' + self._image_set + self._year + '.json') def _load_image_set_index(self): """ Load image ids. """ image_ids = self._COCO.getImgIds() return image_ids def _get_widths(self): anns = self._COCO.loadImgs(self._image_index) widths = [ann['width'] for ann in anns] return widths def image_path_at(self, i): """ Return the absolute path to image i in the image sequence. """ return self.image_path_from_index(self._image_index[i]) def image_path_from_index(self, index): """ Construct an image path from the image's "index" identifier. """ # Example image path for index=119993: # images/train2014/COCO_train2014_000000119993.jpg file_name = ('COCO_' + self._data_name + '_' + str(index).zfill(12) + '.jpg') image_path = osp.join(self._data_path, 'images', self._data_name, file_name) assert osp.exists(image_path), \ 'Path does not exist: {}'.format(image_path) return image_path def selective_search_roidb(self): return self._roidb_from_proposals('selective_search') def edge_boxes_roidb(self): return self._roidb_from_proposals('edge_boxes_AR') def mcg_roidb(self): return self._roidb_from_proposals('MCG') def _roidb_from_proposals(self, method): """ Creates a roidb from pre-computed proposals of a particular methods. """ top_k = self.config['top_k'] cache_file = osp.join( self.cache_path, self.name + '_{:s}_top{:d}'.format(method, top_k) + '_roidb.pkl') if osp.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{:s} {:s} roidb loaded from {:s}'.format( self.name, method, cache_file) return roidb if self._image_set in self._gt_splits: gt_roidb = self.gt_roidb() method_roidb = self._load_proposals(method, gt_roidb) roidb = imdb.merge_roidbs(gt_roidb, method_roidb) # Make sure we don't use proposals that are contained in crowds roidb = _filter_crowd_proposals(roidb, self.config['crowd_thresh']) else: roidb = self._load_proposals(method, None) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote {:s} roidb to {:s}'.format(method, cache_file) return roidb def _load_proposals(self, method, gt_roidb): """ Load pre-computed proposals in the format provided by Jan Hosang: http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- computing/research/object-recognition-and-scene-understanding/how- good-are-detection-proposals-really/ For MCG, use boxes from http://www.eecs.berkeley.edu/Research/Projects/ CS/vision/grouping/mcg/ and convert the file layout using lib/datasets/tools/mcg_munge.py. """ box_list = [] top_k = self.config['top_k'] valid_methods = [ 'MCG', 'selective_search', 'edge_boxes_AR', 'edge_boxes_70' ] assert method in valid_methods print 'Loading {} boxes'.format(method) for i, index in enumerate(self._image_index): if i % 1000 == 0: print '{:d} / {:d}'.format(i + 1, len(self._image_index)) box_file = osp.join(cfg.DATA_DIR, 'coco_proposals', method, 'mat', self._get_box_file(index)) raw_data = sio.loadmat(box_file)['boxes'] boxes = np.maximum(raw_data - 1, 0).astype(np.uint16) if method == 'MCG': # Boxes from the MCG website are in (y1, x1, y2, x2) order boxes = boxes[:, (1, 0, 3, 2)] # Remove duplicate boxes and very small boxes and then take top k keep = ds_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = ds_utils.filter_small_boxes(boxes, self.config['min_size']) boxes = boxes[keep, :] boxes = boxes[:top_k, :] box_list.append(boxes) # Sanity check im_ann = self._COCO.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] ds_utils.validate_boxes(boxes, width=width, height=height) return self.create_roidb_from_box_list(box_list, gt_roidb) def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = [ self._load_coco_annotation(index) for index in self._image_index ] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb def _load_coco_annotation(self, index): """ Loads COCO bounding-box instance annotations. Crowd instances are handled by marking their overlaps (with all categories) to -1. This overlap value means that crowd "instances" are excluded from training. """ im_ann = self._COCO.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None) objs = self._COCO.loadAnns(annIds) # Sanitize bboxes -- some are invalid valid_objs = [] for obj in objs: x1 = np.max((0, obj['bbox'][0])) y1 = np.max((0, obj['bbox'][1])) x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) objs = valid_objs num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) seg_areas = np.zeros((num_objs), dtype=np.float32) # Lookup table to map from COCO category ids to our internal class # indices coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls], self._class_to_ind[cls]) for cls in self._classes[1:]]) for ix, obj in enumerate(objs): cls = coco_cat_id_to_class_ind[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training overlaps[ix, :] = -1.0 else: overlaps[ix, cls] = 1.0 ds_utils.validate_boxes(boxes, width=width, height=height) overlaps = scipy.sparse.csr_matrix(overlaps) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': seg_areas } def _get_box_file(self, index): # first 14 chars / first 22 chars / all chars + .mat # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat file_name = ('COCO_' + self._data_name + '_' + str(index).zfill(12) + '.mat') return osp.join(file_name[:14], file_name[:22], file_name) def _print_detection_eval_metrics(self, coco_eval): IoU_lo_thresh = 0.5 IoU_hi_thresh = 0.95 def _get_thr_ind(coco_eval, thr): ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & (coco_eval.params.iouThrs < thr + 1e-5))[0][0] iou_thr = coco_eval.params.iouThrs[ind] assert np.isclose(iou_thr, thr) return ind ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh) # precision has dims (iou, recall, cls, area range, max dets) # area range index 0: all area ranges # max dets index 2: 100 per image precision = \ coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] ap_default = np.mean(precision[precision > -1]) print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' '~~~~').format(IoU_lo_thresh, IoU_hi_thresh) print '{:.1f}'.format(100 * ap_default) for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # minus 1 because of __background__ precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] ap = np.mean(precision[precision > -1]) print '{:.1f}'.format(100 * ap) print '~~~~ Summary metrics ~~~~' coco_eval.summarize() def _do_detection_eval(self, res_file, output_dir): ann_type = 'bbox' coco_dt = self._COCO.loadRes(res_file) coco_eval = COCOeval(self._COCO, coco_dt) coco_eval.params.useSegm = (ann_type == 'segm') coco_eval.evaluate() coco_eval.accumulate() self._print_detection_eval_metrics(coco_eval) eval_file = osp.join(output_dir, 'detection_results.pkl') with open(eval_file, 'wb') as fid: cPickle.dump(coco_eval, fid, cPickle.HIGHEST_PROTOCOL) print 'Wrote COCO eval results to: {}'.format(eval_file) def _coco_results_one_category(self, boxes, cat_id): results = [] for im_ind, index in enumerate(self.image_index): dets = boxes[im_ind].astype(np.float) if dets == []: continue scores = dets[:, -1] xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 results.extend([{ 'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k] } for k in xrange(dets.shape[0])]) return results def _write_coco_results_file(self, all_boxes, res_file): # [{"image_id": 42, # "category_id": 18, # "bbox": [258.15,41.29,348.26,243.78], # "score": 0.236}, ...] results = [] for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue print 'Collecting {} results ({:d}/{:d})'.format( cls, cls_ind, self.num_classes - 1) coco_cat_id = self._class_to_coco_cat_id[cls] results.extend( self._coco_results_one_category(all_boxes[cls_ind], coco_cat_id)) print 'Writing results json to {}'.format(res_file) with open(res_file, 'w') as fid: json.dump(results, fid) def evaluate_detections(self, all_boxes, output_dir): res_file = osp.join( output_dir, ('detections_' + self._image_set + self._year + '_results')) if self.config['use_salt']: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' self._write_coco_results_file(all_boxes, res_file) # Only do evaluation on non-test sets if self._image_set.find('test') == -1: self._do_detection_eval(res_file, output_dir) # Optionally cleanup results json file if self.config['cleanup']: os.remove(res_file) def competition_mode(self, on): if on: self.config['use_salt'] = False self.config['cleanup'] = False else: self.config['use_salt'] = True self.config['cleanup'] = True