def __init__(self, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.loss = ScalarMeter(cfg.LOG_PERIOD) self.loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0
def _load_lvis_annotations(json_file: str): """ Load COCO annotations from a JSON file Args: json_file: str Path to the file to load annotations from Returns: Instance of `pycocotools.coco.COCO` that provides access to annotations data """ from lvis import LVIS json_file = PathManager.get_local_path(json_file) logger = logging.getLogger(__name__) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) return lvis_api
def __init__(self, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full val set). self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.all_preds = [] self.all_labels = []
def _load_coco_annotations(json_file: str): """ Load COCO annotations from a JSON file Args: json_file: str Path to the file to load annotations from Returns: Instance of `pycocotools.coco.COCO` that provides access to annotations data """ from pycocotools.coco import COCO logger = logging.getLogger(__name__) timer = Timer() with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) return coco_api
def __init__( self, num_videos, num_clips, num_cls, overall_iters, multi_label=False, ensemble_method="sum", ): """ Construct tensors to store the predictions and labels. Expect to get num_clips predictions from each video, and calculate the metrics on num_videos videos. Args: num_videos (int): number of videos to test. num_clips (int): number of clips sampled from each video for aggregating the final prediction for the video. num_cls (int): number of classes for each prediction. overall_iters (int): overall iterations for testing. multi_label (bool): if True, use map as the metric. ensemble_method (str): method to perform the ensemble, options include "sum", and "max". """ self.iter_timer = Timer() self.num_clips = num_clips self.overall_iters = overall_iters self.multi_label = multi_label self.ensemble_method = ensemble_method # Initialize tensors. self.video_preds = torch.zeros((num_videos, num_cls)) if multi_label: self.video_preds -= 1e10 self.video_labels = (torch.zeros( (num_videos, num_cls)) if multi_label else torch.zeros( (num_videos)).long()) self.clip_count = torch.zeros((num_videos)).long() # Reset metric. self.reset()
def __init__(self, num_videos, num_clips, num_cls, overall_iters): """ Construct tensors to store the predictions and labels. Expect to get num_clips predictions from each video, and calculate the metrics on num_videos videos. Args: num_videos (int): number of videos to test. num_clips (int): number of clips sampled from each video for aggregating the final prediction for the video. num_cls (int): number of classes for each prediction. overall_iters (int): overall iterations for testing. """ self.iter_timer = Timer() self.num_clips = num_clips self.overall_iters = overall_iters # Initialize tensors. self.video_preds = torch.zeros((num_videos, num_cls)) self.video_labels = torch.zeros((num_videos)).long() self.clip_count = torch.zeros((num_videos)).long() # Reset metric. self.reset()
def benchmark_eval(args): cfg = setup(args) if args.config_file.endswith(".yaml"): model = build_model(cfg) DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) else: model = instantiate(cfg.model) model.to(cfg.train.device) DetectionCheckpointer(model).load(cfg.train.init_checkpoint) cfg.dataloader.num_workers = 0 data_loader = instantiate(cfg.dataloader.test) model.eval() logger.info("Model:\n{}".format(model)) dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False) def f(): while True: yield from dummy_data for k in range(5): # warmup model(dummy_data[k]) max_iter = 300 timer = Timer() with tqdm.tqdm(total=max_iter) as pbar: for idx, d in enumerate(f()): if idx == max_iter: break model(d) pbar.update() logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
def benchmark_data(cfg): # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Benchmark data loading with config:") logger.info(pprint.pformat(cfg)) timer = Timer() dataloader = loader.construct_loader(cfg, "train") logger.info("Initialize loader using {:.2f} seconds.".format( timer.seconds())) batch_size = cfg.TRAIN.BATCH_SIZE log_period = cfg.BENCHMARK.LOG_PERIOD epoch_times = [] # Test for a few epochs. for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS): timer = Timer() timer_epoch = Timer() iter_times = [] for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)): if cur_iter > 0 and cur_iter % log_period == 0: iter_times.append(timer.seconds()) vram = psutil.virtual_memory() logger.info( "Epoch {}: {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, log_period, log_period * batch_size, iter_times[-1], (vram.total - vram.available) / 1024**3, vram.total / 1024**3, )) timer.reset() epoch_times.append(timer_epoch.seconds()) vram = psutil.virtual_memory() logger.info( "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, len(dataloader), len(dataloader) * batch_size, epoch_times[-1], (vram.total - vram.available) / 1024**3, vram.total / 1024**3, )) logger.info( "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} " "(avg/std) seconds.".format( cur_epoch, log_period, log_period * batch_size, np.mean(iter_times), np.std(iter_times), )) logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} " "(avg/std) seconds.".format( len(dataloader) * batch_size, np.mean(epoch_times), np.std(epoch_times), ))
def load_ade_instances(json_file, image_root, dataset_type): """ Args: json_file (str): path to the json instance annotation file. image_root (str or path-like): directory which contains all the images. dataset_type (str): type of this dataset. One of base_train/base_val/novel/novel_test. Returns: list[dict]: a list of dicts in Detectron2 standard format. """ timer = Timer() ade_file = json.load(open(json_file, 'r')) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) dataset_dicts = [] for item in ade_file: record = {} record['file_name'] = os.path.join(image_root, item['fpath_img']) record['height'] = item['height'] record['width'] = item['width'] record['image_id'] = item['index'] record['annotations'] = [] K = len(item['anchors']) proposal_boxes = np.zeros((K, 4)) record['proposal_bbox_mode'] = BoxMode.XYXY_ABS proposal_objectness_logits = np.zeros((K, )) for i, anchor in enumerate(item['anchors']): anno = {} anno['category_id'] = anchor['label'] x1, x2, y1, y2 = anchor['anchor'] proposal_boxes[i] = [x1, y1, x2, y2] if dataset_type == 'base_train': # NOTE: In standard detection task, x1, x2, y1, y2 in the # next line should be anchor['bbox'], which is the ground # truth position for instances. But in our task, we will # conduct classification directly on proposals, so we set # the anno['bbox'] the same as proposal. We will use the # information of ground truth bbox as a supervision x1, x2, y1, y2 = anchor['anchor'] anno['bbox'] = [float(p) for p in [x1, y1, x2, y2]] anno['bbox_mode'] = BoxMode.XYXY_ABS anno['attr'] = anchor['attr'] anno['hierarchy'] = anchor['hierarchy'] anno['part'] = anchor['part'] record['annotations'].append(anno) record['proposal_boxes'] = proposal_boxes record['proposal_objectness_logits'] = proposal_objectness_logits ## NOTE: if you want to use segmentation, remove _debug in the next line if dataset_type == 'base_train_debug': record['sem_seg_file_name'] = os.path.join(image_root, item['seg']) record['scene'] = item['scene'] dataset_dicts.append(record) return dataset_dicts
def load_coco_json_mem_efficient(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Actually not mem efficient """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] logger.info("Loaded {} images in COCO format from {}".format( len(imgs), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys or []) for img_dict in imgs: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] anno_dict_list = coco_api.imgToAnns[image_id] if 'neg_category_ids' in img_dict: record['neg_category_ids'] = \ [id_map[x] for x in img_dict['neg_category_ids']] objs = [] for anno in anno_dict_list: assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if not isinstance(segm, dict): # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) del coco_api return dataset_dicts
def before_train(self): self._start_time = time.perf_counter() self._total_timer = Timer() self._total_timer.pause()
def load_cocoa_cls_json(json_file, image_root, dataset_name=None): """ Load a json file with D2SA's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in D2SA instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): cocoa_cls_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(cocoa_cls_api.getCatIds()) cats = cocoa_cls_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(list(cocoa_cls_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = cocoa_cls_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [cocoa_cls_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "category_id"] num_instances_without_valid_segmentation = 0 num_instances_without_valid_visible_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. anno['iscrowd'] = 0 assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get( "segmentation", None) if not dataset_name.endswith("visible") else anno.get( "visible_mask", None) # segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if not isinstance(segm, dict): # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm vis_segm = anno.get("visible_mask", None) if not isinstance(vis_segm, dict): # filter out invalid polygons (< 3 points) vis_segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(vis_segm) == 0: num_instances_without_valid_visible_segmentation += 1 continue # ignore this instance obj["visible_mask"] = vis_segm obj["occlude_rate"] = anno.get("occlude_rate", 0) obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warn( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation)) if num_instances_without_valid_visible_segmentation > 0: logger.warn( "Filtered out {} instances without valid visible segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_visible_segmentation)) return dataset_dicts
def benchmark_data_loading(cfg): """ Benchmark the speed of data loading in PySlowFast. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Benchmark data loading with config:") logger.info(pprint.pformat(cfg)) timer = Timer() dataloader = loader.construct_loader(cfg, "train") logger.info("Initialize loader using {:.2f} seconds.".format( timer.seconds())) # Total batch size across different machines. batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS log_period = cfg.BENCHMARK.LOG_PERIOD epoch_times = [] # Test for a few epochs. for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS): timer = Timer() timer_epoch = Timer() iter_times = [] for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)): if cur_iter > 0 and cur_iter % log_period == 0: iter_times.append(timer.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, log_period, log_period * batch_size, iter_times[-1], ram_usage, ram_total, )) timer.reset() epoch_times.append(timer_epoch.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, len(dataloader), len(dataloader) * batch_size, epoch_times[-1], ram_usage, ram_total, )) logger.info( "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} " "(avg/std) seconds.".format( cur_epoch, log_period, log_period * batch_size, np.mean(iter_times), np.std(iter_times), )) logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} " "(avg/std) seconds.".format( len(dataloader) * batch_size, np.mean(epoch_times), np.std(epoch_times), ))
def __init__(self, warmup_iter=3): self._warmup_iter = warmup_iter self._step_timer = Timer() self._start_time = time.perf_counter() self._total_timer = Timer()
def load_dacon_rotated_train_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with DACON's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in dacon instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): dacon_api = DaconAPI(json_file) anns = dacon_api.features if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) meta = MetadataCatalog.get(dataset_name) meta.thing_classes = dacon_api.thing_classes logger.info("Loaded {} images in dacon format from {}".format( len(anns), json_file)) dataset_dicts = [] for ann in anns: record = {} record["file_name"] = os.path.join(image_root, ann['image_id']) record["height"] = ann['height'] record["width"] = ann['width'] patch_size = (ann['width'], ann['height']) objs = [] properties = ann['properties'] for p in properties: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. obj = {} obj["bbox"] = dacon_api.cvt_dacon_to_detectron_rotated( p["bounds_imcoords"].split(","), patch_size) obj["bbox_mode"] = BoxMode.XYWHA_ABS obj["category_id"] = int(p["type_id"]) - 1 objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def load_filtered_lvis_json(json_file, image_root, metadata, dataset_name=None): """ Load a json file in LVIS's annotation format. Args: json_file (str): full path to the LVIS json annotation file. image_root (str): the directory where the images in this json file exists. metadata: meta data associated with dataset_name dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). If provided, this function will put "thing_classes" into the metadata associated with this dataset. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from lvis import LVIS json_file = PathManager.get_local_path(json_file) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) if dataset_name is not None and "train" in dataset_name: assert global_cfg.MODEL.ROI_HEADS.NUM_CLASSES == len( metadata["thing_classes"] ), "NUM_CLASSES should match number of categories: ALL=1230, NOVEL=454" # sort indices for reproducible results img_ids = sorted(list(lvis_api.imgs.keys())) imgs = lvis_api.load_imgs(img_ids) anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] # Sanity check that each annotation has a unique id ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique".format(json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in the LVIS format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] for (img_dict, anno_dict_list) in imgs_anns: record = {} file_name = img_dict["file_name"] if img_dict["file_name"].startswith("COCO"): file_name = file_name[-16:] record["file_name"] = os.path.join(image_root, file_name) record["height"] = img_dict["height"] record["width"] = img_dict["width"] record["not_exhaustive_category_ids"] = img_dict.get( "not_exhaustive_category_ids", []) record["neg_category_ids"] = img_dict.get("neg_category_ids", []) image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. assert anno["image_id"] == image_id obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} if global_cfg.MODEL.ROI_HEADS.NUM_CLASSES == 454: # Novel classes only if anno["category_id"] - 1 not in LVIS_CATEGORIES_NOVEL_IDS: continue obj["category_id"] = metadata["class_mapping"][ anno["category_id"] - 1] else: # Convert 1-indexed to 0-indexed obj["category_id"] = anno["category_id"] - 1 objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def main(cfg: DictConfig) -> None: if "experiments" in cfg.keys(): cfg = OmegaConf.merge(cfg, cfg.experiments) if "debug" in cfg.keys(): logger.info(f"Run script in debug") cfg = OmegaConf.merge(cfg, cfg.debug) # A logger for this file logger = logging.getLogger(__name__) # NOTE: hydra causes the python file to run in hydra.run.dir by default logger.info(f"Run script in {HydraConfig.get().run.dir}") writer = SummaryWriter(log_dir=cfg.train.tensorboard_dir) checkpoints_dir = Path(cfg.train.checkpoints_dir) if not checkpoints_dir.exists(): checkpoints_dir.mkdir(parents=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") image_shape = (cfg.train.channels, cfg.train.image_height, cfg.train.image_width) # NOTE: With hydra, the python file runs in hydra.run.dir by default, so set the dataset path to a full path or an appropriate relative path dataset_path = Path(cfg.dataset.root) / cfg.dataset.frames split_path = Path(cfg.dataset.root) / cfg.dataset.split_file assert dataset_path.exists(), "Video image folder not found" assert (split_path.exists() ), "The file that describes the split of train/test not found." # Define training set train_dataset = Dataset( dataset_path=dataset_path, split_path=split_path, split_number=cfg.dataset.split_number, input_shape=image_shape, sequence_length=cfg.train.sequence_length, training=True, ) # Define train dataloader train_dataloader = DataLoader( train_dataset, batch_size=cfg.train.batch_size, shuffle=True, num_workers=cfg.train.num_workers, ) # Define test set test_dataset = Dataset( dataset_path=dataset_path, split_path=split_path, split_number=cfg.dataset.split_number, input_shape=image_shape, sequence_length=cfg.train.sequence_length, training=False, ) # Define test dataloader test_dataloader = DataLoader( test_dataset, batch_size=cfg.train.batch_size, shuffle=False, num_workers=cfg.train.num_workers, ) # Classification criterion criterion = nn.CrossEntropyLoss().to(device) # Define network model = CNNLSTM( num_classes=train_dataset.num_classes, latent_dim=cfg.train.latent_dim, lstm_layers=cfg.train.lstm_layers, hidden_dim=cfg.train.hidden_dim, bidirectional=cfg.train.bidirectional, attention=cfg.train.attention, ) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-5) checkpointer = Checkpointer( model, optimizer=optimizer, # scheduler=scheduler, save_dir=cfg.train.checkpoints_dir, save_to_disk=True, ) if cfg.train.resume: if not checkpointer.has_checkpoint(): start_epoch = 0 else: ckpt = checkpointer.resume_or_load("", resume=True) start_epoch = ckpt["epoch"] model.to(device) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.to(device) elif cfg.train.checkpoint_model != "": ckpt = torch.load(cfg.train.checkpoint_model, map_location="cpu") model.load_state_dict(ckpt["model"]) model.to(device) start_epoch = 0 else: start_epoch = 0 for epoch in range(start_epoch, cfg.train.num_epochs): epoch += 1 epoch_metrics = {"loss": [], "acc": []} timer = Timer() for batch_i, (X, y) in enumerate(train_dataloader): batch_i += 1 if X.size(0) == 1: continue image_sequences = Variable(X.to(device), requires_grad=True) labels = Variable(y.to(device), requires_grad=False) optimizer.zero_grad() # Reset LSTM hidden state model.lstm.reset_hidden_state() # Get sequence predictions predictions = model(image_sequences) # Compute metrics loss = criterion(predictions, labels) acc = ( predictions.detach().argmax(1) == labels).cpu().numpy().mean() loss.backward() optimizer.step() # Keep track of epoch metrics epoch_metrics["loss"].append(loss.item()) epoch_metrics["acc"].append(acc) # Determine approximate time left batches_done = (epoch - 1) * len(train_dataloader) + (batch_i - 1) batches_left = cfg.train.num_epochs * len( train_dataloader) - batches_done time_left = datetime.timedelta(seconds=batches_left * timer.seconds()) time_iter = round(timer.seconds(), 3) timer.reset() logger.info( f'Training - [Epoch: {epoch}/{cfg.train.num_epochs}] [Batch: {batch_i}/{len(train_dataloader)}] [Loss: {np.mean(epoch_metrics["loss"]):.3f}] [Acc: {np.mean(epoch_metrics["acc"]):.3f}] [ETA: {time_left}] [Iter time: {time_iter}s/it]' ) # Empty cache if torch.cuda.is_available(): torch.cuda.empty_cache() writer.add_scalar("train/loss", np.mean(epoch_metrics["loss"]), epoch) writer.add_scalar("train/acc", np.mean(epoch_metrics["acc"]), epoch) def test_model(epoch): """ Evaluate the model on the test set """ model.eval() test_metrics = {"loss": [], "acc": []} timer = Timer() for batch_i, (X, y) in enumerate(test_dataloader): batch_i += 1 image_sequences = Variable(X.to(device), requires_grad=False) labels = Variable(y, requires_grad=False).to(device) with torch.no_grad(): # Reset LSTM hidden state model.lstm.reset_hidden_state() # Get sequence predictions predictions = model(image_sequences) # Compute metrics loss = criterion(predictions, labels) acc = (predictions.detach().argmax(1) == labels ).cpu().numpy().mean() # Keep track of loss and accuracy test_metrics["loss"].append(loss.item()) test_metrics["acc"].append(acc) # Determine approximate time left batches_done = batch_i - 1 batches_left = len(test_dataloader) - batches_done time_left = datetime.timedelta(seconds=batches_left * timer.seconds()) time_iter = round(timer.seconds(), 3) timer.reset() # Log test performance logger.info( f'Testing - [Epoch: {epoch}/{cfg.train.num_epochs}] [Batch: {batch_i}/{len(test_dataloader)}] [Loss: {np.mean(test_metrics["loss"]):.3f}] [Acc: {np.mean(test_metrics["acc"]):.3f}] [ETA: {time_left}] [Iter time: {time_iter}s/it]' ) writer.add_scalar("test/loss", np.mean(test_metrics["loss"]), epoch) writer.add_scalar("test/acc", np.mean(test_metrics["acc"]), epoch) model.train() # Evaluate the model on the test set test_model(epoch) # Save model checkpoint if epoch % cfg.train.checkpoint_interval == 0: checkpointer.save(f"checkpoint_{epoch:04}", epoch=epoch) writer.close()
def load_viroi_json(dataset_name, image_path, stuff_path, panoptic_path, class_json_file, relation_json_file, instance_json_file, triplet_json_file, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation annotations. Args: dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ timer = Timer() viroi_api = VIROI(image_path, stuff_path, panoptic_path, class_json_file, relation_json_file, instance_json_file, triplet_json_file) if timer.seconds() > 1: logger.info("Loading viroi takes {:.2f} seconds.".format( timer.seconds())) meta = MetadataCatalog.get(dataset_name) stuff_dataset_id_to_contiguous_id = meta.get( "stuff_dataset_id_to_contiguous_id") thing_dataset_id_to_contiguous_id = meta.get( "thing_dataset_id_to_contiguous_id") relation_dataset_id_to_contiguous_id = meta.get( "relation_dataset_id_to_contiguous_id") # The categories in a custom json file may not be sorted. # thing_classes = [c["name"] for c in viroi_api.loadThings()] # meta.thing_classes = thing_classes # stuff_classes = [c["name"] for c in viroi_api.loadStuffs()] # meta.stuff_classes = stuff_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. # meta.thing_dataset_id_to_contiguous_id = {v['category_id']: i for i, v in enumerate(viroi_api.loadThings())} # category_id => from 0 to 79 # meta.contiguous_id_to_thing_class_id = {i:v['class_id'] for i, v in enumerate(viroi_api.loadThings())} # from 0 to 79 => from 1 to 80 # meta.stuff_dataset_id_to_contiguous_id = {v['category_id']: i+1 for i, v in enumerate(viroi_api.loadStuffs())} # category => from 1 to 53 # meta.contiguous_id_to_stuff_class_id = {i+1: v['class_id'] for i, v in enumerate(viroi_api.loadStuffs())} # from 1 to 53 => from 81 to 133 # sort indices for reproducible results # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] logger.info("Loaded {} images in VIROI".format( len(viroi_api.image_instance_dict))) # ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys or []) dataset_dict = [] image_ids = viroi_api.loadIds() for image_id in image_ids: img_dict = viroi_api.loadImgs(image_id)[0] record = {} record["file_name"] = os.path.join(image_path, img_dict["image_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["image_id"] instance_dict = img_dict['instances'] objs = [] stfs = [] object_id_list = [] stuff_id_list = [] thing_count = 0 stuff_count = 0 # interest_map=np.zeros((img_dict["height"],img_dict["width"])) for instance_id in instance_dict: instance = instance_dict[instance_id] if viroi_api.class_dict[str(instance['class_id'])]['isthing']: object_id_list.append(instance_id) obj = {} obj['iscrowd'] = instance['iscrowd'] obj['labeled'] = 1 if instance['labeled'] else 0 obj['bbox'] = [ instance['box'][1], instance['box'][0], instance['box'][3] - instance['box'][1], instance['box'][2] - instance['box'][0] ] obj['category_id'] = thing_dataset_id_to_contiguous_id[ viroi_api.loadClassdict()[str( instance['class_id'])]['category_id']] obj['category_name'] = viroi_api.loadClassdict()[str( instance['class_id'])]['name'] obj['class_id'] = instance['class_id'] obj['segmentation'] = instance['segmentation'] obj["bbox_mode"] = BoxMode.XYWH_ABS # if obj['labeled']==1: # mask=mask_utils.decode(instance['segmentation']) # interest_map[mask==1]=255 objs.append(obj) thing_count += 1 else: stuff_id_list.append(instance_id) stf = {} stf['iscrowd'] = instance['iscrowd'] stf['labeled'] = 1 if instance['labeled'] else 0 stf['bbox'] = [ instance['box'][1], instance['box'][0], instance['box'][3] - instance['box'][1], instance['box'][2] - instance['box'][0] ] stf['category_id'] = stuff_dataset_id_to_contiguous_id[ viroi_api.loadClassdict()[str( instance['class_id'])]['category_id']] stf['category_name'] = viroi_api.loadClassdict()[str( instance['class_id'])]['name'] stf['class_id'] = instance['class_id'] stf['segmentation'] = instance['segmentation'] stf["bbox_mode"] = BoxMode.XYWH_ABS # if stf['labeled']==1: # mask=mask_utils.decode(instance['segmentation']) # interest_map[mask==1]=1 stfs.append(stf) stuff_count += 1 record["annotations"] = objs # record['interest_map'] = interest_map # Image.fromarray(interest_map).convert('L').save("interest_map.png") record["stuff_annotations"] = stfs record["instance_ids"] = object_id_list record["stuff_instance_ids"] = stuff_id_list record["sem_seg_file_name"] = os.path.join( stuff_path, img_dict["image_name"].replace("jpg", "png")) instance_ids = [] for id in object_id_list: instance_ids.append(id) for id in stuff_id_list: instance_ids.append(id) triplets = viroi_api.loadTriplets(image_id)[0] triplet_records = [] for triplet_id in triplets: triplet = triplets[triplet_id] tri = {} tri['subject_id'] = instance_ids.index( str(triplet['subject_instance_id'])) tri['object_id'] = instance_ids.index( str(triplet['object_instance_id'])) tri['relation_id'] = relation_dataset_id_to_contiguous_id[ triplet['relation_id']] triplet_records.append(tri) record["triplets"] = triplet_records dataset_dict.append(record) return dataset_dict
def load_coco_with_attributes_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Extend load_coco_json() with additional support for attributes """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map img_ids = sorted(coco_api.imgs.keys()) imgs = coco_api.loadImgs(img_ids) anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: assert anno["image_id"] == image_id assert anno.get( "ignore", 0) == 0, '"ignore" in COCO json file is not supported.' obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: if not isinstance(segm, dict): segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: for idx, v in enumerate(keypts): if idx % 3 != 2: keypts[idx] = v + 0.5 obj["keypoints"] = keypts attrs = anno.get("attribute_ids", None) if attrs: # list[int] obj["attribute_ids"] = attrs obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation)) return dataset_dicts
def __init__(self) -> None: self.timer = Timer() self.timer.reset() self.epoch_times = []
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "category_id"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(list(coco_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys or []) for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def benchmark_data(cfg: AttrDict, split: str = "train"): split = split.upper() total_images = MAX_ITERS * cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"] timer = Timer() dataset = build_dataset(cfg, split) try: device = torch.device("cuda" if cfg.MACHINE.DEVICE == "gpu" else "cpu") except AttributeError: device = torch.device("cuda") # Gives sampler same seed for entire distributed group as per pytorch documentation. sampler_seed = cfg.SEED_VALUE dataloader = get_loader( dataset=dataset, dataset_config=cfg["DATA"][split], num_dataloader_workers=cfg.DATA.NUM_DATALOADER_WORKERS, pin_memory=False, multi_processing_method=cfg.MULTI_PROCESSING_METHOD, device=device, sampler_seed=sampler_seed, ) # Fairstore data sampler would require setting the start iter before it can start. if hasattr(dataloader.sampler, "set_start_iter"): dataloader.sampler.set_start_iter(0) # initial warmup measured as warmup time timer.reset() data_iterator = iter(dataloader) for i in range(10): # warmup next(data_iterator) if i == 0: # the total number of seconds since the start/reset of the timer warmup_time = timer.seconds() logging.info(f"Warmup time {WARMUP_ITERS} batches: {warmup_time} seconds") # measure the number of images per sec in 1000 iterations. timer = Timer() for _ in tqdm.trange(MAX_ITERS): next(data_iterator) time_elapsed = timer.seconds() logging.info( f"iters: {MAX_ITERS}; images: {total_images}; time: {time_elapsed} seconds; " f"images/sec: {round(float(total_images / time_elapsed), 4)}; " f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} " ) # run benchmark for a few more rounds to catch fluctuations for round_idx in range(BENCHMARK_ROUNDS): timer = Timer() for _ in tqdm.trange(MAX_ITERS): next(data_iterator) time_elapsed = timer.seconds() logging.info( f"round: {round_idx}: iters: {MAX_ITERS}; images: {total_images}; " f"time: {time_elapsed} seconds; " f"images/sec: {round(float(total_images / time_elapsed), 4)}; " f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} " ) del data_iterator del dataloader
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str or None): the name of the dataset (e.g., coco_2017_train). When provided, this function will also do the following: * Put "thing_classes" into the metadata associated with this dataset. * Map the category ids into a contiguous range (needed by standard dataset format), and add "thing_dataset_id_to_contiguous_id" to the metadata associated with this dataset. This option should usually be provided, unless users need to load the original json content and apply more processing manually. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None. If `dataset_name` is None, the returned `category_ids` may be incontiguous and may not conform to the Detectron2 standard format. Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] meta.thing_classes = thing_classes # major_change # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning( """ Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """ ) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] total_num_valid_anns = sum([len(x) for x in anns]) total_num_anns = len(coco_api.anns) if total_num_valid_anns < total_num_anns: logger.warning( f"{json_file} contains {total_num_anns} annotations, but only " f"{total_num_valid_anns} of them match to images in the file." ) if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( json_file ) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if isinstance(segm, dict): if isinstance(segm["counts"], list): # convert to compressed RLE segm = mask_util.frPyObjects(segm, *segm["size"]) else: # filter out invalid polygons (< 3 points) segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: annotation_category_id = obj["category_id"] try: obj["category_id"] = id_map[annotation_category_id] except KeyError as e: raise KeyError( f"Encountered category_id={annotation_category_id} " "but this id does not exist in 'categories' of the json file." ) from e objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. ".format( num_instances_without_valid_segmentation ) + "There might be issues in your dataset generation process. Please " "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully" ) return dataset_dicts
def load_lvis_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file in LVIS's annotation format. Args: json_file (str): full path to the LVIS json annotation file. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). If provided, this function will put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "bbox", "bbox_mode", "category_id", "segmentation"). The values for these keys will be returned as-is. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from lvis import LVIS json_file = PathManager.get_local_path(json_file) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) if dataset_name is not None: meta = get_lvis_instances_meta(dataset_name) MetadataCatalog.get(dataset_name).set(**meta) # sort indices for reproducible results img_ids = sorted(lvis_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = lvis_api.load_imgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] # Sanity check that each annotation has a unique id ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique".format(json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in the LVIS format from {}".format( len(imgs_anns), json_file)) if extra_annotation_keys: logger.info( "The following extra annotation keys will be loaded: {} ".format( extra_annotation_keys)) else: extra_annotation_keys = [] def get_file_name(img_root, img_dict): # Determine the path including the split folder ("train2017", "val2017", "test2017") from # the coco_url field. Example: # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg' split_folder, file_name = img_dict["coco_url"].split("/")[-2:] return os.path.join(img_root + split_folder, file_name) dataset_dicts = [] for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = get_file_name(image_root, img_dict) record["height"] = img_dict["height"] record["width"] = img_dict["width"] record["not_exhaustive_category_ids"] = img_dict.get( "not_exhaustive_category_ids", []) record["neg_category_ids"] = img_dict.get("neg_category_ids", []) image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. assert anno["image_id"] == image_id obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} # LVIS data loader can be used to load COCO dataset categories. In this case `meta` # variable will have a field with COCO-specific category mapping. if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta: obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ anno["category_id"]] else: obj["category_id"] = anno[ "category_id"] - 1 # Convert 1-indexed to 0-indexed segm = anno["segmentation"] # list[list[float]] # filter out invalid polygons (< 3 points) valid_segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] assert len(segm) == len( valid_segm ), "Annotation contains an invalid polygon with < 3 points" assert len(segm) > 0 obj["segmentation"] = segm for extra_ann_key in extra_annotation_keys: obj[extra_ann_key] = anno[extra_ann_key] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def load_lvis_json(json_file, image_root, dataset_name=None): """ Load a json file in LVIS's annotation format. Args: json_file (str): full path to the LVIS json annotation file. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). If provided, this function will put "thing_classes" into the metadata associated with this dataset. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from lvis import LVIS json_file = PathManager.get_local_path(json_file) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) if dataset_name is not None: meta = get_lvis_instances_meta(dataset_name) MetadataCatalog.get(dataset_name).set(**meta) # sort indices for reproducible results img_ids = sorted(list(lvis_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = lvis_api.load_imgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] # Sanity check that each annotation has a unique id ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique".format(json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in the LVIS format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] for (img_dict, anno_dict_list) in imgs_anns: record = {} file_name = img_dict["file_name"] if img_dict["file_name"].startswith("COCO"): # Convert form the COCO 2014 file naming convention of # COCO_[train/val/test]2014_000000000000.jpg to the 2017 naming convention of # 000000000000.jpg (LVIS v1 will fix this naming issue) file_name = file_name[-16:] record["file_name"] = os.path.join(image_root, file_name) record["height"] = img_dict["height"] record["width"] = img_dict["width"] record["not_exhaustive_category_ids"] = img_dict.get( "not_exhaustive_category_ids", []) record["neg_category_ids"] = img_dict.get("neg_category_ids", []) image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. assert anno["image_id"] == image_id obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} obj["category_id"] = anno[ "category_id"] - 1 # Convert 1-indexed to 0-indexed segm = anno["segmentation"] # list[list[float]] # filter out invalid polygons (< 3 points) valid_segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] assert len(segm) == len( valid_segm ), "Annotation contains an invalid polygon with < 3 points" assert len(segm) > 0 obj["segmentation"] = segm objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def __init__(self, pidfile): super(temporal_restrict, self).__init__(pidfile=pidfile) self.tt = Timer() self.event_type = str(self.__class__.__name__) self.cfgs = getattr(get_cfg(), self.event_type)
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO # coco 的一个python api类 timer = Timer() # 计算时间 json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) # 用anno_json 文件初始化 if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: # 创建一个新的meta meta = MetadataCatalog.get(dataset_name) # getCatIds() 获取所有类别的id号 cat_ids = sorted(coco_api.getCatIds()) # loadCats() 根据id号,获取所有类别信息,每个类别信息是一个字典 # {"supercategory": "person", "id": 1, "name": "person"}, cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. # 获取类别名称,按id 顺序 thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes # 设置元数据类别 # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): # 不连续 if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # 重新映射 # 这个在 data.datasets.builtin_meta._get_builtin_metadata 进行一次映射了吗 # sort indices for reproducible results # 可再生的,可复写的 img_ids = sorted(coco_api.imgs.keys()) # 获取所有图像的id # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} # 获取所有图片的信息,元素如上描述 imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] # 内部的list[dict] 是一张图片的所有标注 # imgToAnns 返回的就是一个img_id anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] # 有效标注数 total_num_valid_anns = sum([len(x) for x in anns]) # 实际标注数 total_num_anns = len(coco_api.anns) if total_num_valid_anns < total_num_anns: logger.warning( f"{json_file} contains {total_num_anns} annotations, but only " f"{total_num_valid_anns} of them match to images in the file.") if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. # [先执行第一句后执行第二句] ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] # 可能存在annotation id 不唯一 assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) # pair(img, anno) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] # iscrowd =0 ,表示只有单个对象,iscrowd = 1, 表示有多个对象 # 在segmentation 方面很有用, """ segmentation格式取决于这个实例是一个单个的对象 (即iscrowd=0,将使用polygons格式)还是一组对象(即iscrowd=1,将使用RLE格式) 注意,单个的对象(iscrowd=0)可能需要多个polygon来表示,比如这个对象在图像中被挡住了。 而iscrowd=1时(将标注一组对象,比如一群人)的segmentation使用的就是RLE格式 """ ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: # 用一个记录集成一个图片的信息 record = {} # change to full path record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] # object: 物体,目标 # anno_dict_list: 一张图片中所有的标注 for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get( "ignore", 0) == 0, '"ignore" in COCO json file is not supported.' # 根据ann_keys:["iscrowd", "bbox", "keypoints", "category_id"] 获取需要的信息 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) # 如果有seg信息 if segm: # either list[list[float]] or dict(RLE) if isinstance(segm, dict): if isinstance(segm["counts"], list): # convert to compressed RLE segm = mask_util.frPyObjects(segm, *segm["size"]) else: # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS # 1 # 重新映射id if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. ".format( num_instances_without_valid_segmentation) + "There might be issues in your dataset generation process. " "A valid polygon should be a list[float] with even length >= 6." ) # polygon 多边形 return dataset_dicts
def load_coco_json(json_file, image_root, dataset_name=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, person keypoints and densepose annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. Returns: list[dict]: a list of dicts in "Detectron2 Dataset" format. (See DATASETS.md) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(list(coco_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] # TODO: refactoring candidate, one should not have to alter DB reader # every time new data type is added DENSEPOSE_KEYS = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V", "dp_masks"] num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) img_name = img_dict["file_name"] if 'COCO_val2014_00000050' in img_name: continue_flat = 0 else: continue_flat = 1 if dataset_name == "coco_2014_minival" and continue_flat == 1: continue record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = { field: anno[field] for field in ["iscrowd", "bbox", "keypoints", "category_id"] + DENSEPOSE_KEYS if field in anno } segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if not isinstance(segm, dict): # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warn( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation)) return dataset_dicts
def load_hico_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with HOI's instances annotation. Args: json_file (str): full path to the json file in HOI instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., `hico-det_train`). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "category_id"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: print(dataset_name) meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] # meta.thing_classes = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} # meta.thing_dataset_id_to_contiguous_id = id_map # Get metadata "person_cls_id" and "action classes" person_cls_id = meta.person_cls_id action_classes = meta.action_classes # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986, # 'isactive': 1, # 'isknown': 1, # 'hoi_triplets': [{person_id: 42984, object_id: 42986, action_id: 4}, ...], # }, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in HOI format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "category_id"] ann_keys += (extra_annotation_keys or []) num_instances_without_hoi_annotations = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] num_instances = len(anno_dict_list) for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id obj = {key: anno[key] for key in ann_keys if key in anno} # "hoi_triplets" in the annotation is a list[dict], where each dict is an # annotation record for an interaction. Example of anno["hoi_triplet"][0]: # [{ # person_id: 42984, # object_id: 42986, # action_id: 4 # }, # ... ] # Here "person_id" ("object_id") is the *anno id* of the person (object) instance. # For each instance, we record its interactions with other instances in the given # image in an binary matrix named `actions` with shape (N, K), where N is the number # of instances and K is the number of actions. If this instance is interacting with # j-th instance with k-th action, then (i, j) entry of `actions` will be 1. actions = np.zeros((num_instances, len(action_classes))) hoi_triplets = anno["hoi_triplets"] if len(hoi_triplets) > 0: # Mapping *anno id* of instances to contiguous indices in this image map_to_contiguous_id_within_image(hoi_triplets, anno_dict_list) for triplet in hoi_triplets: action_id = triplet["action_id"] is_person = (anno["category_id"] == person_cls_id) target_id = triplet["object_id"] if is_person else triplet[ "person_id"] actions[target_id, action_id] = 1 else: num_instances_without_hoi_annotations += 1 obj["actions"] = actions obj["isactive"] = 1 if len(hoi_triplets) > 0 else 0 obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_hoi_annotations > 0: logger.warning("There are {} instances without hoi annotation.".format( num_instances_without_hoi_annotations)) return dataset_dicts
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes # Add keypoint names from categories, all categories have the same 294 keypoint names, which are string numbers from 1-294 meta.keypoint_names = [ keypoint_name for keypoint_name in coco_api.dataset["categories"][0]["keypoints"] ] # For keypoints, the same flips don't need to add since that is automatically added later, e.g (1,1) # Also don't need to add flipped indices, e.g (1,6) and (6,1), only need to add (1,6), # since the create_keypoint_hflip_indices adds the flipped counterparts meta.keypoint_flip_map = [ # Short sleeve top +0 ("2", "6"), ("3", "5"), ("7", "25"), ("8", "24"), ("9", "23"), ("10", "22"), ("11", "21"), ("12", "20"), ("13", "19"), ("14", "18"), ("15", "17"), # Long sleeve top +25 ("27", "31"), ("28", "30"), ("32", "58"), ("33", "57"), ("34", "56"), ("35", "55"), ("36", "54"), ("37", "53"), ("38", "52"), ("39", "51"), ("40", "50"), ("41", "49"), ("42", "48"), ("43", "47"), ("44", "46"), # Short sleeve outwear +58 ("60", "84"), ("61", "63"), ("62", "64"), ("65", "83"), ("66", "82"), ("67", "81"), ("68", "80"), ("69", "79"), ("70", "78"), ("71", "77"), ("72", "76"), ("73", "75"), ("74", "87"), ("89", "86"), ("88", "85"), # Long sleeve outwear +89 ("91", "95"), ("92", "94"), ("93", "123"), ("96", "122"), ("97", "121"), ("98", "120"), ("99", "119"), ("100", "118"), ("101", "117"), ("102", "116"), ("103", "115"), ("104", "114"), ("105", "113"), ("106", "112"), ("107", "111"), ("108", "110"), ("109", "126"), ("128", "125"), ("127", "124"), # Vest +128 ("130", "134"), ("131", "133"), ("135", "143"), ("136", "142"), ("137", "141"), ("138", "140"), # Sling +143 ("145", "149"), ("146", "148"), ("150", "158"), ("151", "157"), ("152", "156"), ("153", "155"), # Shorts +158 ("159", "161"), ("162", "168"), ("163", "167"), ("164", "166"), # Trousers +168 ("169", "171"), ("172", "182"), ("173", "181"), ("174", "180"), ("175", "179"), ("176", "178"), # Skirt +182 ("183", "185"), ("186", "190"), ("187", "189"), # Short sleeve dress +190 ("192", "196"), ("193", "195"), ("197", "219"), ("198", "218"), ("199", "217"), ("200", "216"), ("201", "215"), ("202", "214"), ("203", "213"), ("204", "212"), ("205", "211"), ("206", "210"), ("207", "209"), # Long sleeve dress +219 ("221", "225"), ("222", "224"), ("226", "256"), ("227", "255"), ("228", "254"), ("229", "253"), ("230", "252"), ("231", "251"), ("232", "250"), ("233", "249"), ("234", "248"), ("235", "247"), ("236", "246"), ("237", "245"), ("238", "244"), ("239", "243"), ("240", "242"), # Vest dress +256 ("258", "262"), ("259", "261"), ("263", "275"), ("264", "274"), ("265", "273"), ("266", "272"), ("267", "271"), ("268", "270"), # Sling dress +275 ("277", "281"), ("278", "280"), ("282", "294"), ("283", "293"), ("284", "292"), ("285", "291"), ("286", "290"), ("287", "289") ] meta.keypoint_connection_rules = [ # Short sleeve top +0 Lightblue (0,191,255) ("1", "2", (0, 191, 255)), ("2", "3", (0, 191, 255)), ("3", "4", (0, 191, 255)), ("4", "5", (0, 191, 255)), ("5", "6", (0, 191, 255)), ("6", "1", (0, 191, 255)), ("2", "7", (0, 191, 255)), ("7", "8", (0, 191, 255)), ("8", "9", (0, 191, 255)), ("9", "10", (0, 191, 255)), ("10", "11", (0, 191, 255)), ("11", "12", (0, 191, 255)), ("12", "13", (0, 191, 255)), ("13", "14", (0, 191, 255)), ("14", "15", (0, 191, 255)), ("15", "16", (0, 191, 255)), ("16", "17", (0, 191, 255)), ("17", "18", (0, 191, 255)), ("18", "19", (0, 191, 255)), ("19", "20", (0, 191, 255)), ("20", "21", (0, 191, 255)), ("21", "22", (0, 191, 255)), ("22", "23", (0, 191, 255)), ("23", "24", (0, 191, 255)), ("24", "25", (0, 191, 255)), ("25", "6", (0, 191, 255)), # Long sleeve top +25 Green rgb(0,128,0) ("26", "27", (0, 128, 0)), ("27", "28", (0, 128, 0)), ("28", "29", (0, 128, 0)), ("29", "30", (0, 128, 0)), ("30", "31", (0, 128, 0)), ("31", "26", (0, 128, 0)), ("27", "32", (0, 128, 0)), ("32", "33", (0, 128, 0)), ("33", "34", (0, 128, 0)), ("34", "35", (0, 128, 0)), ("35", "36", (0, 128, 0)), ("36", "37", (0, 128, 0)), ("37", "38", (0, 128, 0)), ("38", "39", (0, 128, 0)), ("39", "40", (0, 128, 0)), ("40", "41", (0, 128, 0)), ("41", "42", (0, 128, 0)), ("42", "43", (0, 128, 0)), ("43", "44", (0, 128, 0)), ("44", "45", (0, 128, 0)), ("45", "46", (0, 128, 0)), ("46", "47", (0, 128, 0)), ("47", "48", (0, 128, 0)), ("48", "49", (0, 128, 0)), ("49", "50", (0, 128, 0)), ("50", "51", (0, 128, 0)), ("51", "52", (0, 128, 0)), ("52", "53", (0, 128, 0)), ("53", "54", (0, 128, 0)), ("54", "55", (0, 128, 0)), ("55", "56", (0, 128, 0)), ("56", "57", (0, 128, 0)), ("57", "58", (0, 128, 0)), ("58", "31", (0, 128, 0)), # Short sleeve outwear +58 Yellow rgb(255,255,0) ("59", "62", (255, 255, 0)), ("62", "61", (255, 255, 0)), ("61", "60", (255, 255, 0)), ("62", "65", (255, 255, 0)), ("65", "66", (255, 255, 0)), ("66", "67", (255, 255, 0)), ("67", "68", (255, 255, 0)), ("68", "69", (255, 255, 0)), ("69", "70", (255, 255, 0)), ("70", "71", (255, 255, 0)), ("71", "72", (255, 255, 0)), ("72", "73", (255, 255, 0)), ("73", "74", (255, 255, 0)), ("74", "89", (255, 255, 0)), ("89", "88", (255, 255, 0)), ("88", "60", (255, 255, 0)), ("64", "59", (255, 255, 0)), ("64", "63", (255, 255, 0)), ("63", "84", (255, 255, 0)), ("84", "85", (255, 255, 0)), ("85", "86", (255, 255, 0)), ("86", "87", (255, 255, 0)), ("87", "75", (255, 255, 0)), ("75", "76", (255, 255, 0)), ("76", "77", (255, 255, 0)), ("77", "78", (255, 255, 0)), ("78", "79", (255, 255, 0)), ("79", "80", (255, 255, 0)), ("80", "81", (255, 255, 0)), ("81", "82", (255, 255, 0)), ("82", "83", (255, 255, 0)), ("83", "64", (255, 255, 0)), # Long sleeve outwear +89 Red rgb(255,0,0) ("90", "91", (255, 0, 0)), ("91", "92", (255, 0, 0)), ("92", "93", (255, 0, 0)), ("91", "96", (255, 0, 0)), ("96", "97", (255, 0, 0)), ("97", "98", (255, 0, 0)), ("98", "99", (255, 0, 0)), ("99", "100", (255, 0, 0)), ("100", "101", (255, 0, 0)), ("101", "102", (255, 0, 0)), ("102", "103", (255, 0, 0)), ("103", "104", (255, 0, 0)), ("104", "105", (255, 0, 0)), ("105", "106", (255, 0, 0)), ("106", "107", (255, 0, 0)), ("107", "108", (255, 0, 0)), ("108", "109", (255, 0, 0)), ("109", "128", (255, 0, 0)), ("128", "127", (255, 0, 0)), ("127", "93", (255, 0, 0)), ("95", "90", (255, 0, 0)), ("95", "94", (255, 0, 0)), ("94", "123", (255, 0, 0)), ("123", "124", (255, 0, 0)), ("124", "125", (255, 0, 0)), ("125", "126", (255, 0, 0)), ("126", "110", (255, 0, 0)), ("110", "111", (255, 0, 0)), ("111", "112", (255, 0, 0)), ("112", "113", (255, 0, 0)), ("113", "114", (255, 0, 0)), ("114", "115", (255, 0, 0)), ("115", "116", (255, 0, 0)), ("116", "117", (255, 0, 0)), ("117", "118", (255, 0, 0)), ("118", "119", (255, 0, 0)), ("119", "120", (255, 0, 0)), ("120", "121", (255, 0, 0)), ("121", "122", (255, 0, 0)), ("122", "95", (255, 0, 0)), # Vest +128 DarkOrange rgb(255,140,0) ("129", "130", (255, 140, 0)), ("130", "131", (255, 140, 0)), ("131", "132", (255, 140, 0)), ("132", "133", (255, 140, 0)), ("133", "134", (255, 140, 0)), ("134", "129", (255, 140, 0)), ("130", "135", (255, 140, 0)), ("135", "136", (255, 140, 0)), ("136", "137", (255, 140, 0)), ("137", "138", (255, 140, 0)), ("138", "139", (255, 140, 0)), ("139", "140", (255, 140, 0)), ("140", "141", (255, 140, 0)), ("141", "142", (255, 140, 0)), ("142", "143", (255, 140, 0)), ("143", "134", (255, 140, 0)), # Sling +143 DeepPink rgb(255,20,147) ("144", "145", (255, 20, 147)), ("145", "146", (255, 20, 147)), ("146", "147", (255, 20, 147)), ("147", "148", (255, 20, 147)), ("148", "149", (255, 20, 147)), ("149", "144", (255, 20, 147)), ("145", "150", (255, 20, 147)), ("145", "151", (255, 20, 147)), ("151", "152", (255, 20, 147)), ("152", "153", (255, 20, 147)), ("153", "154", (255, 20, 147)), ("154", "155", (255, 20, 147)), ("155", "156", (255, 20, 147)), ("156", "157", (255, 20, 147)), ("157", "149", (255, 20, 147)), ("149", "158", (255, 20, 147)), # Shorts +158 SaddleBrown rgb(139,69,19) ("159", "160", (139, 69, 19)), ("160", "161", (139, 69, 19)), ("159", "162", (139, 69, 19)), ("162", "163", (139, 69, 19)), ("163", "164", (139, 69, 19)), ("164", "165", (139, 69, 19)), ("165", "166", (139, 69, 19)), ("166", "167", (139, 69, 19)), ("167", "168", (139, 69, 19)), ("168", "161", (139, 69, 19)), # Trousers +168 Magenta rgb(255,0,255) ("169", "170", (255, 0, 255)), ("170", "171", (255, 0, 255)), ("169", "172", (255, 0, 255)), ("172", "173", (255, 0, 255)), ("173", "174", (255, 0, 255)), ("174", "175", (255, 0, 255)), ("175", "176", (255, 0, 255)), ("176", "177", (255, 0, 255)), ("177", "178", (255, 0, 255)), ("178", "179", (255, 0, 255)), ("179", "180", (255, 0, 255)), ("180", "181", (255, 0, 255)), ("181", "182", (255, 0, 255)), ("182", "171", (255, 0, 255)), # Skirt +182 GoldenRod rgb(218,165,32) ("183", "184", (218, 165, 32)), ("184", "185", (218, 165, 32)), ("183", "186", (218, 165, 32)), ("186", "187", (218, 165, 32)), ("187", "188", (218, 165, 32)), ("188", "189", (218, 165, 32)), ("189", "190", (218, 165, 32)), ("190", "185", (218, 165, 32)), # Short sleeve dress +190 Gray rgb(128,128,128) ("191", "192", (128, 128, 128)), ("192", "193", (128, 128, 128)), ("193", "194", (128, 128, 128)), ("194", "195", (128, 128, 128)), ("195", "196", (128, 128, 128)), ("196", "191", (128, 128, 128)), ("192", "197", (128, 128, 128)), ("197", "198", (128, 128, 128)), ("198", "199", (128, 128, 128)), ("199", "200", (128, 128, 128)), ("200", "201", (128, 128, 128)), ("201", "202", (128, 128, 128)), ("202", "203", (128, 128, 128)), ("203", "204", (128, 128, 128)), ("204", "205", (128, 128, 128)), ("205", "206", (128, 128, 128)), ("206", "207", (128, 128, 128)), ("207", "208", (128, 128, 128)), ("208", "209", (128, 128, 128)), ("209", "210", (128, 128, 128)), ("210", "211", (128, 128, 128)), ("211", "212", (128, 128, 128)), ("212", "213", (128, 128, 128)), ("213", "214", (128, 128, 128)), ("214", "215", (128, 128, 128)), ("215", "216", (128, 128, 128)), ("216", "217", (128, 128, 128)), ("217", "218", (128, 128, 128)), ("218", "219", (128, 128, 128)), ("219", "196", (128, 128, 128)), # Long sleeve dress +219 Darkblue rgb(0,0,139) ("220", "221", (0, 0, 139)), ("221", "222", (0, 0, 139)), ("222", "223", (0, 0, 139)), ("223", "224", (0, 0, 139)), ("224", "225", (0, 0, 139)), ("225", "220", (0, 0, 139)), ("221", "226", (0, 0, 139)), ("226", "227", (0, 0, 139)), ("227", "228", (0, 0, 139)), ("228", "229", (0, 0, 139)), ("229", "230", (0, 0, 139)), ("230", "231", (0, 0, 139)), ("231", "232", (0, 0, 139)), ("232", "233", (0, 0, 139)), ("233", "234", (0, 0, 139)), ("234", "235", (0, 0, 139)), ("235", "236", (0, 0, 139)), ("236", "237", (0, 0, 139)), ("237", "238", (0, 0, 139)), ("238", "239", (0, 0, 139)), ("239", "240", (0, 0, 139)), ("240", "241", (0, 0, 139)), ("241", "242", (0, 0, 139)), ("242", "243", (0, 0, 139)), ("243", "244", (0, 0, 139)), ("244", "245", (0, 0, 139)), ("245", "246", (0, 0, 139)), ("246", "247", (0, 0, 139)), ("247", "248", (0, 0, 139)), ("248", "249", (0, 0, 139)), ("249", "250", (0, 0, 139)), ("250", "251", (0, 0, 139)), ("251", "252", (0, 0, 139)), ("252", "253", (0, 0, 139)), ("253", "254", (0, 0, 139)), ("254", "255", (0, 0, 139)), ("255", "256", (0, 0, 139)), ("256", "225", (0, 0, 139)), # Vest dress +256 Palevioletred rgb(219,112,147) ("257", "258", (219, 112, 147)), ("258", "259", (219, 112, 147)), ("259", "260", (219, 112, 147)), ("260", "261", (219, 112, 147)), ("261", "262", (219, 112, 147)), ("262", "257", (219, 112, 147)), ("258", "263", (219, 112, 147)), ("263", "264", (219, 112, 147)), ("264", "265", (219, 112, 147)), ("265", "266", (219, 112, 147)), ("266", "267", (219, 112, 147)), ("267", "268", (219, 112, 147)), ("268", "269", (219, 112, 147)), ("269", "270", (219, 112, 147)), ("270", "271", (219, 112, 147)), ("271", "272", (219, 112, 147)), ("272", "273", (219, 112, 147)), ("273", "274", (219, 112, 147)), ("274", "275", (219, 112, 147)), ("275", "262", (219, 112, 147)), # Sling dress +275 Bisque rgb(255, 228, 196) ("276", "277", (255, 228, 196)), ("277", "278", (255, 228, 196)), ("278", "279", (255, 228, 196)), ("279", "280", (255, 228, 196)), ("280", "281", (255, 228, 196)), ("281", "276", (255, 228, 196)), ("277", "282", (255, 228, 196)), ("277", "283", (255, 228, 196)), ("283", "284", (255, 228, 196)), ("284", "285", (255, 228, 196)), ("285", "286", (255, 228, 196)), ("286", "287", (255, 228, 196)), ("287", "288", (255, 228, 196)), ("288", "289", (255, 228, 196)), ("289", "290", (255, 228, 196)), ("290", "291", (255, 228, 196)), ("291", "292", (255, 228, 196)), ("292", "293", (255, 228, 196)), ("293", "281", (255, 228, 196)), ("281", "294", (255, 228, 196)) ] # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(list(coco_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if not isinstance(segm, dict): # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warn( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation)) return dataset_dicts