예제 #1
0
 def __init__(self, epoch_iters, cfg):
     """
     Args:
         epoch_iters (int): the overall number of iterations of one epoch.
         cfg (CfgNode): configs.
     """
     self._cfg = cfg
     self.epoch_iters = epoch_iters
     self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters
     self.iter_timer = Timer()
     self.loss = ScalarMeter(cfg.LOG_PERIOD)
     self.loss_total = 0.0
     self.lr = None
     # Current minibatch errors (smoothed over a window).
     self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
     self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
     # Number of misclassified examples.
     self.num_top1_mis = 0
     self.num_top5_mis = 0
     self.num_samples = 0
예제 #2
0
def _load_lvis_annotations(json_file: str):
    """
    Load COCO annotations from a JSON file

    Args:
        json_file: str
            Path to the file to load annotations from
    Returns:
        Instance of `pycocotools.coco.COCO` that provides access to annotations
        data
    """
    from lvis import LVIS

    json_file = PathManager.get_local_path(json_file)
    logger = logging.getLogger(__name__)
    timer = Timer()
    lvis_api = LVIS(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
    return lvis_api
예제 #3
0
 def __init__(self, max_iter, cfg):
     """
     Args:
         max_iter (int): the max number of iteration of the current epoch.
         cfg (CfgNode): configs.
     """
     self._cfg = cfg
     self.max_iter = max_iter
     self.iter_timer = Timer()
     # Current minibatch errors (smoothed over a window).
     self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
     self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
     # Min errors (over the full val set).
     self.min_top1_err = 100.0
     self.min_top5_err = 100.0
     # Number of misclassified examples.
     self.num_top1_mis = 0
     self.num_top5_mis = 0
     self.num_samples = 0
     self.all_preds = []
     self.all_labels = []
예제 #4
0
def _load_coco_annotations(json_file: str):
    """
    Load COCO annotations from a JSON file

    Args:
        json_file: str
            Path to the file to load annotations from
    Returns:
        Instance of `pycocotools.coco.COCO` that provides access to annotations
        data
    """
    from pycocotools.coco import COCO

    logger = logging.getLogger(__name__)
    timer = Timer()
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))
    return coco_api
예제 #5
0
    def __init__(
        self,
        num_videos,
        num_clips,
        num_cls,
        overall_iters,
        multi_label=False,
        ensemble_method="sum",
    ):
        """
        Construct tensors to store the predictions and labels. Expect to get
        num_clips predictions from each video, and calculate the metrics on
        num_videos videos.
        Args:
            num_videos (int): number of videos to test.
            num_clips (int): number of clips sampled from each video for
                aggregating the final prediction for the video.
            num_cls (int): number of classes for each prediction.
            overall_iters (int): overall iterations for testing.
            multi_label (bool): if True, use map as the metric.
            ensemble_method (str): method to perform the ensemble, options
                include "sum", and "max".
        """

        self.iter_timer = Timer()
        self.num_clips = num_clips
        self.overall_iters = overall_iters
        self.multi_label = multi_label
        self.ensemble_method = ensemble_method
        # Initialize tensors.
        self.video_preds = torch.zeros((num_videos, num_cls))
        if multi_label:
            self.video_preds -= 1e10

        self.video_labels = (torch.zeros(
            (num_videos, num_cls)) if multi_label else torch.zeros(
                (num_videos)).long())
        self.clip_count = torch.zeros((num_videos)).long()
        # Reset metric.
        self.reset()
예제 #6
0
    def __init__(self, num_videos, num_clips, num_cls, overall_iters):
        """
        Construct tensors to store the predictions and labels. Expect to get
        num_clips predictions from each video, and calculate the metrics on
        num_videos videos.
        Args:
            num_videos (int): number of videos to test.
            num_clips (int): number of clips sampled from each video for
                aggregating the final prediction for the video.
            num_cls (int): number of classes for each prediction.
            overall_iters (int): overall iterations for testing.
        """

        self.iter_timer = Timer()
        self.num_clips = num_clips
        self.overall_iters = overall_iters
        # Initialize tensors.
        self.video_preds = torch.zeros((num_videos, num_cls))
        self.video_labels = torch.zeros((num_videos)).long()
        self.clip_count = torch.zeros((num_videos)).long()
        # Reset metric.
        self.reset()
예제 #7
0
def benchmark_eval(args):
    cfg = setup(args)
    if args.config_file.endswith(".yaml"):
        model = build_model(cfg)
        DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)

        cfg.defrost()
        cfg.DATALOADER.NUM_WORKERS = 0
        data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
    else:
        model = instantiate(cfg.model)
        model.to(cfg.train.device)
        DetectionCheckpointer(model).load(cfg.train.init_checkpoint)

        cfg.dataloader.num_workers = 0
        data_loader = instantiate(cfg.dataloader.test)

    model.eval()
    logger.info("Model:\n{}".format(model))
    dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)),
                                 copy=False)

    def f():
        while True:
            yield from dummy_data

    for k in range(5):  # warmup
        model(dummy_data[k])

    max_iter = 300
    timer = Timer()
    with tqdm.tqdm(total=max_iter) as pbar:
        for idx, d in enumerate(f()):
            if idx == max_iter:
                break
            model(d)
            pbar.update()
    logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
예제 #8
0
def benchmark_data(cfg):
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Benchmark data loading with config:")
    logger.info(pprint.pformat(cfg))

    timer = Timer()
    dataloader = loader.construct_loader(cfg, "train")
    logger.info("Initialize loader using {:.2f} seconds.".format(
        timer.seconds()))
    batch_size = cfg.TRAIN.BATCH_SIZE
    log_period = cfg.BENCHMARK.LOG_PERIOD
    epoch_times = []
    # Test for a few epochs.
    for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS):
        timer = Timer()
        timer_epoch = Timer()
        iter_times = []
        for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)):
            if cur_iter > 0 and cur_iter % log_period == 0:
                iter_times.append(timer.seconds())
                vram = psutil.virtual_memory()
                logger.info(
                    "Epoch {}: {} iters ({} videos) in {:.2f} seconds. "
                    "RAM Usage: {:.2f}/{:.2f} GB.".format(
                        cur_epoch,
                        log_period,
                        log_period * batch_size,
                        iter_times[-1],
                        (vram.total - vram.available) / 1024**3,
                        vram.total / 1024**3,
                    ))
                timer.reset()
        epoch_times.append(timer_epoch.seconds())
        vram = psutil.virtual_memory()
        logger.info(
            "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. "
            "RAM Usage: {:.2f}/{:.2f} GB.".format(
                cur_epoch,
                len(dataloader),
                len(dataloader) * batch_size,
                epoch_times[-1],
                (vram.total - vram.available) / 1024**3,
                vram.total / 1024**3,
            ))
        logger.info(
            "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} "
            "(avg/std) seconds.".format(
                cur_epoch,
                log_period,
                log_period * batch_size,
                np.mean(iter_times),
                np.std(iter_times),
            ))
    logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} "
                "(avg/std) seconds.".format(
                    len(dataloader) * batch_size,
                    np.mean(epoch_times),
                    np.std(epoch_times),
                ))
예제 #9
0
def load_ade_instances(json_file, image_root, dataset_type):
    """
    Args:
        json_file (str): path to the json instance annotation file.
        image_root (str or path-like): directory which contains all the images.
        dataset_type (str): type of this dataset. One of base_train/base_val/novel/novel_test.
    Returns:
        list[dict]: a list of dicts in Detectron2 standard format.
    """
    timer = Timer()
    ade_file = json.load(open(json_file, 'r'))
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    dataset_dicts = []

    for item in ade_file:
        record = {}
        record['file_name'] = os.path.join(image_root, item['fpath_img'])
        record['height'] = item['height']
        record['width'] = item['width']
        record['image_id'] = item['index']

        record['annotations'] = []
        K = len(item['anchors'])
        proposal_boxes = np.zeros((K, 4))
        record['proposal_bbox_mode'] = BoxMode.XYXY_ABS
        proposal_objectness_logits = np.zeros((K, ))

        for i, anchor in enumerate(item['anchors']):
            anno = {}
            anno['category_id'] = anchor['label']
            x1, x2, y1, y2 = anchor['anchor']
            proposal_boxes[i] = [x1, y1, x2, y2]

            if dataset_type == 'base_train':
                # NOTE: In standard detection task, x1, x2, y1, y2 in the
                # next line should be anchor['bbox'], which is the ground
                # truth position for instances. But in our task, we will
                # conduct classification directly on proposals, so we set
                # the anno['bbox'] the same as proposal. We will use the
                # information of ground truth bbox as a supervision
                x1, x2, y1, y2 = anchor['anchor']
                anno['bbox'] = [float(p) for p in [x1, y1, x2, y2]]
                anno['bbox_mode'] = BoxMode.XYXY_ABS
                anno['attr'] = anchor['attr']
                anno['hierarchy'] = anchor['hierarchy']
                anno['part'] = anchor['part']

            record['annotations'].append(anno)

        record['proposal_boxes'] = proposal_boxes
        record['proposal_objectness_logits'] = proposal_objectness_logits

        ## NOTE: if you want to use segmentation, remove _debug in the next line
        if dataset_type == 'base_train_debug':
            record['sem_seg_file_name'] = os.path.join(image_root, item['seg'])
            record['scene'] = item['scene']

        dataset_dicts.append(record)

    return dataset_dicts
예제 #10
0
def load_coco_json_mem_efficient(json_file,
                                 image_root,
                                 dataset_name=None,
                                 extra_annotation_keys=None):
    """
    Actually not mem efficient
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
                    Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
                    """)
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(coco_api.imgs.keys())
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys
                                                     or [])

    for img_dict in imgs:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]
        anno_dict_list = coco_api.imgToAnns[image_id]
        if 'neg_category_ids' in img_dict:
            record['neg_category_ids'] = \
                [id_map[x] for x in img_dict['neg_category_ids']]

        objs = []
        for anno in anno_dict_list:
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0

            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if not isinstance(segm, dict):
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            obj["bbox_mode"] = BoxMode.XYWH_ABS

            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    del coco_api
    return dataset_dicts
예제 #11
0
 def before_train(self):
     self._start_time = time.perf_counter()
     self._total_timer = Timer()
     self._total_timer.pause()
예제 #12
0
def load_cocoa_cls_json(json_file, image_root, dataset_name=None):
    """
    Load a json file with D2SA's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in D2SA instances annotation format.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        cocoa_cls_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))
    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(cocoa_cls_api.getCatIds())
        cats = cocoa_cls_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(list(cocoa_cls_api.imgs.keys()))
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = cocoa_cls_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [cocoa_cls_api.imgToAnns[img_id] for img_id in img_ids]

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "category_id"]

    num_instances_without_valid_segmentation = 0
    num_instances_without_valid_visible_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]
        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            anno['iscrowd'] = 0
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0

            obj = {key: anno[key] for key in ann_keys if key in anno}
            segm = anno.get(
                "segmentation",
                None) if not dataset_name.endswith("visible") else anno.get(
                    "visible_mask", None)
            # segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if not isinstance(segm, dict):
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

                vis_segm = anno.get("visible_mask", None)
                if not isinstance(vis_segm, dict):
                    # filter out invalid polygons (< 3 points)
                    vis_segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(vis_segm) == 0:
                        num_instances_without_valid_visible_segmentation += 1
                        continue  # ignore this instance
                obj["visible_mask"] = vis_segm
                obj["occlude_rate"] = anno.get("occlude_rate", 0)

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warn(
            "Filtered out {} instances without valid segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_segmentation))

    if num_instances_without_valid_visible_segmentation > 0:
        logger.warn(
            "Filtered out {} instances without valid visible segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_visible_segmentation))
    return dataset_dicts
예제 #13
0
def benchmark_data_loading(cfg):
    """
    Benchmark the speed of data loading in PySlowFast.
    Args:

        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Benchmark data loading with config:")
    logger.info(pprint.pformat(cfg))

    timer = Timer()
    dataloader = loader.construct_loader(cfg, "train")
    logger.info("Initialize loader using {:.2f} seconds.".format(
        timer.seconds()))
    # Total batch size across different machines.
    batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS
    log_period = cfg.BENCHMARK.LOG_PERIOD
    epoch_times = []
    # Test for a few epochs.
    for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS):
        timer = Timer()
        timer_epoch = Timer()
        iter_times = []
        for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)):
            if cur_iter > 0 and cur_iter % log_period == 0:
                iter_times.append(timer.seconds())
                ram_usage, ram_total = misc.cpu_mem_usage()
                logger.info(
                    "Epoch {}: {} iters ({} videos) in {:.2f} seconds. "
                    "RAM Usage: {:.2f}/{:.2f} GB.".format(
                        cur_epoch,
                        log_period,
                        log_period * batch_size,
                        iter_times[-1],
                        ram_usage,
                        ram_total,
                    ))
                timer.reset()
        epoch_times.append(timer_epoch.seconds())
        ram_usage, ram_total = misc.cpu_mem_usage()
        logger.info(
            "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. "
            "RAM Usage: {:.2f}/{:.2f} GB.".format(
                cur_epoch,
                len(dataloader),
                len(dataloader) * batch_size,
                epoch_times[-1],
                ram_usage,
                ram_total,
            ))
        logger.info(
            "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} "
            "(avg/std) seconds.".format(
                cur_epoch,
                log_period,
                log_period * batch_size,
                np.mean(iter_times),
                np.std(iter_times),
            ))
    logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} "
                "(avg/std) seconds.".format(
                    len(dataloader) * batch_size,
                    np.mean(epoch_times),
                    np.std(epoch_times),
                ))
예제 #14
0
파일: hooks.py 프로젝트: tkhe/tkdetection
 def __init__(self, warmup_iter=3):
     self._warmup_iter = warmup_iter
     self._step_timer = Timer()
     self._start_time = time.perf_counter()
     self._total_timer = Timer()
예제 #15
0
파일: dacon.py 프로젝트: cpark90/rrrcnn
def load_dacon_rotated_train_json(json_file,
                                  image_root,
                                  dataset_name=None,
                                  extra_annotation_keys=None):
    """
    Load a json file with DACON's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in dacon instances annotation format.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        dacon_api = DaconAPI(json_file)
        anns = dacon_api.features
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    meta = MetadataCatalog.get(dataset_name)
    meta.thing_classes = dacon_api.thing_classes

    logger.info("Loaded {} images in dacon format from {}".format(
        len(anns), json_file))

    dataset_dicts = []

    for ann in anns:
        record = {}
        record["file_name"] = os.path.join(image_root, ann['image_id'])
        record["height"] = ann['height']
        record["width"] = ann['width']
        patch_size = (ann['width'], ann['height'])

        objs = []
        properties = ann['properties']
        for p in properties:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            obj = {}
            obj["bbox"] = dacon_api.cvt_dacon_to_detectron_rotated(
                p["bounds_imcoords"].split(","), patch_size)
            obj["bbox_mode"] = BoxMode.XYWHA_ABS
            obj["category_id"] = int(p["type_id"]) - 1
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts
예제 #16
0
def load_filtered_lvis_json(json_file,
                            image_root,
                            metadata,
                            dataset_name=None):
    """
    Load a json file in LVIS's annotation format.
    Args:
        json_file (str): full path to the LVIS json annotation file.
        image_root (str): the directory where the images in this json file exists.
        metadata: meta data associated with dataset_name
        dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
            If provided, this function will put "thing_classes" into the metadata
            associated with this dataset.
    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )
    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from lvis import LVIS

    json_file = PathManager.get_local_path(json_file)

    timer = Timer()
    lvis_api = LVIS(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    if dataset_name is not None and "train" in dataset_name:
        assert global_cfg.MODEL.ROI_HEADS.NUM_CLASSES == len(
            metadata["thing_classes"]
        ), "NUM_CLASSES should match number of categories: ALL=1230, NOVEL=454"

    # sort indices for reproducible results
    img_ids = sorted(list(lvis_api.imgs.keys()))
    imgs = lvis_api.load_imgs(img_ids)
    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]

    # Sanity check that each annotation has a unique id
    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
    assert len(set(ann_ids)) == len(
        ann_ids), "Annotation ids in '{}' are not unique".format(json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in the LVIS format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        file_name = img_dict["file_name"]
        if img_dict["file_name"].startswith("COCO"):
            file_name = file_name[-16:]
        record["file_name"] = os.path.join(image_root, file_name)
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        record["not_exhaustive_category_ids"] = img_dict.get(
            "not_exhaustive_category_ids", [])
        record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            assert anno["image_id"] == image_id
            obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
            if global_cfg.MODEL.ROI_HEADS.NUM_CLASSES == 454:
                # Novel classes only
                if anno["category_id"] - 1 not in LVIS_CATEGORIES_NOVEL_IDS:
                    continue
                obj["category_id"] = metadata["class_mapping"][
                    anno["category_id"] - 1]
            else:
                # Convert 1-indexed to 0-indexed
                obj["category_id"] = anno["category_id"] - 1
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts
예제 #17
0
def main(cfg: DictConfig) -> None:

    if "experiments" in cfg.keys():
        cfg = OmegaConf.merge(cfg, cfg.experiments)

    if "debug" in cfg.keys():
        logger.info(f"Run script in debug")
        cfg = OmegaConf.merge(cfg, cfg.debug)

    # A logger for this file
    logger = logging.getLogger(__name__)

    # NOTE: hydra causes the python file to run in hydra.run.dir by default
    logger.info(f"Run script in {HydraConfig.get().run.dir}")

    writer = SummaryWriter(log_dir=cfg.train.tensorboard_dir)

    checkpoints_dir = Path(cfg.train.checkpoints_dir)
    if not checkpoints_dir.exists():
        checkpoints_dir.mkdir(parents=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    image_shape = (cfg.train.channels, cfg.train.image_height,
                   cfg.train.image_width)

    # NOTE: With hydra, the python file runs in hydra.run.dir by default, so set the dataset path to a full path or an appropriate relative path
    dataset_path = Path(cfg.dataset.root) / cfg.dataset.frames
    split_path = Path(cfg.dataset.root) / cfg.dataset.split_file
    assert dataset_path.exists(), "Video image folder not found"
    assert (split_path.exists()
            ), "The file that describes the split of train/test not found."

    # Define training set
    train_dataset = Dataset(
        dataset_path=dataset_path,
        split_path=split_path,
        split_number=cfg.dataset.split_number,
        input_shape=image_shape,
        sequence_length=cfg.train.sequence_length,
        training=True,
    )

    # Define train dataloader
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=cfg.train.batch_size,
        shuffle=True,
        num_workers=cfg.train.num_workers,
    )

    # Define test set
    test_dataset = Dataset(
        dataset_path=dataset_path,
        split_path=split_path,
        split_number=cfg.dataset.split_number,
        input_shape=image_shape,
        sequence_length=cfg.train.sequence_length,
        training=False,
    )

    # Define test dataloader
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=cfg.train.batch_size,
        shuffle=False,
        num_workers=cfg.train.num_workers,
    )

    # Classification criterion
    criterion = nn.CrossEntropyLoss().to(device)

    # Define network
    model = CNNLSTM(
        num_classes=train_dataset.num_classes,
        latent_dim=cfg.train.latent_dim,
        lstm_layers=cfg.train.lstm_layers,
        hidden_dim=cfg.train.hidden_dim,
        bidirectional=cfg.train.bidirectional,
        attention=cfg.train.attention,
    )
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

    checkpointer = Checkpointer(
        model,
        optimizer=optimizer,
        # scheduler=scheduler,
        save_dir=cfg.train.checkpoints_dir,
        save_to_disk=True,
    )

    if cfg.train.resume:
        if not checkpointer.has_checkpoint():
            start_epoch = 0
        else:
            ckpt = checkpointer.resume_or_load("", resume=True)
            start_epoch = ckpt["epoch"]
            model.to(device)
            for state in optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.to(device)
    elif cfg.train.checkpoint_model != "":
        ckpt = torch.load(cfg.train.checkpoint_model, map_location="cpu")
        model.load_state_dict(ckpt["model"])
        model.to(device)
        start_epoch = 0
    else:
        start_epoch = 0

    for epoch in range(start_epoch, cfg.train.num_epochs):
        epoch += 1
        epoch_metrics = {"loss": [], "acc": []}
        timer = Timer()
        for batch_i, (X, y) in enumerate(train_dataloader):
            batch_i += 1
            if X.size(0) == 1:
                continue

            image_sequences = Variable(X.to(device), requires_grad=True)
            labels = Variable(y.to(device), requires_grad=False)

            optimizer.zero_grad()

            # Reset LSTM hidden state
            model.lstm.reset_hidden_state()

            # Get sequence predictions
            predictions = model(image_sequences)

            # Compute metrics
            loss = criterion(predictions, labels)
            acc = (
                predictions.detach().argmax(1) == labels).cpu().numpy().mean()

            loss.backward()
            optimizer.step()

            # Keep track of epoch metrics
            epoch_metrics["loss"].append(loss.item())
            epoch_metrics["acc"].append(acc)

            # Determine approximate time left
            batches_done = (epoch - 1) * len(train_dataloader) + (batch_i - 1)
            batches_left = cfg.train.num_epochs * len(
                train_dataloader) - batches_done
            time_left = datetime.timedelta(seconds=batches_left *
                                           timer.seconds())
            time_iter = round(timer.seconds(), 3)
            timer.reset()

            logger.info(
                f'Training - [Epoch: {epoch}/{cfg.train.num_epochs}] [Batch: {batch_i}/{len(train_dataloader)}] [Loss: {np.mean(epoch_metrics["loss"]):.3f}] [Acc: {np.mean(epoch_metrics["acc"]):.3f}] [ETA: {time_left}] [Iter time: {time_iter}s/it]'
            )

            # Empty cache
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        writer.add_scalar("train/loss", np.mean(epoch_metrics["loss"]), epoch)
        writer.add_scalar("train/acc", np.mean(epoch_metrics["acc"]), epoch)

        def test_model(epoch):
            """ Evaluate the model on the test set """
            model.eval()
            test_metrics = {"loss": [], "acc": []}
            timer = Timer()
            for batch_i, (X, y) in enumerate(test_dataloader):
                batch_i += 1
                image_sequences = Variable(X.to(device), requires_grad=False)
                labels = Variable(y, requires_grad=False).to(device)

                with torch.no_grad():
                    # Reset LSTM hidden state
                    model.lstm.reset_hidden_state()
                    # Get sequence predictions
                    predictions = model(image_sequences)

                # Compute metrics
                loss = criterion(predictions, labels)
                acc = (predictions.detach().argmax(1) == labels
                       ).cpu().numpy().mean()

                # Keep track of loss and accuracy
                test_metrics["loss"].append(loss.item())
                test_metrics["acc"].append(acc)

                # Determine approximate time left
                batches_done = batch_i - 1
                batches_left = len(test_dataloader) - batches_done
                time_left = datetime.timedelta(seconds=batches_left *
                                               timer.seconds())
                time_iter = round(timer.seconds(), 3)
                timer.reset()

                # Log test performance
                logger.info(
                    f'Testing - [Epoch: {epoch}/{cfg.train.num_epochs}] [Batch: {batch_i}/{len(test_dataloader)}] [Loss: {np.mean(test_metrics["loss"]):.3f}] [Acc: {np.mean(test_metrics["acc"]):.3f}] [ETA: {time_left}] [Iter time: {time_iter}s/it]'
                )

            writer.add_scalar("test/loss", np.mean(test_metrics["loss"]),
                              epoch)
            writer.add_scalar("test/acc", np.mean(test_metrics["acc"]), epoch)

            model.train()

        # Evaluate the model on the test set
        test_model(epoch)

        # Save model checkpoint
        if epoch % cfg.train.checkpoint_interval == 0:
            checkpointer.save(f"checkpoint_{epoch:04}", epoch=epoch)

    writer.close()
예제 #18
0
def load_viroi_json(dataset_name,
                    image_path,
                    stuff_path,
                    panoptic_path,
                    class_json_file,
                    relation_json_file,
                    instance_json_file,
                    triplet_json_file,
                    extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation annotations.

    Args:
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """

    timer = Timer()
    viroi_api = VIROI(image_path, stuff_path, panoptic_path, class_json_file,
                      relation_json_file, instance_json_file,
                      triplet_json_file)
    if timer.seconds() > 1:
        logger.info("Loading viroi takes {:.2f} seconds.".format(
            timer.seconds()))

    meta = MetadataCatalog.get(dataset_name)
    stuff_dataset_id_to_contiguous_id = meta.get(
        "stuff_dataset_id_to_contiguous_id")
    thing_dataset_id_to_contiguous_id = meta.get(
        "thing_dataset_id_to_contiguous_id")
    relation_dataset_id_to_contiguous_id = meta.get(
        "relation_dataset_id_to_contiguous_id")
    # The categories in a custom json file may not be sorted.
    # thing_classes = [c["name"] for c in viroi_api.loadThings()]
    # meta.thing_classes = thing_classes
    # stuff_classes = [c["name"] for c in viroi_api.loadStuffs()]
    # meta.stuff_classes = stuff_classes

    # In COCO, certain category ids are artificially removed,
    # and by convention they are always ignored.
    # We deal with COCO's id issue and translate
    # the category ids to contiguous ids in [0, 80).

    # It works by looking at the "categories" field in the json, therefore
    # if users' own json also have incontiguous ids, we'll
    # apply this mapping as well but print a warning.

    # meta.thing_dataset_id_to_contiguous_id = {v['category_id']: i for i, v in enumerate(viroi_api.loadThings())} # category_id => from 0 to 79
    # meta.contiguous_id_to_thing_class_id = {i:v['class_id'] for i, v in enumerate(viroi_api.loadThings())} # from 0 to 79 => from 1 to 80
    # meta.stuff_dataset_id_to_contiguous_id = {v['category_id']: i+1 for i, v in enumerate(viroi_api.loadStuffs())} # category => from 1 to 53
    # meta.contiguous_id_to_stuff_class_id = {i+1: v['class_id'] for i, v in enumerate(viroi_api.loadStuffs())}  # from 1 to 53 => from 81 to 133

    # sort indices for reproducible results
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}

    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    logger.info("Loaded {} images in VIROI".format(
        len(viroi_api.image_instance_dict)))

    # ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys or [])
    dataset_dict = []
    image_ids = viroi_api.loadIds()
    for image_id in image_ids:
        img_dict = viroi_api.loadImgs(image_id)[0]
        record = {}
        record["file_name"] = os.path.join(image_path, img_dict["image_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["image_id"]

        instance_dict = img_dict['instances']
        objs = []
        stfs = []
        object_id_list = []
        stuff_id_list = []
        thing_count = 0
        stuff_count = 0
        # interest_map=np.zeros((img_dict["height"],img_dict["width"]))
        for instance_id in instance_dict:
            instance = instance_dict[instance_id]

            if viroi_api.class_dict[str(instance['class_id'])]['isthing']:
                object_id_list.append(instance_id)
                obj = {}
                obj['iscrowd'] = instance['iscrowd']
                obj['labeled'] = 1 if instance['labeled'] else 0
                obj['bbox'] = [
                    instance['box'][1], instance['box'][0],
                    instance['box'][3] - instance['box'][1],
                    instance['box'][2] - instance['box'][0]
                ]
                obj['category_id'] = thing_dataset_id_to_contiguous_id[
                    viroi_api.loadClassdict()[str(
                        instance['class_id'])]['category_id']]
                obj['category_name'] = viroi_api.loadClassdict()[str(
                    instance['class_id'])]['name']
                obj['class_id'] = instance['class_id']
                obj['segmentation'] = instance['segmentation']
                obj["bbox_mode"] = BoxMode.XYWH_ABS
                # if obj['labeled']==1:
                #     mask=mask_utils.decode(instance['segmentation'])
                #     interest_map[mask==1]=255
                objs.append(obj)
                thing_count += 1
            else:
                stuff_id_list.append(instance_id)
                stf = {}
                stf['iscrowd'] = instance['iscrowd']
                stf['labeled'] = 1 if instance['labeled'] else 0
                stf['bbox'] = [
                    instance['box'][1], instance['box'][0],
                    instance['box'][3] - instance['box'][1],
                    instance['box'][2] - instance['box'][0]
                ]
                stf['category_id'] = stuff_dataset_id_to_contiguous_id[
                    viroi_api.loadClassdict()[str(
                        instance['class_id'])]['category_id']]
                stf['category_name'] = viroi_api.loadClassdict()[str(
                    instance['class_id'])]['name']
                stf['class_id'] = instance['class_id']
                stf['segmentation'] = instance['segmentation']
                stf["bbox_mode"] = BoxMode.XYWH_ABS
                # if stf['labeled']==1:
                #     mask=mask_utils.decode(instance['segmentation'])
                #     interest_map[mask==1]=1
                stfs.append(stf)
                stuff_count += 1
        record["annotations"] = objs
        # record['interest_map'] = interest_map
        # Image.fromarray(interest_map).convert('L').save("interest_map.png")
        record["stuff_annotations"] = stfs
        record["instance_ids"] = object_id_list
        record["stuff_instance_ids"] = stuff_id_list
        record["sem_seg_file_name"] = os.path.join(
            stuff_path, img_dict["image_name"].replace("jpg", "png"))
        instance_ids = []
        for id in object_id_list:
            instance_ids.append(id)
        for id in stuff_id_list:
            instance_ids.append(id)
        triplets = viroi_api.loadTriplets(image_id)[0]
        triplet_records = []
        for triplet_id in triplets:
            triplet = triplets[triplet_id]
            tri = {}
            tri['subject_id'] = instance_ids.index(
                str(triplet['subject_instance_id']))
            tri['object_id'] = instance_ids.index(
                str(triplet['object_instance_id']))
            tri['relation_id'] = relation_dataset_id_to_contiguous_id[
                triplet['relation_id']]
            triplet_records.append(tri)
        record["triplets"] = triplet_records
        dataset_dict.append(record)
    return dataset_dict
예제 #19
0
def load_coco_with_attributes_json(json_file,
                                   image_root,
                                   dataset_name=None,
                                   extra_annotation_keys=None):
    """
    Extend load_coco_json() with additional support for attributes
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    img_ids = sorted(coco_api.imgs.keys())
    imgs = coco_api.loadImgs(img_ids)
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]

    if "minival" not in json_file:
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"
                ] + (extra_annotation_keys or [])

    num_instances_without_valid_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            assert anno["image_id"] == image_id

            assert anno.get(
                "ignore",
                0) == 0, '"ignore" in COCO json file is not supported.'

            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)
            if segm:
                if not isinstance(segm, dict):
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            attrs = anno.get("attribute_ids", None)
            if attrs:  # list[int]
                obj["attribute_ids"] = attrs

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warning(
            "Filtered out {} instances without valid segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_segmentation))
    return dataset_dicts
예제 #20
0
 def __init__(self) -> None:
     self.timer = Timer()
     self.timer.reset()
     self.epoch_times = []
예제 #21
0
def load_coco_json(json_file,
                   image_root,
                   dataset_name=None,
                   extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox",
            "category_id"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(list(coco_api.imgs.keys()))
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys
                                                     or [])

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0

            obj = {key: anno[key] for key in ann_keys if key in anno}
            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts
예제 #22
0
def benchmark_data(cfg: AttrDict, split: str = "train"):
    split = split.upper()
    total_images = MAX_ITERS * cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"]
    timer = Timer()
    dataset = build_dataset(cfg, split)

    try:
        device = torch.device("cuda" if cfg.MACHINE.DEVICE == "gpu" else "cpu")
    except AttributeError:
        device = torch.device("cuda")

    # Gives sampler same seed for entire distributed group as per pytorch documentation.
    sampler_seed = cfg.SEED_VALUE
    dataloader = get_loader(
        dataset=dataset,
        dataset_config=cfg["DATA"][split],
        num_dataloader_workers=cfg.DATA.NUM_DATALOADER_WORKERS,
        pin_memory=False,
        multi_processing_method=cfg.MULTI_PROCESSING_METHOD,
        device=device,
        sampler_seed=sampler_seed,
    )

    # Fairstore data sampler would require setting the start iter before it can start.
    if hasattr(dataloader.sampler, "set_start_iter"):
        dataloader.sampler.set_start_iter(0)

    # initial warmup measured as warmup time
    timer.reset()
    data_iterator = iter(dataloader)
    for i in range(10):  # warmup
        next(data_iterator)
        if i == 0:
            # the total number of seconds since the start/reset of the timer
            warmup_time = timer.seconds()
    logging.info(f"Warmup time {WARMUP_ITERS} batches: {warmup_time} seconds")

    # measure the number of images per sec in 1000 iterations.
    timer = Timer()
    for _ in tqdm.trange(MAX_ITERS):
        next(data_iterator)
    time_elapsed = timer.seconds()
    logging.info(
        f"iters: {MAX_ITERS}; images: {total_images}; time: {time_elapsed} seconds; "
        f"images/sec: {round(float(total_images / time_elapsed), 4)}; "
        f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} "
    )

    # run benchmark for a few more rounds to catch fluctuations
    for round_idx in range(BENCHMARK_ROUNDS):
        timer = Timer()
        for _ in tqdm.trange(MAX_ITERS):
            next(data_iterator)
        time_elapsed = timer.seconds()
        logging.info(
            f"round: {round_idx}: iters: {MAX_ITERS}; images: {total_images}; "
            f"time: {time_elapsed} seconds; "
            f"images/sec: {round(float(total_images / time_elapsed), 4)}; "
            f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} "
        )
    del data_iterator
    del dataloader
예제 #23
0
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str or path-like): the directory where the images in this json file exists.
        dataset_name (str or None): the name of the dataset (e.g., coco_2017_train).
            When provided, this function will also do the following:

            * Put "thing_classes" into the metadata associated with this dataset.
            * Map the category ids into a contiguous range (needed by standard dataset format),
              and add "thing_dataset_id_to_contiguous_id" to the metadata associated
              with this dataset.

            This option should usually be provided, unless users need to load
            the original json content and apply more processing manually.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See
        `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None.
        If `dataset_name` is None, the returned `category_ids` may be
        incontiguous and may not conform to the Detectron2 standard format.

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
        meta.thing_classes = thing_classes  # major_change

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning(
                    """
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
"""
                )
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(coco_api.imgs.keys())
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
    total_num_valid_anns = sum([len(x) for x in anns])
    total_num_anns = len(coco_api.anns)
    if total_num_valid_anns < total_num_anns:
        logger.warning(
            f"{json_file} contains {total_num_anns} annotations, but only "
            f"{total_num_valid_anns} of them match to images in the file."
        )

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
        assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
            json_file
        )

    imgs_anns = list(zip(imgs, anns))
    logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])

    num_instances_without_valid_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'

            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if isinstance(segm, dict):
                    if isinstance(segm["counts"], list):
                        # convert to compressed RLE
                        segm = mask_util.frPyObjects(segm, *segm["size"])
                else:
                    # filter out invalid polygons (< 3 points)
                    segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:  # list[int]
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # Therefore we assume the coordinates are "pixel indices" and
                        # add 0.5 to convert to floating point coordinates.
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                annotation_category_id = obj["category_id"]
                try:
                    obj["category_id"] = id_map[annotation_category_id]
                except KeyError as e:
                    raise KeyError(
                        f"Encountered category_id={annotation_category_id} "
                        "but this id does not exist in 'categories' of the json file."
                    ) from e
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warning(
            "Filtered out {} instances without valid segmentation. ".format(
                num_instances_without_valid_segmentation
            )
            + "There might be issues in your dataset generation process.  Please "
            "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully"
        )
    return dataset_dicts
예제 #24
0
def load_lvis_json(json_file,
                   image_root,
                   dataset_name=None,
                   extra_annotation_keys=None):
    """
    Load a json file in LVIS's annotation format.

    Args:
        json_file (str): full path to the LVIS json annotation file.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
            If provided, this function will put "thing_classes" into the metadata
            associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "bbox", "bbox_mode", "category_id",
            "segmentation"). The values for these keys will be returned as-is.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from lvis import LVIS

    json_file = PathManager.get_local_path(json_file)

    timer = Timer()
    lvis_api = LVIS(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    if dataset_name is not None:
        meta = get_lvis_instances_meta(dataset_name)
        MetadataCatalog.get(dataset_name).set(**meta)

    # sort indices for reproducible results
    img_ids = sorted(lvis_api.imgs.keys())
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = lvis_api.load_imgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]

    # Sanity check that each annotation has a unique id
    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
    assert len(set(ann_ids)) == len(
        ann_ids), "Annotation ids in '{}' are not unique".format(json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in the LVIS format from {}".format(
        len(imgs_anns), json_file))

    if extra_annotation_keys:
        logger.info(
            "The following extra annotation keys will be loaded: {} ".format(
                extra_annotation_keys))
    else:
        extra_annotation_keys = []

    def get_file_name(img_root, img_dict):
        # Determine the path including the split folder ("train2017", "val2017", "test2017") from
        # the coco_url field. Example:
        #   'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg'
        split_folder, file_name = img_dict["coco_url"].split("/")[-2:]
        return os.path.join(img_root + split_folder, file_name)

    dataset_dicts = []

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = get_file_name(image_root, img_dict)
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        record["not_exhaustive_category_ids"] = img_dict.get(
            "not_exhaustive_category_ids", [])
        record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.
            assert anno["image_id"] == image_id
            obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
            # LVIS data loader can be used to load COCO dataset categories. In this case `meta`
            # variable will have a field with COCO-specific category mapping.
            if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta:
                obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
                    anno["category_id"]]
            else:
                obj["category_id"] = anno[
                    "category_id"] - 1  # Convert 1-indexed to 0-indexed
            segm = anno["segmentation"]  # list[list[float]]
            # filter out invalid polygons (< 3 points)
            valid_segm = [
                poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6
            ]
            assert len(segm) == len(
                valid_segm
            ), "Annotation contains an invalid polygon with < 3 points"
            assert len(segm) > 0
            obj["segmentation"] = segm
            for extra_ann_key in extra_annotation_keys:
                obj[extra_ann_key] = anno[extra_ann_key]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts
def load_lvis_json(json_file, image_root, dataset_name=None):
    """
    Load a json file in LVIS's annotation format.

    Args:
        json_file (str): full path to the LVIS json annotation file.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
            If provided, this function will put "thing_classes" into the metadata
            associated with this dataset.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from lvis import LVIS

    json_file = PathManager.get_local_path(json_file)

    timer = Timer()
    lvis_api = LVIS(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    if dataset_name is not None:
        meta = get_lvis_instances_meta(dataset_name)
        MetadataCatalog.get(dataset_name).set(**meta)

    # sort indices for reproducible results
    img_ids = sorted(list(lvis_api.imgs.keys()))
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = lvis_api.load_imgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]

    # Sanity check that each annotation has a unique id
    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
    assert len(set(ann_ids)) == len(
        ann_ids), "Annotation ids in '{}' are not unique".format(json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in the LVIS format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        file_name = img_dict["file_name"]
        if img_dict["file_name"].startswith("COCO"):
            # Convert form the COCO 2014 file naming convention of
            # COCO_[train/val/test]2014_000000000000.jpg to the 2017 naming convention of
            # 000000000000.jpg (LVIS v1 will fix this naming issue)
            file_name = file_name[-16:]
        record["file_name"] = os.path.join(image_root, file_name)
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        record["not_exhaustive_category_ids"] = img_dict.get(
            "not_exhaustive_category_ids", [])
        record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.
            assert anno["image_id"] == image_id
            obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
            obj["category_id"] = anno[
                "category_id"] - 1  # Convert 1-indexed to 0-indexed
            segm = anno["segmentation"]  # list[list[float]]
            # filter out invalid polygons (< 3 points)
            valid_segm = [
                poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6
            ]
            assert len(segm) == len(
                valid_segm
            ), "Annotation contains an invalid polygon with < 3 points"
            assert len(segm) > 0
            obj["segmentation"] = segm
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts
예제 #26
0
 def __init__(self, pidfile):
     super(temporal_restrict, self).__init__(pidfile=pidfile)
     self.tt = Timer()
     self.event_type = str(self.__class__.__name__)
     self.cfgs = getattr(get_cfg(), self.event_type)
예제 #27
0
def load_coco_json(json_file,
                   image_root,
                   dataset_name=None,
                   extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str or path-like): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO  # coco 的一个python api类

    timer = Timer()  # 计算时间
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)  # 用anno_json 文件初始化
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        # 创建一个新的meta
        meta = MetadataCatalog.get(dataset_name)
        # getCatIds() 获取所有类别的id号
        cat_ids = sorted(coco_api.getCatIds())
        # loadCats() 根据id号,获取所有类别信息,每个类别信息是一个字典
        # {"supercategory": "person", "id": 1, "name": "person"},
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        # 获取类别名称,按id 顺序
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes  # 设置元数据类别

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            # 不连续
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map  # 重新映射
        # 这个在 data.datasets.builtin_meta._get_builtin_metadata 进行一次映射了吗

    # sort indices for reproducible results # 可再生的,可复写的
    img_ids = sorted(coco_api.imgs.keys())  # 获取所有图像的id

    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    #  获取所有图片的信息,元素如上描述
    imgs = coco_api.loadImgs(img_ids)

    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    # 内部的list[dict] 是一张图片的所有标注
    # imgToAnns 返回的就是一个img_id
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
    # 有效标注数
    total_num_valid_anns = sum([len(x) for x in anns])
    # 实际标注数
    total_num_anns = len(coco_api.anns)
    if total_num_valid_anns < total_num_anns:
        logger.warning(
            f"{json_file} contains {total_num_anns} annotations, but only "
            f"{total_num_valid_anns} of them match to images in the file.")

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        # [先执行第一句后执行第二句]
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        # 可能存在annotation id 不唯一
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)
    # pair(img, anno)
    imgs_anns = list(zip(imgs, anns))
    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []
    # iscrowd =0 ,表示只有单个对象,iscrowd = 1, 表示有多个对象
    # 在segmentation 方面很有用,
    """
    segmentation格式取决于这个实例是一个单个的对象
  (即iscrowd=0,将使用polygons格式)还是一组对象(即iscrowd=1,将使用RLE格式)
    注意,单个的对象(iscrowd=0)可能需要多个polygon来表示,比如这个对象在图像中被挡住了。
    而iscrowd=1时(将标注一组对象,比如一群人)的segmentation使用的就是RLE格式
    """
    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"
                ] + (extra_annotation_keys or [])

    num_instances_without_valid_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        # 用一个记录集成一个图片的信息
        record = {}
        # change to full path
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []  # object: 物体,目标
        # anno_dict_list: 一张图片中所有的标注
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get(
                "ignore",
                0) == 0, '"ignore" in COCO json file is not supported.'
            # 根据ann_keys:["iscrowd", "bbox", "keypoints", "category_id"] 获取需要的信息
            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)  # 如果有seg信息
            if segm:  # either list[list[float]] or dict(RLE)
                if isinstance(segm, dict):
                    if isinstance(segm["counts"], list):
                        # convert to compressed RLE
                        segm = mask_util.frPyObjects(segm, *segm["size"])
                else:
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:  # list[int]
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # Therefore we assume the coordinates are "pixel indices" and
                        # add 0.5 to convert to floating point coordinates.
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            obj["bbox_mode"] = BoxMode.XYWH_ABS  # 1
            # 重新映射id
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warning(
            "Filtered out {} instances without valid segmentation. ".format(
                num_instances_without_valid_segmentation) +
            "There might be issues in your dataset generation process. "
            "A valid polygon should be a list[float] with even length >= 6."
        )  # polygon 多边形
    return dataset_dicts
def load_coco_json(json_file, image_root, dataset_name=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation,
    person keypoints and densepose annotations.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.

    Returns:
        list[dict]: a list of dicts in "Detectron2 Dataset" format. (See DATASETS.md)

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
                    Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
                    """)
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(list(coco_api.imgs.keys()))
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}

    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    # TODO: refactoring candidate, one should not have to alter DB reader
    # every time new data type is added
    DENSEPOSE_KEYS = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V", "dp_masks"]

    num_instances_without_valid_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])

        img_name = img_dict["file_name"]
        if 'COCO_val2014_00000050' in img_name:
            continue_flat = 0
        else:
            continue_flat = 1

        if dataset_name == "coco_2014_minival" and continue_flat == 1:
            continue

        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0

            obj = {
                field: anno[field]
                for field in ["iscrowd", "bbox", "keypoints", "category_id"] +
                DENSEPOSE_KEYS if field in anno
            }

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if not isinstance(segm, dict):
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:  # list[int]
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # Therefore we assume the coordinates are "pixel indices" and
                        # add 0.5 to convert to floating point coordinates.
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warn(
            "Filtered out {} instances without valid segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_segmentation))
    return dataset_dicts
예제 #29
0
def load_hico_json(json_file,
                   image_root,
                   dataset_name=None,
                   extra_annotation_keys=None):
    """
    Load a json file with HOI's instances annotation.

    Args:
        json_file (str): full path to the json file in HOI instances annotation format.
        image_root (str or path-like): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., `hico-det_train`).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "category_id"). The values
            for these keys will be returned as-is. For example, the densepose annotations are
            loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        print(dataset_name)
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        # meta.thing_classes = thing_classes

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        # meta.thing_dataset_id_to_contiguous_id = id_map
        # Get metadata "person_cls_id" and "action classes"
        person_cls_id = meta.person_cls_id
        action_classes = meta.action_classes

    # sort indices for reproducible results
    img_ids = sorted(coco_api.imgs.keys())
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986,
    #   'isactive': 1,
    #   'isknown': 1,
    #   'hoi_triplets': [{person_id: 42984, object_id: 42986, action_id: 4}, ...],
    #  },
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in HOI format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "category_id"]

    ann_keys += (extra_annotation_keys or [])

    num_instances_without_hoi_annotations = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        num_instances = len(anno_dict_list)
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            obj = {key: anno[key] for key in ann_keys if key in anno}

            # "hoi_triplets" in the annotation is a list[dict], where each dict is an
            # annotation record for an interaction. Example of anno["hoi_triplet"][0]:
            # [{
            #       person_id: 42984,
            #       object_id: 42986,
            #       action_id: 4
            #   },
            # ... ]
            # Here "person_id" ("object_id") is the *anno id* of the person (object) instance.
            # For each instance, we record its interactions with other instances in the given
            # image in an binary matrix named `actions` with shape (N, K), where N is the number
            # of instances and K is the number of actions. If this instance is interacting with
            # j-th instance with k-th action, then (i, j) entry of `actions` will be 1.
            actions = np.zeros((num_instances, len(action_classes)))
            hoi_triplets = anno["hoi_triplets"]
            if len(hoi_triplets) > 0:
                # Mapping *anno id* of instances to contiguous indices in this image
                map_to_contiguous_id_within_image(hoi_triplets, anno_dict_list)

                for triplet in hoi_triplets:
                    action_id = triplet["action_id"]
                    is_person = (anno["category_id"] == person_cls_id)
                    target_id = triplet["object_id"] if is_person else triplet[
                        "person_id"]
                    actions[target_id, action_id] = 1
            else:
                num_instances_without_hoi_annotations += 1

            obj["actions"] = actions
            obj["isactive"] = 1 if len(hoi_triplets) > 0 else 0

            obj["bbox_mode"] = BoxMode.XYWH_ABS

            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]

            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_hoi_annotations > 0:
        logger.warning("There are {} instances without hoi annotation.".format(
            num_instances_without_hoi_annotations))
    return dataset_dicts
예제 #30
0
def load_coco_json(json_file,
                   image_root,
                   dataset_name=None,
                   extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        # Add keypoint names from categories, all categories have the same 294 keypoint names, which are string numbers from 1-294
        meta.keypoint_names = [
            keypoint_name
            for keypoint_name in coco_api.dataset["categories"][0]["keypoints"]
        ]

        # For keypoints, the same flips don't need to add since that is automatically added later, e.g (1,1)
        # Also don't need to add flipped indices, e.g (1,6) and (6,1), only need to add (1,6),
        # since the create_keypoint_hflip_indices adds the flipped counterparts
        meta.keypoint_flip_map = [
            # Short sleeve top +0
            ("2", "6"),
            ("3", "5"),
            ("7", "25"),
            ("8", "24"),
            ("9", "23"),
            ("10", "22"),
            ("11", "21"),
            ("12", "20"),
            ("13", "19"),
            ("14", "18"),
            ("15", "17"),
            # Long sleeve top +25
            ("27", "31"),
            ("28", "30"),
            ("32", "58"),
            ("33", "57"),
            ("34", "56"),
            ("35", "55"),
            ("36", "54"),
            ("37", "53"),
            ("38", "52"),
            ("39", "51"),
            ("40", "50"),
            ("41", "49"),
            ("42", "48"),
            ("43", "47"),
            ("44", "46"),
            # Short sleeve outwear +58
            ("60", "84"),
            ("61", "63"),
            ("62", "64"),
            ("65", "83"),
            ("66", "82"),
            ("67", "81"),
            ("68", "80"),
            ("69", "79"),
            ("70", "78"),
            ("71", "77"),
            ("72", "76"),
            ("73", "75"),
            ("74", "87"),
            ("89", "86"),
            ("88", "85"),
            # Long sleeve outwear +89
            ("91", "95"),
            ("92", "94"),
            ("93", "123"),
            ("96", "122"),
            ("97", "121"),
            ("98", "120"),
            ("99", "119"),
            ("100", "118"),
            ("101", "117"),
            ("102", "116"),
            ("103", "115"),
            ("104", "114"),
            ("105", "113"),
            ("106", "112"),
            ("107", "111"),
            ("108", "110"),
            ("109", "126"),
            ("128", "125"),
            ("127", "124"),
            # Vest +128
            ("130", "134"),
            ("131", "133"),
            ("135", "143"),
            ("136", "142"),
            ("137", "141"),
            ("138", "140"),
            # Sling +143
            ("145", "149"),
            ("146", "148"),
            ("150", "158"),
            ("151", "157"),
            ("152", "156"),
            ("153", "155"),
            # Shorts +158
            ("159", "161"),
            ("162", "168"),
            ("163", "167"),
            ("164", "166"),
            # Trousers +168
            ("169", "171"),
            ("172", "182"),
            ("173", "181"),
            ("174", "180"),
            ("175", "179"),
            ("176", "178"),
            # Skirt +182
            ("183", "185"),
            ("186", "190"),
            ("187", "189"),
            # Short sleeve dress +190
            ("192", "196"),
            ("193", "195"),
            ("197", "219"),
            ("198", "218"),
            ("199", "217"),
            ("200", "216"),
            ("201", "215"),
            ("202", "214"),
            ("203", "213"),
            ("204", "212"),
            ("205", "211"),
            ("206", "210"),
            ("207", "209"),
            # Long sleeve dress +219
            ("221", "225"),
            ("222", "224"),
            ("226", "256"),
            ("227", "255"),
            ("228", "254"),
            ("229", "253"),
            ("230", "252"),
            ("231", "251"),
            ("232", "250"),
            ("233", "249"),
            ("234", "248"),
            ("235", "247"),
            ("236", "246"),
            ("237", "245"),
            ("238", "244"),
            ("239", "243"),
            ("240", "242"),
            # Vest dress +256
            ("258", "262"),
            ("259", "261"),
            ("263", "275"),
            ("264", "274"),
            ("265", "273"),
            ("266", "272"),
            ("267", "271"),
            ("268", "270"),
            # Sling dress +275
            ("277", "281"),
            ("278", "280"),
            ("282", "294"),
            ("283", "293"),
            ("284", "292"),
            ("285", "291"),
            ("286", "290"),
            ("287", "289")
        ]

        meta.keypoint_connection_rules = [
            # Short sleeve top +0 Lightblue (0,191,255)
            ("1", "2", (0, 191, 255)),
            ("2", "3", (0, 191, 255)),
            ("3", "4", (0, 191, 255)),
            ("4", "5", (0, 191, 255)),
            ("5", "6", (0, 191, 255)),
            ("6", "1", (0, 191, 255)),
            ("2", "7", (0, 191, 255)),
            ("7", "8", (0, 191, 255)),
            ("8", "9", (0, 191, 255)),
            ("9", "10", (0, 191, 255)),
            ("10", "11", (0, 191, 255)),
            ("11", "12", (0, 191, 255)),
            ("12", "13", (0, 191, 255)),
            ("13", "14", (0, 191, 255)),
            ("14", "15", (0, 191, 255)),
            ("15", "16", (0, 191, 255)),
            ("16", "17", (0, 191, 255)),
            ("17", "18", (0, 191, 255)),
            ("18", "19", (0, 191, 255)),
            ("19", "20", (0, 191, 255)),
            ("20", "21", (0, 191, 255)),
            ("21", "22", (0, 191, 255)),
            ("22", "23", (0, 191, 255)),
            ("23", "24", (0, 191, 255)),
            ("24", "25", (0, 191, 255)),
            ("25", "6", (0, 191, 255)),
            # Long sleeve top +25 Green rgb(0,128,0)
            ("26", "27", (0, 128, 0)),
            ("27", "28", (0, 128, 0)),
            ("28", "29", (0, 128, 0)),
            ("29", "30", (0, 128, 0)),
            ("30", "31", (0, 128, 0)),
            ("31", "26", (0, 128, 0)),
            ("27", "32", (0, 128, 0)),
            ("32", "33", (0, 128, 0)),
            ("33", "34", (0, 128, 0)),
            ("34", "35", (0, 128, 0)),
            ("35", "36", (0, 128, 0)),
            ("36", "37", (0, 128, 0)),
            ("37", "38", (0, 128, 0)),
            ("38", "39", (0, 128, 0)),
            ("39", "40", (0, 128, 0)),
            ("40", "41", (0, 128, 0)),
            ("41", "42", (0, 128, 0)),
            ("42", "43", (0, 128, 0)),
            ("43", "44", (0, 128, 0)),
            ("44", "45", (0, 128, 0)),
            ("45", "46", (0, 128, 0)),
            ("46", "47", (0, 128, 0)),
            ("47", "48", (0, 128, 0)),
            ("48", "49", (0, 128, 0)),
            ("49", "50", (0, 128, 0)),
            ("50", "51", (0, 128, 0)),
            ("51", "52", (0, 128, 0)),
            ("52", "53", (0, 128, 0)),
            ("53", "54", (0, 128, 0)),
            ("54", "55", (0, 128, 0)),
            ("55", "56", (0, 128, 0)),
            ("56", "57", (0, 128, 0)),
            ("57", "58", (0, 128, 0)),
            ("58", "31", (0, 128, 0)),
            # Short sleeve outwear +58 Yellow rgb(255,255,0)
            ("59", "62", (255, 255, 0)),
            ("62", "61", (255, 255, 0)),
            ("61", "60", (255, 255, 0)),
            ("62", "65", (255, 255, 0)),
            ("65", "66", (255, 255, 0)),
            ("66", "67", (255, 255, 0)),
            ("67", "68", (255, 255, 0)),
            ("68", "69", (255, 255, 0)),
            ("69", "70", (255, 255, 0)),
            ("70", "71", (255, 255, 0)),
            ("71", "72", (255, 255, 0)),
            ("72", "73", (255, 255, 0)),
            ("73", "74", (255, 255, 0)),
            ("74", "89", (255, 255, 0)),
            ("89", "88", (255, 255, 0)),
            ("88", "60", (255, 255, 0)),
            ("64", "59", (255, 255, 0)),
            ("64", "63", (255, 255, 0)),
            ("63", "84", (255, 255, 0)),
            ("84", "85", (255, 255, 0)),
            ("85", "86", (255, 255, 0)),
            ("86", "87", (255, 255, 0)),
            ("87", "75", (255, 255, 0)),
            ("75", "76", (255, 255, 0)),
            ("76", "77", (255, 255, 0)),
            ("77", "78", (255, 255, 0)),
            ("78", "79", (255, 255, 0)),
            ("79", "80", (255, 255, 0)),
            ("80", "81", (255, 255, 0)),
            ("81", "82", (255, 255, 0)),
            ("82", "83", (255, 255, 0)),
            ("83", "64", (255, 255, 0)),
            # Long sleeve outwear +89 Red rgb(255,0,0)
            ("90", "91", (255, 0, 0)),
            ("91", "92", (255, 0, 0)),
            ("92", "93", (255, 0, 0)),
            ("91", "96", (255, 0, 0)),
            ("96", "97", (255, 0, 0)),
            ("97", "98", (255, 0, 0)),
            ("98", "99", (255, 0, 0)),
            ("99", "100", (255, 0, 0)),
            ("100", "101", (255, 0, 0)),
            ("101", "102", (255, 0, 0)),
            ("102", "103", (255, 0, 0)),
            ("103", "104", (255, 0, 0)),
            ("104", "105", (255, 0, 0)),
            ("105", "106", (255, 0, 0)),
            ("106", "107", (255, 0, 0)),
            ("107", "108", (255, 0, 0)),
            ("108", "109", (255, 0, 0)),
            ("109", "128", (255, 0, 0)),
            ("128", "127", (255, 0, 0)),
            ("127", "93", (255, 0, 0)),
            ("95", "90", (255, 0, 0)),
            ("95", "94", (255, 0, 0)),
            ("94", "123", (255, 0, 0)),
            ("123", "124", (255, 0, 0)),
            ("124", "125", (255, 0, 0)),
            ("125", "126", (255, 0, 0)),
            ("126", "110", (255, 0, 0)),
            ("110", "111", (255, 0, 0)),
            ("111", "112", (255, 0, 0)),
            ("112", "113", (255, 0, 0)),
            ("113", "114", (255, 0, 0)),
            ("114", "115", (255, 0, 0)),
            ("115", "116", (255, 0, 0)),
            ("116", "117", (255, 0, 0)),
            ("117", "118", (255, 0, 0)),
            ("118", "119", (255, 0, 0)),
            ("119", "120", (255, 0, 0)),
            ("120", "121", (255, 0, 0)),
            ("121", "122", (255, 0, 0)),
            ("122", "95", (255, 0, 0)),
            # Vest +128 DarkOrange rgb(255,140,0)
            ("129", "130", (255, 140, 0)),
            ("130", "131", (255, 140, 0)),
            ("131", "132", (255, 140, 0)),
            ("132", "133", (255, 140, 0)),
            ("133", "134", (255, 140, 0)),
            ("134", "129", (255, 140, 0)),
            ("130", "135", (255, 140, 0)),
            ("135", "136", (255, 140, 0)),
            ("136", "137", (255, 140, 0)),
            ("137", "138", (255, 140, 0)),
            ("138", "139", (255, 140, 0)),
            ("139", "140", (255, 140, 0)),
            ("140", "141", (255, 140, 0)),
            ("141", "142", (255, 140, 0)),
            ("142", "143", (255, 140, 0)),
            ("143", "134", (255, 140, 0)),
            # Sling +143 DeepPink rgb(255,20,147)
            ("144", "145", (255, 20, 147)),
            ("145", "146", (255, 20, 147)),
            ("146", "147", (255, 20, 147)),
            ("147", "148", (255, 20, 147)),
            ("148", "149", (255, 20, 147)),
            ("149", "144", (255, 20, 147)),
            ("145", "150", (255, 20, 147)),
            ("145", "151", (255, 20, 147)),
            ("151", "152", (255, 20, 147)),
            ("152", "153", (255, 20, 147)),
            ("153", "154", (255, 20, 147)),
            ("154", "155", (255, 20, 147)),
            ("155", "156", (255, 20, 147)),
            ("156", "157", (255, 20, 147)),
            ("157", "149", (255, 20, 147)),
            ("149", "158", (255, 20, 147)),
            # Shorts +158 SaddleBrown rgb(139,69,19)
            ("159", "160", (139, 69, 19)),
            ("160", "161", (139, 69, 19)),
            ("159", "162", (139, 69, 19)),
            ("162", "163", (139, 69, 19)),
            ("163", "164", (139, 69, 19)),
            ("164", "165", (139, 69, 19)),
            ("165", "166", (139, 69, 19)),
            ("166", "167", (139, 69, 19)),
            ("167", "168", (139, 69, 19)),
            ("168", "161", (139, 69, 19)),
            # Trousers +168 Magenta rgb(255,0,255)
            ("169", "170", (255, 0, 255)),
            ("170", "171", (255, 0, 255)),
            ("169", "172", (255, 0, 255)),
            ("172", "173", (255, 0, 255)),
            ("173", "174", (255, 0, 255)),
            ("174", "175", (255, 0, 255)),
            ("175", "176", (255, 0, 255)),
            ("176", "177", (255, 0, 255)),
            ("177", "178", (255, 0, 255)),
            ("178", "179", (255, 0, 255)),
            ("179", "180", (255, 0, 255)),
            ("180", "181", (255, 0, 255)),
            ("181", "182", (255, 0, 255)),
            ("182", "171", (255, 0, 255)),
            # Skirt +182 GoldenRod rgb(218,165,32)
            ("183", "184", (218, 165, 32)),
            ("184", "185", (218, 165, 32)),
            ("183", "186", (218, 165, 32)),
            ("186", "187", (218, 165, 32)),
            ("187", "188", (218, 165, 32)),
            ("188", "189", (218, 165, 32)),
            ("189", "190", (218, 165, 32)),
            ("190", "185", (218, 165, 32)),
            # Short sleeve dress +190 Gray rgb(128,128,128)
            ("191", "192", (128, 128, 128)),
            ("192", "193", (128, 128, 128)),
            ("193", "194", (128, 128, 128)),
            ("194", "195", (128, 128, 128)),
            ("195", "196", (128, 128, 128)),
            ("196", "191", (128, 128, 128)),
            ("192", "197", (128, 128, 128)),
            ("197", "198", (128, 128, 128)),
            ("198", "199", (128, 128, 128)),
            ("199", "200", (128, 128, 128)),
            ("200", "201", (128, 128, 128)),
            ("201", "202", (128, 128, 128)),
            ("202", "203", (128, 128, 128)),
            ("203", "204", (128, 128, 128)),
            ("204", "205", (128, 128, 128)),
            ("205", "206", (128, 128, 128)),
            ("206", "207", (128, 128, 128)),
            ("207", "208", (128, 128, 128)),
            ("208", "209", (128, 128, 128)),
            ("209", "210", (128, 128, 128)),
            ("210", "211", (128, 128, 128)),
            ("211", "212", (128, 128, 128)),
            ("212", "213", (128, 128, 128)),
            ("213", "214", (128, 128, 128)),
            ("214", "215", (128, 128, 128)),
            ("215", "216", (128, 128, 128)),
            ("216", "217", (128, 128, 128)),
            ("217", "218", (128, 128, 128)),
            ("218", "219", (128, 128, 128)),
            ("219", "196", (128, 128, 128)),
            # Long sleeve dress +219 Darkblue rgb(0,0,139)
            ("220", "221", (0, 0, 139)),
            ("221", "222", (0, 0, 139)),
            ("222", "223", (0, 0, 139)),
            ("223", "224", (0, 0, 139)),
            ("224", "225", (0, 0, 139)),
            ("225", "220", (0, 0, 139)),
            ("221", "226", (0, 0, 139)),
            ("226", "227", (0, 0, 139)),
            ("227", "228", (0, 0, 139)),
            ("228", "229", (0, 0, 139)),
            ("229", "230", (0, 0, 139)),
            ("230", "231", (0, 0, 139)),
            ("231", "232", (0, 0, 139)),
            ("232", "233", (0, 0, 139)),
            ("233", "234", (0, 0, 139)),
            ("234", "235", (0, 0, 139)),
            ("235", "236", (0, 0, 139)),
            ("236", "237", (0, 0, 139)),
            ("237", "238", (0, 0, 139)),
            ("238", "239", (0, 0, 139)),
            ("239", "240", (0, 0, 139)),
            ("240", "241", (0, 0, 139)),
            ("241", "242", (0, 0, 139)),
            ("242", "243", (0, 0, 139)),
            ("243", "244", (0, 0, 139)),
            ("244", "245", (0, 0, 139)),
            ("245", "246", (0, 0, 139)),
            ("246", "247", (0, 0, 139)),
            ("247", "248", (0, 0, 139)),
            ("248", "249", (0, 0, 139)),
            ("249", "250", (0, 0, 139)),
            ("250", "251", (0, 0, 139)),
            ("251", "252", (0, 0, 139)),
            ("252", "253", (0, 0, 139)),
            ("253", "254", (0, 0, 139)),
            ("254", "255", (0, 0, 139)),
            ("255", "256", (0, 0, 139)),
            ("256", "225", (0, 0, 139)),
            # Vest dress +256 Palevioletred rgb(219,112,147)
            ("257", "258", (219, 112, 147)),
            ("258", "259", (219, 112, 147)),
            ("259", "260", (219, 112, 147)),
            ("260", "261", (219, 112, 147)),
            ("261", "262", (219, 112, 147)),
            ("262", "257", (219, 112, 147)),
            ("258", "263", (219, 112, 147)),
            ("263", "264", (219, 112, 147)),
            ("264", "265", (219, 112, 147)),
            ("265", "266", (219, 112, 147)),
            ("266", "267", (219, 112, 147)),
            ("267", "268", (219, 112, 147)),
            ("268", "269", (219, 112, 147)),
            ("269", "270", (219, 112, 147)),
            ("270", "271", (219, 112, 147)),
            ("271", "272", (219, 112, 147)),
            ("272", "273", (219, 112, 147)),
            ("273", "274", (219, 112, 147)),
            ("274", "275", (219, 112, 147)),
            ("275", "262", (219, 112, 147)),
            # Sling dress +275 Bisque rgb(255, 228, 196)
            ("276", "277", (255, 228, 196)),
            ("277", "278", (255, 228, 196)),
            ("278", "279", (255, 228, 196)),
            ("279", "280", (255, 228, 196)),
            ("280", "281", (255, 228, 196)),
            ("281", "276", (255, 228, 196)),
            ("277", "282", (255, 228, 196)),
            ("277", "283", (255, 228, 196)),
            ("283", "284", (255, 228, 196)),
            ("284", "285", (255, 228, 196)),
            ("285", "286", (255, 228, 196)),
            ("286", "287", (255, 228, 196)),
            ("287", "288", (255, 228, 196)),
            ("288", "289", (255, 228, 196)),
            ("289", "290", (255, 228, 196)),
            ("290", "291", (255, 228, 196)),
            ("291", "292", (255, 228, 196)),
            ("292", "293", (255, 228, 196)),
            ("293", "281", (255, 228, 196)),
            ("281", "294", (255, 228, 196))
        ]

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(list(coco_api.imgs.keys()))
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"
                ] + (extra_annotation_keys or [])

    num_instances_without_valid_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0

            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if not isinstance(segm, dict):
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:  # list[int]
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # Therefore we assume the coordinates are "pixel indices" and
                        # add 0.5 to convert to floating point coordinates.
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warn(
            "Filtered out {} instances without valid segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_segmentation))
    return dataset_dicts