def evaluate(self): comm.synchronize() self._predictions = comm.gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return gt_json = PathManager.get_local_path(self._metadata.panoptic_json) gt_folder = self._metadata.panoptic_root with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: logger.info( "Writing all panoptic predictions to {} ...".format(pred_dir)) for p in self._predictions: with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: f.write(p.pop("png_string")) with open(gt_json, "r") as f: json_data = json.load(f) json_data["annotations"] = self._predictions with PathManager.open(self._predictions_json, "w") as f: f.write(json.dumps(json_data)) from panopticapi.evaluation import pq_compute with contextlib.redirect_stdout(io.StringIO()): pq_res = pq_compute( gt_json, PathManager.get_local_path(self._predictions_json), gt_folder=gt_folder, pred_folder=pred_dir, ) res = {} res["PQ"] = 100 * pq_res["All"]["pq"] res["SQ"] = 100 * pq_res["All"]["sq"] res["RQ"] = 100 * pq_res["All"]["rq"] res["PQ_th"] = 100 * pq_res["Things"]["pq"] res["SQ_th"] = 100 * pq_res["Things"]["sq"] res["RQ_th"] = 100 * pq_res["Things"]["rq"] res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] results = OrderedDict({"panoptic_seg": res}) table = _print_panoptic_results(pq_res) if self._dump: dump_info_one_task = { "task": "panoptic_seg", "tables": [table], } _dump_to_markdown([dump_info_one_task]) return results
def __init__(self, dataset_name, meta, cfg, distributed, output_dir=None, dump=False): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have either the following corresponding metadata: "json_file": the path to the COCO format annotation Or it must be in cvpods's standard dataset format so it can be converted to COCO format automatically. meta (SimpleNamespace): dataset metadata. cfg (config dict): cvpods Config instance. distributed (True): if True, will collect results from all ranks for evaluation. Otherwise, will evaluate the results in the current process. output_dir (str): optional, an output directory to dump all results predicted on the dataset. The dump contains two files: 1. "instance_predictions.pth" a file in torch serialization format that contains all the raw original predictions. 2. "coco_instances_results.json" a json file in COCO's result format. dump (bool): If True, after the evaluation is completed, a Markdown file that records the model evaluation metrics and corresponding scores will be generated in the working directory. """ self._dump = dump self.cfg = cfg self._tasks = self._tasks_from_config(cfg) self._distributed = distributed self._output_dir = output_dir self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) self._metadata = meta if not hasattr(self._metadata, "json_file"): self._logger.warning( f"json_file was not found in MetaDataCatalog for '{dataset_name}'." " Trying to convert it to COCO format ...") cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") self._metadata.json_file = cache_path convert_to_coco_json(dataset_name, cache_path) json_file = PathManager.get_local_path(self._metadata.json_file) with contextlib.redirect_stdout(io.StringIO()): self._coco_api = COCO(json_file) self._kpt_oks_sigmas = cfg.TEST.KEYPOINT_OKS_SIGMAS # Test set json files do not contain annotations (evaluation must be # performed using the COCO evaluation server). self._do_evaluation = "annotations" in self._coco_api.dataset
def __init__(self, dataset_name, meta, cfg, distributed, output_dir=None, dump=False, use_fast_impl=True): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have the following corresponding metadata: "json_file": the path to the LVIS format annotation meta (SimpleNamespace): dataset metadata. cfg (config dict): cvpods Config instance. distributed (True): if True, will collect results from all ranks for evaluation. Otherwise, will evaluate the results in the current process. output_dir (str): optional, an output directory to dump results. dump (bool): If True, after the evaluation is completed, a Markdown file that records the model evaluation metrics and corresponding scores will be generated in the working directory. use_fast_impl (bool): If True, use the C++ version lvis evaluation api instead of the python version, to reduce the time of the evalutation. """ from lvis import LVIS # TODO: really use dataset_name self.dataset_name = dataset_name self._dump = dump self._tasks = self._tasks_from_config(cfg) self._distributed = distributed self._output_dir = output_dir self._use_fast_impl = use_fast_impl self._max_dets = cfg.TEST.DETECTIONS_PER_IMAGE self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) self._metadata = meta json_file = PathManager.get_local_path(self._metadata.json_file) self._lvis_api = LVIS(json_file) # Test set json files do not contain annotations (evaluation must be # performed using the LVIS evaluation server). self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
def load(self, path: str): """ Load from the given checkpoint. When path points to network file, this function has to be called on all ranks. Args: path (str): path or url to the checkpoint. If empty, will not load anything. Returns: dict: extra data loaded from the checkpoint that has not been processed. For example, those saved with :meth:`.save(**extra_data)`. """ if not path: # no checkpoint provided self.logger.info( "No checkpoint found. Initializing model from scratch") return {} self.logger.info("Loading checkpoint from {}".format(path)) if not os.path.isfile(path): path = PathManager.get_local_path(path) assert PathManager.isfile(path), "Checkpoint {} not found!".format( path) checkpoint = self._load_file(path) self._load_model(checkpoint) if self.resume: for key, obj in self.checkpointables.items(): if key in checkpoint: self.logger.info("Loading {} from {}".format(key, path)) obj.load_state_dict(checkpoint.pop(key)) # return any further checkpoint data return checkpoint else: return {}
def _load_annotations(self, json_file, image_root): """ Load a json file in LVIS's annotation format. Args: json_file (str): full path to the LVIS json annotation file. image_root (str): the directory where the images in this json file exists. Returns: list[dict]: a list of dicts in cvpods standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from lvis import LVIS json_file = PathManager.get_local_path(json_file) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) # sort indices for reproducible results img_ids = sorted(lvis_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = lvis_api.load_imgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] # Sanity check that each annotation has a unique id ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique".format(json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in the LVIS format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] for (img_dict, anno_dict_list) in imgs_anns: record = {} file_name = img_dict["file_name"] if img_dict["file_name"].startswith("COCO"): # Convert form the COCO 2014 file naming convention of # COCO_[train/val/test]2014_000000000000.jpg to the 2017 naming convention of # 000000000000.jpg (LVIS v1 will fix this naming issue) file_name = file_name[-16:] record["file_name"] = os.path.join(image_root, file_name) record["height"] = img_dict["height"] record["width"] = img_dict["width"] record["not_exhaustive_category_ids"] = img_dict.get( "not_exhaustive_category_ids", []) record["neg_category_ids"] = img_dict.get("neg_category_ids", []) image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. assert anno["image_id"] == image_id obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} obj["category_id"] = anno[ "category_id"] - 1 # Convert 1-indexed to 0-indexed segm = anno["segmentation"] # list[list[float]] # filter out invalid polygons (< 3 points) valid_segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] assert len(segm) == len( valid_segm ), "Annotation contains an invalid polygon with < 3 points" assert len(segm) > 0 obj["segmentation"] = segm objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def _load_annotations(self, json_file, image_root): """ Load a json file with CrowdHuman's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in CrowdHuman instances annotation format. image_root (str): the directory where the images in this json file exists. Returns: list[dict]: a list of dicts in cvpods standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ timer = Timer() json_file = PathManager.get_local_path(json_file) with open(json_file, 'r') as file: gt_records = file.readlines() if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) logger.info("Loaded {} images in CrowdHuman format from {}".format( len(gt_records), json_file)) dataset_dicts = [] ann_keys = ["tag", "hbox", "vbox", "head_attr", "extra"] for anno_str in gt_records: anno_dict = json.loads(anno_str) record = {} record["file_name"] = os.path.join( image_root, "{}.jpg".format(anno_dict["ID"])) record["image_id"] = anno_dict["ID"] objs = [] for anno in anno_dict['gtboxes']: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. obj = {key: anno[key] for key in ann_keys if key in anno} obj["bbox"] = anno["fbox"] obj["category_id"] = 0 if 'extra' in anno and 'ignore' in anno[ 'extra'] and anno['extra']['ignore'] != 0: obj["category_id"] = -1 obj["bbox_mode"] = BoxMode.XYWH_ABS objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def _get_local_path(self, path): name = path[len(self.PREFIX):] return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
def _get_local_path(self, path): logger = logging.getLogger(__name__) catalog_path = ModelCatalog.get(path[len(self.PREFIX):]) logger.info("Catalog entry {} points to {}".format(path, catalog_path)) return PathManager.get_local_path(catalog_path)
def _load_annotations(self, json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in cvpods standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] self.meta["thing_classes"] = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} self.meta["thing_dataset_id_to_contiguous_id"] = id_map # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if not isinstance(segm, dict): # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.". format(num_instances_without_valid_segmentation)) return dataset_dicts
def _load_annotations(self, json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with WiderFace's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in WiderFace instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., widerface_2019_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in cvpods standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = self.meta cat_ids = sorted(coco_api.getCatIds()) id_map = {v: i for i, v in enumerate(cat_ids)} meta["thing_dataset_id_to_contiguous_id"] = id_map # sort indices for reproducible results img_ids = sorted(list(coco_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len(ann_ids), \ "Annotation ids in '{}' are not unique!".format(json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id # ensure the width and height of bbox are greater than 0 if anno["bbox"][2] <= 0 or anno["bbox"][3] <= 0: continue if anno.get("ignore", 0) != 0: continue obj = {key: anno[key] for key in ann_keys if key in anno} obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts