def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() if "proposals" in self._predictions[0]: self._eval_box_proposals() if "instances" in self._predictions[0]: self._eval_predictions(set(self._tasks)) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def load(self, path: str): """ Load from the given checkpoint. When path points to network file, this function has to be called on all ranks. Args: path (str): path or url to the checkpoint. If empty, will not load anything. Returns: dict: extra data loaded from the checkpoint that has not been processed. For example, those saved with :meth:`.save(**extra_data)`. """ if not path: # no checkpoint provided self.logger.info( "No checkpoint found. Initializing model from scratch") return {} self.logger.info("Loading checkpoint from {}".format(path)) if not os.path.isfile(path): path = PathManager.get_local_path(path) assert PathManager.isfile(path), "Checkpoint {} not found!".format( path) checkpoint = self._load_file(path) self._load_model(checkpoint) for key, obj in self.checkpointables.items(): if key in checkpoint: self.logger.info("Loading {} from {}".format(key, path)) obj.load_state_dict(checkpoint.pop(key)) # return any further checkpoint data return checkpoint
def convert_to_coco_json(dataset_name, output_folder="", allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatastCatalog and in detectron2's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatastCatalog and in detectron2's standard format output_folder: where json file will be saved and loaded from allow_cached: if json file is already present then skip conversion Returns: cache_path: path to the COCO-format json file """ # TODO: The dataset or the conversion script *may* change, # a checksum would be useful for validating the cached data cache_path = os.path.join(output_folder, f"{dataset_name}_coco_format.json") PathManager.mkdirs(output_folder) if os.path.exists(cache_path) and allow_cached: logger.info( f"Reading cached annotations in COCO format from:{cache_path} ...") else: logger.info( f"Converting dataset annotations in '{dataset_name}' to COCO format ...)" ) coco_dict = convert_to_coco_dict(dataset_name) with PathManager.open(cache_path, "w") as json_file: logger.info(f"Caching annotations in COCO format: {cache_path}") json.dump(coco_dict, json_file) return cache_path
def get_all_checkpoint_files(self): """ Returns: list: All available checkpoint files (.pth files) in target directory. """ all_model_checkpoints = [ os.path.join(self.save_dir, file) for file in PathManager.ls(self.save_dir) if PathManager.isfile(os.path.join(self.save_dir, file)) and file.endswith(".pth") ] return all_model_checkpoints
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the dl_lib logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = cfg.OUTPUT_DIR if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() # setup_logger(output_dir, distributed_rank=rank, name="dl_lib") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": logger.info( "Contents of args.config_file={}:\n{}".format( args.config_file, PathManager.open(args.config_file, "r").read() ) ) logger.info("Running with full config:\n{}".format(cfg)) base_config = cfg.__class__.__base__() logger.info("different config with base class:\n{}".format(cfg.show_diff(base_config))) # if comm.is_main_process() and output_dir: # # Note: some of our scripts may expect the existence of # # config.yaml in output directory # path = os.path.join(output_dir, "config.yaml") # with PathManager.open(path, "w") as f: # f.write(cfg.dump()) # logger.info("Full config saved to {}".format(os.path.abspath(path))) # make sure each worker has a different, yet deterministic seed if specified seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK return cfg, logger
def _load_file(self, filename): if filename.endswith(".pkl"): with PathManager.open(filename, "rb") as f: data = pickle.load(f, encoding="latin1") if "model" in data and "__author__" in data: # file is in dl_lib model zoo format self.logger.info("Reading a file from '{}'".format( data["__author__"])) return data else: # assume file is from Caffe2 / Detectron1 model zoo if "blobs" in data: # Detection models have "blobs", but ImageNet models don't data = data["blobs"] data = { k: v for k, v in data.items() if not k.endswith("_momentum") } return { "model": data, "__author__": "Caffe2", "matching_heuristics": True } loaded = super()._load_file(filename) # load native pth checkpoint if "model" not in loaded: loaded = {"model": loaded} return loaded
def has_checkpoint(self): """ Returns: bool: whether a checkpoint exists in the target directory. """ save_file = os.path.join(self.save_dir, "last_checkpoint") return PathManager.exists(save_file)
def load_cityscapes_semantic(image_dir, gt_dir): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". Returns: list[dict]: a list of dict, each has "file_name" and "sem_seg_file_name". """ ret = [] for image_file in glob.glob(os.path.join(image_dir, "**/*.png")): suffix = "leftImg8bit.png" assert image_file.endswith(suffix) prefix = image_dir label_file = gt_dir + image_file[ len(prefix):-len(suffix)] + "gtFine_labelTrainIds.png" assert os.path.isfile( label_file ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa json_file = gt_dir + image_file[ len(prefix):-len(suffix)] + "gtFine_polygons.json" with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret.append({ "file_name": image_file, "sem_seg_file_name": label_file, "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], }) return ret
def __init__(self, json_file, window_size=20): """ Args: json_file (str): path to the json file. New data will be appended if the file exists. window_size (int): the window size of median smoothing for the scalars whose `smoothing_hint` are True. """ self._file_handle = PathManager.open(json_file, "a") self._window_size = window_size
def process(self, inputs, outputs): """ Args: inputs: the inputs to a model. It is a list of dicts. Each dict corresponds to an image and contains keys like "height", "width", "file_name". outputs: the outputs of a model. It is either list of semantic segmentation predictions (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic segmentation prediction in the same format. """ for input, output in zip(inputs, outputs): if "flops" in output: flops = output["flops"] self._real_flops.append(flops["real_flops"]) self._expt_flops.append(flops["expt_flops"]) output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) pred = np.array(output, dtype=np.int) # Cityscapes test output if 'cityscapes' in self._dataset_name and 'test' in self._dataset_name: pred_converg = pred.copy() f_name = input["file_name"].split('/')[-1] pred_unique = list(np.unique(pred_converg)) pred_unique.sort(reverse=True) for i in range(len(pred_unique)): pred_converg[pred_converg == int(pred_unique[i])] \ = trainId2label[int(pred_unique[i])].id save_dir = os.path.join(self._output_dir, 'test_dir') if not os.path.exists(save_dir): PathManager.mkdirs(save_dir) cv2.imwrite(os.path.join(save_dir, f_name), pred_converg) with PathManager.open( self.input_file_to_gt_file[input["file_name"]], "rb") as f: gt = np.array(Image.open(f), dtype=np.int) gt[gt == self._ignore_label] = self._num_classes self._conf_matrix += np.bincount( self._N * pred.reshape(-1) + gt.reshape(-1), minlength=self._N**2).reshape(self._N, self._N) self._predictions.extend( self.encode_json_sem_seg(pred, input["file_name"]))
def load_proposals_into_dataset(dataset_dicts, proposal_file): r""" Load precomputed object proposals into the dataset. The proposal file should be a pickled dict with the following keys: - "ids": list[int] or list[str], the image ids - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores corresponding to the boxes. - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. Args: dataset_dicts (list[dict]): annotations in dl_lib Dataset format. proposal_file (str): file path of pre-computed proposals, in pkl format. Returns: list[dict]: the same format as dataset_dicts, but added proposal field. """ logger = logging.getLogger(__name__) logger.info("Loading proposals from: {}".format(proposal_file)) with PathManager.open(proposal_file, "rb") as f: proposals = pickle.load(f, encoding="latin1") # Rename the key names in D1 proposal files rename_keys = {"indexes": "ids", "scores": "objectness_logits"} for key in rename_keys: if key in proposals: proposals[rename_keys[key]] = proposals.pop(key) # Fetch the indexes of all proposals that are in the dataset # Convert image_id to str since they could be int. img_ids = set({str(record["image_id"]) for record in dataset_dicts}) id_to_index = { str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids } # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' bbox_mode = BoxMode(proposals["bbox_mode"] ) if "bbox_mode" in proposals else BoxMode.XYXY_ABS for record in dataset_dicts: # Get the index of the proposal i = id_to_index[str(record["image_id"])] boxes = proposals["boxes"][i] objectness_logits = proposals["objectness_logits"][i] # Sort the proposals in descending order of the scores inds = objectness_logits.argsort()[::-1] record["proposal_boxes"] = boxes[inds] record["proposal_objectness_logits"] = objectness_logits[inds] record["proposal_bbox_mode"] = bbox_mode return dataset_dicts
def tag_last_checkpoint(self, last_filename_basename: str): """ Tag the last checkpoint. Args: last_filename_basename (str): the basename of the last filename. """ save_file = os.path.join(self.save_dir, "last_checkpoint") with PathManager.open(save_file, "w") as f: f.write(last_filename_basename)
def _eval_predictions(self, tasks): """ Evaluate self._predictions on the given tasks. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") self._coco_results = list( itertools.chain(*[x["instances"] for x in self._predictions])) # unmap the category ids for COCO if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = { v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() } for result in self._coco_results: category_id = result["category_id"] assert ( category_id in reverse_id_mapping ), "A prediction has category_id={}, which is not available in the dataset.".format( category_id) result["category_id"] = reverse_id_mapping[category_id] if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._coco_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating predictions ...") for task in sorted(tasks): coco_eval = ( _evaluate_predictions_on_coco( self._coco_api, self._coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas) if len(self._coco_results) > 0 else None # cocoapi does not handle empty results very well ) res = self._derive_coco_results( coco_eval, task, class_names=self._metadata.get("thing_classes")) self._results[task] = res
def load_voc_instances(dirname: str, split: str): """ Load Pascal VOC detection annotations to dl_lib format. Args: dirname: Contain "Annotations", "ImageSets", "JPEGImages" split (str): one of "train", "test", "val", "trainval" """ with PathManager.open( os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: fileids = np.loadtxt(f, dtype=np.str) dicts = [] for fileid in fileids: anno_file = os.path.join(dirname, "Annotations", fileid + ".xml") jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") tree = ET.parse(anno_file) r = { "file_name": jpeg_file, "image_id": fileid, "height": int(tree.findall("./size/height")[0].text), "width": int(tree.findall("./size/width")[0].text), } instances = [] for obj in tree.findall("object"): cls = obj.find("name").text # We include "difficult" samples in training. # Based on limited experiments, they don't hurt accuracy. # difficult = int(obj.find("difficult").text) # if difficult == 1: # continue bbox = obj.find("bndbox") bbox = [ float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"] ] # Original annotations are integers in the range [1, W or H] # Assuming they mean 1-based pixel indices (inclusive), # a box with annotation (xmin=1, xmax=W) covers the whole image. # In coordinate space this is represented by (xmin=0, xmax=W) bbox[0] -= 1.0 bbox[1] -= 1.0 instances.append({ "category_id": CLASS_NAMES.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS }) r["annotations"] = instances dicts.append(r) return dicts
def get_checkpoint_file(self): """ Returns: str: The latest checkpoint file in target directory. """ save_file = os.path.join(self.save_dir, "last_checkpoint") try: with PathManager.open(save_file, "r") as f: last_saved = f.read().strip() except IOError: # if file doesn't exist, maybe because it has just been # deleted by a separate process return "" return os.path.join(self.save_dir, last_saved)
def _eval_box_proposals(self): """ Evaluate the box proposals in self._predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, objectness_logits = [], [], [] for prediction in self._predictions: ids.append(prediction["image_id"]) boxes.append( prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append( prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } with PathManager.open( os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(self._predictions, self._coco_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) self._logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res
def __init__(self, dataset_name, cfg, distributed, output_dir=None, dump=False): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have either the following corresponding metadata: "json_file": the path to the COCO format annotation Or it must be in detectron2's standard dataset format so it can be converted to COCO format automatically. cfg (CfgNode): config instance distributed (True): if True, will collect results from all ranks for evaluation. Otherwise, will evaluate the results in the current process. output_dir (str): optional, an output directory to dump results. """ self._dump = dump self._tasks = self._tasks_from_config(cfg) self._distributed = distributed self._output_dir = output_dir self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) self._metadata = MetadataCatalog.get(dataset_name) if not hasattr(self._metadata, "json_file"): self._logger.warning( f"json_file was not found in MetaDataCatalog for '{dataset_name}'" ) cache_path = convert_to_coco_json(dataset_name, output_dir) self._metadata.json_file = cache_path json_file = PathManager.get_local_path(self._metadata.json_file) with contextlib.redirect_stdout(io.StringIO()): self._coco_api = COCO(json_file) self._kpt_oks_sigmas = cfg.TEST.KEYPOINT_OKS_SIGMAS # Test set json files do not contain annotations (evaluation must be # performed using the COCO evaluation server). self._do_evaluation = "annotations" in self._coco_api.dataset
def after_step(self): if self._profiler is None: return self._profiler.__exit__(None, None, None) out_file = os.path.join( self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)) if "://" not in out_file: self._profiler.export_chrome_trace(out_file) else: # Support non-posix filesystems with tempfile.TemporaryDirectory(prefix="dl_lib_profiler") as d: tmp_file = os.path.join(d, "tmp.json") self._profiler.export_chrome_trace(tmp_file) with open(tmp_file) as f: content = f.read() with PathManager.open(out_file, "w") as f: f.write(content)
def read_image(file_name, format=None): """ Read an image into the given format. Will apply rotation and flipping if the image has such exif information. Args: file_name (str): image file path format (str): one of the supported image modes in PIL, or "BGR" Returns: image (np.ndarray): an HWC image """ with PathManager.open(file_name, "rb") as f: image = Image.open(f) image = ImageOps.exif_transpose(image) # capture and ignore this bug: https://github.com/python-pillow/Pillow/issues/3973 try: image = ImageOps.exif_transpose(image) except Exception: pass if format is not None: # PIL only supports RGB, so convert to RGB and flip channels over below conversion_format = format if format == "BGR": conversion_format = "RGB" image = image.convert(conversion_format) image = np.asarray(image) if format == "BGR": # flip channels if needed image = image[:, :, ::-1] # PIL squeezes out the channel dimension for "L", so make it HWC if format == "L": image = np.expand_dims(image, -1) return image
def save(self, name: str, **kwargs: dict): """ Dump model and checkpointables to a file. Args: name (str): name of the file. kwargs (dict): extra arbitrary data to save. """ if not self.save_dir or not self.save_to_disk: return data = {} data["model"] = self.model.state_dict() for key, obj in self.checkpointables.items(): data[key] = obj.state_dict() data.update(kwargs) basename = "{}.pth".format(name) save_file = os.path.join(self.save_dir, basename) assert os.path.basename(save_file) == basename, basename self.logger.info("Saving checkpoint to {}".format(save_file)) with PathManager.open(save_file, "wb") as f: torch.save(data, f) self.tag_last_checkpoint(basename)
def _get_local_path(self, path): logger = logging.getLogger(__name__) catalog_path = ModelCatalog.get(path[len(self.PREFIX):]) logger.info("Catalog entry {} points to {}".format(path, catalog_path)) return PathManager.get_local_path(catalog_path)
def setup_logger(output=None, distributed_rank=0, *, color=True, name="dl_lib", abbrev_name=None): """ generate a logger with given setting Args: output (str): a file name or a directory to save log. If None, will not save log file. If ends with ".txt" or ".log", assumed to be a file name. Otherwise, logs will be saved to `output/log.txt`. name (str): the root module name of this logger abbrev_name (str): an abbreviation of the module, to avoid long names in logs. Set to "" to not log the root module in logs. By default, will abbreviate "dl_lib" to "d2" and leave other modules unchanged. """ logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) logger.propagate = False if abbrev_name is None: abbrev_name = "c2" if name == "dl_lib" else name plain_formatter = logging.Formatter( "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S") # stdout logging: master only if distributed_rank == 0: ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) if color: formatter = _ColorfulFormatter( colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", datefmt="%m/%d %H:%M:%S", root_name=name, abbrev_name=str(abbrev_name), ) else: formatter = plain_formatter ch.setFormatter(formatter) logger.addHandler(ch) # file logging: all workers if output is not None: if output.endswith(".txt") or output.endswith(".log"): filename = output else: filename = os.path.join(output, "log.txt") if distributed_rank > 0: filename = filename + ".rank{}".format(distributed_rank) PathManager.mkdirs(os.path.dirname(filename)) fh = logging.StreamHandler(_cached_log_stream(filename)) fh.setLevel(logging.DEBUG) fh.setFormatter(plain_formatter) logger.addHandler(fh) return logger
def _cached_log_stream(filename): return PathManager.open(filename, "a")
def evaluate(self): """ Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): * Mean intersection-over-union averaged across classes (mIoU) * Frequency Weighted IoU (fwIoU) * Mean pixel accuracy averaged across classes (mACC) * Pixel Accuracy (pACC) """ if self._distributed: synchronize() conf_matrix_list = all_gather(self._conf_matrix) self._predictions = all_gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) self._real_flops = all_gather(self._real_flops) self._real_flops = list(itertools.chain(*self._real_flops)) self._expt_flops = all_gather(self._expt_flops) self._expt_flops = list(itertools.chain(*self._expt_flops)) if not is_main_process(): return self._conf_matrix = np.zeros_like(self._conf_matrix) for conf_matrix in conf_matrix_list: self._conf_matrix += conf_matrix if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._predictions)) acc = np.zeros(self._num_classes, dtype=np.float) iou = np.zeros(self._num_classes, dtype=np.float) tp = self._conf_matrix.diagonal()[:-1].astype(np.float) pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) class_weights = pos_gt / np.sum(pos_gt) pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) acc_valid = pos_gt > 0 acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] iou_valid = (pos_gt + pos_pred) > 0 union = pos_gt + pos_pred - tp iou[acc_valid] = tp[acc_valid] / union[acc_valid] macc = np.sum(acc) / np.sum(acc_valid) miou = np.sum(iou) / np.sum(iou_valid) fiou = np.sum(iou * class_weights) pacc = np.sum(tp) / np.sum(pos_gt) res = {} res["mIoU"] = 100 * miou res["fwIoU"] = 100 * fiou res["mACC"] = 100 * macc res["pACC"] = 100 * pacc # add flops calculation if len(self._real_flops) > 0 and len(self._expt_flops) > 0: res["mean_real_flops"] = (sum(self._real_flops) / len(self._real_flops)) / 1e3 res["max_real_flops"] = max(self._real_flops) / 1e3 res["min_real_flops"] = min(self._real_flops) / 1e3 res["mean_expt_flops"] = (sum(self._expt_flops) / len(self._expt_flops)) / 1e3 res["max_expt_flops"] = max(self._expt_flops) / 1e3 res["min_expt_flops"] = min(self._expt_flops) / 1e3 if self._output_dir: file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") with PathManager.open(file_path, "wb") as f: torch.save(res, f) results = OrderedDict({"sem_seg": res}) self._logger.info(results) return results
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in dl_lib Dataset format. Returns: dict: a format that builtin models in dl_lib accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train and not self.eval_with_gt: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, dot_number=self.dot_number) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def cityscapes_files_to_dict(files, from_json, to_polygons): """ Parse cityscapes annotation files to a dict. Args: files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: A dict in dl_lib Dataset format. """ from cityscapesscripts.helpers.labels import id2label, name2label image_file, instance_id_file, _, json_file = files annos = [] if from_json: from shapely.geometry import MultiPolygon, Polygon with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } # `polygons_union` contains the union of all valid polygons. polygons_union = Polygon() # CityscapesScripts draw the polygons in sequential order # and each polygon *overwrites* existing ones. See # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa # We use reverse order, and each polygon *avoids* early ones. # This will resolve the ploygon overlaps in the same way as CityscapesScripts. for obj in jsonobj["objects"][::-1]: if "deleted" in obj: # cityscapes data format specific continue label_name = obj["label"] try: label = name2label[label_name] except KeyError: if label_name.endswith("group"): # crowd area label = name2label[label_name[:-len("group")]] else: raise if label.id < 0: # cityscapes data format continue # Cityscapes's raw annotations uses integer coordinates # Therefore +0.5 here poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 # CityscapesScript uses PIL.ImageDraw.polygon to rasterize # polygons for evaluation. This function operates in integer space # and draws each pixel whose center falls into the polygon. # Therefore it draws a polygon which is 0.5 "fatter" in expectation. # We therefore dilate the input polygon by 0.5 as our input. poly = Polygon(poly_coord).buffer(0.5, resolution=4) if not label.hasInstances or label.ignoreInEval: # even if we won't store the polygon it still contributes to overlaps resolution polygons_union = polygons_union.union(poly) continue # Take non-overlapping part of the polygon poly_wo_overlaps = poly.difference(polygons_union) if poly_wo_overlaps.is_empty: continue polygons_union = polygons_union.union(poly) anno = {} anno["iscrowd"] = label_name.endswith("group") anno["category_id"] = label.id if isinstance(poly_wo_overlaps, Polygon): poly_list = [poly_wo_overlaps] elif isinstance(poly_wo_overlaps, MultiPolygon): poly_list = poly_wo_overlaps.geoms else: raise NotImplementedError( "Unknown geometric structure {}".format(poly_wo_overlaps)) poly_coord = [] for poly_el in poly_list: # COCO API can work only with exterior boundaries now, hence we store only them. # TODO: store both exterior and interior boundaries once other parts of the # codebase support holes in polygons. poly_coord.append(list(chain(*poly_el.exterior.coords))) anno["segmentation"] = poly_coord (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds anno["bbox"] = (xmin, ymin, xmax, ymax) anno["bbox_mode"] = BoxMode.XYXY_ABS annos.append(anno) else: # See also the official annotation parsing scripts at # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa with PathManager.open(instance_id_file, "rb") as f: inst_image = np.asarray(Image.open(f), order="F") # ids < 24 are stuff labels (filtering them first is about 5% faster) flattened_ids = np.unique(inst_image[inst_image >= 24]) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": inst_image.shape[0], "width": inst_image.shape[1], } for instance_id in flattened_ids: # For non-crowd annotations, instance_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = instance_id // 1000 if instance_id >= 1000 else instance_id label = id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue anno = {} anno["iscrowd"] = instance_id < 1000 anno["category_id"] = label.id mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") inds = np.nonzero(mask) ymin, ymax = inds[0].min(), inds[0].max() xmin, xmax = inds[1].min(), inds[1].max() anno["bbox"] = (xmin, ymin, xmax, ymax) if xmax <= xmin or ymax <= ymin: continue anno["bbox_mode"] = BoxMode.XYXY_ABS if to_polygons: # This conversion comes from D4809743 and D5171122, # when Mask-RCNN was first developed. contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2] polygons = [ c.reshape(-1).tolist() for c in contours if len(c) >= 3 ] # opencv's can produce invalid polygons if len(polygons) == 0: continue anno["segmentation"] = polygons else: anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] annos.append(anno) ret["annotations"] = annos return ret
def _open(self, path, mode="r", **kwargs): return PathManager.open(self._get_local_path(path), mode, **kwargs)
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in dl_lib standard format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(list(coco_api.imgs.keys())) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore wce explicitly white-list them. ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if not isinstance(segm, dict): # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warn( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation)) return dataset_dicts
def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"): """ Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are treated as ground truth annotations and all files under "image_root" with "image_ext" extension as input images. Ground truth and input images are matched using file paths relative to "gt_root" and "image_root" respectively without taking into account file extensions. This works for COCO as well as some other datasets. Args: gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation annotations are stored as images with integer values in pixels that represent corresponding semantic labels. image_root (str): the directory where the input images are. gt_ext (str): file extension for ground truth annotations. image_ext (str): file extension for input images. Returns: list[dict]: a list of dicts in dl_lib standard format without instance-level annotation. Notes: 1. This function does not read the image and ground truth files. The results do not have the "image" and "sem_seg" fields. """ # We match input images with ground truth based on their relative filepaths (without file # extensions) starting from 'image_root' and 'gt_root' respectively. def file2id(folder_path, file_path): # extract relative path starting from `folder_path` image_id = os.path.normpath( os.path.relpath(file_path, start=folder_path)) # remove file extension image_id = os.path.splitext(image_id)[0] return image_id input_files = sorted( (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)), key=lambda file_path: file2id(image_root, file_path), ) gt_files = sorted( (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)), key=lambda file_path: file2id(gt_root, file_path), ) assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root) # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images if len(input_files) != len(gt_files): logger.warn( "Directory {} and {} has {} and {} files, respectively.".format( image_root, gt_root, len(input_files), len(gt_files))) input_basenames = [ os.path.basename(f)[:-len(image_ext)] for f in input_files ] gt_basenames = [os.path.basename(f)[:-len(gt_ext)] for f in gt_files] intersect = list(set(input_basenames) & set(gt_basenames)) # sort, otherwise each worker may obtain a list[dict] in different order intersect = sorted(intersect) logger.warn("Will use their intersection of {} files.".format( len(intersect))) input_files = [ os.path.join(image_root, f + image_ext) for f in intersect ] gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect] logger.info("Loaded {} images with semantic segmentation from {}".format( len(input_files), image_root)) dataset_dicts = [] for (img_path, gt_path) in zip(input_files, gt_files): local_path = PathManager.get_local_path(gt_path) w, h = imagesize.get(local_path) record = {} record["file_name"] = img_path record["sem_seg_file_name"] = gt_path record["height"] = h record["width"] = w dataset_dicts.append(record) return dataset_dicts
def _get_local_path(self, path): name = path[len(self.PREFIX):] return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)