def convert_to_herb_json(dataset_name, output_file, allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatasetCatalog and in herbarium's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatasetCatalog and in herbarium's standard format output_file: path of json file that will be saved to allow_cached: if json file is already present then skip conversion """ # TODO: The dataset or the conversion script *may* change, # a checksum would be useful for validating the cached data PathManager.mkdirs(os.path.dirname(output_file)) with file_lock(output_file): if PathManager.exists(output_file) and allow_cached: logger.warning( f"Using previously cached COCO format annotations at '{output_file}'. " "You need to clear the cache file if your dataset has been modified." ) else: logger.info(f"Converting annotations of dataset '{dataset_name}' to HERB format ...)") coco_dict = convert_to_herb_dict(dataset_name) logger.info(f"Caching COCO format annotations at '{output_file}' ...") tmp_file = output_file + ".tmp" with PathManager.open(tmp_file, "w") as f: json.dump(coco_dict, f) shutil.move(tmp_file, output_file)
def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning( "[HERBEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() self._eval_predictions(predictions, img_ids=img_ids) # Copy so the caller can do whatever with results #return copy.deepcopy(self._results) return None
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the herbarium logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode or omegaconf.DictConfig): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir") if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format( rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": logger.info("Contents of args.config_file={}:\n{}".format( args.config_file, _highlight( PathManager.open(args.config_file, "r").read(), args.config_file), )) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") if isinstance(cfg, CfgNode): logger.info("Running with full config:\n{}".format( _highlight(cfg.dump(), ".yaml"))) with PathManager.open(path, "w") as f: f.write(cfg.dump()) else: LazyConfig.save(cfg, path) logger.info("Full config saved to {}".format(path)) # make sure each worker has a different, yet deterministic seed if specified seed = _try_get_key(cfg, "SEED", "train.seed", default=-1) seed_all_rng(None if seed < 0 else seed + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = _try_get_key(cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False)
def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None): """ Load a config file. Args: filename: absolute path or relative path w.r.t. the current working directory keys: keys to load and return. If not given, return all keys (whose values are config objects) in a dict. """ has_keys = keys is not None filename = filename.replace("/./", "/") # redundant if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]: raise ValueError( f"Config file {filename} has to be a python or yaml file.") if filename.endswith(".py"): _validate_py_syntax(filename) with _patch_import(): # Record the filename module_namespace = { "__file__": filename, "__package__": _random_package_name(filename), } with PathManager.open(filename) as f: content = f.read() # Compile first with filename to: # 1. make filename appears in stacktrace # 2. make load_rel able to find its parent's (possibly remote) location exec(compile(content, filename, "exec"), module_namespace) ret = module_namespace else: with PathManager.open(filename) as f: obj = yaml.unsafe_load(f) ret = OmegaConf.create(obj, flags={"allow_objects": True}) if has_keys: if isinstance(keys, str): return _cast_to_config(ret[keys]) else: return tuple(_cast_to_config(ret[a]) for a in keys) else: if filename.endswith(".py"): # when not specified, only load those that are config objects ret = DictConfig( { name: _cast_to_config(value) for name, value in ret.items() if isinstance(value, (DictConfig, ListConfig, dict)) and not name.startswith("_") }, flags={"allow_objects": True}, ) return ret
def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for HERB format ...") if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(predictions)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating predictions with official HERB API...") herb_eval = (_evaluate_predictions_on_herb( self._herb_api, predictions, img_ids=img_ids, ) if len(predictions) > 0 else None)
def _load_file(self, filename): if filename.endswith(".pkl"): with PathManager.open(filename, "rb") as f: data = pickle.load(f, encoding="latin1") if "model" in data and "__author__" in data: # file is in Detectron2 model zoo format self.logger.info("Reading a file from '{}'".format( data["__author__"])) return data else: # assume file is from Caffe2 / Detectron1 model zoo if "blobs" in data: # Detection models have "blobs", but ImageNet models don't data = data["blobs"] data = { k: v for k, v in data.items() if not k.endswith("_momentum") } return { "model": data, "__author__": "Caffe2", "matching_heuristics": True } loaded = super()._load_file(filename) # load native pth checkpoint if "model" not in loaded: loaded = {"model": loaded} return loaded
def _cached_log_stream(filename): # use 1K buffer if writing to cloud storage io = PathManager.open(filename, "a", buffering=1024 if "://" in filename else -1) atexit.register(io.close) return io
def _validate_py_syntax(filename): # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py with PathManager.open(filename, "r") as f: content = f.read() try: ast.parse(content) except SyntaxError as e: raise SyntaxError(f"Config file {filename} has syntax error!") from e
def __init__(self, json_file, window_size=20): """ Args: json_file (str): path to the json file. New data will be appended if the file exists. window_size (int): the window size of median smoothing for the scalars whose `smoothing_hint` are True. """ self._file_handle = PathManager.open(json_file, "a") self._window_size = window_size self._last_write = -1
def after_step(self): if self._profiler is None: return self._profiler.__exit__(None, None, None) PathManager.mkdirs(self._output_dir) out_file = os.path.join( self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)) if "://" not in out_file: self._profiler.export_chrome_trace(out_file) else: # Support non-posix filesystems with tempfile.TemporaryDirectory( prefix="detectron2_profiler") as d: tmp_file = os.path.join(d, "tmp.json") self._profiler.export_chrome_trace(tmp_file) with open(tmp_file) as f: content = f.read() with PathManager.open(out_file, "w") as f: f.write(content)
def update_meta(json_file, dataset_name=None): from pyherbtools.herb import HERB if dataset_name is not None and "test" not in dataset_name: logger.info("Update Metadat of {} dataset".format(dataset_name)) timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): herb_api = HERB(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(herb_api.getCatIds()) cats = herb_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes logger.info("Creating hierarchy target from given annotation") order_family_hierarchy = torch.zeros(len(meta.family_map), len(meta.order_map)) family_species_hierarchy = torch.zeros(len(meta.species_map), len(meta.family_map)) for cat in cats: order_id = meta.order_map[cat["order"]] family_id = meta.family_map[cat["family"]] species_id = meta.species_map[cat["name"]] order_family_hierarchy[family_id][order_id] = 1 family_species_hierarchy[species_id][family_id] = 1 from torch import nn order_family_hierarchy = nn.Softmax(dim=1)(order_family_hierarchy) family_species_hierarchy = nn.Softmax(dim=1)(family_species_hierarchy) meta.hierarchy_prior = { "order|family": order_family_hierarchy, "family|species": family_species_hierarchy } meta.cats = cats meta.num_classes = { "family": len(meta.family_map), "order": len(meta.order_map), "species": len(meta.species_map), }
def find_relative_file(original_file, relative_import_path, level): cur_file = os.path.dirname(original_file) for _ in range(level - 1): cur_file = os.path.dirname(cur_file) cur_name = relative_import_path.lstrip(".") for part in cur_name.split("."): cur_file = os.path.join(cur_file, part) # NOTE: directory import is not handled. Because then it's unclear # if such import should produce python module or DictConfig. This can # be discussed further if needed. if not cur_file.endswith(".py"): cur_file += ".py" if not PathManager.isfile(cur_file): raise ImportError( f"Cannot import name {relative_import_path} from " f"{original_file}: {cur_file} has to exist.") return cur_file
def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None: assert PathManager.isfile( cfg_filename), f"Config file '{cfg_filename}' does not exist!" loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) loaded_cfg = type(self)(loaded_cfg) # defaults.py needs to import CfgNode from .defaults import _C latest_ver = _C.VERSION assert ( latest_ver == self.VERSION ), "CfgNode.merge_from_file is only allowed on a config object of latest version!" logger = logging.getLogger(__name__) loaded_ver = loaded_cfg.get("VERSION", None) if loaded_ver is None: from .compat import guess_version loaded_ver = guess_version(loaded_cfg, cfg_filename) assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format( loaded_ver, self.VERSION) if loaded_ver == self.VERSION: self.merge_from_other_cfg(loaded_cfg) else: # compat.py needs to import CfgNode from .compat import upgrade_config, downgrade_config logger.warning( "Loading an old v{} config file '{}' by automatically upgrading to v{}. " "See docs/CHANGELOG.md for instructions to update your files.". format(loaded_ver, cfg_filename, self.VERSION)) # To convert, first obtain a full config at an old version old_self = downgrade_config(self, to_version=loaded_ver) old_self.merge_from_other_cfg(loaded_cfg) new_config = upgrade_config(old_self) self.clear() self.update(new_config)
def read_image(file_name, format=None): """ Read an image into the given format. Will apply rotation and flipping if the image has such exif information. Args: file_name (str): image file path format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". Returns: image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. """ with PathManager.open(file_name, "rb") as f: image = Image.open(f) # work around this bug: https://github.com/python-pillow/Pillow/issues/3973 image = _apply_exif_orientation(image) return convert_PIL_to_numpy(image, format)
def new_import(name, globals=None, locals=None, fromlist=(), level=0): if ( # Only deal with relative imports inside config files level != 0 and globals is not None and globals.get( "__package__", "").startswith(_CFG_PACKAGE_NAME)): cur_file = find_relative_file(globals["__file__"], name, level) _validate_py_syntax(cur_file) spec = importlib.machinery.ModuleSpec( _random_package_name(cur_file), None, origin=cur_file) module = importlib.util.module_from_spec(spec) module.__file__ = cur_file with PathManager.open(cur_file) as f: content = f.read() exec(compile(content, cur_file, "exec"), module.__dict__) for name in fromlist: # turn imported dict into DictConfig automatically val = _cast_to_config(module.__dict__[name]) module.__dict__[name] = val return module return old_import(name, globals, locals, fromlist=fromlist, level=level)
def save(cfg, filename: str): """ Args: cfg: an omegaconf config object filename: yaml file name to save the config file """ logger = logging.getLogger(__name__) try: cfg = deepcopy(cfg) except Exception: pass else: # if it's deep-copyable, then... def _replace_type_by_name(x): if "_target_" in x and callable(x._target_): try: x._target_ = _convert_target_to_string(x._target_) except AttributeError: pass # not necessary, but makes yaml looks nicer _visit_dict_config(cfg, _replace_type_by_name) try: OmegaConf.save(cfg, filename) except Exception: logger.exception("Unable to serialize the config to yaml. Error:") new_filename = filename + ".pkl" try: # retry by pickle with PathManager.open(new_filename, "wb") as f: cloudpickle.dump(cfg, f) logger.warning( f"Config saved using cloudpickle at {new_filename} ...") except Exception: pass
def load_herb_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with Herbarium's instances annotation format. Currently supports Family, Order, class annotations. Args: json_file (str): full path to the json file in Herb instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str or None): the name of the dataset (e.g., herb_2021_train). When provided, this function will also do the following: * Put "family", "order", "name" into the metadata associated with this dataset. * Build Class hierarchy in metadataset * Map the category ids into a hierarchy id and continuous id (needed by standard dataset format), and add "hierarchy_id_to_contiguous_id" to the metadata associated with this dataset. This option should usually be provided, unless users need to load the original json content and apply more processing manually. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict. The values for these keys will be returned as-is. For example, the region_id annotations are loaded in this way. * Currently region_id is not provided in dataset Returns: list[dict]: a list of dicts in Herbarium standard dataset dicts format when `dataset_name` is not None. If `dataset_name` is None, the returned `category_ids` may be incontiguous and may not conform to the Herbarium standard format. Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pyherbtools.herb import HERB timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): herb_api = HERB(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) # sort indices for reproducible results img_ids = sorted(herb_api.imgs.keys()) imgs = herb_api.loadImgs(img_ids) anns = [herb_api.imgToAnns[img_id] for img_id in img_ids] total_num_valid_anns = sum([len(x) for x in anns]) total_num_anns = len(herb_api.anns) if total_num_valid_anns < total_num_anns: logger.warning( f"{json_file} contains {total_num_anns} annotations, but only " f"{total_num_valid_anns} of them match to images in the file." ) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in HERB format from {}".format(len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["category_id", "hierarchy_id"] + (extra_annotation_keys or []) logger.info("Convert HERB format into herbarium format") timer = Timer() if "test" not in dataset_name: meta = MetadataCatalog.get(dataset_name) dataset_dicts = [process_per_record(anns, image_root, ann_keys, meta) for anns in imgs_anns] logger.info("Processing Record takes {:.2f} seconds.".format(timer.seconds())) return dataset_dicts
def __init__( self, dataset_name, tasks=None, distributed=True, output_dir=None, ): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have either the following corresponding metadata: "json_file": the path to the COCO format annotation Or it must be in herbarium's standard dataset format so it can be converted to COCO format automatically. tasks (tuple[str]): tasks that can be evaluated under the given configuration. A task is one of "bbox", "segm", "keypoints". By default, will infer this automatically from predictions. distributed (True): if True, will collect results from all ranks and run evaluation in the main process. Otherwise, will only evaluate the results in the current process. output_dir (str): optional, an output directory to dump all results predicted on the dataset. The dump contains two files: 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and contains all the results in the format they are produced by the model. 2. "coco_instances_results.json" a json file in COCO's result format. use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. Although the results should be very close to the official implementation in COCO API, it is still recommended to compute results with the official API for use in papers. The faster implementation also uses more RAM. kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval When empty, it will use the defaults in COCO. Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. """ self._logger = logging.getLogger(__name__) self._distributed = distributed self._output_dir = output_dir self._tasks = tasks self._cpu_device = torch.device("cpu") self._metadata = MetadataCatalog.get(dataset_name) if not hasattr(self._metadata, "json_file"): self._logger.info( f"'{dataset_name}' is not registered by `register_herb_instances`." " Therefore trying to convert it to HERB format ...") cache_path = os.path.join(output_dir, f"{dataset_name}_herb_format.json") self._metadata.json_file = cache_path convert_to_herb_json(dataset_name, cache_path) json_file = PathManager.get_local_path(self._metadata.json_file) with contextlib.redirect_stdout(io.StringIO()): self._herb_api = HERB(json_file) # Test set json files do not contain annotations (evaluation must be # performed using the COCO evaluation server). self._do_evaluation = "annotations" in self._herb_api.dataset
def _open_cfg(cls, filename): return PathManager.open(filename, "r")
def setup_logger(output=None, distributed_rank=0, *, color=True, name="herbarium", abbrev_name=None): """ Initialize the herbarium logger and set its verbosity level to "DEBUG". Args: output (str): a file name or a directory to save log. If None, will not save log file. If ends with ".txt" or ".log", assumed to be a file name. Otherwise, logs will be saved to `output/log.txt`. name (str): the root module name of this logger abbrev_name (str): an abbreviation of the module, to avoid long names in logs. Set to "" to not log the root module in logs. By default, will abbreviate "herbarium" to "hb" and leave other modules unchanged. Returns: logging.Logger: a logger """ logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) logger.propagate = False if abbrev_name is None: abbrev_name = "hb" if name == "herbarium" else name plain_formatter = logging.Formatter( "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S") # stdout logging: master only if distributed_rank == 0: ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) if color: formatter = _ColorfulFormatter( colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", datefmt="%m/%d %H:%M:%S", root_name=name, abbrev_name=str(abbrev_name), ) else: formatter = plain_formatter ch.setFormatter(formatter) logger.addHandler(ch) # file logging: all workers if output is not None: if output.endswith(".txt") or output.endswith(".log"): filename = output else: filename = os.path.join(output, "log.txt") if distributed_rank > 0: filename = filename + ".rank{}".format(distributed_rank) PathManager.mkdirs(os.path.dirname(filename)) fh = logging.StreamHandler(_cached_log_stream(filename)) fh.setLevel(logging.DEBUG) fh.setFormatter(plain_formatter) logger.addHandler(fh) return logger
def _open(self, path, mode="r", **kwargs): return PathManager.open(self._get_local_path(path), mode, **kwargs)
def _get_local_path(self, path, **kwargs): logger = logging.getLogger(__name__) catalog_path = ModelCatalog.get(path[len(self.PREFIX):]) logger.info("Catalog entry {} points to {}".format(path, catalog_path)) return PathManager.get_local_path(catalog_path, **kwargs)
# Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset) return url class ModelCatalogHandler(PathHandler): """ Resolve URL like catalog://. """ PREFIX = "catalog://" def _get_supported_prefixes(self): return [self.PREFIX] def _get_local_path(self, path, **kwargs): logger = logging.getLogger(__name__) catalog_path = ModelCatalog.get(path[len(self.PREFIX):]) logger.info("Catalog entry {} points to {}".format(path, catalog_path)) return PathManager.get_local_path(catalog_path, **kwargs) def _open(self, path, mode="r", **kwargs): return PathManager.open(self._get_local_path(path), mode, **kwargs) PathManager.register_handler(ModelCatalogHandler())