Esempio n. 1
0
def convert_to_herb_json(dataset_name, output_file, allow_cached=True):
    """
    Converts dataset into COCO format and saves it to a json file.
    dataset_name must be registered in DatasetCatalog and in herbarium's standard format.

    Args:
        dataset_name:
            reference from the config file to the catalogs
            must be registered in DatasetCatalog and in herbarium's standard format
        output_file: path of json file that will be saved to
        allow_cached: if json file is already present then skip conversion
    """

    # TODO: The dataset or the conversion script *may* change,
    # a checksum would be useful for validating the cached data

    PathManager.mkdirs(os.path.dirname(output_file))
    with file_lock(output_file):
        if PathManager.exists(output_file) and allow_cached:
            logger.warning(
                f"Using previously cached COCO format annotations at '{output_file}'. "
                "You need to clear the cache file if your dataset has been modified."
            )
        else:
            logger.info(f"Converting annotations of dataset '{dataset_name}' to HERB format ...)")
            coco_dict = convert_to_herb_dict(dataset_name)

            logger.info(f"Caching COCO format annotations at '{output_file}' ...")
            tmp_file = output_file + ".tmp"
            with PathManager.open(tmp_file, "w") as f:
                json.dump(coco_dict, f)
            shutil.move(tmp_file, output_file)
Esempio n. 2
0
    def evaluate(self, img_ids=None):
        """
        Args:
            img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
        """
        if self._distributed:
            comm.synchronize()
            predictions = comm.gather(self._predictions, dst=0)
            predictions = list(itertools.chain(*predictions))

            if not comm.is_main_process():
                return {}
        else:
            predictions = self._predictions

        if len(predictions) == 0:
            self._logger.warning(
                "[HERBEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(predictions, f)

        self._results = OrderedDict()
        self._eval_predictions(predictions, img_ids=img_ids)
        # Copy so the caller can do whatever with results
        #return copy.deepcopy(self._results)
        return None
Esempio n. 3
0
def default_setup(cfg, args):
    """
    Perform some basic common setups at the beginning of a job, including:

    1. Set up the herbarium logger
    2. Log basic information about environment, cmdline arguments, and config
    3. Backup the config to the output directory

    Args:
        cfg (CfgNode or omegaconf.DictConfig): the full config to be used
        args (argparse.NameSpace): the command line arguments to be logged
    """
    output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir",
                              "train.output_dir")
    if comm.is_main_process() and output_dir:
        PathManager.mkdirs(output_dir)

    rank = comm.get_rank()
    setup_logger(output_dir, distributed_rank=rank, name="fvcore")
    logger = setup_logger(output_dir, distributed_rank=rank)

    logger.info("Rank of current process: {}. World size: {}".format(
        rank, comm.get_world_size()))
    logger.info("Environment info:\n" + collect_env_info())

    logger.info("Command line arguments: " + str(args))
    if hasattr(args, "config_file") and args.config_file != "":
        logger.info("Contents of args.config_file={}:\n{}".format(
            args.config_file,
            _highlight(
                PathManager.open(args.config_file, "r").read(),
                args.config_file),
        ))

    if comm.is_main_process() and output_dir:
        # Note: some of our scripts may expect the existence of
        # config.yaml in output directory
        path = os.path.join(output_dir, "config.yaml")
        if isinstance(cfg, CfgNode):
            logger.info("Running with full config:\n{}".format(
                _highlight(cfg.dump(), ".yaml")))
            with PathManager.open(path, "w") as f:
                f.write(cfg.dump())
        else:
            LazyConfig.save(cfg, path)
        logger.info("Full config saved to {}".format(path))

    # make sure each worker has a different, yet deterministic seed if specified
    seed = _try_get_key(cfg, "SEED", "train.seed", default=-1)
    seed_all_rng(None if seed < 0 else seed + rank)

    # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
    # typical validation set.
    if not (hasattr(args, "eval_only") and args.eval_only):
        torch.backends.cudnn.benchmark = _try_get_key(cfg,
                                                      "CUDNN_BENCHMARK",
                                                      "train.cudnn_benchmark",
                                                      default=False)
Esempio n. 4
0
    def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
        """
        Load a config file.

        Args:
            filename: absolute path or relative path w.r.t. the current working directory
            keys: keys to load and return. If not given, return all keys
                (whose values are config objects) in a dict.
        """
        has_keys = keys is not None
        filename = filename.replace("/./", "/")  # redundant
        if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]:
            raise ValueError(
                f"Config file {filename} has to be a python or yaml file.")
        if filename.endswith(".py"):
            _validate_py_syntax(filename)

            with _patch_import():
                # Record the filename
                module_namespace = {
                    "__file__": filename,
                    "__package__": _random_package_name(filename),
                }
                with PathManager.open(filename) as f:
                    content = f.read()
                # Compile first with filename to:
                # 1. make filename appears in stacktrace
                # 2. make load_rel able to find its parent's (possibly remote) location
                exec(compile(content, filename, "exec"), module_namespace)

            ret = module_namespace
        else:
            with PathManager.open(filename) as f:
                obj = yaml.unsafe_load(f)
            ret = OmegaConf.create(obj, flags={"allow_objects": True})

        if has_keys:
            if isinstance(keys, str):
                return _cast_to_config(ret[keys])
            else:
                return tuple(_cast_to_config(ret[a]) for a in keys)
        else:
            if filename.endswith(".py"):
                # when not specified, only load those that are config objects
                ret = DictConfig(
                    {
                        name: _cast_to_config(value)
                        for name, value in ret.items()
                        if isinstance(value, (DictConfig, ListConfig, dict))
                        and not name.startswith("_")
                    },
                    flags={"allow_objects": True},
                )
            return ret
Esempio n. 5
0
    def _eval_predictions(self, predictions, img_ids=None):
        """
        Evaluate predictions. Fill self._results with the metrics of the tasks.
        """
        self._logger.info("Preparing results for HERB format ...")

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "coco_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(predictions))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating predictions with official HERB API...")

        herb_eval = (_evaluate_predictions_on_herb(
            self._herb_api,
            predictions,
            img_ids=img_ids,
        ) if len(predictions) > 0 else None)
    def _load_file(self, filename):
        if filename.endswith(".pkl"):
            with PathManager.open(filename, "rb") as f:
                data = pickle.load(f, encoding="latin1")
            if "model" in data and "__author__" in data:
                # file is in Detectron2 model zoo format
                self.logger.info("Reading a file from '{}'".format(
                    data["__author__"]))
                return data
            else:
                # assume file is from Caffe2 / Detectron1 model zoo
                if "blobs" in data:
                    # Detection models have "blobs", but ImageNet models don't
                    data = data["blobs"]
                data = {
                    k: v
                    for k, v in data.items() if not k.endswith("_momentum")
                }
                return {
                    "model": data,
                    "__author__": "Caffe2",
                    "matching_heuristics": True
                }

        loaded = super()._load_file(filename)  # load native pth checkpoint
        if "model" not in loaded:
            loaded = {"model": loaded}
        return loaded
Esempio n. 7
0
def _cached_log_stream(filename):
    # use 1K buffer if writing to cloud storage
    io = PathManager.open(filename,
                          "a",
                          buffering=1024 if "://" in filename else -1)
    atexit.register(io.close)
    return io
Esempio n. 8
0
def _validate_py_syntax(filename):
    # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
    with PathManager.open(filename, "r") as f:
        content = f.read()
    try:
        ast.parse(content)
    except SyntaxError as e:
        raise SyntaxError(f"Config file {filename} has syntax error!") from e
Esempio n. 9
0
 def __init__(self, json_file, window_size=20):
     """
     Args:
         json_file (str): path to the json file. New data will be appended if the file exists.
         window_size (int): the window size of median smoothing for the scalars whose
             `smoothing_hint` are True.
     """
     self._file_handle = PathManager.open(json_file, "a")
     self._window_size = window_size
     self._last_write = -1
Esempio n. 10
0
 def after_step(self):
     if self._profiler is None:
         return
     self._profiler.__exit__(None, None, None)
     PathManager.mkdirs(self._output_dir)
     out_file = os.path.join(
         self._output_dir,
         "profiler-trace-iter{}.json".format(self.trainer.iter))
     if "://" not in out_file:
         self._profiler.export_chrome_trace(out_file)
     else:
         # Support non-posix filesystems
         with tempfile.TemporaryDirectory(
                 prefix="detectron2_profiler") as d:
             tmp_file = os.path.join(d, "tmp.json")
             self._profiler.export_chrome_trace(tmp_file)
             with open(tmp_file) as f:
                 content = f.read()
         with PathManager.open(out_file, "w") as f:
             f.write(content)
Esempio n. 11
0
def update_meta(json_file, dataset_name=None):

    from pyherbtools.herb import HERB

    if dataset_name is not None and "test" not in dataset_name:

        logger.info("Update Metadat of {} dataset".format(dataset_name))
        timer = Timer()
        json_file = PathManager.get_local_path(json_file)
        with contextlib.redirect_stdout(io.StringIO()):
            herb_api = HERB(json_file)
        if timer.seconds() > 1:
            logger.info("Loading {} takes {:.2f} seconds.".format(
                json_file, timer.seconds()))

        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(herb_api.getCatIds())
        cats = herb_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        logger.info("Creating hierarchy target from given annotation")

        order_family_hierarchy = torch.zeros(len(meta.family_map),
                                             len(meta.order_map))
        family_species_hierarchy = torch.zeros(len(meta.species_map),
                                               len(meta.family_map))

        for cat in cats:
            order_id = meta.order_map[cat["order"]]
            family_id = meta.family_map[cat["family"]]
            species_id = meta.species_map[cat["name"]]

            order_family_hierarchy[family_id][order_id] = 1
            family_species_hierarchy[species_id][family_id] = 1

        from torch import nn
        order_family_hierarchy = nn.Softmax(dim=1)(order_family_hierarchy)
        family_species_hierarchy = nn.Softmax(dim=1)(family_species_hierarchy)

        meta.hierarchy_prior = {
            "order|family": order_family_hierarchy,
            "family|species": family_species_hierarchy
        }
        meta.cats = cats

        meta.num_classes = {
            "family": len(meta.family_map),
            "order": len(meta.order_map),
            "species": len(meta.species_map),
        }
Esempio n. 12
0
 def find_relative_file(original_file, relative_import_path, level):
     cur_file = os.path.dirname(original_file)
     for _ in range(level - 1):
         cur_file = os.path.dirname(cur_file)
     cur_name = relative_import_path.lstrip(".")
     for part in cur_name.split("."):
         cur_file = os.path.join(cur_file, part)
     # NOTE: directory import is not handled. Because then it's unclear
     # if such import should produce python module or DictConfig. This can
     # be discussed further if needed.
     if not cur_file.endswith(".py"):
         cur_file += ".py"
     if not PathManager.isfile(cur_file):
         raise ImportError(
             f"Cannot import name {relative_import_path} from "
             f"{original_file}: {cur_file} has to exist.")
     return cur_file
Esempio n. 13
0
    def merge_from_file(self,
                        cfg_filename: str,
                        allow_unsafe: bool = True) -> None:
        assert PathManager.isfile(
            cfg_filename), f"Config file '{cfg_filename}' does not exist!"
        loaded_cfg = self.load_yaml_with_base(cfg_filename,
                                              allow_unsafe=allow_unsafe)
        loaded_cfg = type(self)(loaded_cfg)

        # defaults.py needs to import CfgNode
        from .defaults import _C

        latest_ver = _C.VERSION
        assert (
            latest_ver == self.VERSION
        ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"

        logger = logging.getLogger(__name__)

        loaded_ver = loaded_cfg.get("VERSION", None)
        if loaded_ver is None:
            from .compat import guess_version

            loaded_ver = guess_version(loaded_cfg, cfg_filename)
        assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
            loaded_ver, self.VERSION)

        if loaded_ver == self.VERSION:
            self.merge_from_other_cfg(loaded_cfg)
        else:
            # compat.py needs to import CfgNode
            from .compat import upgrade_config, downgrade_config

            logger.warning(
                "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
                "See docs/CHANGELOG.md for instructions to update your files.".
                format(loaded_ver, cfg_filename, self.VERSION))
            # To convert, first obtain a full config at an old version
            old_self = downgrade_config(self, to_version=loaded_ver)
            old_self.merge_from_other_cfg(loaded_cfg)
            new_config = upgrade_config(old_self)
            self.clear()
            self.update(new_config)
Esempio n. 14
0
def read_image(file_name, format=None):
    """
    Read an image into the given format.
    Will apply rotation and flipping if the image has such exif information.

    Args:
        file_name (str): image file path
        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".

    Returns:
        image (np.ndarray):
            an HWC image in the given format, which is 0-255, uint8 for
            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
    """
    with PathManager.open(file_name, "rb") as f:
        image = Image.open(f)

        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
        image = _apply_exif_orientation(image)
        return convert_PIL_to_numpy(image, format)
Esempio n. 15
0
 def new_import(name, globals=None, locals=None, fromlist=(), level=0):
     if (
             # Only deal with relative imports inside config files
             level != 0 and globals is not None and globals.get(
                 "__package__", "").startswith(_CFG_PACKAGE_NAME)):
         cur_file = find_relative_file(globals["__file__"], name, level)
         _validate_py_syntax(cur_file)
         spec = importlib.machinery.ModuleSpec(
             _random_package_name(cur_file), None, origin=cur_file)
         module = importlib.util.module_from_spec(spec)
         module.__file__ = cur_file
         with PathManager.open(cur_file) as f:
             content = f.read()
         exec(compile(content, cur_file, "exec"), module.__dict__)
         for name in fromlist:  # turn imported dict into DictConfig automatically
             val = _cast_to_config(module.__dict__[name])
             module.__dict__[name] = val
         return module
     return old_import(name,
                       globals,
                       locals,
                       fromlist=fromlist,
                       level=level)
Esempio n. 16
0
    def save(cfg, filename: str):
        """
        Args:
            cfg: an omegaconf config object
            filename: yaml file name to save the config file
        """
        logger = logging.getLogger(__name__)
        try:
            cfg = deepcopy(cfg)
        except Exception:
            pass
        else:
            # if it's deep-copyable, then...
            def _replace_type_by_name(x):
                if "_target_" in x and callable(x._target_):
                    try:
                        x._target_ = _convert_target_to_string(x._target_)
                    except AttributeError:
                        pass

            # not necessary, but makes yaml looks nicer
            _visit_dict_config(cfg, _replace_type_by_name)

        try:
            OmegaConf.save(cfg, filename)
        except Exception:
            logger.exception("Unable to serialize the config to yaml. Error:")
            new_filename = filename + ".pkl"
            try:
                # retry by pickle
                with PathManager.open(new_filename, "wb") as f:
                    cloudpickle.dump(cfg, f)
                logger.warning(
                    f"Config saved using cloudpickle at {new_filename} ...")
            except Exception:
                pass
Esempio n. 17
0
def load_herb_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
    """
    Load a json file with Herbarium's instances annotation format.
    Currently supports Family, Order, class annotations.

    Args:
        json_file (str): full path to the json file in Herb instances annotation format.
        image_root (str or path-like): the directory where the images in this json file exists.
        dataset_name (str or None): the name of the dataset (e.g., herb_2021_train).
            When provided, this function will also do the following:

            * Put "family", "order", "name" into the metadata associated with this dataset.
            * Build Class hierarchy in metadataset 
            * Map the category ids into a hierarchy id and continuous id (needed by standard dataset format),
              and add "hierarchy_id_to_contiguous_id" to the metadata associated
              with this dataset.

            This option should usually be provided, unless users need to load
            the original json content and apply more processing manually.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict. The values for these keys will be returned as-is.
            For example, the region_id annotations are loaded in this way.
            
            * Currently region_id is not provided in dataset

    Returns:
        list[dict]: a list of dicts in Herbarium standard dataset dicts format 
        when `dataset_name` is not None.
        If `dataset_name` is None, the returned `category_ids` may be
        incontiguous and may not conform to the Herbarium standard format.

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pyherbtools.herb import HERB

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        herb_api = HERB(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))

    # sort indices for reproducible results
    img_ids = sorted(herb_api.imgs.keys())
    imgs = herb_api.loadImgs(img_ids)
    anns = [herb_api.imgToAnns[img_id] for img_id in img_ids]
    total_num_valid_anns = sum([len(x) for x in anns])
    total_num_anns = len(herb_api.anns)
    if total_num_valid_anns < total_num_anns:
        logger.warning(
            f"{json_file} contains {total_num_anns} annotations, but only "
            f"{total_num_valid_anns} of them match to images in the file."
        )

    imgs_anns = list(zip(imgs, anns))
    logger.info("Loaded {} images in HERB format from {}".format(len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["category_id", "hierarchy_id"] + (extra_annotation_keys or [])

    logger.info("Convert HERB format into herbarium format")

    timer = Timer()



    if "test" not in dataset_name:
        meta = MetadataCatalog.get(dataset_name)
        dataset_dicts = [process_per_record(anns, image_root, ann_keys, meta) for anns in imgs_anns]

    logger.info("Processing Record takes {:.2f} seconds.".format(timer.seconds()))

    return dataset_dicts
Esempio n. 18
0
    def __init__(
        self,
        dataset_name,
        tasks=None,
        distributed=True,
        output_dir=None,
    ):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
                It must have either the following corresponding metadata:

                    "json_file": the path to the COCO format annotation

                Or it must be in herbarium's standard dataset format
                so it can be converted to COCO format automatically.
            tasks (tuple[str]): tasks that can be evaluated under the given
                configuration. A task is one of "bbox", "segm", "keypoints".
                By default, will infer this automatically from predictions.
            distributed (True): if True, will collect results from all ranks and run evaluation
                in the main process.
                Otherwise, will only evaluate the results in the current process.
            output_dir (str): optional, an output directory to dump all
                results predicted on the dataset. The dump contains two files:

                1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
                   contains all the results in the format they are produced by the model.
                2. "coco_instances_results.json" a json file in COCO's result format.
            use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP.
                Although the results should be very close to the official implementation in COCO
                API, it is still recommended to compute results with the official API for use in
                papers. The faster implementation also uses more RAM.
            kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS.
                See http://cocodataset.org/#keypoints-eval
                When empty, it will use the defaults in COCO.
                Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
        """
        self._logger = logging.getLogger(__name__)
        self._distributed = distributed
        self._output_dir = output_dir
        self._tasks = tasks

        self._cpu_device = torch.device("cpu")

        self._metadata = MetadataCatalog.get(dataset_name)
        if not hasattr(self._metadata, "json_file"):
            self._logger.info(
                f"'{dataset_name}' is not registered by `register_herb_instances`."
                " Therefore trying to convert it to HERB format ...")

            cache_path = os.path.join(output_dir,
                                      f"{dataset_name}_herb_format.json")
            self._metadata.json_file = cache_path
            convert_to_herb_json(dataset_name, cache_path)

        json_file = PathManager.get_local_path(self._metadata.json_file)
        with contextlib.redirect_stdout(io.StringIO()):
            self._herb_api = HERB(json_file)

        # Test set json files do not contain annotations (evaluation must be
        # performed using the COCO evaluation server).
        self._do_evaluation = "annotations" in self._herb_api.dataset
Esempio n. 19
0
 def _open_cfg(cls, filename):
     return PathManager.open(filename, "r")
Esempio n. 20
0
def setup_logger(output=None,
                 distributed_rank=0,
                 *,
                 color=True,
                 name="herbarium",
                 abbrev_name=None):
    """
    Initialize the herbarium logger and set its verbosity level to "DEBUG".

    Args:
        output (str): a file name or a directory to save log. If None, will not save log file.
            If ends with ".txt" or ".log", assumed to be a file name.
            Otherwise, logs will be saved to `output/log.txt`.
        name (str): the root module name of this logger
        abbrev_name (str): an abbreviation of the module, to avoid long names in logs.
            Set to "" to not log the root module in logs.
            By default, will abbreviate "herbarium" to "hb" and leave other
            modules unchanged.

    Returns:
        logging.Logger: a logger
    """
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)
    logger.propagate = False

    if abbrev_name is None:
        abbrev_name = "hb" if name == "herbarium" else name

    plain_formatter = logging.Formatter(
        "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
        datefmt="%m/%d %H:%M:%S")
    # stdout logging: master only
    if distributed_rank == 0:
        ch = logging.StreamHandler(stream=sys.stdout)
        ch.setLevel(logging.DEBUG)
        if color:
            formatter = _ColorfulFormatter(
                colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s",
                datefmt="%m/%d %H:%M:%S",
                root_name=name,
                abbrev_name=str(abbrev_name),
            )
        else:
            formatter = plain_formatter
        ch.setFormatter(formatter)
        logger.addHandler(ch)

    # file logging: all workers
    if output is not None:
        if output.endswith(".txt") or output.endswith(".log"):
            filename = output
        else:
            filename = os.path.join(output, "log.txt")
        if distributed_rank > 0:
            filename = filename + ".rank{}".format(distributed_rank)
        PathManager.mkdirs(os.path.dirname(filename))

        fh = logging.StreamHandler(_cached_log_stream(filename))
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(plain_formatter)
        logger.addHandler(fh)

    return logger
Esempio n. 21
0
 def _open(self, path, mode="r", **kwargs):
     return PathManager.open(self._get_local_path(path), mode, **kwargs)
Esempio n. 22
0
 def _get_local_path(self, path, **kwargs):
     logger = logging.getLogger(__name__)
     catalog_path = ModelCatalog.get(path[len(self.PREFIX):])
     logger.info("Catalog entry {} points to {}".format(path, catalog_path))
     return PathManager.get_local_path(catalog_path, **kwargs)
Esempio n. 23
0
        # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
        url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
            prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX,
            url=url,
            type=type,
            dataset=dataset)
        return url


class ModelCatalogHandler(PathHandler):
    """
    Resolve URL like catalog://.
    """

    PREFIX = "catalog://"

    def _get_supported_prefixes(self):
        return [self.PREFIX]

    def _get_local_path(self, path, **kwargs):
        logger = logging.getLogger(__name__)
        catalog_path = ModelCatalog.get(path[len(self.PREFIX):])
        logger.info("Catalog entry {} points to {}".format(path, catalog_path))
        return PathManager.get_local_path(catalog_path, **kwargs)

    def _open(self, path, mode="r", **kwargs):
        return PathManager.open(self._get_local_path(path), mode, **kwargs)


PathManager.register_handler(ModelCatalogHandler())