def warm_cache(self, tasks: Iterable[Task]): # Update the cached path list ahead of time, so PBS jobs don't waste time doing it themselves. click.echo("Checking path list, this may take a few minutes...") done_collections = set() # type: Set[collections.Collection] for task in tasks: if task.collection in done_collections: continue cache_path = Path(task.resolve_path(self.cache_folder)) scan.build_pathset(task.collection, cache_path=cache_path) done_collections.add(task.collection)
def _check_pathset_loading(cache_path: Path, expected_paths: Iterable[str], log: logging.Logger, collection: Collection): """Check that the right mix of paths (index and filesystem) are loaded""" path_set = scan.build_pathset(collection, cache_path, log=log) loaded_paths = set(path_set.iterkeys('file://')) assert loaded_paths == set(expected_paths) # Sanity check that a random path doesn't match... dummy_dataset = cache_path.joinpath('dummy_dataset', 'ga-metadata.yaml') assert dummy_dataset.absolute().as_uri() not in path_set