Example #1
0
class _CachedRepo(object):
    """Custom wrapper around a Repo instance

    It provides a few customized methods that also cache their return
    values. For all other method access the underlying repo instance
    is used.

    A repository is assumed to be static. Cache invalidation must
    be done manually, if that assumption does no longer hold.
    """
    def __init__(self, path):
        self._unresolved_path = path
        self._repo = Dataset(path).repo
        self._tmpdir = None
        self._ismanagedbranch = None

    def __getattr__(self, name):
        """Fall back on the actual repo instance, if we have nothing"""
        return getattr(self._repo, name)

    # more than 20 special remotes may be rare
    @lru_cache(maxsize=20)
    def get_special_remotes_wo_timestamp(self):
        return {
            k: {pk: pv
                for pk, pv in v.items() if pk != 'timestamp'}
            for k, v in self._repo.get_special_remotes().items()
        }

    # there can be many files, and the records per file are smallish
    @lru_cache(maxsize=10000)
    def get_file_annexinfo(self, fpath):
        rpath = str(fpath.relative_to(self._unresolved_path))
        finfo = self._repo.get_content_annexinfo(
            paths=[rpath],
            # a simple `exists()` will not be enough (pointer files, etc...)
            eval_availability=True,
        )
        finfo = finfo.popitem()[1] if finfo else {}
        return finfo

    # n-keys and n-files should have the same order of magnitude
    @lru_cache(maxsize=10000)
    def get_whereis_key_by_specialremote(self, key):
        """Returns whereis () for a single key

        Returns
        -------
        dict
          Keys are special remote IDs, values are dicts with all relevant
          whereis properties, currently ('urls' (list), 'here' (bool)).
        """
        whereis = self._repo.whereis(key, key=True, output='full')
        whereis_by_sr = {
            k: {
                prop: v[prop]
                for prop in ('urls', 'here') if v.get(prop) not in (None, [])
            }
            for k, v in whereis.items()
        }
        return whereis_by_sr

    def is_managed_branch(self):
        if self._ismanagedbranch is None:
            self._ismanagedbranch = self._repo.is_managed_branch()
        return self._ismanagedbranch

    def get_tmpdir(self):
        if not self._tmpdir:
            tmploc = self._repo.pathobj / '.git' / 'tmp' / 'datalad-copy'
            tmploc.mkdir(exist_ok=True, parents=True)
            # put in cache for later clean/lookup
            self._tmpdir = tmploc
        return self._tmpdir

    def cleanup_cachedrepo(self):
        # TODO this could also be the place to stop lingering batch processes
        if not self._tmpdir:
            return

        try:
            self._tmpdir.rmdir()
        except OSError as e:
            ce = CapturedException(e)
            lgr.warning('Failed to clean up temporary directory: %s', ce)

    def get_repotype(self):
        return type(self._repo)