Ejemplo n.º 1
0
def test_publish_file_handle(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    # actually not needed for this test, but provide same setup as
    # everywhere else:
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    # directly publish a file handle, not the dataset itself:
    res = publish(dataset=source, dest="target", path="test-annex.dat")
    eq_(res, opj(source.path, 'test-annex.dat'))

    # only file was published, not the dataset itself:
    assert_not_in("master", target.git_get_branches())
    eq_(Dataset(dst_path).get_dataset_handles(), [])
    assert_not_in("test.dat", target.git_get_files())

    # content is now available from 'target':
    assert_in("target",
              source.repo.annex_whereis('test-annex.dat',
                                        output="descriptions"))
    source.repo.annex_drop('test-annex.dat')
    eq_(source.repo.file_has_content(['test-annex.dat']), [False])
    source.repo._run_annex_command('get', annex_options=['test-annex.dat',
                                                         '--from=target'])
    eq_(source.repo.file_has_content(['test-annex.dat']), [True])
Ejemplo n.º 2
0
def test_publish_file_handle(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True,
                  create=True).git_checkout("master")
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    # actually not needed for this test, but provide same setup as
    # everywhere else:
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    # directly publish a file handle, not the dataset itself:
    res = publish(dataset=source, dest="target", path="test-annex.dat")
    eq_(res, opj(source.path, 'test-annex.dat'))

    # only file was published, not the dataset itself:
    assert_not_in("master", target.git_get_branches())
    eq_(Dataset(dst_path).get_dataset_handles(), [])
    assert_not_in("test.dat", target.git_get_files())

    # content is now available from 'target':
    assert_in(
        "target",
        source.repo.annex_whereis('test-annex.dat', output="descriptions"))
    source.repo.annex_drop('test-annex.dat')
    eq_(source.repo.file_has_content(['test-annex.dat']), [False])
    source.repo._run_annex_command(
        'get', annex_options=['test-annex.dat', '--from=target'])
    eq_(source.repo.file_has_content(['test-annex.dat']), [True])
Ejemplo n.º 3
0
class Dataset(object):
    __slots__ = ['_path', '_repo']

    def __init__(self, path):
        self._path = abspath(path)
        self._repo = None

    def __repr__(self):
        return "<Dataset path=%s>" % self.path

    @property
    def path(self):
        """path to the dataset"""
        return self._path

    @property
    def repo(self):
        """Get an instance of the version control system/repo for this dataset,
        or None if there is none yet.

        If creating an instance of GitRepo is guaranteed to be really cheap
        this could also serve as a test whether a repo is present.

        Returns
        -------
        GitRepo
        """
        if self._repo is None:
            with swallow_logs():
                try:
                    self._repo = AnnexRepo(self._path, create=False, init=False)
                except (InvalidGitRepositoryError, NoSuchPathError, RuntimeError):
                    try:
                        self._repo = GitRepo(self._path, create=False)
                    except (InvalidGitRepositoryError, NoSuchPathError):
                        pass
        elif not isinstance(self._repo, AnnexRepo):
            # repo was initially set to be self._repo but might become AnnexRepo
            # at a later moment, so check if it didn't happen
            if 'git-annex' in self._repo.git_get_branches():
                # we acquired git-annex branch
                self._repo = AnnexRepo(self._repo.path, create=False)
        return self._repo

    def register_sibling(self, name, url, publish_url=None, verify=None):
        """Register the location of a sibling dataset under a given name.

        Optionally, different URLs can be given for retrieving information from
        the sibling and for publishing information to it.
        This is a cheap operation that does not confirm that at the given
        location an actual sibling dataset is available, unless verify is set.
        The value "dataset" verifies, that at the given URL an accessible
        dataset is available and the value "sibling" furthermore verifies, that
        this dataset shares at least one commit with self.

        Parameters
        ----------
        name
        url
        publish_url
        verify
          None | "dataset" | "sibling"
        """
        repo = self.repo

        if verify is not None:
            raise NotImplementedError("TODO: verify not implemented yet")

        if name not in repo.git_get_remotes():
            # Add remote
            repo.git_remote_add(name, url)
            if publish_url is not None:
                # set push url:
                repo._git_custom_command('', ["git", "remote",
                                              "set-url",
                                              "--push", name,
                                              publish_url])
            lgr.info("Added remote '%s':\n %s (pull)\n%s (push)." %
                     (name, url, publish_url if publish_url else url))
        else:
            lgr.warning("Remote '%s' already exists. Ignore.")
            raise ValueError("'%s' already exists. Couldn't register sibling.")

    def get_dataset_handles(self, pattern=None, fulfilled=None, absolute=False,
                            recursive=False):
        """Get names/paths of all known dataset_handles (subdatasets),
        optionally matching a specific name pattern.


        Parameters
        ----------
        pattern : None
          Not implemented
        fulfilled : None or bool
          If not None, return either only present or absent datasets.
        absolute : bool
          If True, absolute paths will be returned.
        recursive : bool
          If True, recurse into all subdatasets and report their dataset
          handles too.

        Returns
        -------
        list(Dataset paths) or None
          None is return if there is not repository instance yet. For an
          existing repository with no subdatasets an empty list is returned.
        """
        if pattern is not None:
            raise NotImplementedError

        repo = self.repo
        if repo is None:
            return

        # check whether we have anything in the repo. if not go home early
        if not repo.repo.head.is_valid():
            return []

        try:
            submodules = repo.get_submodules()
        except InvalidGitRepositoryError:
            # this happens when we access a repository with a submodule that
            # has no commits, hence doesn't appear in the index and
            # 'git submodule status' also doesn't list it
            return []

        # filter if desired
        if fulfilled is None:
            submodules = [sm.path for sm in submodules]
        else:
            submodules = [sm.path for sm in submodules
                          if sm.module_exists() == fulfilled]

        # expand list with child submodules. keep all paths relative to parent
        # and convert jointly at the end
        if recursive:
            rsm = []
            for sm in submodules:
                rsm.append(sm)
                sdspath = opj(self._path, sm)
                rsm.extend(
                    [opj(sm, sdsh)
                     for sdsh in Dataset(sdspath).get_dataset_handles(
                         pattern=pattern, fulfilled=fulfilled, absolute=False,
                         recursive=recursive)])
            submodules = rsm

        if absolute:
            return [opj(self._path, sm) for sm in submodules]
        else:
            return submodules

#    def get_file_handles(self, pattern=None, fulfilled=None):
#        """Get paths to all known file_handles, optionally matching a specific
#        name pattern.
#
#        If fulfilled is True, only paths to fullfiled handles are returned,
#        if False, only paths to unfulfilled handles are returned.
#
#        Parameters
#        ----------
#        pattern: str
#        fulfilled: bool
#
#        Returns
#        -------
#        list of str
#          (paths)
#        """
#        raise NotImplementedError("TODO")

    # TODO maybe needs to get its own interface
    def remember_state(self, message, auto_add_changes=True, version=None):
        """
        Parameters
        ----------
        auto_add_changes: bool
        message: str
        update_superdataset: bool
        version: str
        """
        if not self.is_installed():
            raise RuntimeError(
                "cannot remember a state when a dataset is not yet installed")
        repo = self.repo
        if auto_add_changes:
            repo.annex_add('.')
        repo.commit(message)
        if version:
            repo._git_custom_command('', 'git tag "{0}"'.format(version))

    def recall_state(self, whereto):
        """Something that can be used to checkout a particular state
        (tag, commit) to "undo" a change or switch to a otherwise desired
        previous state.

        Parameters
        ----------
        whereto: str
        """
        if not self.is_installed():
            raise RuntimeError(
                "cannot remember a state when a dataset is not yet installed")
        self.repo.git_checkout(whereto)

    def is_installed(self):
        """Returns whether a dataset is installed.

        A dataset is installed when a repository for it exists on the filesystem.

        Returns
        -------
        bool
        """
        return self.path is not None and self.repo is not None
Ejemplo n.º 4
0
class Dataset(object):
    __slots__ = ['_path', '_repo']

    def __init__(self, path):
        self._path = abspath(path)
        self._repo = None

    def __repr__(self):
        return "<Dataset path=%s>" % self.path

    @property
    def path(self):
        """path to the dataset"""
        return self._path

    @property
    def repo(self):
        """Get an instance of the version control system/repo for this dataset,
        or None if there is none yet.

        If creating an instance of GitRepo is guaranteed to be really cheap
        this could also serve as a test whether a repo is present.

        Returns
        -------
        GitRepo
        """
        if self._repo is None:
            with swallow_logs():
                try:
                    self._repo = AnnexRepo(self._path,
                                           create=False,
                                           init=False)
                except (InvalidGitRepositoryError, NoSuchPathError,
                        RuntimeError):
                    try:
                        self._repo = GitRepo(self._path, create=False)
                    except (InvalidGitRepositoryError, NoSuchPathError):
                        pass
        elif not isinstance(self._repo, AnnexRepo):
            # repo was initially set to be self._repo but might become AnnexRepo
            # at a later moment, so check if it didn't happen
            if 'git-annex' in self._repo.git_get_branches():
                # we acquired git-annex branch
                self._repo = AnnexRepo(self._repo.path, create=False)
        return self._repo

    def register_sibling(self, name, url, publish_url=None, verify=None):
        """Register the location of a sibling dataset under a given name.

        Optionally, different URLs can be given for retrieving information from
        the sibling and for publishing information to it.
        This is a cheap operation that does not confirm that at the given
        location an actual sibling dataset is available, unless verify is set.
        The value "dataset" verifies, that at the given URL an accessible
        dataset is available and the value "sibling" furthermore verifies, that
        this dataset shares at least one commit with self.

        Parameters
        ----------
        name
        url
        publish_url
        verify
          None | "dataset" | "sibling"
        """
        repo = self.repo

        if verify is not None:
            raise NotImplementedError("TODO: verify not implemented yet")

        if name not in repo.git_get_remotes():
            # Add remote
            repo.git_remote_add(name, url)
            if publish_url is not None:
                # set push url:
                repo._git_custom_command(
                    '',
                    ["git", "remote", "set-url", "--push", name, publish_url])
            lgr.info("Added remote '%s':\n %s (pull)\n%s (push)." %
                     (name, url, publish_url if publish_url else url))
        else:
            lgr.warning("Remote '%s' already exists. Ignore.")
            raise ValueError("'%s' already exists. Couldn't register sibling.")

    def get_dataset_handles(self,
                            pattern=None,
                            fulfilled=None,
                            absolute=False,
                            recursive=False):
        """Get names/paths of all known dataset_handles (subdatasets),
        optionally matching a specific name pattern.


        Parameters
        ----------
        pattern : None
          Not implemented
        fulfilled : None or bool
          If not None, return either only present or absent datasets.
        absolute : bool
          If True, absolute paths will be returned.
        recursive : bool
          If True, recurse into all subdatasets and report their dataset
          handles too.

        Returns
        -------
        list(Dataset paths) or None
          None is return if there is not repository instance yet. For an
          existing repository with no subdatasets an empty list is returned.
        """
        if pattern is not None:
            raise NotImplementedError

        repo = self.repo
        if repo is None:
            return

        # check whether we have anything in the repo. if not go home early
        if not repo.repo.head.is_valid():
            return []

        try:
            submodules = repo.get_submodules()
        except InvalidGitRepositoryError:
            # this happens when we access a repository with a submodule that
            # has no commits, hence doesn't appear in the index and
            # 'git submodule status' also doesn't list it
            return []

        # filter if desired
        if fulfilled is None:
            submodules = [sm.path for sm in submodules]
        else:
            submodules = [
                sm.path for sm in submodules if sm.module_exists() == fulfilled
            ]

        # expand list with child submodules. keep all paths relative to parent
        # and convert jointly at the end
        if recursive:
            rsm = []
            for sm in submodules:
                rsm.append(sm)
                sdspath = opj(self._path, sm)
                rsm.extend([
                    opj(sm, sdsh) for sdsh in Dataset(
                        sdspath).get_dataset_handles(pattern=pattern,
                                                     fulfilled=fulfilled,
                                                     absolute=False,
                                                     recursive=recursive)
                ])
            submodules = rsm

        if absolute:
            return [opj(self._path, sm) for sm in submodules]
        else:
            return submodules


#    def get_file_handles(self, pattern=None, fulfilled=None):
#        """Get paths to all known file_handles, optionally matching a specific
#        name pattern.
#
#        If fulfilled is True, only paths to fullfiled handles are returned,
#        if False, only paths to unfulfilled handles are returned.
#
#        Parameters
#        ----------
#        pattern: str
#        fulfilled: bool
#
#        Returns
#        -------
#        list of str
#          (paths)
#        """
#        raise NotImplementedError("TODO")

# TODO maybe needs to get its own interface

    def remember_state(self, message, auto_add_changes=True, version=None):
        """
        Parameters
        ----------
        auto_add_changes: bool
        message: str
        update_superdataset: bool
        version: str
        """
        if not self.is_installed():
            raise RuntimeError(
                "cannot remember a state when a dataset is not yet installed")
        repo = self.repo
        if auto_add_changes:
            repo.annex_add('.')
        repo.commit(message)
        if version:
            repo._git_custom_command('', 'git tag "{0}"'.format(version))

    def recall_state(self, whereto):
        """Something that can be used to checkout a particular state
        (tag, commit) to "undo" a change or switch to a otherwise desired
        previous state.

        Parameters
        ----------
        whereto: str
        """
        if not self.is_installed():
            raise RuntimeError(
                "cannot remember a state when a dataset is not yet installed")
        self.repo.git_checkout(whereto)

    def is_installed(self):
        """Returns whether a dataset is installed.

        A dataset is installed when a repository for it exists on the filesystem.

        Returns
        -------
        bool
        """
        return self.path is not None and self.repo is not None