Example #1
0
def test_expandpath():
    eq_(expandpath("some", False), expanduser('some'))
    eq_(expandpath("some", False), expandvars('some'))
    assert_true(isabs(expandpath('some')))
    # this may have to go because of platform issues
    if not on_windows:
        # expanduser is not influenced by our HOME setting adjustments
        # for the tests on windows
        eq_(expandpath("$HOME"), expanduser('~'))
Example #2
0
def resolve_path(path, ds=None):
    """Resolve a path specification (against a Dataset location)

    Any explicit path (absolute or relative) is returned as an absolute path.
    In case of an explicit relative path, the current working directory is
    used as a reference. Any non-explicit relative path is resolved against
    as dataset location, i.e. considered relative to the location of the
    dataset. If no dataset is provided, the current working directory is
    used.

    Returns
    -------
    Absolute path
    """
    # first make sure it's actually a valid path:
    from datalad.support.network import PathRI
    if not isinstance(RI(path), PathRI):
        raise ValueError("%s is not a valid path" % path)

    path = expandpath(path, force_absolute=False)
    if is_explicit_path(path):
        # normalize path consistently between two (explicit and implicit) cases
        return dlabspath(path, norm=True)

    # no dataset given, use CWD as reference
    # note: abspath would disregard symlink in CWD
    top_path = getpwd() \
        if ds is None else ds.path if isinstance(ds, Dataset) else ds
    return normpath(opj(top_path, path))
Example #3
0
def resolve_path(path, ds=None):
    """Resolve a path specification (against a Dataset location)

    Any explicit path (absolute or relative) is returned as an absolute path.
    In case of an explicit relative path, the current working directory is
    used as a reference. Any non-explicit relative path is resolved against
    as dataset location, i.e. considered relative to the location of the
    dataset. If no dataset is provided, the current working directory is
    used.

    Returns
    -------
    Absolute path
    """
    # first make sure it's actually a valid path:
    from datalad.support.network import PathRI
    if not isinstance(RI(path), PathRI):
        raise ValueError("%s is not a valid path" % path)

    path = expandpath(path, force_absolute=False)
    if is_explicit_path(path):
        # normalize path consistently between two (explicit and implicit) cases
        return dlabspath(path, norm=True)

    # no dataset given, use CWD as reference
    # note: abspath would disregard symlink in CWD
    top_path = getpwd() \
        if ds is None else ds.path if isinstance(ds, Dataset) else ds
    return normpath(opj(top_path, path))
Example #4
0
def resolve_path(path, ds=None):
    """Resolve a path specification (against a Dataset location)

    Any explicit path (absolute or relative) is returned as an absolute path.
    In case of an explicit relative path, the current working directory is
    used as a reference. Any non-explicit relative path is resolved against
    as dataset location, i.e. considered relative to the location of the
    dataset. If no dataset is provided, the current working directory is
    used.

    Returns
    -------
    Absolute path
    """
    path = expandpath(path, force_absolute=False)
    # TODO: normpath?!
    if is_explicit_path(path):
        return abspath(path)
    # no dataset given, use CWD as reference
    # note: abspath would disregard symlink in CWD
    top_path = getpwd() \
        if ds is None else ds.path if isinstance(ds, Dataset) else ds
    return normpath(opj(top_path, path))
Example #5
0
def resolve_path(path, ds=None):
    """Resolve a path specification (against a Dataset location)

    Any explicit path (absolute or relative) is returned as an absolute path.
    In case of an explicit relative path, the current working directory is
    used as a reference. Any non-explicit relative path is resolved against
    as dataset location, i.e. considered relative to the location of the
    dataset. If no dataset is provided, the current working directory is
    used.

    Returns
    -------
    Absolute path
    """
    path = expandpath(path, force_absolute=False)
    # TODO: normpath?!
    if is_explicit_path(path):
        return abspath(path)
    # no dataset given, use CWD as reference
    # note: abspath would disregard symlink in CWD
    top_path = getpwd() \
        if ds is None else ds.path if isinstance(ds, Dataset) else ds
    return normpath(opj(top_path, path))
Example #6
0
def resolve_path(path, ds=None):
    """Resolve a path specification (against a Dataset location)

    Any explicit path (absolute or relative) is return as an absolute path.
    In case of an explicit relative path, the current working directory is
    used as a reference. Any non-explicit relative path is resolved against
    as dataset location, i.e. considered relative to the location of the
    dataset. If no dataset is provided, the current working directory is
    used.

    Returns
    -------
    Absolute path
    """
    path = expandpath(path, force_absolute=False)
    if is_explicit_path(path):
        return abspath(path)
    if ds is None:
        # no dataset given, use CWD as reference
        # TODO: Check whether we should use PWD instead of CWD here. Is it done
        # by abspath?
        return abspath(path)
    else:
        return normpath(opj(ds.path, path))
Example #7
0
def resolve_path(path, ds=None):
    """Resolve a path specification (against a Dataset location)

    Any explicit path (absolute or relative) is return as an absolute path.
    In case of an explicit relative path, the current working directory is
    used as a reference. Any non-explicit relative path is resolved against
    as dataset location, i.e. considered relative to the location of the
    dataset. If no dataset is provided, the current working directory is
    used.

    Returns
    -------
    Absolute path
    """
    path = expandpath(path, force_absolute=False)
    if is_explicit_path(path):
        return abspath(path)
    if ds is None:
        # no dataset given, use CWD as reference
        # TODO: Check whether we should use PWD instead of CWD here. Is it done
        # by abspath?
        return abspath(path)
    else:
        return normpath(opj(ds.path, path))
Example #8
0
def test_is_explicit_path():
    # by default expanded paths are absolute, hence explicit
    assert_true(is_explicit_path(expandpath('~')))
    assert_false(is_explicit_path("here"))
Example #9
0
def test_expandpath():
    eq_(expandpath("some", False), expanduser('some'))
    eq_(expandpath("some", False), expandvars('some'))
    assert_true(isabs(expandpath('some')))
    # this may have to go because of platform issues
    eq_(expandpath("$HOME"), expanduser('~'))
Example #10
0
    def __call__(dataset=None, path=None, source=None, recursive=False,
                 add_data_to_git=False):
        lgr.debug("Installation attempt started")
        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)

        if isinstance(path, list):
            if not len(path):
                # normalize value to expected state when nothing was provided
                path = None
            elif len(path) == 1:
                # we can simply continue with the function as called with a
                # single argument
                path = path[0]
            else:
                lgr.debug("Installation of multiple targets was requested: {0}".format(path))
                return [Install.__call__(
                        dataset=ds,
                        path=p,
                        source=source,
                        recursive=recursive) for p in path]

        # resolve the target location against the provided dataset
        if path is not None:
            # make sure it is not a URL, `resolve_path` cannot handle that
            if is_url(path):
                try:
                    path = get_local_path_from_url(path)
                    path = resolve_path(path, ds)
                except ValueError:
                    # URL doesn't point to a local something
                    pass
            else:
                path = resolve_path(path, ds)

        # any `path` argument that point to something local now resolved and
        # is no longer a URL

        # if we have no dataset given, figure out which one we need to operate
        # on, based on the resolved target location (that is now guaranteed to
        # be specified, but only if path isn't a URL (anymore) -> special case,
        # handles below
        if ds is None and path is not None and not is_url(path):
            # try to find a dataset at or above the installation target
            dspath = GitRepo.get_toppath(abspath(path))
            if dspath is None:
                # no top-level dataset found, use path as such
                dspath = path
            ds = Dataset(dspath)

        if ds is None and source is None and path is not None:
            # no dataset, no source
            # this could be a shortcut install call, where the first
            # arg identifies the source
            if is_url(path) or os.path.exists(path):
                # we have an actual URL -> this should be the source
                # OR
                # it is not a URL, but it exists locally
                lgr.debug(
                    "Single argument given to install and no dataset found. "
                    "Assuming the argument identifies a source location.")
                source = path
                path = None

        lgr.debug("Resolved installation target: {0}".format(path))

        if ds is None and path is None and source is not None:
            # we got nothing but a source. do something similar to git clone
            # and derive the path from the source and continue
            lgr.debug(
                "Neither dataset not target installation path provided. "
                "Assuming installation of a remote dataset. "
                "Deriving destination path from given source {0}".format(
                    source))
            ds = Dataset(_installationpath_from_url(source))

        if not path and ds is None:
            # no dataset, no target location, nothing to do
            raise InsufficientArgumentsError(
                "insufficient information for installation (needs at "
                "least a dataset or an installation path")

        assert(ds is not None)

        lgr.debug("Resolved target dataset for installation: {0}".format(ds))

        vcs = ds.repo
        if vcs is None:
            # TODO check that a "ds.path" actually points to a TOPDIR
            # should be the case already, but maybe nevertheless check
            try:
                with swallow_logs():
                    vcs = Install._get_new_vcs(ds, source, vcs)
            except GitCommandError:
                lgr.debug("Cannot retrieve from URL: {0}".format(source))
                # maybe source URL was missing a '/.git'
                if source and not source.rstrip('/').endswith('/.git'):
                    source = '{0}/.git'.format(source.rstrip('/'))
                    lgr.debug("Attempt to retrieve from URL: {0}".format(source))
                    vcs = Install._get_new_vcs(ds, source, vcs)
                else:
                    lgr.debug("Unable to establish repository instance at: {0}".format(ds.path))
                    raise

        assert(ds.repo)  # is automagically re-evaluated in the .repo property

        runner = Runner()

        if path is None or path == ds.path:
            # if the goal was to install this dataset, we are done,
            # except for 'recursive'.

            # TODO: For now 'recursive' means just submodules.
            # See --with-data vs. -- recursive and figure it out
            if recursive:
                for sm in ds.repo.get_submodules():
                    _install_subds_from_flexible_source(
                        ds, sm.path, sm.url, recursive=recursive)
            return ds

        # at this point this dataset is "installed", now we can test whether to
        # install something into the dataset

        # needed by the logic below
        assert(isabs(path))

        # express the destination path relative to the root of this dataset
        relativepath = relpath(path, start=ds.path)
        if path.startswith(pardir):
            raise ValueError("installation path outside dataset")

        lgr.debug(
            "Resolved installation target relative to dataset {0}: {1}".format(
                ds, relativepath))

        # this dataset must already know everything necessary
        ###################################################
        # FLOW GUIDE
        #
        # at this point we know nothing about the
        # installation targether
        ###################################################
        try:
            # it is simplest to let annex tell us what we are dealing with
            lgr.debug("Trying to fetch file %s using annex", relativepath)
            if not isinstance(vcs, AnnexRepo):
                assert(isinstance(vcs, GitRepo))
                # FLOW GUIDE
                # this is not an annex repo, but we raise exceptions
                # to be able to treat them alike in the special case handling
                # below
                if not exists(path):
                    raise IOError("path doesn't exist yet, might need special handling")
                elif relativepath in vcs.get_indexed_files():
                    # relativepath is in git
                    raise FileInGitError("We need to handle it as known to git")
                else:
                    raise FileNotInAnnexError("We don't have yet annex repo here")
            if vcs.get_file_key(relativepath):
                # FLOW GUIDE EXIT POINT
                # this is an annex'ed file -> get it
                # TODO implement `copy --from` using `source`
                # TODO fail if `source` is something strange
                vcs.annex_get(relativepath)
                # return the absolute path to the installed file
                return path

        except FileInGitError:
            ###################################################
            # FLOW GUIDE
            #
            # `path` is either
            # - a  file already checked into Git
            # - known submodule
            ###################################################
            lgr.log(5, "FileInGitError logic")
            if source is not None:
                raise FileInGitError("File %s is already in git. Specifying source (%s) makes no sense"
                                     % (path, source))
            # file is checked into git directly -> nothing to do
            # OR this is a submodule of this dataset
            submodule = [sm for sm in ds.repo.get_submodules()
                         if sm.path == relativepath]
            if not len(submodule):
                # FLOW GUIDE EXIT POINT
                # this is a file in Git and no submodule, just return its path
                lgr.debug("Don't act, data already present in Git")
                return path
            elif len(submodule) > 1:
                raise RuntimeError(
                    "more than one submodule registered at the same path?")
            submodule = submodule[0]

            # FLOW GUIDE EXIT POINT
            # we are dealing with a known submodule (i.e. `source`
            # doesn't matter) -> check it out
            lgr.debug("Install subdataset at: {0}".format(submodule.path))
            subds = _install_subds_from_flexible_source(
                ds, submodule.path, submodule.url, recursive=recursive)
            return subds

        except FileNotInAnnexError:
            ###################################################
            # FLOW GUIDE
            #
            # `path` is either
            # - content of a subdataset
            # - an untracked file in this dataset
            # - an entire untracked/unknown existing subdataset
            ###################################################
            lgr.log(5, "FileNotInAnnexError logic")
            subds = get_containing_subdataset(ds, relativepath)
            if ds.path != subds.path:
                # FLOW GUIDE EXIT POINT
                # target path belongs to a known subdataset, hand
                # installation over to it
                return subds.install(
                    path=relpath(path, start=subds.path),
                    source=source,
                    recursive=recursive,
                    add_data_to_git=add_data_to_git)

            # FLOW GUIDE
            # this must be an untracked/existing something, so either
            # - a file
            # - a directory
            # - an entire repository
            if exists(opj(path, '.git')):
                # FLOW GUIDE EXIT POINT
                # this is an existing repo and must be in-place turned into
                # a submodule of this dataset
                return _install_subds_inplace(
                    ds, path, relativepath, source, runner)

            # FLOW GUIDE EXIT POINT
            # - untracked file or directory in this dataset
            if isdir(path) and not recursive:
                # this is a directory and we want --recursive for it
                raise ValueError(
                    "installation of a directory requires the `recursive` flag")

            # few sanity checks
            if source and abspath(source) != path:
                raise ValueError(
                    "installation target already exists, but `source` points to "
                    "another location (target: '{0}', source: '{0}'".format(
                        source, path))

            if not add_data_to_git and not (isinstance(vcs, AnnexRepo)):
                raise RuntimeError(
                    "Trying to install file(s) into a dataset "
                    "with a plain Git repository. First initialize annex, or "
                    "provide override flag.")

            # switch `add` procedure between Git and Git-annex according to flag
            if add_data_to_git:
                vcs.git_add(relativepath)
                added_files = resolve_path(relativepath, ds)
            else:
                # do a blunt `annex add`
                added_files = vcs.annex_add(relativepath)
                # return just the paths of the installed components
                if isinstance(added_files, list):
                    added_files = [resolve_path(i['file'], ds) for i in added_files]
                else:
                    added_files = resolve_path(added_files['file'], ds)
            if added_files:
                return added_files
            else:
                return None

        except IOError:
            ###################################################
            # FLOW GUIDE
            #
            # more complicated special cases -- `path` is either
            # - a file/subdataset in a not yet initialized but known
            #   submodule
            # - an entire untracked/unknown existing subdataset
            # - non-existing content that should be installed from `source`
            ###################################################
            lgr.log(5, "IOError logic")
            # we can end up here in two cases ATM
            if (exists(path) or islink(path)) or source is None:
                # FLOW GUIDE
                # - target exists but this dataset's VCS rejects it,
                #   so it should be part of a subdataset
                # or
                # - target doesn't exist, but no source is given, so
                #   it could be a handle that is actually contained in
                #   a not yet installed subdataset
                subds = get_containing_subdataset(ds, relativepath)
                if ds.path != subds.path:
                    # FLOW GUIDE
                    # target path belongs to a subdataset, hand installation
                    # over to it
                    if not subds.is_installed():
                        # FLOW GUIDE
                        # we are dealing with a target in a not yet
                        # available but known subdataset -> install it first
                        ds.install(subds.path, recursive=recursive)
                    return subds.install(
                        path=relpath(path, start=subds.path),
                        source=source,
                        recursive=recursive,
                        add_data_to_git=add_data_to_git)

                # FLOW GUIDE EXIT POINT
                raise InsufficientArgumentsError(
                    "insufficient information for installation: the "
                    "installation target {0} doesn't exists, isn't a "
                    "known handle of dataset {1}, and no `source` "
                    "information was provided.".format(path, ds))

            if not source:
                # FLOW GUIDE EXIT POINT
                raise InsufficientArgumentsError(
                    "insufficient information for installation: the "
                    "installation target {0} doesn't exists, isn't a "
                    "known handle of dataset {1}, and no `source` "
                    "information was provided.".format(path, ds))

            source_path = expandpath(source)
            if exists(source_path):
                # FLOW GUIDE EXIT POINT
                # this could be
                # - local file
                # - local directory
                # - repository outside the dataset
                # we only want to support the last case of locally cloning
                # a repo -- fail otherwise
                if exists(opj(source_path, '.git')):
                    return _install_subds_from_flexible_source(
                        ds, relativepath, source_path, recursive)

                raise ValueError(
                    "installing individual local files or directories is not "
                    "supported, copy/move them into the dataset first")

            # FLOW GUIDE
            # `source` is non-local, it could be:
            #   - repository
            #   - file
            # we have no further evidence, hence we need to try
            try:
                # FLOW GUIDE EXIT POINT
                # assume it is a dataset
                return _install_subds_from_flexible_source(
                    ds, relativepath, source, recursive)
            except CommandError:
                # FLOW GUIDE EXIT POINT
                # apaarently not a repo, assume it is a file url
                vcs.annex_addurl_to_file(relativepath, source)
                return path