def test_expandpath(): eq_(expandpath("some", False), expanduser('some')) eq_(expandpath("some", False), expandvars('some')) assert_true(isabs(expandpath('some'))) # this may have to go because of platform issues if not on_windows: # expanduser is not influenced by our HOME setting adjustments # for the tests on windows eq_(expandpath("$HOME"), expanduser('~'))
def resolve_path(path, ds=None): """Resolve a path specification (against a Dataset location) Any explicit path (absolute or relative) is returned as an absolute path. In case of an explicit relative path, the current working directory is used as a reference. Any non-explicit relative path is resolved against as dataset location, i.e. considered relative to the location of the dataset. If no dataset is provided, the current working directory is used. Returns ------- Absolute path """ # first make sure it's actually a valid path: from datalad.support.network import PathRI if not isinstance(RI(path), PathRI): raise ValueError("%s is not a valid path" % path) path = expandpath(path, force_absolute=False) if is_explicit_path(path): # normalize path consistently between two (explicit and implicit) cases return dlabspath(path, norm=True) # no dataset given, use CWD as reference # note: abspath would disregard symlink in CWD top_path = getpwd() \ if ds is None else ds.path if isinstance(ds, Dataset) else ds return normpath(opj(top_path, path))
def resolve_path(path, ds=None): """Resolve a path specification (against a Dataset location) Any explicit path (absolute or relative) is returned as an absolute path. In case of an explicit relative path, the current working directory is used as a reference. Any non-explicit relative path is resolved against as dataset location, i.e. considered relative to the location of the dataset. If no dataset is provided, the current working directory is used. Returns ------- Absolute path """ path = expandpath(path, force_absolute=False) # TODO: normpath?! if is_explicit_path(path): return abspath(path) # no dataset given, use CWD as reference # note: abspath would disregard symlink in CWD top_path = getpwd() \ if ds is None else ds.path if isinstance(ds, Dataset) else ds return normpath(opj(top_path, path))
def resolve_path(path, ds=None): """Resolve a path specification (against a Dataset location) Any explicit path (absolute or relative) is return as an absolute path. In case of an explicit relative path, the current working directory is used as a reference. Any non-explicit relative path is resolved against as dataset location, i.e. considered relative to the location of the dataset. If no dataset is provided, the current working directory is used. Returns ------- Absolute path """ path = expandpath(path, force_absolute=False) if is_explicit_path(path): return abspath(path) if ds is None: # no dataset given, use CWD as reference # TODO: Check whether we should use PWD instead of CWD here. Is it done # by abspath? return abspath(path) else: return normpath(opj(ds.path, path))
def test_is_explicit_path(): # by default expanded paths are absolute, hence explicit assert_true(is_explicit_path(expandpath('~'))) assert_false(is_explicit_path("here"))
def test_expandpath(): eq_(expandpath("some", False), expanduser('some')) eq_(expandpath("some", False), expandvars('some')) assert_true(isabs(expandpath('some'))) # this may have to go because of platform issues eq_(expandpath("$HOME"), expanduser('~'))
def __call__(dataset=None, path=None, source=None, recursive=False, add_data_to_git=False): lgr.debug("Installation attempt started") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if isinstance(path, list): if not len(path): # normalize value to expected state when nothing was provided path = None elif len(path) == 1: # we can simply continue with the function as called with a # single argument path = path[0] else: lgr.debug("Installation of multiple targets was requested: {0}".format(path)) return [Install.__call__( dataset=ds, path=p, source=source, recursive=recursive) for p in path] # resolve the target location against the provided dataset if path is not None: # make sure it is not a URL, `resolve_path` cannot handle that if is_url(path): try: path = get_local_path_from_url(path) path = resolve_path(path, ds) except ValueError: # URL doesn't point to a local something pass else: path = resolve_path(path, ds) # any `path` argument that point to something local now resolved and # is no longer a URL # if we have no dataset given, figure out which one we need to operate # on, based on the resolved target location (that is now guaranteed to # be specified, but only if path isn't a URL (anymore) -> special case, # handles below if ds is None and path is not None and not is_url(path): # try to find a dataset at or above the installation target dspath = GitRepo.get_toppath(abspath(path)) if dspath is None: # no top-level dataset found, use path as such dspath = path ds = Dataset(dspath) if ds is None and source is None and path is not None: # no dataset, no source # this could be a shortcut install call, where the first # arg identifies the source if is_url(path) or os.path.exists(path): # we have an actual URL -> this should be the source # OR # it is not a URL, but it exists locally lgr.debug( "Single argument given to install and no dataset found. " "Assuming the argument identifies a source location.") source = path path = None lgr.debug("Resolved installation target: {0}".format(path)) if ds is None and path is None and source is not None: # we got nothing but a source. do something similar to git clone # and derive the path from the source and continue lgr.debug( "Neither dataset not target installation path provided. " "Assuming installation of a remote dataset. " "Deriving destination path from given source {0}".format( source)) ds = Dataset(_installationpath_from_url(source)) if not path and ds is None: # no dataset, no target location, nothing to do raise InsufficientArgumentsError( "insufficient information for installation (needs at " "least a dataset or an installation path") assert(ds is not None) lgr.debug("Resolved target dataset for installation: {0}".format(ds)) vcs = ds.repo if vcs is None: # TODO check that a "ds.path" actually points to a TOPDIR # should be the case already, but maybe nevertheless check try: with swallow_logs(): vcs = Install._get_new_vcs(ds, source, vcs) except GitCommandError: lgr.debug("Cannot retrieve from URL: {0}".format(source)) # maybe source URL was missing a '/.git' if source and not source.rstrip('/').endswith('/.git'): source = '{0}/.git'.format(source.rstrip('/')) lgr.debug("Attempt to retrieve from URL: {0}".format(source)) vcs = Install._get_new_vcs(ds, source, vcs) else: lgr.debug("Unable to establish repository instance at: {0}".format(ds.path)) raise assert(ds.repo) # is automagically re-evaluated in the .repo property runner = Runner() if path is None or path == ds.path: # if the goal was to install this dataset, we are done, # except for 'recursive'. # TODO: For now 'recursive' means just submodules. # See --with-data vs. -- recursive and figure it out if recursive: for sm in ds.repo.get_submodules(): _install_subds_from_flexible_source( ds, sm.path, sm.url, recursive=recursive) return ds # at this point this dataset is "installed", now we can test whether to # install something into the dataset # needed by the logic below assert(isabs(path)) # express the destination path relative to the root of this dataset relativepath = relpath(path, start=ds.path) if path.startswith(pardir): raise ValueError("installation path outside dataset") lgr.debug( "Resolved installation target relative to dataset {0}: {1}".format( ds, relativepath)) # this dataset must already know everything necessary ################################################### # FLOW GUIDE # # at this point we know nothing about the # installation targether ################################################### try: # it is simplest to let annex tell us what we are dealing with lgr.debug("Trying to fetch file %s using annex", relativepath) if not isinstance(vcs, AnnexRepo): assert(isinstance(vcs, GitRepo)) # FLOW GUIDE # this is not an annex repo, but we raise exceptions # to be able to treat them alike in the special case handling # below if not exists(path): raise IOError("path doesn't exist yet, might need special handling") elif relativepath in vcs.get_indexed_files(): # relativepath is in git raise FileInGitError("We need to handle it as known to git") else: raise FileNotInAnnexError("We don't have yet annex repo here") if vcs.get_file_key(relativepath): # FLOW GUIDE EXIT POINT # this is an annex'ed file -> get it # TODO implement `copy --from` using `source` # TODO fail if `source` is something strange vcs.annex_get(relativepath) # return the absolute path to the installed file return path except FileInGitError: ################################################### # FLOW GUIDE # # `path` is either # - a file already checked into Git # - known submodule ################################################### lgr.log(5, "FileInGitError logic") if source is not None: raise FileInGitError("File %s is already in git. Specifying source (%s) makes no sense" % (path, source)) # file is checked into git directly -> nothing to do # OR this is a submodule of this dataset submodule = [sm for sm in ds.repo.get_submodules() if sm.path == relativepath] if not len(submodule): # FLOW GUIDE EXIT POINT # this is a file in Git and no submodule, just return its path lgr.debug("Don't act, data already present in Git") return path elif len(submodule) > 1: raise RuntimeError( "more than one submodule registered at the same path?") submodule = submodule[0] # FLOW GUIDE EXIT POINT # we are dealing with a known submodule (i.e. `source` # doesn't matter) -> check it out lgr.debug("Install subdataset at: {0}".format(submodule.path)) subds = _install_subds_from_flexible_source( ds, submodule.path, submodule.url, recursive=recursive) return subds except FileNotInAnnexError: ################################################### # FLOW GUIDE # # `path` is either # - content of a subdataset # - an untracked file in this dataset # - an entire untracked/unknown existing subdataset ################################################### lgr.log(5, "FileNotInAnnexError logic") subds = get_containing_subdataset(ds, relativepath) if ds.path != subds.path: # FLOW GUIDE EXIT POINT # target path belongs to a known subdataset, hand # installation over to it return subds.install( path=relpath(path, start=subds.path), source=source, recursive=recursive, add_data_to_git=add_data_to_git) # FLOW GUIDE # this must be an untracked/existing something, so either # - a file # - a directory # - an entire repository if exists(opj(path, '.git')): # FLOW GUIDE EXIT POINT # this is an existing repo and must be in-place turned into # a submodule of this dataset return _install_subds_inplace( ds, path, relativepath, source, runner) # FLOW GUIDE EXIT POINT # - untracked file or directory in this dataset if isdir(path) and not recursive: # this is a directory and we want --recursive for it raise ValueError( "installation of a directory requires the `recursive` flag") # few sanity checks if source and abspath(source) != path: raise ValueError( "installation target already exists, but `source` points to " "another location (target: '{0}', source: '{0}'".format( source, path)) if not add_data_to_git and not (isinstance(vcs, AnnexRepo)): raise RuntimeError( "Trying to install file(s) into a dataset " "with a plain Git repository. First initialize annex, or " "provide override flag.") # switch `add` procedure between Git and Git-annex according to flag if add_data_to_git: vcs.git_add(relativepath) added_files = resolve_path(relativepath, ds) else: # do a blunt `annex add` added_files = vcs.annex_add(relativepath) # return just the paths of the installed components if isinstance(added_files, list): added_files = [resolve_path(i['file'], ds) for i in added_files] else: added_files = resolve_path(added_files['file'], ds) if added_files: return added_files else: return None except IOError: ################################################### # FLOW GUIDE # # more complicated special cases -- `path` is either # - a file/subdataset in a not yet initialized but known # submodule # - an entire untracked/unknown existing subdataset # - non-existing content that should be installed from `source` ################################################### lgr.log(5, "IOError logic") # we can end up here in two cases ATM if (exists(path) or islink(path)) or source is None: # FLOW GUIDE # - target exists but this dataset's VCS rejects it, # so it should be part of a subdataset # or # - target doesn't exist, but no source is given, so # it could be a handle that is actually contained in # a not yet installed subdataset subds = get_containing_subdataset(ds, relativepath) if ds.path != subds.path: # FLOW GUIDE # target path belongs to a subdataset, hand installation # over to it if not subds.is_installed(): # FLOW GUIDE # we are dealing with a target in a not yet # available but known subdataset -> install it first ds.install(subds.path, recursive=recursive) return subds.install( path=relpath(path, start=subds.path), source=source, recursive=recursive, add_data_to_git=add_data_to_git) # FLOW GUIDE EXIT POINT raise InsufficientArgumentsError( "insufficient information for installation: the " "installation target {0} doesn't exists, isn't a " "known handle of dataset {1}, and no `source` " "information was provided.".format(path, ds)) if not source: # FLOW GUIDE EXIT POINT raise InsufficientArgumentsError( "insufficient information for installation: the " "installation target {0} doesn't exists, isn't a " "known handle of dataset {1}, and no `source` " "information was provided.".format(path, ds)) source_path = expandpath(source) if exists(source_path): # FLOW GUIDE EXIT POINT # this could be # - local file # - local directory # - repository outside the dataset # we only want to support the last case of locally cloning # a repo -- fail otherwise if exists(opj(source_path, '.git')): return _install_subds_from_flexible_source( ds, relativepath, source_path, recursive) raise ValueError( "installing individual local files or directories is not " "supported, copy/move them into the dataset first") # FLOW GUIDE # `source` is non-local, it could be: # - repository # - file # we have no further evidence, hence we need to try try: # FLOW GUIDE EXIT POINT # assume it is a dataset return _install_subds_from_flexible_source( ds, relativepath, source, recursive) except CommandError: # FLOW GUIDE EXIT POINT # apaarently not a repo, assume it is a file url vcs.annex_addurl_to_file(relativepath, source) return path