def test_install_recursive_with_data(src, path): # now again; with data: res = install(path, source=src, recursive=True, get_data=True, result_filter=None, result_xfm=None) assert_status('ok', res) # installed a dataset and two subdatasets, and one file with content in # each, plus the report that we got all content in each dataset's root dir eq_(len(res), 9) assert_result_count(res, 3, type='dataset') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path) top_ds = YieldDatasets()(res[0]) ok_(top_ds.is_installed()) if isinstance(top_ds.repo, AnnexRepo): ok_(all(top_ds.repo.file_has_content(top_ds.repo.get_annexed_files()))) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds, )) if isinstance(subds.repo, AnnexRepo): ok_( all(subds.repo.file_has_content( subds.repo.get_annexed_files())))
def test_install_recursive_with_data(src=None, path=None): _make_dataset_hierarchy(src) # now again; with data: res = install(path, source=src, recursive=True, get_data=True, result_filter=None, result_xfm=None) assert_status('ok', res) # installed a dataset and two subdatasets, and one file with content in # each assert_result_count(res, 5, type='dataset', action='install') assert_result_count(res, 2, type='file', action='get') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path) top_ds = YieldDatasets()(res[0]) ok_(top_ds.is_installed()) def all_have_content(repo): ainfo = repo.get_content_annexinfo(init=None, eval_availability=True) return all(st["has_content"] for st in ainfo.values()) if isinstance(top_ds.repo, AnnexRepo): ok_(all_have_content(top_ds.repo)) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds, )) if isinstance(subds.repo, AnnexRepo): ok_(all_have_content(subds.repo))
def test_install_recursive_with_data(src, path): # now again; with data: res = install(path, source=src, recursive=True, get_data=True, result_filter=None, result_xfm=None) assert_status('ok', res) # installed a dataset and two subdatasets, and one file with content in # each, plus the report that we got all content in each dataset's root dir eq_(len(res), 9) assert_result_count(res, 3, type='dataset') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path) top_ds = YieldDatasets()(res[0]) ok_(top_ds.is_installed()) if isinstance(top_ds.repo, AnnexRepo): ok_(all(top_ds.repo.file_has_content(top_ds.repo.get_annexed_files()))) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds,)) if isinstance(subds.repo, AnnexRepo): ok_(all(subds.repo.file_has_content(subds.repo.get_annexed_files())))
def test_install_datasets_root(tdir): with chpwd(tdir): ds = install("///") ok_(ds.is_installed()) eq_(ds.path, opj(tdir, get_datasets_topdir())) # do it a second time: result = install("///", result_xfm=None, return_type='list') assert_status('notneeded', result) eq_(YieldDatasets()(result[0]), ds) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") with assert_raises(IncompleteResultsError) as cme: install("sub", source='///') assert_in("already exists and not empty", str(cme.exception))
def __call__(path=None, source=None, dataset=None, get_data=False, description=None, recursive=False, recursion_limit=None, reckless=None, jobs="auto"): # normalize path argument to be equal when called from cmdline and # python and nothing was passed into `path` path = ensure_list(path) if not source and not path: raise InsufficientArgumentsError( "Please provide at least a source or a path") # Common kwargs to pass to underlying git/install calls. # They might need adjustments (e.g. for recursion_limit, but # otherwise would be applicable throughout # # There should have been more of common options! # since underneath get could do similar installs common_kwargs = dict( get_data=get_data, recursive=recursive, recursion_limit=recursion_limit, # git_opts=git_opts, # annex_opts=annex_opts, reckless=reckless, jobs=jobs, ) # did we explicitly get a dataset to install into? # if we got a dataset, path will be resolved against it. # Otherwise path will be resolved first. ds = None if dataset is not None: ds = require_dataset(dataset, check_installed=True, purpose='installation') common_kwargs['dataset'] = dataset # pre-compute for results below refds_path = Interface.get_refds_path(ds) # switch into the two scenarios without --source: # 1. list of URLs # 2. list of (sub)dataset content if source is None: # we need to collect URLs and paths to_install = [] to_get = [] # TODO: this approach is problematic, it disrupts the order of input args. # consequently results will be returned in an unexpected order when a # mixture of source URL and paths is given. Reordering is only possible when # everything in here is fully processed before any results can be yielded. # moreover, I think the semantics of the status quo implementation are a # bit complicated: in a mixture list a source URL will lead to a new dataset # at a generated default location, but a path will lead to a subdataset # at that exact location for urlpath in path: ri = RI(urlpath) (to_get if isinstance(ri, PathRI) else to_install).append(urlpath) # 1. multiple source URLs for s in to_install: lgr.debug("Install passes into install source=%s", s) for r in Install.__call__( source=s, description=description, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" on_failure='ignore', return_type='generator', result_xfm=None, result_filter=None, **common_kwargs): # no post-processing of the installed content on disk # should be necessary here, all done by code further # down that deals with an install from an actuall `source` # any necessary fixes should go there too! r['refds'] = refds_path yield r # 2. one or more dataset content paths if to_get: lgr.debug("Install passes into get %d items", len(to_get)) # all commented out hint on inability to pass those options # into underlying install-related calls. # Also need to pass from get: # annex_get_opts for r in Get.__call__( to_get, # TODO should pass-through description, not sure why disabled # description=description, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" on_failure='ignore', return_type='generator', result_xfm=None, result_filter=None, **common_kwargs): # no post-processing of get'ed content on disk should be # necessary here, this is the responsibility of `get` # (incl. adjusting parent's gitmodules when submodules end # up in an "updated" state (done in get helpers) # any required fixes should go there! r['refds'] = refds_path yield r # we are done here # the rest is about install from a `source` return # an actual `source` was given if source and path and len(path) > 1: # exception is ok here, if this fails it is either direct user error # or we f****d up one of our internal calls raise ValueError( "install needs a single PATH when source is provided. " "Was given mutliple PATHs: %s" % str(path)) # parameter constraints: if not source: # exception is ok here, if this fails it is either direct user error # or we f****d up one of our internal calls raise InsufficientArgumentsError( "a `source` is required for installation") # code below deals with a single path only path = path[0] if path else None if source == path: # even if they turn out to be identical after resolving symlinks # and more sophisticated witchcraft, it would still happily say # "it appears to be already installed", so we just catch an # obviously pointless input combination yield get_status_dict( 'install', path=path, status='impossible', logger=lgr, source_url=source, refds=refds_path, message= "installation `source` and destination `path` are identical. " "If you are trying to add a subdataset simply use the `save` command" ) return # resolve the target location (if local) against the provided dataset # or CWD: if path is not None: # MIH everything in here is highly similar to what common # interface helpers do (or should/could do), but at the same # is very much tailored to just apply to `install` -- I guess # it has to stay special # Should work out just fine for regular paths, so no additional # conditioning is necessary try: path_ri = RI(path) except Exception as e: raise ValueError("invalid path argument {}: ({})".format( path, exc_str(e))) try: # Wouldn't work for SSHRI ATM, see TODO within SSHRI # yoh: path should be a local path, and mapping note within # SSHRI about mapping localhost:path to path is kinda # a peculiar use-case IMHO # TODO Stringification can be removed once PY35 is no longer # supported path = str(resolve_path(path_ri.localpath, dataset)) # any `path` argument that point to something local now # resolved and is no longer a URL except ValueError: # `path` is neither a valid source nor a local path. # TODO: The only thing left is a known subdataset with a # name, that is not a path; Once we correctly distinguish # between path and name of a submodule, we need to consider # this. # For now: Just raise raise ValueError("Invalid path argument {0}".format(path)) # `path` resolved, if there was any. # clone dataset, will also take care of adding to superdataset, if one # is given res = Clone.__call__( source, path, dataset=ds, description=description, reckless=reckless, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" result_xfm=None, return_type='generator', result_filter=None, on_failure='ignore') # helper as_ds = YieldDatasets() destination_dataset = None for r in res: if r['action'] == 'install' and r['type'] == 'dataset': # make sure logic below is valid, only one dataset result is # coming back assert (destination_dataset is None) destination_dataset = as_ds(r) r['refds'] = refds_path yield r assert (destination_dataset) # Now, recursive calls: if recursive or get_data: # dataset argument must not be passed inside since we use bound .get # It is ok to do "inplace" as long as we still return right # after the loop ends common_kwargs.pop('dataset', '') for r in destination_dataset.get( curdir, description=description, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" on_failure='ignore', return_type='generator', result_xfm=None, **common_kwargs): r['refds'] = refds_path yield r # at this point no futher post-processing should be necessary, # `clone` and `get` must have done that (incl. parent handling) # if not, bugs should be fixed in those commands return