def test_gh1811(srcpath, clonepath): orig = Dataset(srcpath).create() (orig.pathobj / 'some').write_text('some') orig.save() clone = Clone.__call__(source=orig.path, path=clonepath) (clone.pathobj / 'somemore').write_text('somemore') clone.save() clone.repo.call_git(['checkout', 'HEAD~1']) res = clone.push(to='origin', on_failure='ignore') assert_result_count(res, 1) assert_result_count( res, 1, path=clone.path, type='dataset', action='publish', status='impossible', message='There is no active branch, cannot determine remote ' 'branch', )
def test_push_recursive(origin_path, src_path, dst_top, dst_sub, dst_subnoannex, dst_subsub): # dataset with two submodules and one subsubmodule origin = Dataset(origin_path).create() origin_subm1 = origin.create('sub m') origin_subm1.create('subsub m') origin.create('subm noannex', annex=False) origin.save() assert_repo_status(origin.path) # prepare src as a fresh clone with all subdatasets checkout out recursively # running on a clone should make the test scenario more different than # test_push(), even for the pieces that should be identical top = Clone.__call__(source=origin.path, path=src_path) sub, subsub, subnoannex = top.get('.', recursive=True, get_data=False, result_xfm='datasets') target_top = mk_push_target(top, 'target', dst_top, annex=True) # subdatasets have no remote yet, so recursive publishing should fail: res = top.push(to="target", recursive=True, on_failure='ignore') assert_in_results(res, path=top.path, type='dataset', refspec='refs/heads/master:refs/heads/master', operations=['new-branch'], action='publish', status='ok', target='target') for d in (sub, subsub, subnoannex): assert_in_results(res, status='error', type='dataset', path=d.path, message=("Unknown target sibling '%s'.", 'target')) # now fix that and set up targets for the submodules target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subnoannex = mk_push_target(subnoannex, 'target', dst_subnoannex, annex=False) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # and same push call as above res = top.push(to="target", recursive=True) # topds skipped assert_in_results(res, path=top.path, type='dataset', action='publish', status='notneeded', target='target') # the rest pushed for d in (sub, subsub, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # all correspondig branches match across all datasets for s, d in zip( (top, sub, subnoannex, subsub), (target_top, target_sub, target_subnoannex, target_subsub)): eq_(list(s.repo.get_branch_commits_("master")), list(d.get_branch_commits_("master"))) if s != subnoannex: eq_(list(s.repo.get_branch_commits_("git-annex")), list(d.get_branch_commits_("git-annex"))) # rerun should not result in further pushes of master res = top.push(to="target", recursive=True) assert_not_in_results(res, status='ok', refspec="refs/heads/master:refs/heads/master") assert_in_results(res, status='notneeded', refspec="refs/heads/master:refs/heads/master") if top.repo.is_managed_branch(): raise SkipTest( 'Save/status of subdataset with managed branches is an still ' 'unresolved issue') # now annex a file in subsub test_copy_file = subsub.pathobj / 'test_mod_annex_file' test_copy_file.write_text("Heavy stuff.") # save all the way up assert_status(('ok', 'notneeded'), top.save(message='subsub got something', recursive=True)) assert_repo_status(top.path) # publish straight up, should be smart by default res = top.push(to="target", recursive=True) # we see 3 out of 4 datasets pushed (sub noannex was left unchanged) for d in (top, sub, subsub): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # file content copied too assert_in_results(res, action='copy', status='ok', path=str(test_copy_file)) # verify it is accessible, drop and bring back assert_status('ok', top.drop(str(test_copy_file))) ok_(not subsub.repo.file_has_content('test_mod_annex_file')) top.get(test_copy_file) ok_file_has_content(test_copy_file, 'Heavy stuff.') # make two modification (sub.pathobj / 'test_mod_annex_file').write_text('annex') (subnoannex.pathobj / 'test_mod_file').write_text('git') # save separately top.save(sub.pathobj, message='annexadd', recursive=True) top.save(subnoannex.pathobj, message='gitadd', recursive=True) # now only publish the latter one res = top.push(to="target", since='HEAD~1', recursive=True) # nothing copied, no reports on the other modification assert_not_in_results(res, action='copy') assert_not_in_results(res, path=sub.path) for d in (top, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # an unconditional push should now pick up the remaining changes res = top.push(to="target", recursive=True) assert_in_results(res, action='copy', status='ok', path=str(sub.pathobj / 'test_mod_annex_file')) assert_in_results(res, status='ok', type='dataset', path=sub.path, refspec='refs/heads/master:refs/heads/master') for d in (top, subnoannex, subsub): assert_in_results(res, status='notneeded', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master')
def __call__(path=None, source=None, dataset=None, get_data=False, description=None, recursive=False, recursion_limit=None, reckless=None, jobs="auto"): # normalize path argument to be equal when called from cmdline and # python and nothing was passed into `path` path = ensure_list(path) if not source and not path: raise InsufficientArgumentsError( "Please provide at least a source or a path") # Common kwargs to pass to underlying git/install calls. # They might need adjustments (e.g. for recursion_limit, but # otherwise would be applicable throughout # # There should have been more of common options! # since underneath get could do similar installs common_kwargs = dict( get_data=get_data, recursive=recursive, recursion_limit=recursion_limit, # git_opts=git_opts, # annex_opts=annex_opts, reckless=reckless, jobs=jobs, ) # did we explicitly get a dataset to install into? # if we got a dataset, path will be resolved against it. # Otherwise path will be resolved first. ds = None if dataset is not None: ds = require_dataset(dataset, check_installed=True, purpose='installation') common_kwargs['dataset'] = dataset # pre-compute for results below refds_path = Interface.get_refds_path(ds) # switch into the two scenarios without --source: # 1. list of URLs # 2. list of (sub)dataset content if source is None: # we need to collect URLs and paths to_install = [] to_get = [] # TODO: this approach is problematic, it disrupts the order of input args. # consequently results will be returned in an unexpected order when a # mixture of source URL and paths is given. Reordering is only possible when # everything in here is fully processed before any results can be yielded. # moreover, I think the semantics of the status quo implementation are a # bit complicated: in a mixture list a source URL will lead to a new dataset # at a generated default location, but a path will lead to a subdataset # at that exact location for urlpath in path: ri = RI(urlpath) (to_get if isinstance(ri, PathRI) else to_install).append(urlpath) # 1. multiple source URLs for s in to_install: lgr.debug("Install passes into install source=%s", s) for r in Install.__call__( source=s, description=description, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" on_failure='ignore', return_type='generator', result_xfm=None, result_filter=None, **common_kwargs): # no post-processing of the installed content on disk # should be necessary here, all done by code further # down that deals with an install from an actuall `source` # any necessary fixes should go there too! r['refds'] = refds_path yield r # 2. one or more dataset content paths if to_get: lgr.debug("Install passes into get %d items", len(to_get)) # all commented out hint on inability to pass those options # into underlying install-related calls. # Also need to pass from get: # annex_get_opts for r in Get.__call__( to_get, # TODO should pass-through description, not sure why disabled # description=description, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" on_failure='ignore', return_type='generator', result_xfm=None, result_filter=None, **common_kwargs): # no post-processing of get'ed content on disk should be # necessary here, this is the responsibility of `get` # (incl. adjusting parent's gitmodules when submodules end # up in an "updated" state (done in get helpers) # any required fixes should go there! r['refds'] = refds_path yield r # we are done here # the rest is about install from a `source` return # an actual `source` was given if source and path and len(path) > 1: # exception is ok here, if this fails it is either direct user error # or we f****d up one of our internal calls raise ValueError( "install needs a single PATH when source is provided. " "Was given mutliple PATHs: %s" % str(path)) # parameter constraints: if not source: # exception is ok here, if this fails it is either direct user error # or we f****d up one of our internal calls raise InsufficientArgumentsError( "a `source` is required for installation") # code below deals with a single path only path = path[0] if path else None if source == path: # even if they turn out to be identical after resolving symlinks # and more sophisticated witchcraft, it would still happily say # "it appears to be already installed", so we just catch an # obviously pointless input combination yield get_status_dict( 'install', path=path, status='impossible', logger=lgr, source_url=source, refds=refds_path, message= "installation `source` and destination `path` are identical. " "If you are trying to add a subdataset simply use the `save` command" ) return # resolve the target location (if local) against the provided dataset # or CWD: if path is not None: # MIH everything in here is highly similar to what common # interface helpers do (or should/could do), but at the same # is very much tailored to just apply to `install` -- I guess # it has to stay special # Should work out just fine for regular paths, so no additional # conditioning is necessary try: path_ri = RI(path) except Exception as e: raise ValueError("invalid path argument {}: ({})".format( path, exc_str(e))) try: # Wouldn't work for SSHRI ATM, see TODO within SSHRI # yoh: path should be a local path, and mapping note within # SSHRI about mapping localhost:path to path is kinda # a peculiar use-case IMHO # TODO Stringification can be removed once PY35 is no longer # supported path = str(resolve_path(path_ri.localpath, dataset)) # any `path` argument that point to something local now # resolved and is no longer a URL except ValueError: # `path` is neither a valid source nor a local path. # TODO: The only thing left is a known subdataset with a # name, that is not a path; Once we correctly distinguish # between path and name of a submodule, we need to consider # this. # For now: Just raise raise ValueError("Invalid path argument {0}".format(path)) # `path` resolved, if there was any. # clone dataset, will also take care of adding to superdataset, if one # is given res = Clone.__call__( source, path, dataset=ds, description=description, reckless=reckless, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" result_xfm=None, return_type='generator', result_filter=None, on_failure='ignore') # helper as_ds = YieldDatasets() destination_dataset = None for r in res: if r['action'] == 'install' and r['type'] == 'dataset': # make sure logic below is valid, only one dataset result is # coming back assert (destination_dataset is None) destination_dataset = as_ds(r) r['refds'] = refds_path yield r assert (destination_dataset) # Now, recursive calls: if recursive or get_data: # dataset argument must not be passed inside since we use bound .get # It is ok to do "inplace" as long as we still return right # after the loop ends common_kwargs.pop('dataset', '') for r in destination_dataset.get( curdir, description=description, # we need to disable error handling in order to have it done at # the very top, otherwise we are not able to order a global # "ignore-and-keep-going" on_failure='ignore', return_type='generator', result_xfm=None, **common_kwargs): r['refds'] = refds_path yield r # at this point no futher post-processing should be necessary, # `clone` and `get` must have done that (incl. parent handling) # if not, bugs should be fixed in those commands return