Esempio n. 1
0
def test_gh1811(srcpath, clonepath):
    orig = Dataset(srcpath).create()
    (orig.pathobj / 'some').write_text('some')
    orig.save()
    clone = Clone.__call__(source=orig.path, path=clonepath)
    (clone.pathobj / 'somemore').write_text('somemore')
    clone.save()
    clone.repo.call_git(['checkout', 'HEAD~1'])
    res = clone.push(to='origin', on_failure='ignore')
    assert_result_count(res, 1)
    assert_result_count(
        res, 1,
        path=clone.path, type='dataset', action='publish',
        status='impossible',
        message='There is no active branch, cannot determine remote '
                'branch',
    )
Esempio n. 2
0
def test_push_recursive(origin_path, src_path, dst_top, dst_sub,
                        dst_subnoannex, dst_subsub):
    # dataset with two submodules and one subsubmodule
    origin = Dataset(origin_path).create()
    origin_subm1 = origin.create('sub m')
    origin_subm1.create('subsub m')
    origin.create('subm noannex', annex=False)
    origin.save()
    assert_repo_status(origin.path)
    # prepare src as a fresh clone with all subdatasets checkout out recursively
    # running on a clone should make the test scenario more different than
    # test_push(), even for the pieces that should be identical
    top = Clone.__call__(source=origin.path, path=src_path)
    sub, subsub, subnoannex = top.get('.',
                                      recursive=True,
                                      get_data=False,
                                      result_xfm='datasets')

    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    # subdatasets have no remote yet, so recursive publishing should fail:
    res = top.push(to="target", recursive=True, on_failure='ignore')
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      refspec='refs/heads/master:refs/heads/master',
                      operations=['new-branch'],
                      action='publish',
                      status='ok',
                      target='target')
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='error',
                          type='dataset',
                          path=d.path,
                          message=("Unknown target sibling '%s'.", 'target'))
    # now fix that and set up targets for the submodules
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subnoannex = mk_push_target(subnoannex,
                                       'target',
                                       dst_subnoannex,
                                       annex=False)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)

    # and same push call as above
    res = top.push(to="target", recursive=True)
    # topds skipped
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      action='publish',
                      status='notneeded',
                      target='target')
    # the rest pushed
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # all correspondig branches match across all datasets
    for s, d in zip(
        (top, sub, subnoannex, subsub),
        (target_top, target_sub, target_subnoannex, target_subsub)):
        eq_(list(s.repo.get_branch_commits_("master")),
            list(d.get_branch_commits_("master")))
        if s != subnoannex:
            eq_(list(s.repo.get_branch_commits_("git-annex")),
                list(d.get_branch_commits_("git-annex")))

    # rerun should not result in further pushes of master
    res = top.push(to="target", recursive=True)
    assert_not_in_results(res,
                          status='ok',
                          refspec="refs/heads/master:refs/heads/master")
    assert_in_results(res,
                      status='notneeded',
                      refspec="refs/heads/master:refs/heads/master")

    if top.repo.is_managed_branch():
        raise SkipTest(
            'Save/status of subdataset with managed branches is an still '
            'unresolved issue')

    # now annex a file in subsub
    test_copy_file = subsub.pathobj / 'test_mod_annex_file'
    test_copy_file.write_text("Heavy stuff.")
    # save all the way up
    assert_status(('ok', 'notneeded'),
                  top.save(message='subsub got something', recursive=True))
    assert_repo_status(top.path)
    # publish straight up, should be smart by default
    res = top.push(to="target", recursive=True)
    # we see 3 out of 4 datasets pushed (sub noannex was left unchanged)
    for d in (top, sub, subsub):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # file content copied too
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(test_copy_file))
    # verify it is accessible, drop and bring back
    assert_status('ok', top.drop(str(test_copy_file)))
    ok_(not subsub.repo.file_has_content('test_mod_annex_file'))
    top.get(test_copy_file)
    ok_file_has_content(test_copy_file, 'Heavy stuff.')

    # make two modification
    (sub.pathobj / 'test_mod_annex_file').write_text('annex')
    (subnoannex.pathobj / 'test_mod_file').write_text('git')
    # save separately
    top.save(sub.pathobj, message='annexadd', recursive=True)
    top.save(subnoannex.pathobj, message='gitadd', recursive=True)
    # now only publish the latter one
    res = top.push(to="target", since='HEAD~1', recursive=True)
    # nothing copied, no reports on the other modification
    assert_not_in_results(res, action='copy')
    assert_not_in_results(res, path=sub.path)
    for d in (top, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # an unconditional push should now pick up the remaining changes
    res = top.push(to="target", recursive=True)
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(sub.pathobj / 'test_mod_annex_file'))
    assert_in_results(res,
                      status='ok',
                      type='dataset',
                      path=sub.path,
                      refspec='refs/heads/master:refs/heads/master')
    for d in (top, subnoannex, subsub):
        assert_in_results(res,
                          status='notneeded',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
Esempio n. 3
0
    def __call__(path=None,
                 source=None,
                 dataset=None,
                 get_data=False,
                 description=None,
                 recursive=False,
                 recursion_limit=None,
                 reckless=None,
                 jobs="auto"):

        # normalize path argument to be equal when called from cmdline and
        # python and nothing was passed into `path`
        path = ensure_list(path)

        if not source and not path:
            raise InsufficientArgumentsError(
                "Please provide at least a source or a path")

        #  Common kwargs to pass to underlying git/install calls.
        #  They might need adjustments (e.g. for recursion_limit, but
        #  otherwise would be applicable throughout
        #
        # There should have been more of common options!
        # since underneath get could do similar installs
        common_kwargs = dict(
            get_data=get_data,
            recursive=recursive,
            recursion_limit=recursion_limit,
            # git_opts=git_opts,
            # annex_opts=annex_opts,
            reckless=reckless,
            jobs=jobs,
        )

        # did we explicitly get a dataset to install into?
        # if we got a dataset, path will be resolved against it.
        # Otherwise path will be resolved first.
        ds = None
        if dataset is not None:
            ds = require_dataset(dataset,
                                 check_installed=True,
                                 purpose='installation')
            common_kwargs['dataset'] = dataset
        # pre-compute for results below
        refds_path = Interface.get_refds_path(ds)

        # switch into the two scenarios without --source:
        # 1. list of URLs
        # 2. list of (sub)dataset content
        if source is None:
            # we need to collect URLs and paths
            to_install = []
            to_get = []
            # TODO: this approach is problematic, it disrupts the order of input args.
            # consequently results will be returned in an unexpected order when a
            # mixture of source URL and paths is given. Reordering is only possible when
            # everything in here is fully processed before any results can be yielded.
            # moreover, I think the semantics of the status quo implementation are a
            # bit complicated: in a mixture list a source URL will lead to a new dataset
            # at a generated default location, but a path will lead to a subdataset
            # at that exact location
            for urlpath in path:
                ri = RI(urlpath)
                (to_get
                 if isinstance(ri, PathRI) else to_install).append(urlpath)

            # 1. multiple source URLs
            for s in to_install:
                lgr.debug("Install passes into install source=%s", s)
                for r in Install.__call__(
                        source=s,
                        description=description,
                        # we need to disable error handling in order to have it done at
                        # the very top, otherwise we are not able to order a global
                        # "ignore-and-keep-going"
                        on_failure='ignore',
                        return_type='generator',
                        result_xfm=None,
                        result_filter=None,
                        **common_kwargs):
                    # no post-processing of the installed content on disk
                    # should be necessary here, all done by code further
                    # down that deals with an install from an actuall `source`
                    # any necessary fixes should go there too!
                    r['refds'] = refds_path
                    yield r

            # 2. one or more dataset content paths
            if to_get:
                lgr.debug("Install passes into get %d items", len(to_get))
                # all commented out hint on inability to pass those options
                # into underlying install-related calls.
                # Also need to pass from get:
                #  annex_get_opts

                for r in Get.__call__(
                        to_get,
                        # TODO should pass-through description, not sure why disabled
                        # description=description,
                        # we need to disable error handling in order to have it done at
                        # the very top, otherwise we are not able to order a global
                        # "ignore-and-keep-going"
                        on_failure='ignore',
                        return_type='generator',
                        result_xfm=None,
                        result_filter=None,
                        **common_kwargs):
                    # no post-processing of get'ed content on disk should be
                    # necessary here, this is the responsibility of `get`
                    # (incl. adjusting parent's gitmodules when submodules end
                    # up in an "updated" state (done in get helpers)
                    # any required fixes should go there!
                    r['refds'] = refds_path
                    yield r

            # we are done here
            # the rest is about install from a `source`
            return

        # an actual `source` was given
        if source and path and len(path) > 1:
            # exception is ok here, if this fails it is either direct user error
            # or we f****d up one of our internal calls
            raise ValueError(
                "install needs a single PATH when source is provided.  "
                "Was given mutliple PATHs: %s" % str(path))

        # parameter constraints:
        if not source:
            # exception is ok here, if this fails it is either direct user error
            # or we f****d up one of our internal calls
            raise InsufficientArgumentsError(
                "a `source` is required for installation")

        # code below deals with a single path only
        path = path[0] if path else None

        if source == path:
            # even if they turn out to be identical after resolving symlinks
            # and more sophisticated witchcraft, it would still happily say
            # "it appears to be already installed", so we just catch an
            # obviously pointless input combination
            yield get_status_dict(
                'install',
                path=path,
                status='impossible',
                logger=lgr,
                source_url=source,
                refds=refds_path,
                message=
                "installation `source` and destination `path` are identical. "
                "If you are trying to add a subdataset simply use the `save` command"
            )
            return

        # resolve the target location (if local) against the provided dataset
        # or CWD:
        if path is not None:
            # MIH everything in here is highly similar to what common
            # interface helpers do (or should/could do), but at the same
            # is very much tailored to just apply to `install` -- I guess
            # it has to stay special

            # Should work out just fine for regular paths, so no additional
            # conditioning is necessary
            try:
                path_ri = RI(path)
            except Exception as e:
                raise ValueError("invalid path argument {}: ({})".format(
                    path, exc_str(e)))
            try:
                # Wouldn't work for SSHRI ATM, see TODO within SSHRI
                # yoh: path should be a local path, and mapping note within
                #      SSHRI about mapping localhost:path to path is kinda
                #      a peculiar use-case IMHO
                # TODO Stringification can be removed once PY35 is no longer
                # supported
                path = str(resolve_path(path_ri.localpath, dataset))
                # any `path` argument that point to something local now
                # resolved and is no longer a URL
            except ValueError:
                # `path` is neither a valid source nor a local path.
                # TODO: The only thing left is a known subdataset with a
                # name, that is not a path; Once we correctly distinguish
                # between path and name of a submodule, we need to consider
                # this.
                # For now: Just raise
                raise ValueError("Invalid path argument {0}".format(path))
        # `path` resolved, if there was any.

        # clone dataset, will also take care of adding to superdataset, if one
        # is given
        res = Clone.__call__(
            source,
            path,
            dataset=ds,
            description=description,
            reckless=reckless,
            # we need to disable error handling in order to have it done at
            # the very top, otherwise we are not able to order a global
            # "ignore-and-keep-going"
            result_xfm=None,
            return_type='generator',
            result_filter=None,
            on_failure='ignore')
        # helper
        as_ds = YieldDatasets()
        destination_dataset = None
        for r in res:
            if r['action'] == 'install' and r['type'] == 'dataset':
                # make sure logic below is valid, only one dataset result is
                # coming back
                assert (destination_dataset is None)
                destination_dataset = as_ds(r)
            r['refds'] = refds_path
            yield r
        assert (destination_dataset)

        # Now, recursive calls:
        if recursive or get_data:
            # dataset argument must not be passed inside since we use bound .get
            # It is ok to do "inplace" as long as we still return right
            # after the loop ends
            common_kwargs.pop('dataset', '')
            for r in destination_dataset.get(
                    curdir,
                    description=description,
                    # we need to disable error handling in order to have it done at
                    # the very top, otherwise we are not able to order a global
                    # "ignore-and-keep-going"
                    on_failure='ignore',
                    return_type='generator',
                    result_xfm=None,
                    **common_kwargs):
                r['refds'] = refds_path
                yield r
        # at this point no futher post-processing should be necessary,
        # `clone` and `get` must have done that (incl. parent handling)
        # if not, bugs should be fixed in those commands
        return