Exemple #1
0
def test_push_subds_no_recursion(src_path=None,
                                 dst_top=None,
                                 dst_sub=None,
                                 dst_subsub=None):
    # dataset with one submodule and one subsubmodule
    top = Dataset(src_path).create()
    sub = top.create('sub m')
    test_file = sub.pathobj / 'subdir' / 'test_file'
    test_file.parent.mkdir()
    test_file.write_text('some')
    subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m')
    top.save(recursive=True)
    assert_repo_status(top.path)
    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)
    # now publish, but NO recursion, instead give the parent dir of
    # both a subdataset and a file in the middle subdataset
    res = top.push(
        to='target',
        # give relative to top dataset to elevate the difficulty a little
        path=str(test_file.relative_to(top.pathobj).parent))
    assert_status('ok', res)
    assert_in_results(res, action='publish', type='dataset', path=top.path)
    assert_in_results(res, action='publish', type='dataset', path=sub.path)
    assert_in_results(res, action='copy', type='file', path=str(test_file))
    # the lowest-level subdataset isn't touched
    assert_not_in_results(res,
                          action='publish',
                          type='dataset',
                          path=subsub.path)
Exemple #2
0
def test_recurse_existing(src=None, path=None):
    origin_ds = _make_dataset_hierarchy(src)

    # make sure recursion_limit works as expected across a range of depths
    for depth in range(len(origin_ds)):
        res = install(path,
                      source=src,
                      recursive=True,
                      recursion_limit=depth,
                      result_xfm=None,
                      return_type='list',
                      result_filter=None)
        # we expect one dataset per level
        assert_result_count(res, depth + 1, type='dataset', status='ok')
        rmtree(path)

    # now install all but the last two levels, no data
    root, sub1, sub2 = install(path,
                               source=src,
                               recursive=True,
                               recursion_limit=2,
                               result_xfm='datasets',
                               result_filter=None)
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is False)
    sub3 = Dataset(opj(sub2.path, 'sub3'))
    ok_(not sub3.is_installed())
    # now get all content in all existing datasets, no new datasets installed
    # in the process
    files = root.get(curdir, recursive=True, recursion_limit='existing')
    assert_not_in_results(files, type='dataset', status='ok')
    assert_result_count(files, 1, type='file', status='ok')
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not sub3.is_installed())
    # now pull down all remaining datasets, no data
    sub3, sub4 = root.get(curdir,
                          recursive=True,
                          get_data=False,
                          result_xfm='datasets',
                          result_filter=lambda x: x['status'] == 'ok')
    ok_(sub4.is_installed())
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is False)
    # aaannd all data
    files = root.get(
        curdir,
        recursive=True,
        result_filter=lambda x: x['status'] == 'ok' and x['type'] == 'file')
    eq_(len(files), 1)
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
Exemple #3
0
def test_auto_data_transfer(path=None):
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    (ds_a.pathobj / "foo.dat").write_text("foo")
    ds_a.save()

    # Should be the default, but just in case.
    ds_a.repo.config.set("annex.numcopies", "1", scope="local")
    ds_a.create_sibling(str(path / "b"), name="b")

    # With numcopies=1, no data is copied with data="auto".
    res = ds_a.push(to="b", data="auto", since=None)
    assert_not_in_results(res, action="copy")

    # Even when a file is explicitly given.
    res = ds_a.push(to="b", path="foo.dat", data="auto", since=None)
    assert_not_in_results(res, action="copy")

    # numcopies=2 changes that.
    ds_a.repo.config.set("annex.numcopies", "2", scope="local")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "foo.dat"))

    # --since= limits the files considered by --auto.
    (ds_a.pathobj / "bar.dat").write_text("bar")
    ds_a.save()
    (ds_a.pathobj / "baz.dat").write_text("baz")
    ds_a.save()
    res = ds_a.push(to="b", data="auto", since="HEAD~1")
    assert_not_in_results(res,
                          action="copy",
                          path=str(ds_a.pathobj / "bar.dat"))
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "baz.dat"))

    # --auto also considers preferred content.
    ds_a.repo.config.unset("annex.numcopies", scope="local")
    ds_a.repo.set_preferred_content("wanted", "nothing", remote="b")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_not_in_results(res,
                          action="copy",
                          path=str(ds_a.pathobj / "bar.dat"))

    ds_a.repo.set_preferred_content("wanted", "anything", remote="b")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "bar.dat"))
Exemple #4
0
def test_push_wanted(srcpath=None, dstpath=None):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', reckless='kill')

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Exemple #5
0
def test_unlock_directory(path=None):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.unlock(path="dir")
    dirpath = Path("dir")
    dirpath_abs = Path(ds.pathobj / "dir")

    # On adjusted branches (for the purposes of this test, crippled
    # filesystems), the files were already unlocked and the committed state is
    # the unlocked pointer file.
    is_managed_branch = ds.repo.is_managed_branch()
    if is_managed_branch:
        assert_repo_status(ds.path)
    else:
        assert_repo_status(ds.path, modified=[dirpath / "a", dirpath / "b"])
    ds.save()
    ds.drop(str(dirpath / "a"), reckless='kill')
    assert_false(ds.repo.file_has_content(str(dirpath / "a")))

    # Unlocking without an explicit non-directory path doesn't fail if one of
    # the directory's files doesn't have content.
    res = ds.unlock(path="dir")
    assert_not_in_results(res, action="unlock",
                          path=str(dirpath_abs / "a"))
    if is_managed_branch:
        assert_not_in_results(res, action="unlock",
                              path=str(dirpath_abs / "b"))
    else:
        assert_in_results(res, action="unlock", status="ok",
                          path=str(dirpath_abs / "b"))
        assert_repo_status(ds.path, modified=[dirpath / "b"])

    # If we explicitly provide a path that lacks content, we get a result
    # for it.
    assert_in_results(ds.unlock(path=dirpath / "a", on_failure="ignore"),
                      action="unlock", status="impossible",
                      path=str(dirpath_abs / "a"))
Exemple #6
0
def test_rerun_assume_ready(path=None):
    ds = Dataset(path).create()
    repo = ds.repo
    (repo.pathobj / "f1").write_text("f1\n")
    ds.save()

    def double_in_both_cmd(src, dest1, dest2):
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "content = open(sys.argv[-3]).read() * 2; "
            "d1 = sys.argv[-2]; d2 = sys.argv[-1]; "
            "op.lexists(d1) and os.unlink(d1); "
            "op.lexists(d2) and os.unlink(d2); "
            "open(d1, 'w').write(content); open(d2, 'w').write(content)",
            src, dest1, dest2]

    ds.run(double_in_both_cmd("f1", "out1", "out2"), outputs=["out1"])
    # Drop the content so that we remove instead of unlock, making the test is
    # more meaningful on an adjusted branch.
    ds.drop(["out1", "out2"], reckless='kill')
    # --assume-ready affects both explicitly specified and automatic outputs.
    res = ds.rerun(assume_ready="outputs")
    assert_not_in_results(res, action="remove")
Exemple #7
0
def test_procedure_discovery(path=None, super_path=None):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.
        ps = run_procedure(discover=True, result_renderer='disabled')
        # there are a few procedures coming with datalad, needs to find them
        assert_true(len(ps) > 2)
        # we get essential properties
        _check_procedure_properties(ps)

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    # extra check: must not pick up cfg_yoda.sh in top directory
    ds.run_procedure('cfg_yoda')

    # path to a procedure which is not under any "standard" location but
    # present in the dataset
    code_dir_procedure_path = op.join(ds.path, 'code', 'datalad_test_proc.py')
    top_dir_procedure_path = op.join(ds.path, 'cfg_yoda.sh')

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True, result_renderer='disabled')
    # it should not be found magically by default
    assert_not_in_results(ps, path=code_dir_procedure_path)
    assert_not_in_results(ps, path=top_dir_procedure_path)

    with patch_config(
        {'datalad.locations.extra-procedures': op.join(ds.path, 'code')}):
        # run discovery on the dataset:
        ps = ds.run_procedure(discover=True, result_renderer='disabled')
        # still needs to find procedures coming with datalad
        assert_true(len(ps) > 3)
        # and procedure under the path we specified
        assert_result_count(ps, 1, path=code_dir_procedure_path)
        assert_not_in_results(ps, path=top_dir_procedure_path)

    # multiple extra locations
    with patch_config({
            'datalad.locations.extra-procedures':
        [op.join(ds.path, 'code'), ds.path]
    }):
        # run discovery on the dataset:
        ps = ds.run_procedure(discover=True, result_renderer='disabled')
        # still needs to find procedures coming with datalad
        assert_true(len(ps) > 4)
        # and procedure under the path we specified
        assert_result_count(ps, 1, path=code_dir_procedure_path)
        assert_result_count(ps, 1, path=top_dir_procedure_path)

    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  scope='branch')
    ds.save(op.join('.datalad', 'config'))

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True, result_renderer='disabled')

    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    _check_procedure_properties(ps)
    # dataset's procedure needs to be in the results
    # and only a single one
    assert_result_count(ps, 1, path=code_dir_procedure_path)
    # a subdir shouldn't be considered a procedure just because it's "executable"
    assert_not_in_results(ps, path=op.join(ds.path, 'code', 'testdir'))

    # make it a subdataset and try again:
    # first we need to save the beast to make install work
    ds.save()
    super = Dataset(super_path).create()
    super.install('sub', source=ds.path)

    ps = super.run_procedure(discover=True, result_renderer='disabled')
    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    _check_procedure_properties(ps)
    # dataset's procedure needs to be in the results
    assert_in_results(ps,
                      path=op.join(super.path, 'sub', 'code',
                                   'datalad_test_proc.py'))

    if not ds.repo.is_managed_branch():  # no symlinks
        import os

        # create a procedure which is a broken symlink, but recognizable as a
        # python script:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'broken_link_proc.py'))
        # broken symlink at procedure location, but we can't tell, whether it is
        # an actual procedure without any guess on how to execute it:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))

        ps = super.run_procedure(discover=True, result_renderer='disabled')
        # still needs to find procedures coming with datalad and the dataset
        # procedure registered before
        assert_true(len(ps) > 3)
        assert_in_results(ps,
                          path=op.join(super.path, 'sub', 'code',
                                       'broken_link_proc.py'),
                          state='absent')
        assert_in_results(ps,
                          path=op.join(super.path, 'sub', 'code',
                                       'unknwon_broken_link'),
                          state='absent')
Exemple #8
0
def test_nested_pushclone_cycle_allplatforms(origpath=None,
                                             storepath=None,
                                             clonepath=None):
    if 'DATALAD_SEED' in os.environ:
        # we are using create-sibling-ria via the cmdline in here
        # this will create random UUIDs for datasets
        # however, given a fixed seed each call to this command will start
        # with the same RNG seed, hence yield the same UUID on the same
        # machine -- leading to a collision
        raise SkipTest(
            'Test incompatible with fixed random number generator seed')
    # the aim here is this high-level test a std create-push-clone cycle for a
    # dataset with a subdataset, with the goal to ensure that correct branches
    # and commits are tracked, regardless of platform behavior and condition
    # of individual clones. Nothing fancy, just that the defaults behave in
    # sensible ways
    from datalad.cmd import WitlessRunner as Runner
    run = Runner().run

    # create original nested dataset
    with chpwd(origpath):
        run(['datalad', 'create', 'super'])
        run(['datalad', 'create', '-d', 'super', str(Path('super', 'sub'))])

    # verify essential linkage properties
    orig_super = Dataset(Path(origpath, 'super'))
    orig_sub = Dataset(orig_super.pathobj / 'sub')

    (orig_super.pathobj / 'file1.txt').write_text('some1')
    (orig_sub.pathobj / 'file2.txt').write_text('some1')
    with chpwd(orig_super.path):
        run(['datalad', 'save', '--recursive'])

    # TODO not yet reported clean with adjusted branches
    #assert_repo_status(orig_super.path)

    # the "true" branch that sub is on, and the gitsha of the HEAD commit of it
    orig_sub_corr_branch = \
        orig_sub.repo.get_corresponding_branch() or orig_sub.repo.get_active_branch()
    orig_sub_corr_commit = orig_sub.repo.get_hexsha(orig_sub_corr_branch)

    # make sure the super trackes this commit
    assert_in_results(
        orig_super.subdatasets(),
        path=orig_sub.path,
        gitshasum=orig_sub_corr_commit,
        # TODO it should also track the branch name
        # Attempted: https://github.com/datalad/datalad/pull/3817
        # But reverted: https://github.com/datalad/datalad/pull/4375
    )

    # publish to a store, to get into a platform-agnostic state
    # (i.e. no impact of an annex-init of any kind)
    store_url = 'ria+' + get_local_file_url(storepath)
    with chpwd(orig_super.path):
        run([
            'datalad', 'create-sibling-ria', '--recursive', '-s', 'store',
            store_url, '--new-store-ok'
        ])
        run(['datalad', 'push', '--recursive', '--to', 'store'])

    # we are using the 'store' sibling's URL, which should be a plain path
    store_super = AnnexRepo(orig_super.siblings(name='store')[0]['url'],
                            init=False)
    store_sub = AnnexRepo(orig_sub.siblings(name='store')[0]['url'],
                          init=False)

    # both datasets in the store only carry the real branches, and nothing
    # adjusted
    for r in (store_super, store_sub):
        eq_(set(r.get_branches()), set([orig_sub_corr_branch, 'git-annex']))

    # and reobtain from a store
    cloneurl = 'ria+' + get_local_file_url(str(storepath), compatibility='git')
    with chpwd(clonepath):
        run(['datalad', 'clone', cloneurl + '#' + orig_super.id, 'super'])
        run(['datalad', '-C', 'super', 'get', '--recursive', '.'])

    # verify that nothing has changed as a result of a push/clone cycle
    clone_super = Dataset(Path(clonepath, 'super'))
    clone_sub = Dataset(clone_super.pathobj / 'sub')
    assert_in_results(
        clone_super.subdatasets(),
        path=clone_sub.path,
        gitshasum=orig_sub_corr_commit,
    )

    for ds1, ds2, f in ((orig_super, clone_super, 'file1.txt'),
                        (orig_sub, clone_sub, 'file2.txt')):
        eq_((ds1.pathobj / f).read_text(), (ds2.pathobj / f).read_text())

    # get status info that does not recursive into subdatasets, i.e. not
    # looking for uncommitted changes
    # we should see no modification reported
    assert_not_in_results(clone_super.status(eval_subdataset_state='commit'),
                          state='modified')
    # and now the same for a more expensive full status
    assert_not_in_results(clone_super.status(recursive=True), state='modified')
Exemple #9
0
def test_auto_if_wanted_data_transfer_path_restriction(path=None):
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    ds_a_sub0 = ds_a.create("sub0")
    ds_a_sub1 = ds_a.create("sub1")

    for ds in [ds_a, ds_a_sub0, ds_a_sub1]:
        (ds.pathobj / "sec.dat").write_text("sec")
        (ds.pathobj / "reg.dat").write_text("reg")
    ds_a.save(recursive=True)

    ds_a.create_sibling(
        str(path / "b"),
        name="b",
        annex_wanted="not metadata=distribution-restrictions=*",
        recursive=True)
    for ds in [ds_a, ds_a_sub0, ds_a_sub1]:
        ds.repo.set_metadata(add={"distribution-restrictions": "doesntmatter"},
                             files=["sec.dat"])

    # wanted-triggered --auto can be restricted to subdataset...
    res = ds_a.push(to="b", path="sub0", data="auto-if-wanted", recursive=True)
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a.pathobj / "reg.dat"))
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a_sub0.pathobj / "reg.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub0.pathobj / "sec.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub1.pathobj / "reg.dat"))

    # ... and to a wanted file.
    res = ds_a.push(to="b",
                    path="reg.dat",
                    data="auto-if-wanted",
                    recursive=True)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "reg.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub1.pathobj / "reg.dat"))

    # But asking to transfer a file does not do it if the remote has a
    # wanted setting and doesn't want it.
    res = ds_a.push(to="b",
                    path="sec.dat",
                    data="auto-if-wanted",
                    recursive=True)
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a.pathobj / "sec.dat"))

    res = ds_a.push(to="b", path="sec.dat", data="anything", recursive=True)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "sec.dat"))
Exemple #10
0
def test_force_checkdatapresent(srcpath=None, dstpath=None):
    src = Dataset(srcpath).create()
    target = mk_push_target(src, 'target', dstpath, annex=True, bare=True)
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=False, message="New annex file")
    assert_repo_status(src.path, annex=True)
    whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0]

    res = src.push(to='target', data='nothing')
    # nothing reported to be copied
    assert_not_in_results(res, action='copy')
    # we got the git-push nevertheless
    eq_(src.repo.get_hexsha(DEFAULT_BRANCH), target.get_hexsha(DEFAULT_BRANCH))
    # nothing moved
    eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0])

    # now a push without forced no-transfer
    # we do not give since, so the non-transfered file is picked up
    # and transferred
    res = src.push(to='target', force=None)
    # no branch change, done before
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec=DEFAULT_REFSPEC)
    # but availability update
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    assert_in_results(res,
                      status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')
    # whereis info reflects the change
    ok_(
        len(whereis_prior) < len(
            src.repo.whereis(files=['test_mod_annex_file'])[0]))

    # do it yet again will do nothing, because all is up-to-date
    assert_status('notneeded', src.push(to='target', force=None))
    # an explicit reference point doesn't change that
    assert_status('notneeded', src.push(to='target',
                                        force=None,
                                        since='HEAD~1'))

    # now force data transfer
    res = src.push(to='target', force='checkdatapresent')
    # no branch change, done before
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec=DEFAULT_REFSPEC)
    # no availability update
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    # but data transfer
    assert_in_results(res,
                      status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')

    # force data transfer, but data isn't available
    src.repo.drop('test_mod_annex_file')
    res = src.push(to='target',
                   path='.',
                   force='checkdatapresent',
                   on_failure='ignore')
    assert_in_results(res,
                      status='impossible',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy',
                      message='Slated for transport, but no content present')
Exemple #11
0
def test_push_recursive(origin_path=None,
                        src_path=None,
                        dst_top=None,
                        dst_sub=None,
                        dst_subnoannex=None,
                        dst_subsub=None):
    # dataset with two submodules and one subsubmodule
    origin = Dataset(origin_path).create()
    origin_subm1 = origin.create('sub m')
    origin_subm1.create('subsub m')
    origin.create('subm noannex', annex=False)
    origin.save()
    assert_repo_status(origin.path)
    # prepare src as a fresh clone with all subdatasets checkout out recursively
    # running on a clone should make the test scenario more different than
    # test_push(), even for the pieces that should be identical
    top = Clone.__call__(source=origin.path, path=src_path)
    subs = top.get('.', recursive=True, get_data=False, result_xfm='datasets')
    # order for '.' should not be relied upon, so sort by path
    sub, subsub, subnoannex = sorted(subs, key=lambda ds: ds.path)

    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    # subdatasets have no remote yet, so recursive publishing should fail:
    res = top.push(to="target", recursive=True, on_failure='ignore')
    check_datasets_order(res)
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      refspec=DEFAULT_REFSPEC,
                      operations=['new-branch'],
                      action='publish',
                      status='ok',
                      target='target')
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='error',
                          type='dataset',
                          path=d.path,
                          message=("Unknown target sibling '%s'.", 'target'))
    # now fix that and set up targets for the submodules
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subnoannex = mk_push_target(subnoannex,
                                       'target',
                                       dst_subnoannex,
                                       annex=False)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)

    # and same push call as above
    res = top.push(to="target", recursive=True)
    check_datasets_order(res)
    # topds skipped
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      action='publish',
                      status='notneeded',
                      target='target')
    # the rest pushed
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # all corresponding branches match across all datasets
    for s, d in zip(
        (top, sub, subnoannex, subsub),
        (target_top, target_sub, target_subnoannex, target_subsub)):
        eq_(list(s.repo.get_branch_commits_(DEFAULT_BRANCH)),
            list(d.get_branch_commits_(DEFAULT_BRANCH)))
        if s != subnoannex:
            eq_(list(s.repo.get_branch_commits_("git-annex")),
                list(d.get_branch_commits_("git-annex")))

    # rerun should not result in further pushes of the default branch
    res = top.push(to="target", recursive=True)
    check_datasets_order(res)
    assert_not_in_results(res, status='ok', refspec=DEFAULT_REFSPEC)
    assert_in_results(res, status='notneeded', refspec=DEFAULT_REFSPEC)

    # now annex a file in subsub
    test_copy_file = subsub.pathobj / 'test_mod_annex_file'
    test_copy_file.write_text("Heavy stuff.")
    # save all the way up
    assert_status(('ok', 'notneeded'),
                  top.save(message='subsub got something', recursive=True))
    assert_repo_status(top.path)
    # publish straight up, should be smart by default
    res = top.push(to="target", recursive=True)
    check_datasets_order(res)
    # we see 3 out of 4 datasets pushed (sub noannex was left unchanged)
    for d in (top, sub, subsub):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # file content copied too
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(test_copy_file))
    # verify it is accessible, drop and bring back
    assert_status('ok', top.drop(str(test_copy_file)))
    ok_(not subsub.repo.file_has_content('test_mod_annex_file'))
    top.get(test_copy_file)
    ok_file_has_content(test_copy_file, 'Heavy stuff.')

    # make two modification
    (sub.pathobj / 'test_mod_annex_file').write_text('annex')
    (subnoannex.pathobj / 'test_mod_file').write_text('git')
    # save separately
    top.save(sub.pathobj, message='annexadd', recursive=True)
    top.save(subnoannex.pathobj, message='gitadd', recursive=True)
    # now only publish the latter one
    res = top.push(to="target", since=DEFAULT_BRANCH + '~1', recursive=True)
    # nothing copied, no reports on the other modification
    assert_not_in_results(res, action='copy')
    assert_not_in_results(res, path=sub.path)
    for d in (top, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # an unconditional push should now pick up the remaining changes
    res = top.push(to="target", recursive=True)
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(sub.pathobj / 'test_mod_annex_file'))
    assert_in_results(res,
                      status='ok',
                      type='dataset',
                      path=sub.path,
                      refspec=DEFAULT_REFSPEC)
    for d in (top, subnoannex, subsub):
        assert_in_results(res,
                          status='notneeded',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)

    # if noannex target gets some annex, we still should not fail to push
    target_subnoannex.call_git(['annex', 'init'])
    # just to ensure that we do need something to push
    (subnoannex.pathobj / "newfile").write_text("content")
    subnoannex.save()
    res = subnoannex.push(to="target")
    assert_in_results(res, status='ok', type='dataset')
Exemple #12
0
def test_remove(path=None):
    # see docstring for test data structure
    ds = get_deeply_nested_structure(path)
    gitfile = op.join("subdir", "git_file.txt")

    ok_((ds.pathobj / gitfile).exists())
    res = ds.remove(gitfile, drop='all')
    assert_result_count(res, 3)
    # git file needs no dropping
    assert_in_results(
        res,
        action='drop',
        path=str(ds.pathobj / gitfile),
        status='notneeded',
        type='file',
    )
    # removed from working tree
    assert_in_results(
        res,
        action='remove',
        path=str(ds.pathobj / gitfile),
        status='ok',
        type='file',
    )
    # saved removal in dataset
    assert_in_results(
        res,
        action='save',
        path=ds.path,
        type='dataset',
        status='ok',
    )
    nok_((ds.pathobj / gitfile).exists())

    # now same for an annexed files
    annexedfile = op.join("subdir", "annexed_file.txt")
    # drop failure prevents removal
    res = ds.remove(annexedfile, drop='all', on_failure='ignore')
    assert_result_count(res, 1)
    assert_in_results(res,
                      status='error',
                      action='drop',
                      path=str(ds.pathobj / annexedfile))
    ok_((ds.pathobj / annexedfile).exists())

    # now remove the file, but actually not drop the underlying
    # key -- hence no availability loss -- default mode of operation
    # remember the key
    key = ds.repo.get_file_annexinfo(annexedfile)['key']
    res = ds.remove(annexedfile,
                    drop='datasets',
                    message="custom msg",
                    on_failure='ignore')
    # removal and dataset save
    assert_result_count(res, 2)
    eq_(
        ds.repo.format_commit("%B",
                              ds.repo.get_corresponding_branch()).rstrip(),
        "custom msg")
    assert_in_results(res,
                      action='remove',
                      status='ok',
                      path=str(ds.pathobj / annexedfile))
    assert_not_in_results(res, action='drop')
    nok_((ds.pathobj / annexedfile).exists())
    res = ds.repo.call_annex_records(['whereis', '--key', key, '--json'])
    assert_in_results(res, key=key, success=True)

    # now remove entire directory
    res = ds.remove('subdir', on_failure='ignore')
    assert_in_results(res, status='impossible', state='untracked')
    ok_((ds.pathobj / 'subdir').exists())

    ds.save('subdir')
    res = ds.remove('subdir', on_failure='ignore')
    assert_in_results(res, status='ok', action='remove')
    assert_in_results(res, status='ok', action='save', type='dataset')
    nok_((ds.pathobj / 'subdir').exists())

    # now remove an entire subdataset
    # prep: make clean
    rmdspath = ds.pathobj / 'subds_modified' / 'subds_lvl1_modified'
    ds.save(rmdspath, recursive=True)
    res = ds.remove(rmdspath, on_failure='ignore')
    # unique dataset, with unique keys -- must fail
    assert_in_results(res,
                      status='error',
                      action='uninstall',
                      path=str(rmdspath))

    # go reckless
    assert_in(
        str(rmdspath),
        ds.subdatasets(path='subds_modified',
                       recursive=True,
                       result_xfm='paths',
                       result_renderer='disabled'))
    res = ds.remove(rmdspath, reckless='availability', on_failure='ignore')
    assert_status('ok', res)
    assert_in_results(res, action='uninstall', path=str(rmdspath))
    assert_in_results(res, action='remove', path=str(rmdspath))
    nok_(rmdspath.exists())
    # properly unlinked
    assert_not_in(
        str(rmdspath),
        ds.subdatasets(path='subds_modified',
                       recursive=True,
                       result_xfm='paths',
                       result_renderer='disabled'))

    # lastly, remove an uninstalled subdataset
    # we save all to be able to check whether removal was committed and
    # the ds is clean at the end
    ds.save()
    # uninstall, we don't care about the existing modifications here
    res = ds.drop('subds_modified',
                  what='all',
                  reckless='kill',
                  recursive=True)
    # even remove the empty mount-point, such that is is invisible on the
    # file system
    (ds.pathobj / 'subds_modified').rmdir()
    res = ds.remove('subds_modified', on_failure='ignore')
    assert_in_results(res,
                      action='remove',
                      path=str(ds.pathobj / 'subds_modified'))
    # removal was committed
    assert_repo_status(ds.path)

    # and really finally, removing top-level is just a drop
    res = ds.remove(reckless='kill')
    assert_in_results(res, action='uninstall', path=ds.path, status='ok')
    nok_(ds.is_installed())
Exemple #13
0
def test_run_assume_ready(path=None):
    ds = Dataset(path).create()
    repo = ds.repo
    adjusted = repo.is_managed_branch()

    # --assume-ready=inputs

    (repo.pathobj / "f1").write_text("f1")
    ds.save()

    def cat_cmd(fname):
        return [
            sys.executable, "-c",
            "import sys; print(open(sys.argv[-1]).read())", fname
        ]

    assert_in_results(ds.run(cat_cmd("f1"), inputs=["f1"]),
                      action="get",
                      type="file")
    # Same thing, but without the get() call.
    assert_not_in_results(ds.run(cat_cmd("f1"),
                                 inputs=["f1"],
                                 assume_ready="inputs"),
                          action="get",
                          type="file")

    ds.drop("f1", reckless='kill')
    if not adjusted:
        # If the input is not actually ready, the command will fail.
        assert_in_results(ds.run(cat_cmd("f1"),
                                 inputs=["f1"],
                                 assume_ready="inputs",
                                 on_failure="ignore",
                                 result_renderer=None),
                          action="run",
                          status="error")

    # --assume-ready=outputs

    def unlink_and_write_cmd(fname):
        # This command doesn't care whether the output file is unlocked because
        # it removes it ahead of time anyway.
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "f = sys.argv[-1]; op.lexists(f) and os.unlink(f); "
            "open(f, mode='w').write(str(sys.argv))", fname
        ]

    (repo.pathobj / "f2").write_text("f2")
    ds.save()

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"])
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")
    # Same thing, but without the unlock() call.
    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 assume_ready="outputs")
    assert_not_in_results(res, action="unlock", type="file")

    # --assume-ready=both

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"])
    assert_in_results(res, action="get", type="file")
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")

    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 inputs=["f2"],
                 assume_ready="both")
    assert_not_in_results(res, action="get", type="file")
    assert_not_in_results(res, action="unlock", type="file")