Example #1
0
def test_rerun_assume_ready(path):
    ds = Dataset(path).create()
    repo = ds.repo
    (repo.pathobj / "f1").write_text("f1\n")
    ds.save()

    def double_in_both_cmd(src, dest1, dest2):
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "content = open(sys.argv[-3]).read() * 2; "
            "d1 = sys.argv[-2]; d2 = sys.argv[-1]; "
            "op.lexists(d1) and os.unlink(d1); "
            "op.lexists(d2) and os.unlink(d2); "
            "open(d1, 'w').write(content); open(d2, 'w').write(content)", src,
            dest1, dest2
        ]

    ds.run(double_in_both_cmd("f1", "out1", "out2"), outputs=["out1"])
    # Drop the content so that we remove instead of unlock, making the test is
    # more meaningful on an adjusted branch.
    ds.drop(["out1", "out2"], check=False)
    # --assume-ready affects both explicitly specified and automatic outputs.
    res = ds.rerun(assume_ready="outputs")
    assert_not_in_results(res, action="remove")
Example #2
0
def test_push_subds_no_recursion(src_path, dst_top, dst_sub, dst_subsub):
    # dataset with one submodule and one subsubmodule
    top = Dataset(src_path).create()
    sub = top.create('sub m')
    test_file = sub.pathobj / 'subdir' / 'test_file'
    test_file.parent.mkdir()
    test_file.write_text('some')
    subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m')
    top.save(recursive=True)
    assert_repo_status(top.path)
    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)
    # now publish, but NO recursion, instead give the parent dir of
    # both a subdataset and a file in the middle subdataset
    res = top.push(
        to='target',
        # give relative to top dataset to elevate the difficulty a little
        path=str(test_file.relative_to(top.pathobj).parent))
    assert_status('ok', res)
    assert_in_results(res, action='publish', type='dataset', path=top.path)
    assert_in_results(res, action='publish', type='dataset', path=sub.path)
    assert_in_results(res, action='copy', type='file', path=str(test_file))
    # the lowest-level subdataset isn't touched
    assert_not_in_results(res,
                          action='publish',
                          type='dataset',
                          path=subsub.path)
Example #3
0
def test_force_checkdatapresent(srcpath, dstpath):
    src = Dataset(srcpath).create()
    target = mk_push_target(src, 'target', dstpath, annex=True, bare=True)
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=False, message="New annex file")
    assert_repo_status(src.path, annex=True)
    whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0]

    res = src.push(to='target', data='nothing')
    # nothing reported to be copied
    assert_not_in_results(res, action='copy')
    # we got the git-push nevertheless
    eq_(src.repo.get_hexsha(DEFAULT_BRANCH), target.get_hexsha(DEFAULT_BRANCH))
    # nothing moved
    eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0])

    # now a push without forced no-transfer
    # we do not give since, so the non-transfered file is picked up
    # and transferred
    res = src.push(to='target', force=None)
    # no branch change, done before
    assert_in_results(res, action='publish', status='notneeded',
                      refspec=DEFAULT_REFSPEC)
    # but availability update
    assert_in_results(res, action='publish', status='ok',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    assert_in_results(res, status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')
    # whereis info reflects the change
    ok_(len(whereis_prior) < len(
        src.repo.whereis(files=['test_mod_annex_file'])[0]))

    # do it yet again will do nothing, because all is uptodate
    assert_status('notneeded', src.push(to='target', force=None))
    # an explicit reference point doesn't change that
    assert_status('notneeded',
                  src.push(to='target', force=None, since='HEAD~1'))

    # now force data transfer
    res = src.push(to='target', force='checkdatapresent')
    # no branch change, done before
    assert_in_results(res, action='publish', status='notneeded',
                      refspec=DEFAULT_REFSPEC)
    # no availability update
    assert_in_results(res, action='publish', status='notneeded',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    # but data transfer
    assert_in_results(res, status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')

    # force data transfer, but data isn't available
    src.repo.drop('test_mod_annex_file')
    res = src.push(to='target', path='.', force='checkdatapresent', on_failure='ignore')
    assert_in_results(res, status='impossible',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy',
                      message='Slated for transport, but no content present')
Example #4
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()

    if src.repo.is_managed_branch():
        # on crippled FS post-update hook enabling via create-sibling doesn't
        # work ATM
        raise SkipTest("no create-sibling on crippled FS")
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, if instructed by configuration
    src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local')
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check that dataset-config cannot overrule this
    src.config.set('datalad.push.copy-auto-if-wanted',
                   'false',
                   where='dataset')
    res = src.push(to='target')
    assert_status('notneeded', res)

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # remove local config, must enable push of secure file
    src.config.unset('datalad.push.copy-auto-if-wanted', where='local')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Example #5
0
def test_recurse_existing(src, path):
    origin_ds = _make_dataset_hierarchy(src)

    # make sure recursion_limit works as expected across a range of depths
    for depth in range(len(origin_ds)):
        res = install(path,
                      source=src,
                      recursive=True,
                      recursion_limit=depth,
                      result_xfm=None,
                      return_type='list',
                      result_filter=None)
        # we expect one dataset per level
        assert_result_count(res, depth + 1, type='dataset', status='ok')
        rmtree(path)

    # now install all but the last two levels, no data
    root, sub1, sub2 = install(path,
                               source=src,
                               recursive=True,
                               recursion_limit=2,
                               result_xfm='datasets',
                               result_filter=None)
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is False)
    sub3 = Dataset(opj(sub2.path, 'sub3'))
    ok_(not sub3.is_installed())
    # now get all content in all existing datasets, no new datasets installed
    # in the process
    files = root.get(curdir, recursive=True, recursion_limit='existing')
    assert_not_in_results(files, type='dataset', status='ok')
    assert_result_count(files, 1, type='file', status='ok')
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not sub3.is_installed())
    # now pull down all remaining datasets, no data
    sub3, sub4 = root.get(curdir,
                          recursive=True,
                          get_data=False,
                          result_xfm='datasets',
                          result_filter=lambda x: x['status'] == 'ok')
    ok_(sub4.is_installed())
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is False)
    # aaannd all data
    files = root.get(
        curdir,
        recursive=True,
        result_filter=lambda x: x['status'] == 'ok' and x['type'] == 'file')
    eq_(len(files), 1)
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
Example #6
0
def test_auto_data_transfer(path):
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    (ds_a.pathobj / "foo.dat").write_text("foo")
    ds_a.save()

    # Should be the default, but just in case.
    ds_a.repo.config.set("annex.numcopies", "1", where="local")
    ds_a.create_sibling(str(path / "b"), name="b")

    # With numcopies=1, no data is copied with data="auto".
    res = ds_a.push(to="b", data="auto", since=None)
    assert_not_in_results(res, action="copy")

    # Even when a file is explicitly given.
    res = ds_a.push(to="b", path="foo.dat", data="auto", since=None)
    assert_not_in_results(res, action="copy")

    # numcopies=2 changes that.
    ds_a.repo.config.set("annex.numcopies", "2", where="local")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "foo.dat"))

    # --since= limits the files considered by --auto.
    (ds_a.pathobj / "bar.dat").write_text("bar")
    ds_a.save()
    (ds_a.pathobj / "baz.dat").write_text("baz")
    ds_a.save()
    res = ds_a.push(to="b", data="auto", since="HEAD~1")
    assert_not_in_results(res,
                          action="copy",
                          path=str(ds_a.pathobj / "bar.dat"))
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "baz.dat"))

    # --auto also considers preferred content.
    ds_a.repo.config.unset("annex.numcopies", where="local")
    ds_a.repo.set_preferred_content("wanted", "nothing", remote="b")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_not_in_results(res,
                          action="copy",
                          path=str(ds_a.pathobj / "bar.dat"))

    ds_a.repo.set_preferred_content("wanted", "anything", remote="b")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "bar.dat"))
Example #7
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Example #8
0
def test_unlock_directory(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.unlock(path="dir")
    dirpath = Path("dir")
    dirpath_abs = Path(ds.pathobj / "dir")

    # On adjusted branches (for the purposes of this test, crippled
    # filesystems), the files were already unlocked and the committed state is
    # the unlocked pointer file.
    is_managed_branch = ds.repo.is_managed_branch()
    if is_managed_branch:
        assert_repo_status(ds.path)
    else:
        assert_repo_status(ds.path, modified=[dirpath / "a", dirpath / "b"])
    ds.save()
    ds.drop(text_type(dirpath / "a"), check=False)
    assert_false(ds.repo.file_has_content(text_type(dirpath / "a")))

    # Unlocking without an explicit non-directory path doesn't fail if one of
    # the directory's files doesn't have content.
    res = ds.unlock(path="dir")
    assert_not_in_results(res,
                          action="unlock",
                          path=text_type(dirpath_abs / "a"))
    if is_managed_branch:
        assert_not_in_results(res,
                              action="unlock",
                              path=text_type(dirpath_abs / "b"))
    else:
        assert_in_results(res,
                          action="unlock",
                          status="ok",
                          path=text_type(dirpath_abs / "b"))
        assert_repo_status(ds.path, modified=[dirpath / "b"])

    # If we explicitly provide a path that lacks content, we get a result
    # for it.
    assert_in_results(ds.unlock(path=dirpath / "a", on_failure="ignore"),
                      action="unlock",
                      status="impossible",
                      path=text_type(dirpath_abs / "a"))
Example #9
0
def test_recurse_existing(src, path):
    origin_ds = _make_dataset_hierarchy(src)

    # make sure recursion_limit works as expected across a range of depths
    for depth in range(len(origin_ds)):
        res = install(
            path, source=src, recursive=True, recursion_limit=depth,
            result_xfm=None, return_type='list', result_filter=None)
        # we expect one dataset per level
        assert_result_count(
            res, depth + 1, type='dataset', status='ok')
        rmtree(path)

    # now install all but the last two levels, no data
    root, sub1, sub2 = install(
        path, source=src, recursive=True, recursion_limit=2,
        result_xfm='datasets', result_filter=None)
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is False)
    sub3 = Dataset(opj(sub2.path, 'sub3'))
    ok_(not sub3.is_installed())
    # now get all content in all existing datasets, no new datasets installed
    # in the process
    files = root.get(curdir, recursive=True, recursion_limit='existing')
    assert_not_in_results(files, type='dataset', status='ok')
    assert_result_count(files, 1, type='file', status='ok')
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not sub3.is_installed())
    # now pull down all remaining datasets, no data
    sub3, sub4 = root.get(
        curdir, recursive=True, get_data=False,
        result_xfm='datasets', result_filter=lambda x: x['status'] == 'ok')
    ok_(sub4.is_installed())
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is False)
    # aaannd all data
    files = root.get(curdir, recursive=True, result_filter=lambda x: x['status'] == 'ok' and x['type'] == 'file')
    eq_(len(files), 1)
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
def test_procedure_discovery(path, super_path):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.
        ps = run_procedure(discover=True)
        # there are a few procedures coming with datalad, needs to find them
        assert_true(len(ps) > 2)
        # we get three essential properties
        eq_(
            sum([
                'procedure_type' in p and 'procedure_callfmt' in p
                and 'path' in p for p in ps
            ]), len(ps))

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add('datalad.clean.proc-pre',
                  'datalad_test_proc',
                  where='dataset')
    ds.add(op.join('.datalad', 'config'))

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True)

    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum([
            'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p
            for p in ps
        ]), len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps,
                      path=op.join(ds.path, 'code', 'datalad_test_proc.py'))

    # make it a subdataset and try again:
    super = Dataset(super_path).create()
    super.install('sub', source=ds.path)

    ps = super.run_procedure(discover=True)
    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum([
            'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p
            for p in ps
        ]), len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps,
                      path=op.join(super.path, 'sub', 'code',
                                   'datalad_test_proc.py'))

    if not on_windows:  # no symlinks
        import os
        # create a procedure which is a broken symlink, but recognizable as a
        # python script:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'broken_link_proc.py'))
        # broken symlink at procedure location, but we can't tell, whether it is
        # an actual procedure without any guess on how to execute it:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))

        ps = super.run_procedure(discover=True)
        # still needs to find procedures coming with datalad and the dataset
        # procedure registered before
        assert_true(len(ps) > 3)
        assert_in_results(ps,
                          path=op.join(super.path, 'sub', 'code',
                                       'broken_link_proc.py'),
                          state='absent')
        assert_not_in_results(ps,
                              path=op.join(super.path, 'sub', 'code',
                                           'unknwon_broken_link'))
Example #11
0
def test_procedure_discovery(path, super_path):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.
        ps = run_procedure(discover=True)
        # there are a few procedures coming with datalad, needs to find them
        assert_true(len(ps) > 2)
        # we get essential properties
        _check_procedure_properties(ps)

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    # extra check: must not pick up cfg_yoda.sh in top directory
    ds.run_procedure('cfg_yoda')

    # path to a procedure which is not under any "standard" location but
    # present in the dataset
    code_dir_procedure_path = op.join(ds.path, 'code', 'datalad_test_proc.py')
    top_dir_procedure_path = op.join(ds.path, 'cfg_yoda.sh')

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True)
    # it should not be found magically by default
    assert_not_in_results(ps, path=code_dir_procedure_path)
    assert_not_in_results(ps, path=top_dir_procedure_path)

    with patch_config({'datalad.locations.extra-procedures': op.join(ds.path, 'code')}):
        # run discovery on the dataset:
        ps = ds.run_procedure(discover=True)
        # still needs to find procedures coming with datalad
        assert_true(len(ps) > 3)
        # and procedure under the path we specified
        assert_result_count(ps, 1, path=code_dir_procedure_path)
        assert_not_in_results(ps, path=top_dir_procedure_path)

    # multiple extra locations
    with patch_config({'datalad.locations.extra-procedures': [op.join(ds.path, 'code'), ds.path]}):
        # run discovery on the dataset:
        ps = ds.run_procedure(discover=True)
        # still needs to find procedures coming with datalad
        assert_true(len(ps) > 4)
        # and procedure under the path we specified
        assert_result_count(ps, 1, path=code_dir_procedure_path)
        assert_result_count(ps, 1, path=top_dir_procedure_path)

    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')
    ds.save(op.join('.datalad', 'config'))

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True)

    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    _check_procedure_properties(ps)
    # dataset's procedure needs to be in the results
    # and only a single one
    assert_result_count(ps, 1, path=code_dir_procedure_path)
    # a subdir shouldn't be considered a procedure just because it's "executable"
    assert_not_in_results(ps, path=op.join(ds.path, 'code', 'testdir'))

    # make it a subdataset and try again:
    # first we need to save the beast to make install work
    ds.save()
    super = Dataset(super_path).create()
    super.install('sub', source=ds.path)

    ps = super.run_procedure(discover=True)
    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    _check_procedure_properties(ps)
    # dataset's procedure needs to be in the results
    assert_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                       'datalad_test_proc.py'))

    if not on_windows:  # no symlinks
        import os
        # create a procedure which is a broken symlink, but recognizable as a
        # python script:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'broken_link_proc.py'))
        # broken symlink at procedure location, but we can't tell, whether it is
        # an actual procedure without any guess on how to execute it:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))

        ps = super.run_procedure(discover=True)
        # still needs to find procedures coming with datalad and the dataset
        # procedure registered before
        assert_true(len(ps) > 3)
        assert_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                           'broken_link_proc.py'),
                          state='absent')
        assert_in_results(
            ps,
            path=op.join(super.path, 'sub', 'code',
                         'unknwon_broken_link'),
            state='absent')
Example #12
0
def test_run_subdataset_install(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    # Repository setup
    #
    # .
    # |-- a/
    # |   |-- a2/
    # |   |   `-- img
    # |   `-- img
    # |-- b/
    # |   `-- b2/
    # |       `-- img
    # |-- c/
    # |   `-- c2/
    # |       `-- img
    # `-- d/
    #     `-- d2/
    #         `-- img
    ds_src_a = ds_src.create("a")
    ds_src_a2 = ds_src_a.create("a2")
    ds_src_b = Dataset(ds_src.pathobj / "b").create()
    ds_src_b2 = ds_src_b.create("b2")
    ds_src_c = ds_src.create("c")
    ds_src_c2 = ds_src_c.create("c2")
    ds_src_d = Dataset(ds_src.pathobj / "d").create()
    ds_src_d2 = ds_src_d.create("d2")

    ds_src.save()

    add_pyscript_image(ds_src_a, "in-a", "img")
    add_pyscript_image(ds_src_a2, "in-a2", "img")
    add_pyscript_image(ds_src_b2, "in-b2", "img")
    add_pyscript_image(ds_src_c2, "in-c2", "img")
    add_pyscript_image(ds_src_d2, "in-d2", "img")

    ds_src.save(recursive=True)

    ds_dest = clone(ds_src.path, str(path / "dest"))
    ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2")
    ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2")
    ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2")
    ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2")
    assert_false(ds_dest_a2.is_installed())
    assert_false(ds_dest_b2.is_installed())
    assert_false(ds_dest_c2.is_installed())
    assert_false(ds_dest_d2.is_installed())

    # Needed subdatasets are installed if container name is given...
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_a2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_a2.pathobj / "img"))
    ok_(ds_dest_a2.is_installed())
    # ... even if the name and path do not match.
    res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_b2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_b2.pathobj / "img"))
    ok_(ds_dest_b2.is_installed())
    # Subdatasets will also be installed if given an image path...
    res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img")))
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_c2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_c2.pathobj / "img"))
    ok_(ds_dest_c2.is_installed())
    ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")))

    # There's no install record if subdataset is already present.
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_not_in_results(res, action="install")
Example #13
0
def test_push_recursive(origin_path, src_path, dst_top, dst_sub,
                        dst_subnoannex, dst_subsub):
    # dataset with two submodules and one subsubmodule
    origin = Dataset(origin_path).create()
    origin_subm1 = origin.create('sub m')
    origin_subm1.create('subsub m')
    origin.create('subm noannex', annex=False)
    origin.save()
    assert_repo_status(origin.path)
    # prepare src as a fresh clone with all subdatasets checkout out recursively
    # running on a clone should make the test scenario more different than
    # test_push(), even for the pieces that should be identical
    top = Clone.__call__(source=origin.path, path=src_path)
    sub, subsub, subnoannex = top.get('.',
                                      recursive=True,
                                      get_data=False,
                                      result_xfm='datasets')

    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    # subdatasets have no remote yet, so recursive publishing should fail:
    res = top.push(to="target", recursive=True, on_failure='ignore')
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      refspec='refs/heads/master:refs/heads/master',
                      operations=['new-branch'],
                      action='publish',
                      status='ok',
                      target='target')
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='error',
                          type='dataset',
                          path=d.path,
                          message=("Unknown target sibling '%s'.", 'target'))
    # now fix that and set up targets for the submodules
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subnoannex = mk_push_target(subnoannex,
                                       'target',
                                       dst_subnoannex,
                                       annex=False)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)

    # and same push call as above
    res = top.push(to="target", recursive=True)
    # topds skipped
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      action='publish',
                      status='notneeded',
                      target='target')
    # the rest pushed
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # all correspondig branches match across all datasets
    for s, d in zip(
        (top, sub, subnoannex, subsub),
        (target_top, target_sub, target_subnoannex, target_subsub)):
        eq_(list(s.repo.get_branch_commits_("master")),
            list(d.get_branch_commits_("master")))
        if s != subnoannex:
            eq_(list(s.repo.get_branch_commits_("git-annex")),
                list(d.get_branch_commits_("git-annex")))

    # rerun should not result in further pushes of master
    res = top.push(to="target", recursive=True)
    assert_not_in_results(res,
                          status='ok',
                          refspec="refs/heads/master:refs/heads/master")
    assert_in_results(res,
                      status='notneeded',
                      refspec="refs/heads/master:refs/heads/master")

    if top.repo.is_managed_branch():
        raise SkipTest(
            'Save/status of subdataset with managed branches is an still '
            'unresolved issue')

    # now annex a file in subsub
    test_copy_file = subsub.pathobj / 'test_mod_annex_file'
    test_copy_file.write_text("Heavy stuff.")
    # save all the way up
    assert_status(('ok', 'notneeded'),
                  top.save(message='subsub got something', recursive=True))
    assert_repo_status(top.path)
    # publish straight up, should be smart by default
    res = top.push(to="target", recursive=True)
    # we see 3 out of 4 datasets pushed (sub noannex was left unchanged)
    for d in (top, sub, subsub):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # file content copied too
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(test_copy_file))
    # verify it is accessible, drop and bring back
    assert_status('ok', top.drop(str(test_copy_file)))
    ok_(not subsub.repo.file_has_content('test_mod_annex_file'))
    top.get(test_copy_file)
    ok_file_has_content(test_copy_file, 'Heavy stuff.')

    # make two modification
    (sub.pathobj / 'test_mod_annex_file').write_text('annex')
    (subnoannex.pathobj / 'test_mod_file').write_text('git')
    # save separately
    top.save(sub.pathobj, message='annexadd', recursive=True)
    top.save(subnoannex.pathobj, message='gitadd', recursive=True)
    # now only publish the latter one
    res = top.push(to="target", since='HEAD~1', recursive=True)
    # nothing copied, no reports on the other modification
    assert_not_in_results(res, action='copy')
    assert_not_in_results(res, path=sub.path)
    for d in (top, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # an unconditional push should now pick up the remaining changes
    res = top.push(to="target", recursive=True)
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(sub.pathobj / 'test_mod_annex_file'))
    assert_in_results(res,
                      status='ok',
                      type='dataset',
                      path=sub.path,
                      refspec='refs/heads/master:refs/heads/master')
    for d in (top, subnoannex, subsub):
        assert_in_results(res,
                          status='notneeded',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
Example #14
0
def test_nested_pushclone_cycle_allplatforms(origpath, storepath, clonepath):
    if 'DATALAD_SEED' in os.environ:
        # we are using create-sibling-ria via the cmdline in here
        # this will create random UUIDs for datasets
        # however, given a fixed seed each call to this command will start
        # with the same RNG seed, hence yield the same UUID on the same
        # machine -- leading to a collision
        raise SkipTest(
            'Test incompatible with fixed random number generator seed')
    # the aim here is this high-level test a std create-push-clone cycle for a
    # dataset with a subdataset, with the goal to ensure that correct branches
    # and commits are tracked, regardless of platform behavior and condition
    # of individual clones. Nothing fancy, just that the defaults behave in
    # sensible ways
    from datalad.cmd import WitlessRunner as Runner
    run = Runner().run

    # create original nested dataset
    with chpwd(origpath):
        run(['datalad', 'create', 'super'])
        run(['datalad', 'create', '-d', 'super', str(Path('super', 'sub'))])

    # verify essential linkage properties
    orig_super = Dataset(Path(origpath, 'super'))
    orig_sub = Dataset(orig_super.pathobj / 'sub')

    (orig_super.pathobj / 'file1.txt').write_text('some1')
    (orig_sub.pathobj / 'file2.txt').write_text('some1')
    with chpwd(orig_super.path):
        run(['datalad', 'save', '--recursive'])

    # TODO not yet reported clean with adjusted branches
    #assert_repo_status(orig_super.path)

    # the "true" branch that sub is on, and the gitsha of the HEAD commit of it
    orig_sub_corr_branch = \
        orig_sub.repo.get_corresponding_branch() or orig_sub.repo.get_active_branch()
    orig_sub_corr_commit = orig_sub.repo.get_hexsha(orig_sub_corr_branch)

    # make sure the super trackes this commit
    assert_in_results(
        orig_super.subdatasets(),
        path=orig_sub.path,
        gitshasum=orig_sub_corr_commit,
        # TODO it should also track the branch name
        # Attempted: https://github.com/datalad/datalad/pull/3817
        # But reverted: https://github.com/datalad/datalad/pull/4375
    )

    # publish to a store, to get into a platform-agnostic state
    # (i.e. no impact of an annex-init of any kind)
    store_url = 'ria+' + get_local_file_url(storepath)
    with chpwd(orig_super.path):
        run([
            'datalad', 'create-sibling-ria', '--recursive', '-s', 'store',
            store_url
        ])
        run(['datalad', 'push', '--recursive', '--to', 'store'])

    # we are using the 'store' sibling's URL, which should be a plain path
    store_super = AnnexRepo(orig_super.siblings(name='store')[0]['url'],
                            init=False)
    store_sub = AnnexRepo(orig_sub.siblings(name='store')[0]['url'],
                          init=False)

    # both datasets in the store only carry the real branches, and nothing
    # adjusted
    for r in (store_super, store_sub):
        eq_(set(r.get_branches()), set([orig_sub_corr_branch, 'git-annex']))

    # and reobtain from a store
    cloneurl = 'ria+' + get_local_file_url(str(storepath), compatibility='git')
    with chpwd(clonepath):
        run(['datalad', 'clone', cloneurl + '#' + orig_super.id, 'super'])
        run(['datalad', '-C', 'super', 'get', '--recursive', '.'])

    # verify that nothing has changed as a result of a push/clone cycle
    clone_super = Dataset(Path(clonepath, 'super'))
    clone_sub = Dataset(clone_super.pathobj / 'sub')
    assert_in_results(
        clone_super.subdatasets(),
        path=clone_sub.path,
        gitshasum=orig_sub_corr_commit,
    )

    for ds1, ds2, f in ((orig_super, clone_super, 'file1.txt'),
                        (orig_sub, clone_sub, 'file2.txt')):
        eq_((ds1.pathobj / f).read_text(), (ds2.pathobj / f).read_text())

    # get status info that does not recursive into subdatasets, i.e. not
    # looking for uncommitted changes
    # we should see no modification reported
    assert_not_in_results(clone_super.status(eval_subdataset_state='commit'),
                          state='modified')
    # and now the same for a more expensive full status
    assert_not_in_results(clone_super.status(recursive=True), state='modified')
Example #15
0
def test_auto_if_wanted_data_transfer_path_restriction(path):
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    ds_a_sub0 = ds_a.create("sub0")
    ds_a_sub1 = ds_a.create("sub1")

    for ds in [ds_a, ds_a_sub0, ds_a_sub1]:
        (ds.pathobj / "sec.dat").write_text("sec")
        (ds.pathobj / "reg.dat").write_text("reg")
    ds_a.save(recursive=True)

    ds_a.create_sibling(
        str(path / "b"),
        name="b",
        annex_wanted="not metadata=distribution-restrictions=*",
        recursive=True)
    for ds in [ds_a, ds_a_sub0, ds_a_sub1]:
        ds.repo.set_metadata(add={"distribution-restrictions": "doesntmatter"},
                             files=["sec.dat"])

    # wanted-triggered --auto can be restricted to subdataset...
    res = ds_a.push(to="b", path="sub0", data="auto-if-wanted", recursive=True)
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a.pathobj / "reg.dat"))
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a_sub0.pathobj / "reg.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub0.pathobj / "sec.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub1.pathobj / "reg.dat"))

    # ... and to a wanted file.
    res = ds_a.push(to="b",
                    path="reg.dat",
                    data="auto-if-wanted",
                    recursive=True)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "reg.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub1.pathobj / "reg.dat"))

    # But asking to transfer a file does not do it if the remote has a
    # wanted setting and doesn't want it.
    res = ds_a.push(to="b",
                    path="sec.dat",
                    data="auto-if-wanted",
                    recursive=True)
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a.pathobj / "sec.dat"))

    res = ds_a.push(to="b", path="sec.dat", data="anything", recursive=True)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "sec.dat"))
Example #16
0
def test_procedure_discovery(path, super_path):
    ps = run_procedure(discover=True)
    # there are a few procedures coming with datalad, needs to find them
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum(['procedure_type' in p and
             'procedure_callfmt' in p and
             'path' in p
             for p in ps]),
        len(ps))

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add(
        'datalad.clean.proc-pre',
        'datalad_test_proc',
        where='dataset')
    ds.add(op.join('.datalad', 'config'))

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True)

    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum(['procedure_type' in p and
             'procedure_callfmt' in p and
             'path' in p
             for p in ps]),
        len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps, path=op.join(ds.path, 'code', 'datalad_test_proc.py'))

    # make it a subdataset and try again:
    super = Dataset(super_path).create()
    super.install('sub', source=ds.path)

    ps = super.run_procedure(discover=True)
    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum(['procedure_type' in p and
             'procedure_callfmt' in p and
             'path' in p
             for p in ps]),
        len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                       'datalad_test_proc.py'))

    if not on_windows:  # no symlinks
        import os
        # create a procedure which is a broken symlink, but recognizable as a
        # python script:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'broken_link_proc.py'))
        # broken symlink at procedure location, but we can't tell, whether it is
        # an actual procedure without any guess on how to execute it:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))

        ps = super.run_procedure(discover=True)
        # still needs to find procedures coming with datalad and the dataset
        # procedure registered before
        assert_true(len(ps) > 3)
        assert_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                           'broken_link_proc.py'),
                          state='absent')
        assert_not_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                               'unknwon_broken_link'))
Example #17
0
def test_run_subdataset_install(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    # Repository setup
    #
    # .
    # |-- a/
    # |   |-- a2/
    # |   |   `-- img
    # |   `-- img
    # |-- b/               / module name: b-name /
    # |   `-- b2/
    # |       `-- img
    # |-- c/
    # |   `-- c2/
    # |       `-- img
    # `-- d/               / module name: d-name /
    #     `-- d2/
    #         `-- img
    ds_src_a = ds_src.create("a")
    ds_src_a2 = ds_src_a.create("a2")
    ds_src_b = Dataset(ds_src.pathobj / "b").create()
    ds_src_b2 = ds_src_b.create("b2")
    ds_src_c = ds_src.create("c")
    ds_src_c2 = ds_src_c.create("c2")
    ds_src_d = Dataset(ds_src.pathobj / "d").create()
    ds_src_d2 = ds_src_d.create("d2")

    ds_src.repo.add_submodule("b", name="b-name")
    ds_src.repo.add_submodule("d", name="d-name")
    ds_src.save()

    add_pyscript_image(ds_src_a, "in-a", "img")
    add_pyscript_image(ds_src_a2, "in-a2", "img")
    add_pyscript_image(ds_src_b2, "in-b2", "img")
    add_pyscript_image(ds_src_c2, "in-c2", "img")
    add_pyscript_image(ds_src_d2, "in-d2", "img")

    ds_src.save(recursive=True)

    ds_dest = clone(ds_src.path, str(path / "dest"))
    ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2")
    ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2")
    ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2")
    ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2")
    assert_false(ds_dest_a2.is_installed())
    assert_false(ds_dest_b2.is_installed())
    assert_false(ds_dest_c2.is_installed())
    assert_false(ds_dest_d2.is_installed())

    # Needed subdatasets are installed if container name is given...
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_a2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_a2.pathobj / "img"))
    ok_(ds_dest_a2.is_installed())
    # ... even if the name and path do not match.
    res = ds_dest.containers_run(["arg"], container_name="b-name/b2/in-b2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_b2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_b2.pathobj / "img"))
    ok_(ds_dest_b2.is_installed())
    # Subdatasets will also be installed if given an image path...
    res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img")))
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_c2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_c2.pathobj / "img"))
    ok_(ds_dest_c2.is_installed())
    # ... unless the module name chain doesn't match the subdataset path. In
    # that case, the caller needs to install the subdatasets beforehand.
    with assert_raises(ValueError):
        ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")))
    ds_dest.get(ds_dest_d2.path, recursive=True, get_data=False)
    ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")))

    # There's no install record if subdataset is already present.
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_not_in_results(res, action="install")
Example #18
0
def test_run_assume_ready(path):
    ds = Dataset(path).create()
    repo = ds.repo
    adjusted = repo.is_managed_branch()

    # --assume-ready=inputs

    (repo.pathobj / "f1").write_text("f1")
    ds.save()

    def cat_cmd(fname):
        return [
            sys.executable, "-c",
            "import sys; print(open(sys.argv[-1]).read())", fname
        ]

    assert_in_results(ds.run(cat_cmd("f1"), inputs=["f1"]),
                      action="get",
                      type="file")
    # Same thing, but without the get() call.
    assert_not_in_results(ds.run(cat_cmd("f1"),
                                 inputs=["f1"],
                                 assume_ready="inputs"),
                          action="get",
                          type="file")

    ds.drop("f1", check=False)
    if not adjusted:
        # If the input is not actually ready, the command will fail.
        with assert_raises(CommandError):
            ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs")

    # --assume-ready=outputs

    def unlink_and_write_cmd(fname):
        # This command doesn't care whether the output file is unlocked because
        # it removes it ahead of time anyway.
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "f = sys.argv[-1]; op.lexists(f) and os.unlink(f); "
            "open(f, mode='w').write(str(sys.argv))", fname
        ]

    (repo.pathobj / "f2").write_text("f2")
    ds.save()

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"])
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")
    # Same thing, but without the unlock() call.
    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 assume_ready="outputs")
    assert_not_in_results(res, action="unlock", type="file")

    # --assume-ready=both

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"])
    assert_in_results(res, action="get", type="file")
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")

    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 inputs=["f2"],
                 assume_ready="both")
    assert_not_in_results(res, action="get", type="file")
    assert_not_in_results(res, action="unlock", type="file")