Example #1
0
def test_run_from_subds_gh3551(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.create("output")
    with chpwd(op.join(ds.path, "sub")):
        assert_in_results(
            run("echo",
                inputs=[op.join(op.pardir, "sub", "input")],
                outputs=[op.join(op.pardir, "output")],
                return_type="list", result_filter=None, result_xfm=None),
            action="get",
            status="notneeded")
    assert_repo_status(ds.path)

    subds_path = op.join("output", "subds")
    ds.create(subds_path)
    with chpwd(op.join(ds.path, "sub")):
        output_dir = op.join(op.pardir, "output", "subds")
        # The below command is trying to be compatible. It could be made better
        # (e.g., actually using the input file) by someone that knows something
        # about Windows.
        assert_in_results(
            run("cd .> {}".format(op.join(output_dir, "f")),
                inputs=[op.join(op.pardir, "sub", "input")],
                outputs=[output_dir],
                return_type="list", result_filter=None, result_xfm=None),
            action="save",
            status="ok")
    assert_repo_status(ds.path)
    subds = Dataset(op.join(ds.path, subds_path))
    ok_exists(op.join(subds.path, "f"))
    if not ds.repo.is_managed_branch():  # FIXME
        # This check fails on Windows:
        # https://github.com/datalad/datalad/pull/3747/checks?check_run_id=248506560#step:8:254
        ok_(subds.repo.file_has_content("f"))
    def test_addurls_url_on_collision_error_if_different(self, path):
        ds = Dataset(path).create(force=True)

        data = [self.data[0].copy(), self.data[0].copy()]
        data[0]["some_metadata"] = "1"
        data[1]["some_metadata"] = "2"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-",
                       "{url}",
                       "{name}",
                       exclude_autometa="*",
                       on_collision="error-if-different")
        ok_exists(op.join(ds.path, "a"))
Example #3
0
def test_merge_follow_parentds_subdataset_other_branch(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    on_adjusted = ds_src.repo.is_managed_branch()
    ds_src_subds = ds_src.create("subds")
    ds_clone = install(source=ds_src.path, path=path / "clone",
                       recursive=True, result_xfm="datasets")
    ds_clone_subds = Dataset(ds_clone.pathobj / "subds")

    ds_src_subds.repo.call_git(["checkout", "-b", "other"])
    (ds_src_subds.pathobj / "foo").write_text("foo content")
    ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    res = ds_clone.update(merge=True, follow="parentds", recursive=True,
                          on_failure="ignore")
    if on_adjusted:
        # Our git-annex sync based on approach on adjusted branches is
        # incompatible with follow='parentds'.
        assert_in_results(res, action="update", status="impossible")
        return
    else:
        assert_in_results(res, action="update", status="ok")
    eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
    ok_(ds_clone_subds.repo.is_under_annex("foo"))

    (ds_src_subds.pathobj / "bar").write_text("bar content")
    ds_src.save(recursive=True)
    ds_clone_subds.repo.checkout("master", options=["-bnew"])
    ds_clone.update(merge=True, follow="parentds", recursive=True)
    if not on_adjusted:
        eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
Example #4
0
def test_run_remove_keeps_leading_directory(path):
    ds = Dataset(op.join(path, "ds")).create()
    repo = ds.repo

    (ds.pathobj / "d").mkdir()
    output = (ds.pathobj / "d" / "foo")
    output.write_text("foo")
    ds.save()

    output_rel = str(output.relative_to(ds.pathobj))
    repo.drop(output_rel, options=["--force"])

    assert_in_results(ds.run("cd .> {}".format(output_rel),
                             outputs=[output_rel],
                             result_renderer=None),
                      action="run.remove",
                      status="ok")

    assert_repo_status(ds.path)

    # Remove still gets saved() if command doesn't generate the output (just as
    # it would if git-rm were used instead of unlink).
    repo.drop(output_rel, options=["--force"])
    assert_in_results(ds.run("cd .> something-else",
                             outputs=[output_rel],
                             result_renderer=None),
                      action="run.remove",
                      status="ok")
    assert_repo_status(ds.path)
    def test_addurls_url_on_collision_choose(self, path):
        ds = Dataset(path).create(force=True)
        data = deepcopy(self.data)
        for row in data:
            row["name"] = "a"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-first", on_collision="take-first")
        ok_file_has_content(op.join(ds.path, "a-first"),
                            "a content",
                            strip=True)

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-last", on_collision="take-last")
        ok_file_has_content(op.join(ds.path, "a-last"),
                            "c content",
                            strip=True)
Example #6
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    assert_raises(NoDatasetArgumentFound, unlock, dataset=None, path=path2)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    (ds.pathobj / ".noannex").unlink()
    AnnexRepo(path, create=True)

    # One that doesn't exist.
    res = ds.unlock(path="notexistent.txt",
                    result_xfm=None,
                    on_failure='ignore',
                    return_type='item-or-list')
    eq_(res['message'], "path does not exist")

    # And one that isn't associated with a dataset.
    assert_in_results(ds.unlock(path=path2, on_failure="ignore"),
                      status="error",
                      message="path not underneath this dataset")

    chpwd(_cwd)
Example #7
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Example #8
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1',
                  ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1',
                  ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
Example #9
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(result, status='error', path=sub1.path)
        assert_in_results(result, status='ok', path=sub2.path)

        cml.assert_logged(
            msg="target path already exists and not empty".format(sub1.path),
            regex=False,
            level='ERROR')
Example #10
0
def test_merge_follow_parentds_subdataset_adjusted_warning(path):
    path = Path(path)

    ds_src = Dataset(path / "source").create()
    if ds_src.repo.is_managed_branch():
        raise SkipTest("This test depends on the source repo being "
                       "an un-adjusted branch")

    ds_src_subds = ds_src.create("subds")

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_clone_subds = Dataset(ds_clone.pathobj / "subds")
    _adjust(ds_clone_subds.repo)
    # Note: Were we to save ds_clone here, we would get a merge conflict in the
    # top repo for the submodule (even if using 'git annex sync' rather than
    # 'git merge').

    ds_src_subds.repo.call_git(["checkout", DEFAULT_BRANCH + "^0"])
    (ds_src_subds.pathobj / "foo").write_text("foo content")
    ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    assert_in_results(ds_clone.update(merge=True,
                                      recursive=True,
                                      follow="parentds",
                                      on_failure="ignore"),
                      status="impossible",
                      path=ds_clone_subds.path,
                      action="update")
    eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
Example #11
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
Example #12
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Example #13
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()

    if src.repo.is_managed_branch():
        # on crippled FS post-update hook enabling via create-sibling doesn't
        # work ATM
        raise SkipTest("no create-sibling on crippled FS")
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, if instructed by configuration
    src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local')
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check that dataset-config cannot overrule this
    src.config.set('datalad.push.copy-auto-if-wanted',
                   'false',
                   where='dataset')
    res = src.push(to='target')
    assert_status('notneeded', res)

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # remove local config, must enable push of secure file
    src.config.unset('datalad.push.copy-auto-if-wanted', where='local')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Example #14
0
def test_configs(path):

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('cfg_yoda')
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')

    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n')

    # 2. now configure specific call format including usage of substitution config
    # for run:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  u'%s {script} {ds} {{mysub}} {args}' %
                  quote_cmdlinearg(sys.executable),
                  where='dataset')
    ds.config.add('datalad.run.substitutions.mysub',
                  'dataset-call-config',
                  where='dataset')
    # TODO: Should we allow for --inputs/--outputs arguments for run_procedure
    #       (to be passed into run)?
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'),
                        'dataset-call-config\n')

    # 3. have a conflicting config at user-level, which should override the
    # config on dataset level:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  u'%s {script} {ds} local {args}' %
                  quote_cmdlinearg(sys.executable),
                  where='local')
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n')

    # 4. get configured help message:
    r = ds.run_procedure('datalad_test_proc',
                         help_proc=True,
                         on_failure='ignore')
    assert_true(len(r) == 1)
    assert_in_results(r, status="impossible")

    ds.config.add('datalad.procedures.datalad_test_proc.help',
                  "This is a help message",
                  where='dataset')

    r = ds.run_procedure('datalad_test_proc', help_proc=True)
    assert_true(len(r) == 1)
    assert_in_results(r, message="This is a help message", status='ok')
Example #15
0
 def test_addurls_no_rows(self, path):
     ds = Dataset(path).create(force=True)
     for fname in ["in.csv", "in.json"]:
         with swallow_logs(new_level=logging.WARNING) as cml:
             assert_in_results(ds.addurls(fname, "{url}", "{name}"),
                               action="addurls",
                               status="notneeded")
             cml.assert_logged("No rows", regex=False)
def test_base(dspath, records):
    # make fake UKB datarecord downloads
    make_datarecord_zips('12345', records)

    # init dataset
    ds = create(dspath)
    ds.ukb_init('12345', ['20227_2_0', '25747_2_0', '25748_2_0', '25748_3_0'])
    # dummy key file, no needed to bypass tests
    ds.config.add('datalad.ukbiobank.keyfile', 'dummy', where='local')

    # fake ukbfetch
    bin_dir = make_ukbfetch(ds, records)

    # refuse to operate on dirty datasets
    (ds.pathobj / 'dirt').write_text('dust')
    assert_status('error', ds.ukb_update(on_failure='ignore'))
    (ds.pathobj / 'dirt').unlink()

    # meaningful crash with no ukbfetch
    assert_raises(RuntimeError, ds.ukb_update)

    # put fake ukbfetch in the path and run
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True)

    # get expected file layout
    incoming = ds.repo.get_files('incoming')
    incoming_p = ds.repo.get_files('incoming-native')
    for i in [
            '12345_25748_2_0.txt', '12345_25748_3_0.txt', '12345_20227_2_0.zip'
    ]:
        assert_in(i, incoming)
    for i in ['25748_2_0.txt', '25748_3_0.txt', '20227_2_0/fMRI/rfMRI.nii.gz']:
        assert_in(i, incoming_p)
    # not ZIPs after processing
    assert_not_in('12345_20227_2_0.zip', incoming_p)
    assert_not_in('20227_2_0.zip', incoming_p)

    # rerun works
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True)

    # rightfully refuse to merge when active branch is an incoming* one
    ds.repo.checkout('incoming')
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        assert_in_results(
            ds.ukb_update(merge=True, force=True, on_failure='ignore'),
            status='impossible',
            message='Refuse to merge into incoming* branch',
        )
Example #17
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(ds.addurls(self.json_file,
                                         "{url}",
                                         "{name}",
                                         ifexists="skip"),
                              action="addurls",
                              status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                          "{url}", "{name}")
Example #18
0
def test_install_recursive(src, path_nr, path_r):
    # first install non-recursive:
    ds = install(path_nr, source=src, recursive=False)
    ok_(ds.is_installed())
    for sub in ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(not sub.is_installed(), "Unintentionally installed: %s" % (sub, ))
    # this also means, subdatasets to be listed as not fulfilled:
    eq_(
        set(
            ds.subdatasets(recursive=True,
                           fulfilled=False,
                           result_xfm='relpaths')), {'subm 1', '2'})

    # now recursively:
    # don't filter implicit results so we can inspect them
    res = install(path_r,
                  source=src,
                  recursive=True,
                  result_xfm=None,
                  result_filter=None)
    # installed a dataset and two subdatasets
    assert_result_count(res, 3, action='install', type='dataset')
    # we recurse top down during installation, so toplevel should appear at
    # first position in returned list
    eq_(res[0]['path'], path_r)
    top_ds = Dataset(res[0]['path'])
    ok_(top_ds.is_installed())

    # the subdatasets are contained in returned list:
    # (Note: Until we provide proper (singleton) instances for Datasets,
    # need to check for their paths)
    assert_in_results(res, path=opj(top_ds.path, 'subm 1'), type='dataset')
    assert_in_results(res, path=opj(top_ds.path, '2'), type='dataset')

    eq_(len(top_ds.subdatasets(recursive=True)), 2)

    for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subds.is_installed(), "Not installed: %s" % (subds, ))
        # no content was installed:
        ainfo = subds.repo.get_content_annexinfo(init=None,
                                                 eval_availability=True)
        assert_false(any(st["has_content"] for st in ainfo.values()))
    # no unfulfilled subdatasets:
    ok_(top_ds.subdatasets(recursive=True, fulfilled=False) == [])

    # check if we can install recursively into a dataset
    # https://github.com/datalad/datalad/issues/2982
    subds = ds.install('recursive-in-ds', source=src, recursive=True)
    ok_(subds.is_installed())
    for subsub in subds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subsub.is_installed())

    # check that we get subdataset instances manufactored from notneeded results
    # to install existing subdatasets again
    eq_(subds, ds.install('recursive-in-ds'))
def test_invalid_call(path):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.

        # needs spec or discover
        assert_raises(InsufficientArgumentsError, run_procedure)
        res = run_procedure('unknown', on_failure='ignore')
        assert_true(len(res) == 1)
        assert_in_results(res, status="impossible")
Example #20
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta,
                                  {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(
                ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"),
                action="addurls",
                status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError,
                          ds.addurls,
                          self.json_file, "{url}", "{name}")
Example #21
0
def test_nested_create(path):
    # to document some more organic usage pattern
    ds = Dataset(path).create()
    ok_clean_git(ds.path)
    lvl2relpath = opj('lvl1', 'lvl2')
    lvl2path = opj(ds.path, lvl2relpath)
    os.makedirs(lvl2path)
    os.makedirs(opj(ds.path, 'lvl1', 'empty'))
    with open(opj(lvl2path, 'file'), 'w') as f:
        f.write('some')
    ok_(ds.add('.'))
    # later create subdataset in a fresh dir
    subds1 = ds.create(opj('lvl1', 'subds'))
    ok_clean_git(ds.path)
    eq_(ds.subdatasets(result_xfm='relpaths'), [opj('lvl1', 'subds')])
    # later create subdataset in an existing empty dir
    subds2 = ds.create(opj('lvl1', 'empty'))
    ok_clean_git(ds.path)
    # later try to wrap existing content into a new subdataset
    # but that won't work
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `force` option to ignore'
    )
    # even with force, as to do this properly complicated surgery would need to
    # take place
    # MIH disable shaky test till proper dedicated upfront check is in-place in `create`
    # gh-1725
    #assert_in_results(
    #    ds.create(lvl2relpath, force=True,
    #              on_failure='ignore', result_xfm=None, result_filter=None),
    #    status='error', action='add')
    # only way to make it work is to unannex the content upfront
    ds.repo._run_annex_command('unannex',
                               annex_options=[opj(lvl2relpath, 'file')])
    # nothing to save, git-annex commits the unannex itself
    assert_status(
        'ok'
        if ds.repo.config.getint("annex", "version") == 6 else 'notneeded',
        ds.save())
    # still nothing without force
    # "err='lvl1/lvl2' already exists in the index"
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `force` option to ignore'
    )
    # XXX even force doesn't help, because (I assume) GitPython doesn't update
    # its representation of the Git index properly
    ds.create(lvl2relpath, force=True)
    assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
Example #22
0
def test_on_failure_continue(path):
    ds = Dataset(op.join(path, "ds")).create(force=True)
    # save() calls status() in a way that respects on_failure.
    assert_in_results(ds.save(
        path=[op.join(path, "outside"),
              op.join(path, "ds", "within")],
        on_failure="ignore"),
                      action="status",
                      status="error")
    # save() continued despite the failure and saved ds/within.
    assert_repo_status(ds.path)
Example #23
0
def test_merge_no_merge_target(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    assert_repo_status(ds_src.path)
    ds_clone.repo.checkout(DEFAULT_BRANCH, options=["-bnew"])
    res = ds_clone.update(merge=True, on_failure="ignore")
    assert_in_results(res, status="impossible", action="update")
Example #24
0
def test_rerun_cherry_pick(path):
    ds = Dataset(path).create()

    ds.repo.tag("prerun")
    ds.run('echo abc > runfile')
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")

    for onto, action in [("HEAD", "skip"), ("prerun", "pick")]:
        results = ds.rerun(since="prerun", onto=onto)
        assert_in_results(results, status='ok', rerun_action=action)
Example #25
0
def test_rerun_cherry_pick(path):
    ds = Dataset(path).create()

    ds.repo.tag("prerun")
    ds.run('echo abc > runfile')
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")

    for onto, action in [("HEAD", "skip"), ("prerun", "pick")]:
        results = ds.rerun(since="prerun", onto=onto)
        assert_in_results(results, status='ok', rerun_action=action)
Example #26
0
def test_nested_create(path):
    # to document some more organic usage pattern
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    lvl2relpath = op.join('lvl1', 'lvl2')
    lvl2path = op.join(ds.path, lvl2relpath)
    os.makedirs(lvl2path)
    os.makedirs(op.join(ds.path, 'lvl1', 'empty'))
    with open(op.join(lvl2path, 'file'), 'w') as f:
        f.write('some')
    ok_(ds.save())
    # Empty directories are filtered out.
    assert_repo_status(ds.path, untracked=[])
    # later create subdataset in a fresh dir
    # WINDOWS FAILURE IS NEXT LINE
    subds1 = ds.create(op.join('lvl1', 'subds'))
    assert_repo_status(ds.path, untracked=[])
    eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')])
    # later create subdataset in an existing empty dir
    subds2 = ds.create(op.join('lvl1', 'empty'))
    assert_repo_status(ds.path)
    # later try to wrap existing content into a new subdataset
    # but that won't work
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [op.join(lvl2path, 'file')]))
    # even with force, as to do this properly complicated surgery would need to
    # take place
    # MIH disable shaky test till proper dedicated upfront check is in-place in `create`
    # gh-1725
    #assert_in_results(
    #    ds.create(lvl2relpath, force=True,
    #              on_failure='ignore', result_xfm=None, result_filter=None),
    #    status='error', action='add')
    # only way to make it work is to unannex the content upfront
    ds.repo.call_annex(['unannex', op.join(lvl2relpath, 'file')])
    # nothing to save, git-annex commits the unannex itself, but only on v5
    ds.repo.commit()
    # still nothing without force
    # "err='lvl1/lvl2' already exists in the index"
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `force` option to ignore'
    )
    # XXX even force doesn't help, because (I assume) GitPython doesn't update
    # its representation of the Git index properly
    ds.create(lvl2relpath, force=True)
    assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
Example #27
0
def test_empty_git_upstairs(topdir):
    # create() doesn't get confused by an empty .git/ upstairs (gh-3473)
    assert_in_results(
        create(op.join(topdir, "empty", "ds"), **raw),
        status="ok", type="dataset", action="create")
    # ... and it will ignore non-meaningful content in .git
    assert_in_results(
        create(op.join(topdir, "nonempty", "ds"), **raw),
        status="ok", type="dataset", action="create")
    # ... but it will raise if it detects a valid repo
    # (by existence of .git/HEAD as defined in GitRepo._valid_git_test_path)
    with assert_raises(CommandError):
        create(op.join(topdir, "git_with_head", "ds"), **raw)
Example #28
0
def test_rerun_cherry_pick(path):
    ds = Dataset(path).create()

    ds.repo.tag("prerun")
    ds.run('echo abc > runfile')
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")

    for onto, text in [("HEAD", "skipping"), ("prerun", "cherry picking")]:
        results = ds.rerun(since="prerun", onto=onto)
        assert_in_results(results, status='ok', path=ds.path)
        assert any(r.get("message", "").endswith(text) for r in results)
Example #29
0
def test_nested_create(path):
    # to document some more organic usage pattern
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    lvl2relpath = op.join('lvl1', 'lvl2')
    lvl2path = op.join(ds.path, lvl2relpath)
    os.makedirs(lvl2path)
    os.makedirs(op.join(ds.path, 'lvl1', 'empty'))
    with open(op.join(lvl2path, 'file'), 'w') as f:
        f.write('some')
    ok_(ds.save())
    # Empty directories are filtered out.
    assert_repo_status(ds.path, untracked=[])
    # later create subdataset in a fresh dir
    # WINDOWS FAILURE IS NEXT LINE
    subds1 = ds.create(op.join('lvl1', 'subds'))
    assert_repo_status(ds.path, untracked=[])
    eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')])
    # later create subdataset in an existing empty dir
    subds2 = ds.create(op.join('lvl1', 'empty'))
    assert_repo_status(ds.path)
    # later try to wrap existing content into a new subdataset
    # but that won't work
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=(
            'collision with content in parent dataset at %s: %s',
            ds.path, [op.join(lvl2path, 'file')]))
    # even with force, as to do this properly complicated surgery would need to
    # take place
    # MIH disable shaky test till proper dedicated upfront check is in-place in `create`
    # gh-1725
    #assert_in_results(
    #    ds.create(lvl2relpath, force=True,
    #              on_failure='ignore', result_xfm=None, result_filter=None),
    #    status='error', action='add')
    # only way to make it work is to unannex the content upfront
    ds.repo._run_annex_command('unannex', annex_options=[op.join(lvl2relpath, 'file')])
    # nothing to save, git-annex commits the unannex itself, but only on v5
    ds.repo.commit()
    # still nothing without force
    # "err='lvl1/lvl2' already exists in the index"
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message='will not create a dataset in a non-empty directory, use `force` option to ignore')
    # XXX even force doesn't help, because (I assume) GitPython doesn't update
    # its representation of the Git index properly
    ds.create(lvl2relpath, force=True)
    assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
Example #30
0
def test_merge_no_merge_target(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    if ds_src.repo.is_managed_branch():
        # `git annex sync REMOTE` rather than `git merge TARGET` is used on an
        # adjusted branch, so we don't give an error if TARGET can't be
        # determined.
        raise SkipTest("Test depends on non-adjusted branch")
    ds_clone = install(source=ds_src.path, path=path / "clone",
                       recursive=True, result_xfm="datasets")
    assert_repo_status(ds_src.path)
    ds_clone.repo.checkout("master", options=["-bnew"])
    res = ds_clone.update(merge=True, on_failure="ignore")
    assert_in_results(res, status="impossible", action="update")
Example #31
0
def test_surprise_subds(path):
    # https://github.com/datalad/datalad/issues/3139
    ds = create(path, force=True)
    # a lonely repo without any commit
    somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True)
    # a proper subdataset
    subds = create(op.join(path, 'd2', 'subds'), force=True)

    # If subrepo is an adjusted branch, it would have a commit, making most of
    # this test irrelevant because it is about the unborn branch edge case.
    adjusted = somerepo.is_managed_branch()
    # This edge case goes away with Git v2.22.0.
    fixed_git = external_versions['cmd:git'] >= '2.22.0'

    # save non-recursive
    res = ds.save(recursive=False, on_failure='ignore')
    if not adjusted and fixed_git:
        # We get an appropriate error about no commit being checked out.
        assert_in_results(res, action='add_submodule', status='error')

    # the content of both subds and subrepo are not added to their
    # respective parent as no --recursive was given
    assert_repo_status(subds.path, untracked=['subfile'])
    assert_repo_status(somerepo.path, untracked=['subfile'])

    if adjusted or fixed_git:
        if adjusted:
            # adjusted branch: #datalad/3178 (that would have a commit)
            modified = [subds.repo.pathobj, somerepo.pathobj]
            untracked = []
        else:
            # Newer Git versions refuse to add a sub-repository with no commits
            # checked out.
            modified = [subds.repo.pathobj]
            untracked = ['d1']
        assert_repo_status(ds.path, modified=modified, untracked=untracked)
        assert_not_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                      ds.repo.get_content_info())
    else:
        # however, while the subdataset is added (and reported as modified
        # because it content is still untracked) the subrepo
        # cannot be added (it has no commit)
        # worse: its untracked file add been added to the superdataset
        assert_repo_status(ds.path, modified=['d2/subds'])
        assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                  ds.repo.get_content_info())
    # with proper subdatasets, all evil is gone
    assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile',
                  ds.repo.get_content_info())
Example #32
0
def test_update_adjusted_incompatible_with_ff_only(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    maybe_adjust_repo(ds_clone.repo)

    assert_in_results(ds_clone.update(merge="ff-only", on_failure="ignore"),
                      action="update",
                      status="impossible")
    assert_in_results(ds_clone.update(on_failure="ignore"),
                      action="update",
                      status="ok")
Example #33
0
def test_copy_file_prevent_dotgit_placement(srcpath, destpath):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')

    # The last path above wasn't treated as a target directory because it
    # wasn't an existing directory. We also guard against a '.git' in the
    # target directory code path, though the handling is different.
    with assert_raises(ValueError):
        dest.copy_file(
            [sub.pathobj / '.git' / 'config', dest.pathobj / '.git'])

    # A source path can have a leading .git/ if the destination is outside of
    # .git/.
    nok_((dest.pathobj / "config").exists())
    dest.copy_file(sub.pathobj / '.git' / 'config')
    ok_((dest.pathobj / "config").exists())

    target = dest.pathobj / 'some'
    nok_(target.exists())
    dest.copy_file([sub.pathobj / '.git' / 'config', target])
    ok_(target.exists())

    # But we only waste so many cycles trying to prevent foot shooting. This
    # next one sneaks by because only .name, not all upstream parts, is checked
    # for each destination that comes out of _yield_specs().
    badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist'
    dest.copy_file([sub.pathobj / '.git' / 'config', badobj])
    ok_(badobj.exists())
Example #34
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Example #35
0
def test_reobtain_data(originpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Example #36
0
def test_get_invalid_call(path, file_outside):

    # no argument at all:
    assert_raises(InsufficientArgumentsError, get, None)
    assert_raises(InsufficientArgumentsError, get, [])
    # invalid dataset:
    assert_status('impossible', get(None, dataset=path, on_failure='ignore'))

    # have a plain git:
    ds = Dataset(path)
    ds.create(no_annex=True)
    with open(opj(path, "some.txt"), "w") as f:
        f.write("whatever")
    ds.save("some.txt", to_git=True, message="Initial commit.")

    # make it an annex (remove indicator file that create has placed
    # in the dataset to make it possible):
    (ds.pathobj / '.noannex').unlink()
    AnnexRepo(path, init=True, create=True)
    # call get again on a file in git:
    result = ds.get("some.txt")
    assert_status('notneeded', result)

    # invalid source:
    # yoh:  but now we would need to add it to annex since clever code first
    # checks what needs to be fetched at all
    create_tree(path, {'annexed.dat': 'some'})
    ds.save("annexed.dat")
    ds.repo.drop("annexed.dat", options=['--force'])
    with assert_raises(RemoteNotAvailableError) as ce:
        ds.get("annexed.dat", source='MysteriousRemote')
    eq_("MysteriousRemote", ce.exception.remote)

    res = ds.get("NotExistingFile.txt", on_failure='ignore')
    assert_status('impossible', res)
    assert_message("path does not exist", res)

    # path outside repo errors as with most other commands:
    res = ds.get(file_outside, on_failure='ignore')
    assert_in_results(
        res, status='impossible', message='path not associated with any dataset')
Example #37
0
def test_reobtain_data(originpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True), 1)
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True), 1)
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 2)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Example #38
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(
            os.curdir, recursive=True,
            on_failure='ignore', result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(
            result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(result, status='error', path=sub1.path)
        assert_in_results(result, status='ok', path=sub2.path)

        cml.assert_logged(
            msg="target path already exists and not empty".format(sub1.path),
            regex=False, level='ERROR')
Example #39
0
def test_create_raises(path, outside_path):
    ds = Dataset(path)
    # incompatible arguments (annex only):
    assert_raises(ValueError, ds.create, no_annex=True, description='some')

    with open(op.join(path, "somefile.tst"), 'w') as f:
        f.write("some")
    # non-empty without `force`:
    assert_in_results(
        ds.create(force=False, **raw),
        status='error',
        message='will not create a dataset in a non-empty directory, use `force` option to ignore')
    # non-empty with `force`:
    ds.create(force=True)
    # create sub outside of super:
    assert_in_results(
        ds.create(outside_path, **raw),
        status='error',
        message=(
            'dataset containing given paths is not underneath the reference '
            'dataset %s: %s', ds, outside_path))
    obscure_ds = u"ds-" + OBSCURE_FILENAME
    # create a sub:
    ds.create(obscure_ds)
    # fail when doing it again
    assert_in_results(
        ds.create(obscure_ds, **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s',
                 ds.path,
                 [text_type(ds.pathobj / obscure_ds)]),
    )

    # now deinstall the sub and fail trying to create a new one at the
    # same location
    ds.uninstall(obscure_ds, check=False)
    assert_in(obscure_ds, ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # and now should fail to also create inplace or under
    assert_in_results(
        ds.create(obscure_ds, **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s',
                 ds.path,
                 [text_type(ds.pathobj / obscure_ds)]),
    )
    assert_in_results(
        ds.create(op.join(obscure_ds, 'subsub'), **raw),
        status='error',
        message=('collision with %s (dataset) in dataset %s',
                 text_type(ds.pathobj / obscure_ds),
                 ds.path)
    )
    os.makedirs(op.join(ds.path, 'down'))
    with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f:
        f.write("someother")
    ds.save()
    assert_in_results(
        ds.create('down', **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s',
                 ds.path,
                 [text_type(ds.pathobj / 'down' / 'someotherfile.tst')]),
    )
Example #40
0
def test_get_recurse_subdatasets(src, path):

    ds = install(
        path, source=src,
        result_xfm='datasets', return_type='item-or-list')

    # ask for the two subdatasets specifically. This will obtain them,
    # but not any content of any files in them
    subds1, subds2 = ds.get(['subm 1', '2'], get_data=False,
                            description="youcouldnotmakethisup",
                            result_xfm='datasets')
    for d in (subds1, subds2):
        eq_(d.repo.get_description(), 'youcouldnotmakethisup')

    # there are 3 files to get: test-annex.dat within each dataset:
    rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat')
    rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat')
    annexed_files = {'test-annex.dat',
                     rel_path_sub1,
                     rel_path_sub2}

    # None of them is currently present:
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    ok_clean_git(subds1.path)
    # explicitly given path in subdataset => implicit recursion:
    # MIH: Nope, we fulfill the dataset handle, but that doesn't
    #      imply fulfilling all file handles
    result = ds.get(rel_path_sub1, recursive=True)
    # all good actions
    assert_status('ok', result)

    assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok')
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)

    # drop it:
    subds1.repo.drop('test-annex.dat')
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)

    # now, with a path not explicitly pointing within a
    # subdataset, but recursive option:
    # get everything:
    result = ds.get(recursive=True, result_filter=lambda x: x.get('type') != 'dataset')
    assert_status('ok', result)

    eq_(set([item.get('path')[len(ds.path) + 1:] for item in result
             if item['type'] == 'file']),
        annexed_files)
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)
    ok_(subds2.repo.file_has_content('test-annex.dat') is True)

    # drop them:
    ds.repo.drop('test-annex.dat')
    subds1.repo.drop('test-annex.dat')
    subds2.repo.drop('test-annex.dat')
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    # now, the very same call, but without recursive:
    result = ds.get('.', recursive=False)
    assert_status('ok', result)
    # one report is on the requested dir
    eq_(len(result) - 1, 1)
    assert_result_count(
        result, 1, path=opj(ds.path, 'test-annex.dat'), status='ok')
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)
Example #41
0
def test_configs(path):

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')

    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n')

    # 2. now configure specific call format including usage of substitution config
    # for run:
    ds.config.add(
        'datalad.procedures.datalad_test_proc.call-format',
        'python "{script}" "{ds}" {{mysub}} {args}',
        where='dataset'
    )
    ds.config.add(
        'datalad.run.substitutions.mysub',
        'dataset-call-config',
        where='dataset'
    )
    # TODO: Should we allow for --inputs/--outputs arguments for run_procedure
    #       (to be passed into run)?
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n')

    # 3. have a conflicting config at user-level, which should override the
    # config on dataset level:
    ds.config.add(
        'datalad.procedures.datalad_test_proc.call-format',
        'python "{script}" "{ds}" local {args}',
        where='local'
    )
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n')

    # 4. get configured help message:
    r = ds.run_procedure('datalad_test_proc', help_proc=True,
                         on_failure='ignore')
    assert_true(len(r) == 1)
    assert_in_results(r, status="impossible")

    ds.config.add(
        'datalad.procedures.datalad_test_proc.help',
        "This is a help message",
        where='dataset'
    )

    r = ds.run_procedure('datalad_test_proc', help_proc=True)
    assert_true(len(r) == 1)
    assert_in_results(r, message="This is a help message", status='ok')
Example #42
0
def test_invalid_call():
    # needs spec or discover
    assert_raises(InsufficientArgumentsError, run_procedure)
    res = run_procedure('unknown', on_failure='ignore')
    assert_true(len(res) == 1)
    assert_in_results(res, status="impossible")
Example #43
0
def test_procedure_discovery(path, super_path):
    ps = run_procedure(discover=True)
    # there are a few procedures coming with datalad, needs to find them
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum(['procedure_type' in p and
             'procedure_callfmt' in p and
             'path' in p
             for p in ps]),
        len(ps))

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add(
        'datalad.clean.proc-pre',
        'datalad_test_proc',
        where='dataset')
    ds.add(op.join('.datalad', 'config'))

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True)

    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum(['procedure_type' in p and
             'procedure_callfmt' in p and
             'path' in p
             for p in ps]),
        len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps, path=op.join(ds.path, 'code', 'datalad_test_proc.py'))

    # make it a subdataset and try again:
    super = Dataset(super_path).create()
    super.install('sub', source=ds.path)

    ps = super.run_procedure(discover=True)
    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum(['procedure_type' in p and
             'procedure_callfmt' in p and
             'path' in p
             for p in ps]),
        len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                       'datalad_test_proc.py'))

    if not on_windows:  # no symlinks
        import os
        # create a procedure which is a broken symlink, but recognizable as a
        # python script:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'broken_link_proc.py'))
        # broken symlink at procedure location, but we can't tell, whether it is
        # an actual procedure without any guess on how to execute it:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))

        ps = super.run_procedure(discover=True)
        # still needs to find procedures coming with datalad and the dataset
        # procedure registered before
        assert_true(len(ps) > 3)
        assert_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                           'broken_link_proc.py'),
                          state='absent')
        assert_not_in_results(ps, path=op.join(super.path, 'sub', 'code',
                                               'unknwon_broken_link'))
Example #44
0
def test_unlock(path):

    ds = Dataset(path)

    # file is currently locked:
    # TODO: use get_annexed_files instead of hardcoded filename
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # in direct mode there is no unlock:
    if ds.repo.is_direct_mode():
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('notneeded', res)

    # in V6+ we can unlock even if the file's content isn't present:
    elif ds.repo.supports_unlocked_pointers:
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('ok', res)
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    else:
        # cannot unlock without content (annex get wasn't called)
        assert_raises(CommandError, ds.unlock)  # FIXME

    ds.repo.get('test-annex.dat')
    result = ds.unlock()
    assert_result_count(result, 1)
    if ds.repo.is_direct_mode():
        assert_status('notneeded', result)
    else:
        assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content")

    ds.repo.add('test-annex.dat')
    # in V6+ we need to explicitly re-lock it:
    if ds.repo.supports_unlocked_pointers:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    if not ds.repo.is_direct_mode():
        # after commit, file is locked again:
        assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content", f.read())

    # unlock again, this time more specific:
    result = ds.unlock(path='test-annex.dat')
    assert_result_count(result, 1)

    if ds.repo.is_direct_mode():
        assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='notneeded')
    else:
        assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content again")

    ds.repo.add('test-annex.dat')
    # in V6+ we need to explicitly re-lock it:
    if ds.repo.supports_unlocked_pointers:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # TODO:
    # BOOOM: test-annex.dat writeable in V6!
    # Why the hell is this different than the first time we wrote to the file
    # and locked it again?
    # Also: After opening the file is empty.

    if not ds.repo.is_direct_mode():
        # after commit, file is locked again:
        assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content again", f.read())