Ejemplo n.º 1
0
def test_assert_cwd_unchanged_not_masking_exceptions():
    # Test that we are not masking out other "more important" exceptions

    orig_cwd = os.getcwd()

    @assert_cwd_unchanged
    def do_chdir_value_error():
        os.chdir(os.pardir)
        raise ValueError("error exception")

    with swallow_logs(new_level=logging.WARN) as cml:
        with assert_raises(ValueError) as cm:
            do_chdir_value_error()
        # retrospect exception
        eq_(orig_cwd, os.getcwd(),
            "assert_cwd_unchanged didn't return us back to %s" % orig_cwd)
        assert_in("Mitigating and changing back", cml.out)

    # and again but allowing to chdir
    @assert_cwd_unchanged(ok_to_chdir=True)
    def do_chdir_value_error():
        os.chdir(os.pardir)
        raise ValueError("error exception")

    with swallow_logs(new_level=logging.WARN) as cml:
        assert_raises(ValueError, do_chdir_value_error)
        eq_(orig_cwd, os.getcwd(),
            "assert_cwd_unchanged didn't return us back to %s" % orig_cwd)
        assert_not_in("Mitigating and changing back", cml.out)
Ejemplo n.º 2
0
def test_gitrepo_call_git_methods(path=None):
    gr = GitRepo(path).init()
    gr.call_git(['add', "foo", "bar"])
    gr.call_git(['commit', '-m', "foobar"])
    gr.call_git(["mv"], files=["foo", "foo.txt"])
    ok_((gr.pathobj / 'foo.txt').exists())

    for expect_fail, check in [(False, assert_in),
                               (True, assert_not_in)]:
        with swallow_logs(new_level=logging.DEBUG) as cml:
            with assert_raises(CommandError):
                gr.call_git(["mv"], files=["notthere", "dest"],
                            expect_fail=expect_fail)
            check("fatal: bad source", cml.out)

    eq_(list(gr.call_git_items_(["ls-files"], read_only=True)),
        ["bar", "foo.txt"])
    eq_(list(gr.call_git_items_(["ls-files", "-z"], sep="\0", read_only=True)),
        # Note: The custom separator has trailing empty item, but this is an
        # arbitrary command with unknown output it isn't safe to trim it.
        ["bar", "foo.txt"])

    with assert_raises(AssertionError):
        gr.call_git_oneline(["ls-files"], read_only=True)

    eq_(gr.call_git_oneline(["ls-files"], files=["bar"], read_only=True),
        "bar")

    ok_(gr.call_git_success(["rev-parse", "HEAD^{commit}"], read_only=True))
    with swallow_logs(new_level=logging.DEBUG) as cml:
        assert_false(gr.call_git_success(["rev-parse", "HEAD^{blob}"],
                                         read_only=True))
        assert_not_in("expected blob type", cml.out)
Ejemplo n.º 3
0
def test_windows_incompatible_names(path=None):
    ds = Dataset(path).create()
    create_tree(
        path, {
            'imgood': 'Look what a nice name I have',
            'illegal:character.txt': 'strange choice of name',
            'spaceending ': 'who does these things?',
            'lookmumadot.': 'why would you do this?',
            'COM1.txt': 'I am a serial port',
            'dirs with spaces': {
                'seriously?': 'you are stupid',
                'why somuch?wrongstuff.': "I gave up"
            },
        })
    ds.repo.config.set('datalad.save.windows-compat-warning', 'error')
    ds.save('.datalad/config')
    res = ds.save(on_failure='ignore')
    # check that none of the 6 problematic files was saved, but the good one was
    assert_result_count(res, 6, status='impossible', action='save')
    assert_result_count(res, 1, status='ok', action='save')

    # check that the warning is emitted
    ds.repo.config.set('datalad.save.windows-compat-warning', 'warning')
    ds.save('.datalad/config')
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.save()
        cml.assert_logged(
            "Some elements of your dataset are not compatible with Windows "
            "systems. Disable this check by changing "
            "datalad.save.windows-compat-warning or consider renaming the "
            "following elements:")
        assert_in("Elements using a reserved filename:", cml.out)
        assert_in("Elements with illegal characters:", cml.out)
        assert_in("Elements ending with a dot:", cml.out)
        assert_in("Elements ending with a space:", cml.out)

    # check that a setting of 'none' really does nothing
    ds.repo.config.set('datalad.save.windows-compat-warning', 'none')
    ds.save('.datalad/config')
    create_tree(
        path, {
            'more illegal:characters?.py':
            'My arch nemesis uses Windows and I will'
            'destroy them! Muahahaha'
        })
    with swallow_logs(new_level=logging.WARN) as cml:
        res = ds.save()
        # we shouldn't see warnings
        assert_not_in(
            "Some elements of your dataset are not compatible with Windows "
            "systems. Disable this check by changing "
            "datalad.save.windows-compat-warning or consider renaming the "
            "following elements:", cml.out)
        # make sure the file is saved successfully
        assert_result_count(res, 1, status='ok', action='save')
Ejemplo n.º 4
0
def test_addurls_dry_run(path=None):
    ds = Dataset(path).create(force=True)

    json_file = "links.json"
    with open(op.join(ds.path, json_file), "w") as jfh:
        json.dump([{
            "url": "URL/a.dat",
            "name": "a",
            "subdir": "foo"
        }, {
            "url": "URL/b.dat",
            "name": "b",
            "subdir": "bar"
        }, {
            "url": "URL/c.dat",
            "name": "c",
            "subdir": "foo"
        }], jfh)

    ds.save(message="setup")

    with swallow_logs(new_level=logging.INFO) as cml:
        ds.addurls(json_file,
                   "{url}",
                   "{subdir}//{_url_filename_root}",
                   dry_run=True,
                   result_renderer='disabled')

        for dir_ in ["foo", "bar"]:
            assert_in("Would create a subdataset at {}".format(dir_), cml.out)
        assert_in(
            "Would download URL/a.dat to {}".format(
                os.path.join(path, "foo", "BASE")), cml.out)

        assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]), cml.out)
Ejemplo n.º 5
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError,
                  URL,
                  "http://example.com",
                  hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl),
            'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
Ejemplo n.º 6
0
    def test_addurls_version(self=None, path=None):
        ds = Dataset(path).create(force=True)

        def version_fn(url):
            if url.endswith("b.dat"):
                raise ValueError("Scheme error")
            return url + ".v1"

        with patch("datalad.local.addurls.get_versioned_url", version_fn):
            with swallow_logs(new_level=logging.WARNING) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           version_urls=True,
                           result_renderer='disabled')
                assert_in("b.dat", str(cml.out))

        names = ["a", "c"]
        for fname in names:
            ok_exists(os.path.join(path, fname))

        whereis = ds.repo.whereis(names, output="full")
        for fname, info in whereis.items():
            eq_(info[WEB_SPECIAL_REMOTE_UUID]['urls'],
                ["{}udir/{}.dat.v1".format(self.url, fname)])
Ejemplo n.º 7
0
def test_install_skip_failed_recursive(src=None, path=None):
    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(
            result,
            status='error',
            path=sub1.path,
            type='dataset',
            message='target path already exists and not empty, refuse to '
            'clone into target path')
        assert_in_results(result, status='ok', path=sub2.path)
Ejemplo n.º 8
0
def test_too_long():
    with swallow_logs(new_level=logging.ERROR) as cml:
        with assert_raises(OSError):  # we still raise an exception if we exceed too much
            Runner().run(
                [sys.executable, '-c', 'import sys; print(len(sys.argv))'] + [str(i) for i in range(CMD_MAX_ARG)],
                protocol=StdOutCapture
            )
        cml.assert_logged('.*use.*ulimit.*')
Ejemplo n.º 9
0
 def test_addurls_dropped_urls(self=None, path=None):
     ds = Dataset(path).create(force=True)
     with swallow_logs(new_level=logging.WARNING) as cml:
         ds.addurls(self.json_file,
                    "",
                    "{subdir}//{name}",
                    result_renderer='disabled')
         assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL",
                      str(cml.out))
Ejemplo n.º 10
0
def test_guess_dot_git(path=None, url=None, tdir=None, *, annex):
    repo = (AnnexRepo if annex else GitRepo)(path, create=True)
    repo.add('file.txt', git=not annex)
    repo.commit()

    # we need to prepare to be served via http, otherwise it must fail
    with swallow_logs() as cml:
        assert_raises(IncompleteResultsError, install, path=tdir, source=url)
    ok_(not exists(tdir))

    Runner(cwd=path).run(['git', 'update-server-info'])

    with swallow_logs() as cml:
        installed = install(tdir, source=url)
        assert_not_in("Failed to get annex.uuid", cml.out)
    eq_(installed.pathobj.resolve(), Path(tdir).resolve())
    ok_(exists(tdir))
    assert_repo_status(tdir, annex=annex)
Ejemplo n.º 11
0
def test_external_versions_rogue_module(topd=None):
    ev = ExternalVersions()
    # if module throws some other non-ImportError exception upon import
    # we must not crash, but issue a warning
    modname = 'verycustomrogue__'
    create_tree(topd, {modname + '.py': 'raise Exception("pickaboo")'})
    with patch('sys.path', [topd]), \
        swallow_logs(new_level=logging.WARNING) as cml:
        assert ev[modname] is None
        assert_true(ev.dumps(indent=True).endswith(linesep))
        assert_in('pickaboo', cml.out)
Ejemplo n.º 12
0
    def test_addurls_subdataset(self=None, path=None):
        ds = Dataset(path).create(force=True)

        for save in True, False:
            label = "save" if save else "nosave"
            with swallow_outputs() as cmo:
                ds.addurls(self.json_file,
                           "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save,
                           cfg_proc=["yoda"])
                # The custom result renderer transforms the subdataset
                # action=create results into something more informative than
                # "create(ok): . (dataset)"...
                assert_in("create(ok): foo-{} (dataset)".format(label),
                          cmo.out)
                # ... and that doesn't lose the standard summary.
                assert_in("create (ok: 2)", cmo.out)

            subdirs = [
                op.join(ds.path, "{}-{}".format(d, label))
                for d in ["foo", "bar"]
            ]
            subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))

            for subds, fnames in subdir_files.items():
                for fname in fnames:
                    ok_exists(op.join(subds, fname))
                # cfg_proc was applied generated subdatasets.
                ok_exists(op.join(subds, "code"))
            if save:
                assert_repo_status(path)
            else:
                # The datasets are create but not saved (since asked not to)
                assert_repo_status(path, untracked=subdirs)
                # but the downloaded files aren't.
                for subds, fnames in subdir_files.items():
                    assert_repo_status(subds, added=fnames)

        # Now save the "--nosave" changes and check that we have
        # all the subdatasets.
        ds.save()
        eq_(
            set(subdatasets(dataset=ds, recursive=True,
                            result_xfm="relpaths")),
            {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

        # We don't try to recreate existing subdatasets.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file,
                       "{url}",
                       "{subdir}-nosave//{name}",
                       result_renderer='disabled')
            assert_in("Not creating subdataset at existing path", cml.out)
Ejemplo n.º 13
0
 def test_addurls_no_rows(self=None, path=None):
     ds = Dataset(path).create(force=True)
     for fname in ["in.csv", "in.tsv", "in.json"]:
         with swallow_logs(new_level=logging.WARNING) as cml:
             assert_in_results(ds.addurls(fname,
                                          "{url}",
                                          "{name}",
                                          result_renderer='disabled'),
                               action="addurls",
                               status="notneeded")
             cml.assert_logged("No rows", regex=False)
Ejemplo n.º 14
0
def check_filters(name):
    with swallow_logs(new_level=logging.DEBUG, name=name) as cml:
        lgr1 = logging.getLogger(name + '.goodone')
        lgr2 = logging.getLogger(name + '.anotherone')
        lgr3 = logging.getLogger(name + '.bad')
        lgr1.debug('log1')
        lgr2.info('log2')
        lgr3.info('log3')
        assert_in('log1', cml.out)
        assert_in('log2', cml.out)
        assert_not_in('log3', cml.out)
Ejemplo n.º 15
0
def test_install_dataladri(src=None, topurl=None, path=None):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path).run(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    assert_repo_status(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Ejemplo n.º 16
0
def test_log_progress_noninteractive_filter():
    name = "dl-test"
    lgr = LoggerHelper(name).get_initialized_logger()
    pbar_id = "lp_test"
    with swallow_logs(new_level=logging.INFO, name=name) as cml:
        log_progress(lgr.info, pbar_id, "Start", label="testing", total=3)
        log_progress(lgr.info, pbar_id, "THERE0", update=1)
        log_progress(lgr.info, pbar_id, "NOT", update=1,
                     noninteractive_level=logging.DEBUG)
        log_progress(lgr.info, pbar_id, "THERE1", update=1,
                     noninteractive_level=logging.INFO)
        log_progress(lgr.info, pbar_id, "Done")
        for present in ["Start", "THERE0", "THERE1", "Done"]:
            assert_in(present, cml.out)
        assert_not_in("NOT", cml.out)
Ejemplo n.º 17
0
def test_push_git_annex_branch_many_paths_same_data(path=None):
    path = Path(path)
    ds = Dataset(path / "ds").create(force=True)
    ds.save()
    mk_push_target(ds, "target", str(path / "target"), annex=True, bare=False)
    nbytes = sum(
        ds.repo.get_content_annexinfo(paths=[f])[f]["bytesize"] for f in [
            ds.repo.pathobj / "f0", ds.repo.pathobj / "f3", ds.repo.pathobj /
            "f4"
        ])
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = ds.push(to="target")
    assert_in("{} bytes of annex data".format(nbytes), cml.out)
    # 3 files point to content already covered by another file.
    assert_result_count(res, 3, action="copy", type="file", status="notneeded")
Ejemplo n.º 18
0
def check_basic_scenario(url, d=None):
    ds = Dataset(d).create()
    annex = ds.repo

    # TODO skip if no boto or no credentials
    get_test_providers(url)  # so to skip if unknown creds

    # Let's try to add some file which we should have access to
    ds.download_url(url)
    ds.save()

    # git-annex got a fix where it stopped replacing - in the middle of the filename
    # Let's cater to the developers who might have some intermediate version and not
    # easy to compare -- we will just check that only one file there is an that it
    # matches what we expect when outside of the development versions range:
    filenames = glob.glob(op.join(d, '3versions[-_]allversioned.txt'))
    eq_(len(filenames), 1)
    filename = op.basename(filenames[0])
    if external_versions['cmd:annex'] < '8.20200501':
        assert_in('_', filename)
    # Date after the fix in 8.20200501-53-gcabbc91b1
    elif external_versions['cmd:annex'] >= '8.20200512':
        assert_in('-', filename)
    else:
        pass  # either of those is ok

    whereis1 = annex.whereis(filename, output='full')
    eq_(len(whereis1), 2)  # here and datalad
    annex.drop(filename)

    whereis2 = annex.whereis(filename, output='full')
    eq_(len(whereis2), 1)  # datalad

    # make sure that there are no "hidden" error messages, despite the
    # whereis command succeeding
    # https://github.com/datalad/datalad/issues/6453#issuecomment-1047533276
    from datalad.runner import StdOutErrCapture

    # we need to swallow logs since if DATALAD_LOG_LEVEL is set low, we
    # would get all the git-annex debug output in stderr
    with swallow_logs(new_level=logging.INFO) as cml:
        out = annex._call_annex(['whereis'], protocol=StdOutErrCapture)
        eq_(out['stderr'].strip(), '')

    # if we provide some bogus address which we can't access, we shouldn't pollute output
    with assert_raises(CommandError) as cme:
        annex.add_url_to_file('bogus', url + '_bogus')
    assert_in('addurl: 1 failed', cme.value.stderr)
Ejemplo n.º 19
0
def test_assert_Xwd_unchanged_ok_chdir(func):
    # Test that we are not masking out other "more important" exceptions

    orig_cwd = os.getcwd()
    orig_pwd = getpwd()

    @assert_cwd_unchanged(ok_to_chdir=True)
    def do_chdir_value_error():
        func(os.pardir)
        return "a value"

    with swallow_logs() as cml:
        eq_(do_chdir_value_error(), "a value")
        eq_(orig_cwd, os.getcwd(),
            "assert_cwd_unchanged didn't return us back to cwd %s" % orig_cwd)
        eq_(orig_pwd, getpwd(),
            "assert_cwd_unchanged didn't return us back to cwd %s" % orig_pwd)
        assert_not_in("Mitigating and changing back", cml.out)
Ejemplo n.º 20
0
def test_is_url():
    ok_(is_url('file://localhost/some'))
    ok_(is_url('http://localhost'))
    ok_(is_url('ssh://me@localhost'))
    # in current understanding it is indeed a url but an 'ssh', implicit=True, not just
    # a useless scheme=weird with a hope to point to a netloc
    with swallow_logs():
        ok_(is_url('weird://'))
    nok_(is_url('relative'))
    nok_(is_url('/absolute'))
    ok_(is_url('like@sshlogin'))  # actually we do allow ssh:implicit urls ATM
    nok_(is_url(''))
    nok_(is_url(' '))
    nok_(is_url(123))  # stuff of other types wouldn't be considered a URL

    # we can pass RI instance directly
    ok_(is_url(RI('file://localhost/some')))
    nok_(is_url(RI('relative')))
Ejemplo n.º 21
0
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest(
            "Travis-specific '{}' identity file does not exist".format(ifile))

    with patch_config({"datalad.ssh.identityfile": ifile}):
        with swallow_logs(new_level=logging.DEBUG) as cml:
            manager = SSHManager()
            ssh = manager.get_connection('ssh://datalad-test')
            cmd_out, _ = ssh("echo blah")
            if _ssh_manager_is_multiplex:
                expected_socket = op.join(
                    str(manager.socket_dir),
                    get_connection_hash("datalad-test", identity_file=ifile))
                ok_(exists(expected_socket))
            manager.close()
            assert_in("-i", cml.out)
            assert_in(ifile, cml.out)
Ejemplo n.º 22
0
def test_create_alias(ds_path=None, ria_path=None, clone_path=None):
    ds_path = Path(ds_path)
    clone_path = Path(clone_path)

    ds_path.mkdir()
    dsa = Dataset(ds_path / "a").create()

    res = dsa.create_sibling_ria(url="ria+file://{}".format(ria_path),
                                 name="origin",
                                 alias="ds-a",
                                 new_store_ok=True)
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')

    ds_clone = clone(source="ria+file://{}#~ds-a".format(ria_path),
                     path=clone_path / "a")
    assert_repo_status(ds_clone.path)

    # multiple datasets in a RIA store with different aliases work
    dsb = Dataset(ds_path / "b").create()

    res = dsb.create_sibling_ria(url="ria+file://{}".format(ria_path),
                                 name="origin",
                                 alias="ds-b",
                                 new_store_ok=True)
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')

    ds_clone = clone(source="ria+file://{}#~ds-b".format(ria_path),
                     path=clone_path / "b")
    assert_repo_status(ds_clone.path)

    # second dataset in a RIA store with the same alias emits a warning
    dsc = Dataset(ds_path / "c").create()

    with swallow_logs(logging.WARNING) as cml:
        res = dsc.create_sibling_ria(url="ria+file://{}".format(ria_path),
                                     name="origin",
                                     alias="ds-a",
                                     new_store_ok=True)
        assert_in(
            "Alias 'ds-a' already exists in the RIA store, not adding an alias",
            cml.out)
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
Ejemplo n.º 23
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        if isinstance(ri, str):
            eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri,
            ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(
        str(ri),
        str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # This one does not have a path. TODO: either proxy path from its .RI or adjust
    # hierarchy of classes to make it more explicit
    if cls == GitTransportRI:
        return
    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
Ejemplo n.º 24
0
def test_rerun_invalid_merge_run_commit(path=None):
    ds = Dataset(path).create()
    ds.run("echo foo >>foo")
    ds.run("echo invalid >>invalid")
    run_msg = last_commit_msg(ds.repo)
    run_hexsha = ds.repo.get_hexsha()
    ds.repo.call_git(["reset", "--hard", DEFAULT_BRANCH + "~"])
    with open(op.join(ds.path, "non-run"), "w") as nrfh:
        nrfh.write("non-run")
    ds.save()
    # Assign two parents to the invalid run commit.
    commit = ds.repo.call_git_oneline(
        ["commit-tree", run_hexsha + "^{tree}", "-m", run_msg,
         "-p", run_hexsha + "^",
         "-p", ds.repo.get_hexsha()])
    ds.repo.call_git(["reset", "--hard", commit])
    hexsha_orig = ds.repo.get_hexsha()
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.rerun(since="")
        assert_in("has run information but is a merge commit", cml.out)
    eq_(len(ds.repo.get_revisions(hexsha_orig + ".." + DEFAULT_BRANCH)), 1)
Ejemplo n.º 25
0
def test_ssh_manager_close_no_throw(bogus_socket=None):
    manager = MultiplexSSHManager()

    class bogus:
        def close(self):
            raise Exception("oh I am so bad")

        @property
        def ctrl_path(self):
            with open(bogus_socket, "w") as f:
                f.write("whatever")
            return Path(bogus_socket)

    # since we are digging into protected area - should also set _prev_connections
    manager._prev_connections = {}
    manager._connections['bogus'] = bogus()
    assert_raises(Exception, manager.close)
    assert_raises(Exception, manager.close)

    # but should proceed just fine if allow_fail=False
    with swallow_logs(new_level=logging.DEBUG) as cml:
        manager.close(allow_fail=False)
        assert_in('Failed to close a connection: oh I am so bad', cml.out)
Ejemplo n.º 26
0
def test_git_config_warning(path=None):
    if 'GIT_AUTHOR_NAME' in os.environ:
        raise SkipTest("Found existing explicit identity config")

    # Note: An easier way to test this, would be to just set GIT_CONFIG_GLOBAL
    # to point somewhere else. However, this is not supported by git before
    # 2.32. Hence, stick with changed HOME in this test, but be sure to unset a
    # possible GIT_CONFIG_GLOBAL in addition.

    patched_env = os.environ.copy()
    patched_env.pop('GIT_CONFIG_GLOBAL', None)
    patched_env.update(get_home_envvars(path))
    with chpwd(path), \
            patch.dict('os.environ', patched_env, clear=True), \
            swallow_logs(new_level=30) as cml:
        # no configs in that empty HOME
        from datalad.api import Dataset
        from datalad.config import ConfigManager

        # reach into the class and disable the "checked" flag that
        # has already been tripped before we get here
        ConfigManager._checked_git_identity = False
        Dataset(path).config.reload()
        assert_in("configure Git before", cml.out)
Ejemplo n.º 27
0
def test_globbedpaths(path=None):
    dotdir = op.curdir + op.sep

    for patterns, expected in [
        (["1.txt", "2.dat"], {"1.txt", "2.dat"}),
        ([dotdir + "1.txt", "2.dat"], {dotdir + "1.txt", "2.dat"}),
        (["*.txt", "*.dat"], {"1.txt", "2.dat", bOBSCURE_FILENAME, "3.txt"}),
        ([dotdir + "*.txt", "*.dat"],
         {dotdir + "1.txt", "2.dat", bOBSCURE_FILENAME, dotdir + "3.txt"}),
        ([op.join("subdir", "*.txt")],
         {op.join("subdir", "1.txt"),
          op.join("subdir", "2.txt")}),
        (["subdir" + op.sep], {"subdir" + op.sep}),
        ([dotdir + op.join("subdir", "*.txt")], {
            dotdir + op.join(*ps)
            for ps in [("subdir", "1.txt"), ("subdir", "2.txt")]
        }), (["*.txt"], {"1.txt", "3.txt"}),
        ([op.join("subdir", "**")], {
            op.join(*ps)
            for ps in [("subdir" + op.sep, ), (
                "subdir",
                "subsub"), ("subdir",
                            "1.txt"), ("subdir",
                                       "2.txt"), ("subdir", "subsub", "3.dat")]
        }),
        ([dotdir + op.join("**", "*.dat")], {
            dotdir + op.join("2.dat"), dotdir + bOBSCURE_FILENAME,
            dotdir + op.join("subdir", "subsub", "3.dat")
        })
    ]:
        gp = GlobbedPaths(patterns, pwd=path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)), {op.join(path, p) for p in expected})

    pardir = op.pardir + op.sep
    subdir_path = op.join(path, "subdir")
    for patterns, expected in [
        (["*.txt"], {"1.txt", "2.txt"}),
        ([dotdir + "*.txt"], {dotdir + p
                              for p in ["1.txt", "2.txt"]}),
        ([pardir + "*.txt"], {pardir + p
                              for p in ["1.txt", "3.txt"]}),
        ([dotdir + pardir + "*.txt"],
         {dotdir + pardir + p
          for p in ["1.txt", "3.txt"]}),
            # Patterns that don't match are retained by default.
        (["amiss"], {"amiss"})
    ]:
        gp = GlobbedPaths(patterns, pwd=subdir_path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)),
            {op.join(subdir_path, p)
             for p in expected})

    # Full patterns still get returned as relative to pwd.
    gp = GlobbedPaths([op.join(path, "*.dat")], pwd=path)
    eq_(gp.expand(), ["2.dat", bOBSCURE_FILENAME])

    # "." gets special treatment.
    gp = GlobbedPaths([".", "*.dat"], pwd=path)
    eq_(set(gp.expand()), {"2.dat", bOBSCURE_FILENAME, "."})
    eq_(gp.expand(dot=False), ["2.dat", bOBSCURE_FILENAME])
    gp = GlobbedPaths(["."], pwd=path, expand=False)
    eq_(gp.expand(), ["."])
    eq_(gp.paths, ["."])

    # We can the glob outputs.
    glob_results = {"z": "z", "a": ["x", "d", "b"]}
    with patch('glob.glob', lambda k, **kwargs: glob_results[k]):
        gp = GlobbedPaths(["z", "a"])
        eq_(gp.expand(), ["z", "b", "d", "x"])

    # glob expansion for paths property is determined by expand argument.
    for expand, expected in [(True, ["2.dat", bOBSCURE_FILENAME]),
                             (False, ["*.dat"])]:
        gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand)
        eq_(gp.paths, expected)

    with swallow_logs(new_level=logging.DEBUG) as cml:
        GlobbedPaths(["not here"], pwd=path).expand()
        assert_in("No matching files found for 'not here'", cml.out)
Ejemplo n.º 28
0
def test_Dataset_flyweight(path1=None, path2=None):

    import gc
    import sys

    ds1 = Dataset(path1)
    assert_is_instance(ds1, Dataset)
    # Don't create circular references or anything similar
    assert_equal(1, sys.getrefcount(ds1) - 1)

    ds1.create()

    # Due to issue 4862, we currently still require gc.collect() under unclear
    # circumstances to get rid of an exception traceback when creating in an
    # existing directory. That traceback references the respective function
    # frames which in turn reference the repo instance (they are methods).
    # Doesn't happen on all systems, though. Eventually we need to figure that
    # out.
    # However, still test for the refcount after gc.collect() to ensure we don't
    # introduce new circular references and make the issue worse!
    gc.collect()

    # refcount still fine after repo creation:
    assert_equal(1, sys.getrefcount(ds1) - 1)

    # instantiate again:
    ds2 = Dataset(path1)
    assert_is_instance(ds2, Dataset)
    # the very same object:
    ok_(ds1 is ds2)

    # reference the same via relative path:
    with chpwd(path1):
        ds3 = Dataset(relpath(path1, start=path2))
        ok_(ds1 == ds3)
        ok_(ds1 is ds3)

    # gc knows one such object only:
    eq_(
        1,
        len([
            o for o in gc.get_objects()
            if isinstance(o, Dataset) and o.path == path1
        ]))

    # on windows a symlink is not what you think it is
    if not on_windows:
        # reference the same via symlink:
        with chpwd(path2):
            os.symlink(path1, 'linked')
            ds4 = Dataset('linked')
            ds4_id = id(ds4)
            ok_(ds4 == ds1)
            ok_(ds4 is not ds1)

        # underlying repo, however, IS the same:
        ok_(ds4.repo is ds1.repo)

    # deleting one reference has no effect on the other:
    del ds1
    gc.collect()  # TODO: see first comment above
    ok_(ds2 is not None)
    ok_(ds2.repo is ds3.repo)
    if not on_windows:
        ok_(ds2.repo is ds4.repo)

    # deleting remaining references should lead to garbage collection
    del ds2

    with swallow_logs(new_level=1) as cml:
        del ds3
        gc.collect()  # TODO: see first comment above
        # flyweight vanished:
        assert_not_in(path1, Dataset._unique_instances.keys())
        # no such instance known to gc anymore:
        eq_([], [
            o for o in gc.get_objects()
            if isinstance(o, Dataset) and o.path == path1
        ])
        # underlying repo should only be cleaned up, if ds3 was the last
        # reference to it. Otherwise the repo instance should live on
        # (via symlinked ds4):
        finalizer_log = "Finalizer called on: AnnexRepo(%s)" % path1
        if on_windows:
            cml.assert_logged(msg=finalizer_log, level="Level 1", regex=False)
        else:
            assert_not_in(finalizer_log, cml.out)
            # symlinked is still there:
            ok_(ds4 is not None)
            eq_(ds4_id, id(ds4))
Ejemplo n.º 29
0
def test_aggregation(path=None):
    with chpwd(path):
        assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    # before anything aggregated we would get nothing and only a log warning
    with swallow_logs(new_level=logging.WARNING) as cml:
        assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
    assert_re_in('.*Found no aggregated metadata.*update', cml.out)
    ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
                  scope='branch')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
                     scope='branch')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
                        scope='branch')
    ds.save(recursive=True)
    assert_repo_status(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.aggregate_metadata(recursive=True, update_mode='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 3, status='ok', action='aggregate_metadata')
    assert_in_results(res, action='save', status="ok")
    # nice and tidy
    assert_repo_status(ds.path)

    # quick test of aggregate report
    aggs = ds.metadata(get_aggregates=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.metadata(recursive=True)
    # basic sanity check
    assert_result_count(origres, 6)
    assert_result_count(origres, 3, type='dataset')
    assert_result_count(origres, 3, type='file')  # Now that we have annex.key
    # three different IDs
    assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == ensure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(
        opj(path, 'clone'), source=ds.path,
        result_xfm='datasets', return_type='item-or-list')
    # ID mechanism works
    assert_equal(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.metadata()
    # basic sanity check
    assert_result_count(cloneres, 2)
    assert_result_count(cloneres, 1, type='dataset')
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extract same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok', clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(
                r['query_matched']['frictionless_datapackage.name'],
                r['metadata']['frictionless_datapackage']['name'])
Ejemplo n.º 30
0
    def test_addurls(self=None, path=None):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return len(ds.repo.get_revisions("git-annex"))

        n_annex_commits = get_annex_commit_counts()

        # Meanwhile also test that we can specify path relative
        # to the top of the dataset, as we generally treat paths in
        # Python API, and it will be the one saved in commit
        # message record
        json_file = op.relpath(self.json_file, ds.path)

        ds.addurls(json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="(md5sum|size)",
                   result_renderer='disabled')
        ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH),
                      f"url_file='{json_file}'")

        filenames = ["a", "b", "c"]
        for fname in filenames:
            ok_exists(op.join(ds.path, fname))

        for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                         ["foo", "bar", "foo"]):
            assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

        # Ignore this check if we're faking dates because that disables
        # batch mode.
        # Also ignore if on Windows as it seems as if a git-annex bug
        # leads to separate meta data commits:
        # https://github.com/datalad/datalad/pull/5202#discussion_r535429704
        if not (dl_cfg.get('datalad.fake-dates') or on_windows):
            # We should have two new commits on the git-annex: one for the
            # added urls and one for the added metadata.
            eq_(n_annex_commits + 2, get_annex_commit_counts())

        # Add to already existing links, overwriting.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file,
                       "{url}",
                       "{name}",
                       ifexists="overwrite",
                       result_renderer='disabled')
            for fname in filenames:
                assert_in("Removing {}".format(os.path.join(path, fname)),
                          cml.out)

        # Add to already existing links, skipping.
        assert_in_results(ds.addurls(self.json_file,
                                     "{url}",
                                     "{name}",
                                     ifexists="skip",
                                     result_renderer='disabled'),
                          action="addurls",
                          status="notneeded")

        # Add to already existing links works, as long content is the same.
        ds.addurls(self.json_file,
                   "{url}",
                   "{name}",
                   result_renderer='disabled')

        # But it fails if something has changed.
        ds.unlock("a")
        with open(op.join(ds.path, "a"), "w") as ofh:
            ofh.write("changed")
        ds.save("a")

        assert_raises(IncompleteResultsError,
                      ds.addurls,
                      self.json_file,
                      "{url}",
                      "{name}",
                      result_renderer='disabled')