def test_get_most_obscure_supported_name():
    n = get_most_obscure_supported_name()
    ok_startswith(n, OBSCURE_PREFIX)
    ok_(len(OBSCURE_FILENAMES) > 1)
    # from more complex to simpler ones
    ok_(len(OBSCURE_FILENAMES[0]) > len(OBSCURE_FILENAMES[-1]))
    print(repr(n))
Exemple #2
0
def test_install_dataset_from_just_source(src_repo=None, path=None):

    src_ds = Dataset(src_repo).create(result_renderer='disabled', force=True)
    src_ds.save(['INFO.txt', 'test.dat'], to_git=True)
    src_ds.save('test-annex.dat', to_git=False)
    # equivalent repo on github:
    src_url = "https://github.com/datalad/testrepo--basic--r1.git"
    sources = [
        src_ds.path,
        get_local_file_url(src_ds.path, compatibility='git')
    ]
    if not dl_cfg.get('datalad.tests.nonetwork'):
        sources.append(src_url)

    for url in sources:

        with chpwd(path, mkdir=True):
            ds = install(source=url)

        ok_startswith(ds.path, path)
        ok_(ds.is_installed())
        ok_(GitRepo.is_valid_repo(ds.path))
        assert_repo_status(ds.path, annex=None)
        assert_in('INFO.txt', ds.repo.get_indexed_files())

        # cleanup before next iteration
        rmtree(path)
def test_with_tempfile_dir_via_env_variable():
    target = os.path.join(os.path.expanduser("~"), "dataladtesttmpdir")
    assert_false(os.path.exists(target),
                 "directory %s already exists." % target)

    with patch_config({'datalad.tests.temp.dir': target}):
        filename = _with_tempfile_decorated_dummy()
        ok_startswith(filename, target)
def test_ok_startswith():
    ok_startswith('abc', 'abc')
    ok_startswith('abc', 'a')
    ok_startswith('abc', '')
    ok_startswith(' abc', ' ')
    ok_startswith('abc\r\n', 'a')  # no effect from \r\n etc
    assert_raises(AssertionError, ok_startswith, 'abc', 'b')
    assert_raises(AssertionError, ok_startswith, 'abc', 'abcd')
Exemple #5
0
def test_run_datalad_help():
    out, err = check_run_and_get_output("datalad --help")
    ok_startswith(out, "Usage: ")
    # There could be a warning from coverage that no data was collected, should be benign
    lines = [
        l for l in err.split(os.linesep)
        if ('no-data-collected' not in l) and l
    ]
    eq_(lines, [])
def test_with_tempfile_default_prefix(d1=None):
    d = basename(d1)
    short = 'datalad_temp_'
    full = short + \
           'test_with_tempfile_default_prefix'
    if on_windows:
        ok_startswith(d, short)
        nok_startswith(d, full)
    else:
        ok_startswith(d, full)
Exemple #7
0
def test_install_dataset_from_instance(src=None, dst=None):
    origin = Dataset(src).create(result_renderer='disabled', force=True)
    origin.save(['INFO.txt', 'test.dat'], to_git=True)
    origin.save('test-annex.dat', to_git=False)

    clone = install(source=origin, path=dst)

    assert_is_instance(clone, Dataset)
    ok_startswith(clone.path, dst)
    ok_(clone.is_installed())
    ok_(GitRepo.is_valid_repo(clone.path))
    assert_repo_status(clone.path, annex=None)
    assert_in('INFO.txt', clone.repo.get_indexed_files())
Exemple #8
0
def test__version__():
    # in released stage, version in the last CHANGELOG entry
    # should correspond to the one in datalad
    CHANGELOG_filename = op.join(
        op.dirname(__file__), op.pardir, op.pardir, 'CHANGELOG.md')
    if not op.exists(CHANGELOG_filename):
        raise SkipTest("no %s found" % CHANGELOG_filename)
    regex = re.compile(r'^# '
                       r'(?P<version>[0-9]+\.[0-9.abcrc~]+)\s+'
                       r'\((?P<date>.*)\)'
                       )
    with open(CHANGELOG_filename, 'rb') as f:
        for line in f:
            line = line.rstrip()
            if not line.startswith(b'# '):
                # The first section header we hit, must be our changelog entry
                continue
            reg = regex.match(ensure_unicode(line))
            if not reg:  # first one at that level is the one
                raise AssertionError(
                    "Following line must have matched our regex: %r" % line)
            regd = reg.groupdict()
            changelog_version = regd['version']
            lv_changelog_version = Version(changelog_version)
            # we might have a suffix - sanitize
            san__version__ = __version__.rstrip('.dirty')
            lv__version__ = Version(san__version__)
            if '???' in regd['date'] and 'will be better than ever' in regd['codename']:
                # we only have our template
                # we can only assert that its version should be higher than
                # the one we have now
                assert_greater(lv_changelog_version, lv__version__)
            else:
                # should be a "release" record
                assert_not_in('???', regd['date'])
                ok_startswith(__version__, changelog_version)
                if lv__version__ != lv_changelog_version:
                    # It was not tagged yet and Changelog has no new records
                    # (they are composed by auto upon release)
                    assert_greater(lv__version__, lv_changelog_version)
                    assert_in('+', san__version__)  # we have build suffix
                else:
                    # all is good, tagged etc
                    assert_equal(lv_changelog_version, lv__version__)
                    assert_equal(changelog_version, san__version__)
            return

    raise AssertionError(
        "No log line matching our regex found in %s" % CHANGELOG_filename
    )
Exemple #9
0
def test_install_dataset_from_just_source_via_path(path=None):
    # for remote urls only, the source could be given to `path`
    # to allows for simplistic cmdline calls

    url = "https://github.com/datalad/testrepo--basic--r1.git"

    with chpwd(path, mkdir=True):
        ds = install(url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    assert_repo_status(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Exemple #10
0
 def check_contents(outname, prefix):
     with tarfile.open(outname) as tf:
         nfiles = 0
         for ti in tf:
             # any annex links resolved
             assert_false(ti.issym())
             ok_startswith(ti.name, prefix + '/')
             assert_equal(ti.mtime, committed_date)
             if '.datalad' not in ti.name:
                 # ignore any files in .datalad for this test to not be
                 # susceptible to changes in how much we generate a meta info
                 nfiles += 1
         # we have exactly four files (includes .gitattributes for default
         # MD5E backend), and expect no content for any directory
         assert_equal(nfiles, 4)
Exemple #11
0
def test_cmdline_example_to_rst():
    # don't puke on nothing
    out = fmt.cmdline_example_to_rst(SIO(''))
    out.seek(0)
    ok_startswith(out.read(), '.. AUTO-GENERATED')
    out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy')
    out.seek(0)
    assert_in('.. dummy:', out.read())
    # full scale test
    out = fmt.cmdline_example_to_rst(SIO(demo_example), ref='mydemo')
    out.seek(0)
    out_text = out.read()
    assert_in('.. code-block:: sh', out_text)
    assert_not_in('shame', out_text)  # no SKIP'ed
    assert_not_in('#', out_text)  # no comments
Exemple #12
0
def test_help_np():
    stdout, stderr = run_main(['--help-np'])

    # Let's extract section titles:
    # enough of bin/datalad and .tox/py27/bin/datalad -- guarantee consistency! ;)
    ok_startswith(stdout, 'Usage: datalad')
    # Sections start/end with * if ran under DATALAD_HELP2MAN mode
    sections = [
        l[1:-1]
        for l in filter(re.compile(r'^\*.*\*$').match, stdout.split('\n'))
    ]
    for s in {
            'Essential commands',
            'Commands for metadata handling',
            'Miscellaneous commands',
            'General information',
            'Global options',
            'Plumbing commands',
    }:
        assert_in(s, sections)
        # should be present only one time!
        eq_(stdout.count(s), 1)

    assert_all_commands_present(stdout)

    if not get_terminal_size()[0] or 0:
        raise SkipTest(
            "Could not determine terminal size, skipping the rest of the test")

    # none of the lines must be longer than 80 chars
    # TODO: decide on   create-sibling and possibly
    # rewrite-urls
    accepted_width = get_console_width()

    long_lines = [
        "%d %s" % (len(l), l) for l in stdout.split('\n')
        if len(l) > accepted_width
        and '{' not in l  # on nd70 summary line is unsplit
    ]
    if long_lines:
        raise AssertionError(
            "Following lines in --help output were longer than %s chars:\n%s" %
            (accepted_width, '\n'.join(long_lines)))
Exemple #13
0
def test_version():
    # we just get a version if not asking for a version of some command
    stdout, stderr = run_main(['--version'], expect_stderr=True)
    eq_(stdout.rstrip(), "datalad %s" % datalad.__version__)

    stdout, stderr = run_main(['clone', '--version'], expect_stderr=True)
    ok_startswith(stdout, 'datalad %s\n' % datalad.__version__)
    # since https://github.com/datalad/datalad/pull/2733 no license in --version
    assert_not_in("Copyright", stdout)
    assert_not_in("Permission is hereby granted", stdout)

    try:
        import datalad_container
    except ImportError:
        pass  # not installed, cannot test with extension
    else:
        stdout, stderr = run_main(['containers-list', '--version'],
                                  expect_stderr=True)
        eq_(stdout, 'datalad_container %s\n' % datalad_container.__version__)
Exemple #14
0
def test_get_versioned_url():
    get_test_providers('s3://openfmri/tarballs')  # to verify having credentials to access openfmri via S3
    for url_pref in ('http://openfmri.s3.amazonaws.com', 'https://s3.amazonaws.com/openfmri'):
        eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz"),
            url_pref + "/tarballs/ds001_raw.tgz?versionId=null")

        eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz?param=1"),
            url_pref + "/tarballs/ds001_raw.tgz?param=1&versionId=null")

        # We don't duplicate the version if it already exists.
        eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz?versionId=null"),
            url_pref + "/tarballs/ds001_raw.tgz?versionId=null")

    # something is wrong there
    #print(get_versioned_url("http://openfmri.s3.amazonaws.com/ds001/demographics.txt"))

    eq_(get_versioned_url("someurl"), "someurl")  # should just return original one
    assert_raises(RuntimeError, get_versioned_url, "someurl", guarantee_versioned=True)

    # TODO: on a bucket without versioning
    url = "http://datalad-test0-nonversioned.s3.amazonaws.com/2versions-removed-recreated.txt"
    eq_(get_versioned_url(url), url)
    eq_(get_versioned_url(url, return_all=True), [url])

    assert_raises(NotImplementedError, get_versioned_url, "s3://buga")

    urls = get_versioned_url("http://datalad-test0-versioned.s3.amazonaws.com/2versions-removed-recreated.txt",
                             return_all=True, verify=True)
    eq_(len(set(urls)), len(urls))  # all unique
    for url in urls:
        # so we didn't grab other files along with the same prefix
        ok_startswith(url, 'http://datalad-test0-versioned.s3.amazonaws.com/2versions-removed-recreated.txt?versionId=')

    # Update a versioned URL with a newer version tag.
    url_3ver = "http://datalad-test0-versioned.s3.amazonaws.com/3versions-allversioned.txt"
    url_3ver_input = url_3ver + "?versionId=b.qCuh7Sg58VIYj8TVHzbRS97EvejzEl"
    eq_(get_versioned_url(url_3ver_input), url_3ver_input)
    eq_(get_versioned_url(url_3ver_input, update=True),
        url_3ver + "?versionId=Kvuind11HZh._dCPaDAb0OY9dRrQoTMn")
Exemple #15
0
def test_add_readme(path=None):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.aggregate_metadata()
    assert_repo_status(ds.path)
    assert_status('ok', ds.add_readme())
    # should use default name
    content = open(opj(path, 'README.md')).read()
    ok_startswith(
        content, """\
# Dataset "demo_ds"

this is for play

### Authors

- Betty
- Tom

### License

PDDL

## General information

This is a DataLad dataset (id: {id}).
""".format(id=ds.id))
    # make sure that central README references are present
    assert_in(
        """More information on how to install DataLad and [how to install](http://handbook.datalad.org/en/latest/intro/installation.html)
it can be found in the [DataLad Handbook](https://handbook.datalad.org/en/latest/index.html).
""", content)
    # no unexpectedly long lines
    assert all([len(l) < 160 for l in content.splitlines()])

    # should skip on re-run
    assert_status('notneeded', ds.add_readme())
Exemple #16
0
    def test_addurls(self=None, path=None):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return len(ds.repo.get_revisions("git-annex"))

        n_annex_commits = get_annex_commit_counts()

        # Meanwhile also test that we can specify path relative
        # to the top of the dataset, as we generally treat paths in
        # Python API, and it will be the one saved in commit
        # message record
        json_file = op.relpath(self.json_file, ds.path)

        ds.addurls(json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="(md5sum|size)",
                   result_renderer='disabled')
        ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH),
                      f"url_file='{json_file}'")

        filenames = ["a", "b", "c"]
        for fname in filenames:
            ok_exists(op.join(ds.path, fname))

        for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                         ["foo", "bar", "foo"]):
            assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

        # Ignore this check if we're faking dates because that disables
        # batch mode.
        # Also ignore if on Windows as it seems as if a git-annex bug
        # leads to separate meta data commits:
        # https://github.com/datalad/datalad/pull/5202#discussion_r535429704
        if not (dl_cfg.get('datalad.fake-dates') or on_windows):
            # We should have two new commits on the git-annex: one for the
            # added urls and one for the added metadata.
            eq_(n_annex_commits + 2, get_annex_commit_counts())

        # Add to already existing links, overwriting.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file,
                       "{url}",
                       "{name}",
                       ifexists="overwrite",
                       result_renderer='disabled')
            for fname in filenames:
                assert_in("Removing {}".format(os.path.join(path, fname)),
                          cml.out)

        # Add to already existing links, skipping.
        assert_in_results(ds.addurls(self.json_file,
                                     "{url}",
                                     "{name}",
                                     ifexists="skip",
                                     result_renderer='disabled'),
                          action="addurls",
                          status="notneeded")

        # Add to already existing links works, as long content is the same.
        ds.addurls(self.json_file,
                   "{url}",
                   "{name}",
                   result_renderer='disabled')

        # But it fails if something has changed.
        ds.unlock("a")
        with open(op.join(ds.path, "a"), "w") as ofh:
            ofh.write("changed")
        ds.save("a")

        assert_raises(IncompleteResultsError,
                      ds.addurls,
                      self.json_file,
                      "{url}",
                      "{name}",
                      result_renderer='disabled')
Exemple #17
0
def test_usage_on_insufficient_args():
    stdout, stderr = run_main(['install'], exit_code=2, expect_stderr=True)
    ok_startswith(stderr, 'usage:')
Exemple #18
0
def test_wtf(topdir=None):
    path = opj(topdir, OBSCURE_FILENAME)
    # smoke test for now
    with swallow_outputs() as cmo:
        wtf(dataset=path, on_failure="ignore")
        assert_not_in('## dataset', cmo.out)
        assert_in('## configuration', cmo.out)
        # Those sections get sensored out by default now
        assert_not_in('user.name: ', cmo.out)
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf()
            assert_not_in('## dataset', cmo.out)
            assert_in('## configuration', cmo.out)
    # now with a dataset
    ds = create(path)
    with swallow_outputs() as cmo:
        wtf(dataset=ds.path)
        assert_in('## configuration', cmo.out)
        assert_in('## dataset', cmo.out)
        assert_in(u'path: {}'.format(ds.path), ensure_unicode(cmo.out))
        assert_in('branches', cmo.out)
        assert_in(DEFAULT_BRANCH + '@', cmo.out)
        assert_in('git-annex@', cmo.out)

    # and if we run with all sensitive
    for sensitive in ('some', True):
        with swallow_outputs() as cmo:
            wtf(dataset=ds.path, sensitive=sensitive)
            # we fake those for tests anyways, but we do show cfg in this mode
            # and explicitly not showing them
            assert_in('user.name: %s' % _HIDDEN, cmo.out)

    with swallow_outputs() as cmo:
        wtf(dataset=ds.path, sensitive='all')
        assert_not_in(_HIDDEN, cmo.out)  # all is shown
        assert_in('user.name: ', cmo.out)

    # Sections selection
    #
    # If we ask for no sections and there is no dataset
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf(sections=[])
            assert_not_in('## dataset', cmo.out)
            for s in SECTION_CALLABLES:
                assert_not_in('## %s' % s.lower(), cmo.out.lower())

    # ask for a selected set
    secs = ['git-annex', 'configuration']
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf(sections=secs)
            for s in SECTION_CALLABLES:
                (assert_in if s in secs else assert_not_in)('## %s' %
                                                            s.lower(),
                                                            cmo.out.lower())
            # order should match our desired one, not alphabetical
            # but because of https://github.com/datalad/datalad/issues/3915
            # alphanum is now desired
            assert cmo.out.index('## git-annex') > cmo.out.index(
                '## configuration')

    # not achievable from cmdline is to pass an empty list of sections.
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf(sections=[])
            eq_(cmo.out.rstrip(), '# WTF')

    # and we could decorate it nicely for embedding e.g. into github issues
    with swallow_outputs() as cmo:
        wtf(sections=['dependencies'], decor='html_details')
        ok_startswith(cmo.out,
                      '<details><summary>DataLad %s WTF' % __version__)
        assert_in('## dependencies', cmo.out)

    # short flavor
    with swallow_outputs() as cmo:
        wtf(flavor='short')
        assert_in("- datalad: version=%s" % __version__, cmo.out)
        assert_in("- dependencies: ", cmo.out)
        eq_(len(cmo.out.splitlines()),
            4)  # #WTF, datalad, dependencies, trailing new line

    with swallow_outputs() as cmo:
        wtf(flavor='short', sections='*')
        assert_greater(len(cmo.out.splitlines()), 10)  #  many more

    # check that wtf of an unavailable section yields impossible result (#6712)
    res = wtf(sections=['murkie'], on_failure='ignore')
    eq_(res[0]["status"], "impossible")

    # should result only in '# WTF'
    skip_if_no_module('pyperclip')

    # verify that it works correctly in the env/platform
    import pyperclip
    with swallow_outputs() as cmo:
        try:
            pyperclip.copy("xxx")
            pyperclip_works = pyperclip.paste().strip() == "xxx"
            wtf(dataset=ds.path, clipboard=True)
        except (AttributeError, pyperclip.PyperclipException) as exc:
            # AttributeError could come from pyperclip if no DISPLAY
            raise SkipTest(str(exc))
        assert_in("WTF information of length", cmo.out)
        assert_not_in('user.name', cmo.out)
        if not pyperclip_works:
            # Some times does not throw but just fails to work
            raise SkipTest(
                "Pyperclip seems to be not functioning here correctly")
        assert_not_in('user.name', pyperclip.paste())
        assert_in(_HIDDEN, pyperclip.paste())  # by default no sensitive info
        assert_in("cmd:annex:", pyperclip.paste())  # but the content is there
Exemple #19
0
def test_get_versioned_url_anon():
    # The one without any authenticator, was crashing.
    # Also it triggered another bug about having . in the bucket name
    url_on = "http://openneuro.org.s3.amazonaws.com/ds000001/dataset_description.json"
    url_on_versioned = get_versioned_url(url_on)
    ok_startswith(url_on_versioned, url_on + "?versionId=")
def test_with_tempfile_specified_prefix(d1=None):
    ok_startswith(basename(d1), 'nodatalad_')
    ok_('test_with_tempfile_specified_prefix' not in d1)