コード例 #1
0
def test_our_metadataset_search(tdir):
    # smoke test for basic search operations on our super-megadataset
    # expensive operation but ok
    ds = install(path=tdir,
                 source=DATASETS_TOPURL,
                 result_xfm='datasets',
                 return_type='item-or-list')
    res_haxby = list(ds.search('haxby'))
    assert_greater(len(res_haxby), 10)
    # default search should be case insensitive
    # but somehow it is not fully -- we get 12 here
    #res_Haxby = list(ds.search('Haxby'))
    #eq_(len(res_haxby), len(res_Haxby))

    assert_result_count(ds.search('id:873a6eae-7ae6-11e6-a6c8-002590f97d84',
                                  mode='textblob'),
                        1,
                        type='dataset',
                        path=op.join(ds.path, 'crcns', 'pfc-2'))

    # there is a problem with argparse not decoding into utf8 in PY2
    from datalad.cmdline.tests.test_main import run_main
    # TODO: make it into an independent lean test
    from datalad.cmd import Runner
    out, err = Runner(cwd=ds.path)('datalad search Buzsáki')
    assert_in('crcns/pfc-2 ', out)  # has it in description
    # and then another aspect: this entry it among multiple authors, need to
    # check if aggregating them into a searchable entity was done correctly
    assert_in('crcns/hc-1 ', out)
コード例 #2
0
def test_external_versions_basic():
    ev = ExternalVersions()
    our_module = 'datalad'
    assert_equal(ev.versions, {})
    assert_equal(ev[our_module], __version__)
    # and it could be compared
    assert_greater_equal(ev[our_module], __version__)
    assert_greater(ev[our_module], '0.1')
    assert_equal(list(ev.keys()), [our_module])
    assert_true(our_module in ev)
    assert_false('unknown' in ev)

    # all are LooseVersions now
    assert_true(isinstance(ev[our_module], LooseVersion))
    version_str = __version__
    assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str))

    # For non-existing one we get None
    assert_equal(ev['custom__nonexisting'], None)
    # and nothing gets added to _versions for nonexisting
    assert_equal(set(ev.versions.keys()), {our_module})

    # but if it is a module without version, we get it set to UNKNOWN
    assert_equal(ev['os'], ev.UNKNOWN)
    # And get a record on that inside
    assert_equal(ev.versions.get('os'), ev.UNKNOWN)
    # And that thing is "True", i.e. present
    assert (ev['os'])
    # but not comparable with anything besides itself (was above)
    assert_raises(TypeError, cmp, ev['os'], '0')
    assert_raises(TypeError, assert_greater, ev['os'], '0')

    return
コード例 #3
0
def _test_external(ev, modname):
    try:
        exec("import %s" % modname, globals(), locals())
    except ImportError:
        raise SkipTest("External %s not present" % modname)
    except Exception as e:
        raise SkipTest("External %s fails to import: %s" % (modname, exc_str(e)))
    assert (ev[modname] is not ev.UNKNOWN)
    assert_greater(ev[modname], '0.0.1')
    assert_greater('1000000.0', ev[modname])  # unlikely in our lifetimes
コード例 #4
0
ファイル: test_version.py プロジェクト: vsoch/datalad
def test__version__():
    # in released stage, version in the last CHANGELOG entry
    # should correspond to the one in datalad
    CHANGELOG_filename = op.join(
        op.dirname(__file__), op.pardir, op.pardir, 'CHANGELOG.md')
    if not op.exists(CHANGELOG_filename):
        raise SkipTest("no %s found" % CHANGELOG_filename)
    regex = re.compile(r'^## '
                       r'(?P<version>[0-9]+\.[0-9.abcrc~]+)\s+'
                       r'\((?P<date>.*)\)'
                       r'\s+--\s+'
                       r'(?P<codename>.+)'
                       )
    with open(CHANGELOG_filename, 'rb') as f:
        for line in f:
            line = line.rstrip()
            if not line.startswith(b'## '):
                # The first section header we hit, must be our changelog entry
                continue
            reg = regex.match(assure_unicode(line))
            if not reg:  # first one at that level is the one
                raise AssertionError(
                    "Following line must have matched our regex: %r" % line)
            regd = reg.groupdict()
            changelog_version = regd['version']
            lv_changelog_version = LooseVersion(changelog_version)
            # we might have a suffix - sanitize
            san__version__ = __version__.rstrip('.devdirty')
            lv__version__ = LooseVersion(san__version__)
            if '???' in regd['date'] and 'will be better than ever' in regd['codename']:
                # we only have our template
                # we can only assert that its version should be higher than
                # the one we have now
                assert_greater(lv_changelog_version, lv__version__)
            else:
                # should be a "release" record
                assert_not_in('???', regd['date'])
                assert_not_in('will be better than ever', regd['codename'])
                assert_equal(__hardcoded_version__, changelog_version)
                if __hardcoded_version__ != san__version__:
                    # It was not tagged yet and Changelog should have its
                    # template record for the next release
                    assert_greater(lv_changelog_version, lv__version__)
                    assert_in('.dev', san__version__)
                else:
                    # all is good, tagged etc
                    assert_equal(lv_changelog_version, lv__version__)
                    assert_equal(changelog_version, san__version__)
                    assert_equal(__hardcoded_version__, san__version__)
            return

    raise AssertionError(
        "No log line matching our regex found in %s" % CHANGELOG_filename
    )
コード例 #5
0
def test__gen_github_entity_organization():
    # to test effectiveness of the fix, we need to provide some
    # token which would not work
    with patch_config(
        {CONFIG_HUB_TOKEN_FIELD: "ed51111111111111111111111111111111111111"}):
        org_cred = next(_gen_github_entity(None, 'datalad-collection-1'))
    assert len(org_cred) == 2, "we return organization and credential"
    org, _ = org_cred
    assert org
    repos = list(org.get_repos())
    repos_names = [r.name for r in repos]
    assert_greater(len(repos), 3)  # we have a number of those
    assert_in('datasets.datalad.org', repos_names)
コード例 #6
0
def check_runner_heavy_output(log_online):
    # TODO: again, no automatic detection of this resulting in being
    # stucked yet.

    runner = Runner()
    cmd = '%s %s' % (sys.executable,
                     op.join(op.dirname(__file__), "heavyoutput.py"))

    with swallow_outputs() as cm, swallow_logs():
        ret = runner.run(cmd,
                         log_online=log_online,
                         log_stderr=False,
                         log_stdout=False,
                         expect_stderr=True)
        eq_(cm.err, cm.out)  # they are identical in that script
        eq_(cm.out[:10], "0 [0, 1, 2")
        eq_(cm.out[-15:], "997, 998, 999]\n")

    # for some reason swallow_logs is not effective, so we just skip altogether
    # if too heavy debug output
    if lgr.getEffectiveLevel() <= logging.DEBUG:
        raise SkipTest(
            "Skipping due to too heavy impact on logs complicating debugging")

    #do it again with capturing:
    with swallow_logs():
        ret = runner.run(cmd,
                         log_online=log_online,
                         log_stderr=True,
                         log_stdout=True,
                         expect_stderr=True)

    if log_online:
        # halting case of datalad add and other batch commands #2116
        logged = []
        with swallow_logs():

            def process_stdout(l):
                assert l
                logged.append(l)

            ret = runner.run(cmd,
                             log_online=log_online,
                             log_stdout=process_stdout,
                             log_stderr='offline',
                             expect_stderr=True)
        assert_equal(len(logged), 100)
        assert_greater(len(ret[1]), 1000)  # stderr all here
        assert not ret[0], "all messages went into `logged`"
コード例 #7
0
def test_install_top(tdir):
    # installs one level of subdatasets only
    ds = install(
        path=tdir,
        source=DATASETS_TOPURL,
        recursive=DATASETS_FULL_INSTALL,
        recursion_limit=1,
    )
    subdss = ds.subdatasets(fulfilled=True, result_xfm='datasets')
    if DATASETS_FULL_INSTALL:
        assert_greater(len(subdss), 25)  # we have a good number on top
        assert_equal(ds.subdatasets(fulfilled=False),
                     [])  # and none is left behind

    detached = [s.path for s in subdss if not s.repo.get_active_branch()]
    assert_equal(detached, [])
コード例 #8
0
ファイル: test_gh.py プロジェクト: datalad/datalad-crawler
def test_crawl(tempd):
    if not _get_github_cred().is_known:
        raise SkipTest("no github credential")
    ds = create(tempd)
    with chpwd(tempd):
        crawl_init(template='gh',
                   save=True,
                   args={
                       'org': 'datalad-collection-1',
                       'include': 'kaggle'
                   })
        crawl()
    subdss = ds.subdatasets(fulfilled=True, result_xfm='datasets')
    assert all('kaggle' in d.path for d in subdss)
    assert_greater(len(subdss), 1)
    assert_false(ds.repo.dirty)
コード例 #9
0
def test_external_versions_basic():
    ev = ExternalVersions()
    our_module = 'datalad'
    assert_equal(ev.versions, {})
    assert_equal(ev[our_module], __version__)
    # and it could be compared
    assert_greater_equal(ev[our_module], __version__)
    assert_greater(ev[our_module], '0.1')
    assert_equal(list(ev.keys()), [our_module])
    assert_true(our_module in ev)
    assert_false('unknown' in ev)

    # all are LooseVersions now
    assert_true(isinstance(ev[our_module], LooseVersion))
    version_str = __version__
    assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str))

    # For non-existing one we get None
    assert_equal(ev['custom__nonexisting'], None)
    # and nothing gets added to _versions for nonexisting
    assert_equal(set(ev.versions.keys()), {our_module})

    # but if it is a module without version, we get it set to UNKNOWN
    assert_equal(ev['os'], ev.UNKNOWN)
    # And get a record on that inside
    assert_equal(ev.versions.get('os'), ev.UNKNOWN)
    # And that thing is "True", i.e. present
    assert (ev['os'])
    # but not comparable with anything besides itself (was above)
    assert_raises(TypeError, cmp, ev['os'], '0')
    assert_raises(TypeError, assert_greater, ev['os'], '0')

    return
    # Code below is from original duecredit, and we don't care about
    # testing this one
    # And we can get versions based on modules themselves
    from datalad.tests import mod
    assert_equal(ev[mod], mod.__version__)

    # Check that we can get a copy of the versions
    versions_dict = ev.versions
    versions_dict[our_module] = "0.0.1"
    assert_equal(versions_dict[our_module], "0.0.1")
    assert_equal(ev[our_module], __version__)
コード例 #10
0
def test_wtf(topdir):
    path = opj(topdir, OBSCURE_FILENAME)
    # smoke test for now
    with swallow_outputs() as cmo:
        wtf(dataset=path, on_failure="ignore")
        assert_not_in('## dataset', cmo.out)
        assert_in('## configuration', cmo.out)
        # Those sections get sensored out by default now
        assert_not_in('user.name: ', cmo.out)
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf()
            assert_not_in('## dataset', cmo.out)
            assert_in('## configuration', cmo.out)
    # now with a dataset
    ds = create(path)
    with swallow_outputs() as cmo:
        wtf(dataset=ds.path)
        assert_in('## configuration', cmo.out)
        assert_in('## dataset', cmo.out)
        assert_in(u'path: {}'.format(ds.path),
                  ensure_unicode(cmo.out))

    # and if we run with all sensitive
    for sensitive in ('some', True):
        with swallow_outputs() as cmo:
            wtf(dataset=ds.path, sensitive=sensitive)
            # we fake those for tests anyways, but we do show cfg in this mode
            # and explicitly not showing them
            assert_in('user.name: %s' % _HIDDEN, cmo.out)

    with swallow_outputs() as cmo:
        wtf(dataset=ds.path, sensitive='all')
        assert_not_in(_HIDDEN, cmo.out)  # all is shown
        assert_in('user.name: ', cmo.out)

    # Sections selection
    #
    # If we ask for no sections and there is no dataset
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf(sections=[])
            assert_not_in('## dataset', cmo.out)
            for s in SECTION_CALLABLES:
                assert_not_in('## %s' % s.lower(), cmo.out.lower())

    # ask for a selected set
    secs = ['git-annex', 'configuration']
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf(sections=secs)
            for s in SECTION_CALLABLES:
                (assert_in if s in secs else assert_not_in)(
                    '## %s' % s.lower(), cmo.out.lower()
                )
            # order should match our desired one, not alphabetical
            # but because of https://github.com/datalad/datalad/issues/3915
            # alphanum is now desired
            assert cmo.out.index('## git-annex') > cmo.out.index('## configuration')

    # not achievable from cmdline is to pass an empty list of sections.
    with chpwd(path):
        with swallow_outputs() as cmo:
            wtf(sections=[])
            eq_(cmo.out.rstrip(), '# WTF')

    # and we could decorate it nicely for embedding e.g. into github issues
    with swallow_outputs() as cmo:
        wtf(sections=['dependencies'], decor='html_details')
        ok_startswith(cmo.out, '<details><summary>DataLad %s WTF' % __version__)
        assert_in('## dependencies', cmo.out)

    # short flavor
    with swallow_outputs() as cmo:
        wtf(flavor='short')
        assert_in("- datalad: version=%s" % __version__, cmo.out)
        assert_in("- dependencies: ", cmo.out)
        eq_(len(cmo.out.splitlines()), 4)  # #WTF, datalad, dependencies, trailing new line

    with swallow_outputs() as cmo:
        wtf(flavor='short', sections='*')
        assert_greater(len(cmo.out.splitlines()), 10)  #  many more

    # should result only in '# WTF'
    skip_if_no_module('pyperclip')

    # verify that it works correctly in the env/platform
    import pyperclip
    with swallow_outputs() as cmo:
        try:
            pyperclip.copy("xxx")
            pyperclip_works = pyperclip.paste().strip() == "xxx"
            wtf(dataset=ds.path, clipboard=True)
        except (AttributeError, pyperclip.PyperclipException) as exc:
            # AttributeError could come from pyperclip if no DISPLAY
            raise SkipTest(exc_str(exc))
        assert_in("WTF information of length", cmo.out)
        assert_not_in('user.name', cmo.out)
        if not pyperclip_works:
            # Some times does not throw but just fails to work
            raise SkipTest(
                "Pyperclip seems to be not functioning here correctly")
        assert_not_in('user.name', pyperclip.paste())
        assert_in(_HIDDEN, pyperclip.paste())  # by default no sensitive info
        assert_in("cmd:annex:", pyperclip.paste())  # but the content is there
コード例 #11
0
def _test_list_tuple(thing):
    version = ExternalVersions._deduce_version(thing)
    assert_greater(version, '0.0.1')
    assert_greater('0.2', version)
    assert_equal('0.1', version)
    assert_equal(version, '0.1')
コード例 #12
0
ファイル: test_parallel.py プロジェクト: seldamat/datalad
def test_gracefull_death():
    def assert_provides_and_raises(pc, exception, target=None):
        """Helper to get all results before exception is raised"""
        results = []
        with assert_raises(exception):
            for r in pc:
                results.append(r)
        # results should be sorted since we do not guarantee order
        results = sorted(results)
        if target is not None:
            assert_equal(results, target)
        return results

    def interrupted_producer():
        yield 1
        raise ValueError()

    def consumer(i):
        sleep(0.001)
        yield i

    assert_provides_and_raises(
        ProducerConsumer(interrupted_producer(), consumer, jobs=3), ValueError,
        [1])

    def faulty_consumer(i):
        sleep(0.001)
        if i == 1:
            raise ValueError()
        return i

    # so we do not get failed, but other parallel ones finish their job
    results = assert_provides_and_raises(
        ProducerConsumer(range(1000), faulty_consumer, jobs=5), ValueError)
    # and analysis of futures to raise an exception can take some time etc, so
    # we could get more, but for sure we should not get all 999 and not even a 100
    if info_log_level:
        assert_greater(100, len(results))
    assert_equal(results[:4], [0, 2, 3, 4])

    def producer():
        for i in range(10):
            sleep(0.0001)
            yield i
        raise ValueError()

    # by default we do not stop upon producer failing
    assert_provides_and_raises(ProducerConsumer(producer(), consumer, jobs=2),
                               ValueError, list(range(10)))
    # if producer produces more than we can as quickly consume but then fails
    # ATM we do not proceed to consume other items, but fail when we finish
    # consuming until the time point when producer has failed
    # by default we do not stop upon producer failing
    results = assert_provides_and_raises(
        ProducerConsumer(producer(),
                         consumer,
                         reraise_immediately=True,
                         jobs=2), ValueError)
    # we will get some results, seems around 4 and they should be "sequential"
    assert_equal(results, list(range(len(results))))
    assert_greater_equal(len(results), 2)
    if info_log_level:
        assert_greater_equal(6, len(results))

    # Simulate situation close to what we have when outside code consumes
    # some yielded results and then "looses interest" (on_failure="error").
    # In this case we should still exit gracefully (no GeneratorExit warnings),
    # not over-produce, and also do not kill already running consumers
    consumed = []

    def inner():
        def consumer(i):
            sleep(0.01)
            consumed.append(i)
            return i

        pc = iter(ProducerConsumer(range(1000), consumer, jobs=2))
        yield next(pc)
        yield next(pc)

    assert_equal(sorted(inner()), [0, 1])
    consumed = sorted(consumed)
    assert_equal(consumed, list(range(len(consumed))))
    assert_greater_equal(len(consumed),
                         4)  # we should wait for that 2nd batch to finish
    if info_log_level:
        assert_greater_equal(20, len(consumed))