Ejemplo n.º 1
0
def test_add_archive_use_archive_dir(repo_path):
    repo = AnnexRepo(repo_path, create=True)
    with chpwd(repo_path):
        # Let's add first archive to the repo with default setting
        archive_path = opj('4u', '1.tar.gz')
        # check it gives informative error if archive is not already added
        with assert_raises(RuntimeError) as cmr:
            add_archive_content(archive_path)
        assert_re_in(
            "You should run ['\"]datalad save 4u\\\\1\\.tar\\.gz['\"] first"
            if on_windows else
            "You should run ['\"]datalad save 4u/1\\.tar\\.gz['\"] first",
            str(cmr.exception),
            match=False)
        with swallow_outputs():
            repo.add(archive_path)
        repo.commit("added 1.tar.gz")

        ok_archives_caches(repo.path, 0)
        add_archive_content(archive_path,
                            strip_leading_dirs=True,
                            use_current_dir=True)
        ok_(not exists(opj('4u', '1 f.txt')))
        ok_file_under_git(repo.path, '1 f.txt', annexed=True)
        ok_archives_caches(repo.path, 0)

        # and now let's extract under archive dir
        add_archive_content(archive_path, strip_leading_dirs=True)
        ok_file_under_git(repo.path, opj('4u', '1 f.txt'), annexed=True)
        ok_archives_caches(repo.path, 0)

        add_archive_content(opj('4u', 'sub.tar.gz'))
        ok_file_under_git(repo.path, opj('4u', 'sub', '2 f.txt'), annexed=True)
        ok_archives_caches(repo.path, 0)
Ejemplo n.º 2
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save(all_changes=True)

    # now, install that thing:
    ds, subds = install(path, source=src, recursive=True)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        result = ds.get(curdir, recursive=True)
        assert_re_in('.*Found no annex at {0}. Skipped.'.format(ds),
                     cml.out, flags=re.DOTALL)
        eq_(len(result), 1)
        eq_(result[0]['file'], opj("subds", "file_in_annex.txt"))
        ok_(result[0]['success'] is True)
        ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Ejemplo n.º 3
0
def test_logging_to_a_file(dst):
    ok_(not exists(dst))

    lgr = LoggerHelper("dataladtest-1").get_initialized_logger(logtarget=dst)
    ok_(exists(dst))  # nothing was logged -- no file created

    msg = "Oh my god, they killed Kenny"
    lgr.error(msg)
    with open(dst) as f:
        lines = f.readlines()
    assert_equal(len(lines), 1, "Read more than a single log line: %s" %  lines)
    line = lines[0]
    ok_(msg in line)
    ok_('\033[' not in line,
        msg="There should be no color formatting in log files. Got: %s" % line)
    # verify that time stamp and level are present in the log line
    # do not want to rely on not having race conditions around date/time changes
    # so matching just with regexp
    # (...)? is added to swallow possible traceback logs
    regex = "\[ERROR\]"
    if EnsureBool()(dl_cfg.get('datalad.log.timestamp', False)):
        regex = "\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} " + regex
    if EnsureBool()(dl_cfg.get('datalad.log.vmem', False)):
        regex += ' RSS/VMS: \S+/\S+( \S+)?\s*'
    regex += "(\s+\S+\s*)? " + msg
    assert_re_in(regex, line, match=True)
    # Close all handlers so windows is happy -- apparently not closed fast enough
    for handler in lgr.handlers:
        handler.close()
Ejemplo n.º 4
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save(auto_add_changes=True)

    # now, install that thing:
    ds, subds = install(path, source=src, recursive=True)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        result = ds.get(curdir, recursive=True)
        assert_re_in('.*Found no annex at {0}. Skipped.'.format(ds),
                     cml.out, flags=re.DOTALL)
        eq_(len(result), 1)
        eq_(result[0]['file'], opj("subds", "file_in_annex.txt"))
        ok_(result[0]['success'] is True)
        ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Ejemplo n.º 5
0
def test_logging_to_a_file(dst):
    ok_(not exists(dst))

    lgr = LoggerHelper("dataladtest-1").get_initialized_logger(logtarget=dst)
    ok_(exists(dst))  # nothing was logged -- no file created

    msg = "Oh my god, they killed Kenny"
    lgr.error(msg)
    with open(dst) as f:
        lines = f.readlines()
    assert_equal(len(lines), 1, "Read more than a single log line: %s" %  lines)
    line = lines[0]
    ok_(msg in line)
    ok_('\033[' not in line,
        msg="There should be no color formatting in log files. Got: %s" % line)
    # verify that time stamp and level are present in the log line
    # do not want to rely on not having race conditions around date/time changes
    # so matching just with regexp
    # (...)? is added to swallow possible traceback logs
    regex = "\[ERROR\]"
    if EnsureBool()(cfg.get('datalad.log.timestamp', False)):
        regex = "\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} " + regex
    if EnsureBool()(cfg.get('datalad.log.vmem', False)):
        regex += ' RSS/VMS: \S+/\S+( \S+)?\s*'
    regex += "(\s+\S+\s*)? " + msg
    assert_re_in(regex, line, match=True)
    # Close all handlers so windows is happy -- apparently not closed fast enough
    for handler in lgr.handlers:
        handler.close()
Ejemplo n.º 6
0
def test_utils_suppress_similar():
    tu = TestUtils()

    # Check suppression boundary for straight chain of similar
    # messages.

    def n_foo(number):
        for i in range(number):
            yield dict(action="foo", status="ok", path="path{}".format(i))

    with _swallow_outputs() as cmo:
        cmo.isatty = lambda: True
        list(tu(9, result_fn=n_foo, result_renderer="default"))
        assert_in("path8", cmo.out)
        assert_not_in("suppressed", cmo.out)

    with _swallow_outputs() as cmo:
        list(tu(10, result_fn=n_foo, result_renderer="default"))
        assert_in("path9", cmo.out)
        assert_not_in("suppressed", cmo.out)

    with _swallow_outputs() as cmo:
        list(tu(11, result_fn=n_foo, result_renderer="default"))
        assert_not_in("path10", cmo.out)
        assert_re_in(r"[^-0-9]1 .* suppressed", cmo.out, match=False)

    with _swallow_outputs() as cmo:
        list(tu(13, result_fn=n_foo, result_renderer="default"))
        assert_not_in("path10", cmo.out)
        # We see an update for each result.
        assert_re_in(r"1 .* suppressed", cmo.out, match=False)
        assert_re_in(r"2 .* suppressed", cmo.out, match=False)
        assert_re_in(r"3 .* suppressed", cmo.out, match=False)

    with _swallow_outputs(isatty=False) as cmo:
        list(tu(11, result_fn=n_foo, result_renderer="default"))
        assert_in("path10", cmo.out)

    # Check a chain of similar messages, split in half by a distinct one.

    def n_foo_split_by_a_bar(number):
        half = number // 2 - 1
        for i in range(number):
            yield dict(action="foo", status="ok", path="path{}".format(i))
            if i == half:
                yield dict(action="bar", status="ok", path="path")

    with _swallow_outputs() as cmo:
        list(tu(20, result_fn=n_foo_split_by_a_bar, result_renderer="default"))
        assert_in("path10", cmo.out)
        assert_in("path19", cmo.out)
        assert_not_in("suppressed", cmo.out)

    with _swallow_outputs() as cmo:
        list(tu(21, result_fn=n_foo_split_by_a_bar, result_renderer="default"))
        assert_in("path10", cmo.out)
        assert_not_in("path20", cmo.out)
        assert_re_in("[^-0-9]1 .* suppressed", cmo.out, match=False)
Ejemplo n.º 7
0
def test_incorrect_msg_interpolation():
    with assert_raises(TypeError) as cme:
        TestUtils2().__call__()
    # this must be our custom exception
    assert_re_in("Failed to render.*kaboom.*not enough arguments",
                 str(cme.exception))

    # there should be no exception if reported in the record path contains %
    TestUtils2().__call__("%eatthis")
Ejemplo n.º 8
0
def test_switch_re():
    ran = []

    def n2(data):
        for i in range(2):
            ran.append(len(ran))
            yield updated(data, {'f2': 'x_%d' % i})

    switch_node = switch(
        'f1',
        OrderedDict([
            ('m[13]', sub({'f2': {
                '_': '1'
            }})),
            # should be able to consume nodes and pipelines
            ('m[23]', [n2]),
            ('emp.*', None),  # just return input
        ]),
        re=True)
    out = list(switch_node({'f1': 'm123', 'f2': 'x_'}))
    assert_equal(out, [{'f1': 'm123', 'f2': 'x1'}])
    assert_equal(ran, [])

    # if there is a value mapping doesn't exist for, by default would fail
    data_missing = {'f1': 'xxxxx', 'f2': 'x_'}
    with assert_raises(KeyError) as cme:
        list(switch_node(data_missing))
    assert_re_in('Found no matches for f1 == .xxxxx. matching one of',
                 cme.exception.args)

    # but in the 2nd case, the thing is a sub-pipeline so it behaves as such without spitting
    # out its output
    out = list(switch_node({'f1': 'm2', 'f2': 'x_'}))
    assert_equal(out, _out([{'f1': 'm2', 'f2': 'x_'}]))
    assert_equal(ran, [0, 1])  # but does execute just fine

    # and if matches both -- we need to get all outputs
    for i in range(len(ran)):
        ran.remove(i)
    out = list(switch_node({'f1': 'm3', 'f2': 'x_'}))
    assert_equal(out, [{
        'f1': 'm3',
        'f2': 'x1'
    }] + _out([{
        'f1': 'm3',
        'f2': 'x_'
    }]))
    assert_equal(ran, [0, 1])  # and does execute just as fine

    # empty match
    out = list(switch_node({'f1': 'empty', 'f2': 'x_'}))
    assert_equal(out, [{'f1': 'empty', 'f2': 'x_'}])
Ejemplo n.º 9
0
def test_switch_re():
    ran = []

    def n2(data):
        for i in range(2):
            ran.append(len(ran))
            yield updated(data, {'f2': 'x_%d' % i})

    switch_node = switch(
        'f1',
        OrderedDict([
            ('m[13]', sub({'f2': {'_': '1'}})),
            # should be able to consume nodes and pipelines
            ('m[23]', [n2]),
            ('emp.*', None), # just return input
        ]),
        re=True
    )
    out = list(switch_node({'f1': 'm123', 'f2': 'x_'}))
    assert_equal(out, [{'f1': 'm123', 'f2': 'x1'}])
    assert_equal(ran, [])

    # if there is a value mapping doesn't exist for, by default would fail
    data_missing = {'f1': 'xxxxx', 'f2': 'x_'}
    with assert_raises(KeyError) as cme:
        list(switch_node(data_missing))
    assert_re_in('Found no matches for f1 == .xxxxx. matching one of',
                 cme.exception.args)

    # but in the 2nd case, the thing is a sub-pipeline so it behaves as such without spitting
    # out its output
    out = list(switch_node({'f1': 'm2', 'f2': 'x_'}))
    assert_equal(out, _out([{'f1': 'm2', 'f2': 'x_'}]))
    assert_equal(ran, [0, 1])  # but does execute just fine

    # and if matches both -- we need to get all outputs
    for i in range(len(ran)):
        ran.remove(i)
    out = list(switch_node({'f1': 'm3', 'f2': 'x_'}))
    assert_equal(out, [{'f1': 'm3', 'f2': 'x1'}] +
                       _out([{'f1': 'm3', 'f2': 'x_'}]))
    assert_equal(ran, [0, 1])  # and does execute just as fine

    # empty match
    out = list(switch_node({'f1': 'empty', 'f2': 'x_'}))
    assert_equal(out, [{'f1': 'empty', 'f2': 'x_'}])
Ejemplo n.º 10
0
def test_assert_re_in():
    assert_re_in(".*", "")
    assert_re_in(".*", ["any"])

    # should do match not search
    assert_re_in("ab", "abc")
    assert_raises(AssertionError, assert_re_in, "ab", "cab")
    assert_raises(AssertionError, assert_re_in, "ab$", "abc")

    # Sufficient to have one entry matching
    assert_re_in("ab", ["", "abc", "laskdjf"])
    assert_raises(AssertionError, assert_re_in, "ab$", ["ddd", ""])

    # Tuples should be ok too
    assert_re_in("ab", ("", "abc", "laskdjf"))
    assert_raises(AssertionError, assert_re_in, "ab$", ("ddd", ""))

    # shouldn't "match" the empty list
    assert_raises(AssertionError, assert_re_in, "", [])
Ejemplo n.º 11
0
def check_incorrect_option(opts, err_str):
    # The first line used to be:
    # stdout, stderr = run_main((sys.argv[0],) + opts, expect_stderr=True, exit_code=2)
    # But: what do we expect to be in sys.argv[0] here?
    # It depends on how we invoke the test.
    # - nosetests -s -v datalad/cmdline/tests/test_main.py would result in:
    #   sys.argv[0}=='nosetests'
    # - python -m nose -s -v datalad/cmdline/tests/test_main.py would result in:
    #   sys.argv[0}=='python -m nose'
    # - python -c "import nose; nose.main()" -s -v datalad/cmdline/tests/test_main.py would result in:
    #   sys.argv[0]=='-c'
    # This led to failure in case sys.argv[0] contained an option, that was
    # defined to be a datalad option too, therefore was a 'known_arg' and was
    # checked to meet its constraints.
    # But sys.argv[0] actually isn't used by main at all. It simply doesn't
    # matter what's in there. The only thing important to pass here is `opts`.
    stdout, stderr = run_main(opts, expect_stderr=True, exit_code=2)
    out = stdout + stderr
    assert_in("usage: ", out)
    assert_re_in(err_str, out, match=False)
Ejemplo n.º 12
0
def test_exc_str():
    try:
        raise Exception("my bad")
    except Exception as e:
        estr = exc_str(e)
    assert_re_in("my bad \[test_dochelpers.py:test_exc_str:...\]", estr)

    def f():
        def f2():
            raise Exception("my bad again")

        f2()

    try:
        f()
    except Exception as e:
        # default one:
        estr2 = exc_str(e, 2)
        estr1 = exc_str(e, 1)
        # and we can control it via environ by default
        with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}):
            estr3 = exc_str(e)
        with patch.dict('os.environ', {}, clear=True):
            estr_ = exc_str()

    assert_re_in(
        "my bad again \[test_dochelpers.py:test_exc_str:...,test_dochelpers.py:f:...,test_dochelpers.py:f2:...\]",
        estr3)
    assert_re_in(
        "my bad again \[test_dochelpers.py:f:...,test_dochelpers.py:f2:...\]",
        estr2)
    assert_re_in("my bad again \[test_dochelpers.py:f2:...\]", estr1)
    assert_equal(estr_, estr1)

    try:
        raise NotImplementedError
    except Exception as e:
        assert_re_in(
            "NotImplementedError\(\) \[test_dochelpers.py:test_exc_str:...\]",
            exc_str(e))
Ejemplo n.º 13
0
def test_interface():
    di = Demo()

    import argparse
    parser = argparse.ArgumentParser()

    di.setup_parser(parser)
    with swallow_outputs() as cmo:
        assert_equal(parser.print_help(), None)
        assert (cmo.out)
        assert_equal(cmo.err, '')
    args = parser.parse_args(['42', '11', '1', '2', '--demoarg', '23'])
    assert_is(args.demoarg, 23)
    assert_equal(args.demoposarg, [42, 11])
    assert_equal(args.demooptposarg1, 1)
    assert_equal(args.demooptposarg2, 2)

    # wrong type
    with swallow_outputs() as cmo:
        assert_raises(SystemExit, parser.parse_args, ['--demoarg', 'abc'])
        # that is what we dump upon folks atm. TODO: improve reporting of illspecified options
        assert_re_in(".*invalid constraint:int value:.*", cmo.err, re.DOTALL)

    # missing argument to option
    with swallow_outputs() as cmo:
        assert_raises(SystemExit, parser.parse_args, ['--demoarg'])
        assert_re_in(".*--demoarg: expected one argument", cmo.err, re.DOTALL)

    # missing positional argument
    with swallow_outputs() as cmo:
        assert_raises(SystemExit, parser.parse_args, [''])
        # PY2|PY3
        assert_re_in(
            ".*error: (too few arguments|the following arguments are required: demoposarg)",
            cmo.err, re.DOTALL)
Ejemplo n.º 14
0
def test_within_ds_file_search(path):
    try:
        import mutagen
    except ImportError:
        raise SkipTest
    ds = Dataset(path).create(force=True)
    # override default and search for datasets and files for this test
    for m in ('egrep', 'textblob', 'autofield'):
        ds.config.add('datalad.search.index-{}-documenttype'.format(m),
                      'all',
                      where='dataset')
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    makedirs(opj(path, 'stim'))
    for src, dst in (('audio.mp3', opj('stim', 'stim1.mp3')), ):
        copy(opj(dirname(dirname(__file__)), 'tests', 'data', src),
             opj(path, dst))
    ds.save()
    ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True)
    # If it is not under annex, below addition of metadata silently does
    # not do anything
    ds.repo.set_metadata(opj('stim', 'stim1.mp3'), init={'importance': 'very'})
    ds.aggregate_metadata()
    assert_repo_status(ds.path)
    # basic sanity check on the metadata structure of the dataset
    dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
    for src in ('audio', ):
        # something for each one
        assert_in(src, dsmeta)
        # each src declares its own context
        assert_in('@context', dsmeta[src])
        # we have a unique content metadata summary for each src
        assert_in(src, dsmeta['datalad_unique_content_properties'])

    # test default behavior
    with swallow_outputs() as cmo:
        ds.search(show_keys='name', mode='textblob')

        assert_in("""\
id
meta
parentds
path
type
""", cmo.out)

    target_out = """\
annex.importance
annex.key
audio.bitrate
audio.duration(s)
audio.format
audio.music-Genre
audio.music-album
audio.music-artist
audio.music-channels
audio.music-sample_rate
audio.name
audio.tracknumber
datalad_core.id
datalad_core.refcommit
id
parentds
path
type
"""

    # test default behavior while limiting set of keys reported
    with swallow_outputs() as cmo:
        ds.search(['\.id', 'artist$'], show_keys='short')
        out_lines = [l for l in cmo.out.split(os.linesep) if l]
        # test that only the ones matching were returned
        assert_equal([l for l in out_lines if not l.startswith(' ')],
                     ['audio.music-artist', 'datalad_core.id'])
        # more specific test which would also test formatting
        assert_equal(
            out_lines,
            [
                'audio.music-artist',
                ' in  1 datasets',
                " has 1 unique values: 'dlartist'",
                'datalad_core.id',
                ' in  1 datasets',
                # we have them sorted
                " has 1 unique values: '%s'" % ds.id
            ])

    with assert_raises(ValueError) as cme:
        ds.search('*wrong')
    assert_re_in(
        r"regular expression '\(\?i\)\*wrong' \(original: '\*wrong'\) is incorrect: ",
        str(cme.exception))

    # check generated autofield index keys
    with swallow_outputs() as cmo:
        ds.search(mode='autofield', show_keys='name')
        # it is impossible to assess what is different from that dump
        assert_in(target_out, cmo.out)

    assert_result_count(ds.search('blablob#'), 0)
    # now check that we can discover things from the aggregated metadata
    for mode, query, hitpath, matched in (
        ('egrep', ':mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # same as above, leading : is stripped, in indicates "ALL FIELDS"
        ('egrep', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # same as above, but with AND condition
            # get both matches
        ('egrep', ['mp3', 'type:file'], opj('stim', 'stim1.mp3'), {
            'type': 'file',
            'audio.format': 'mp3'
        }),
            # case insensitive search
        ('egrep', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # field selection by expression
        ('egrep', 'audio\.+:mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # random keyword query
        ('textblob', 'mp3', opj('stim', 'stim1.mp3'), {
            'meta': 'mp3'
        }),
            # report which field matched with auto-field
        ('autofield', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # XXX next one is not supported by current text field analyser
            # decomposes the mime type in [mime, audio, mp3]
            # ('autofield',
            # "'mime:audio/mp3'",
            # opj('stim', 'stim1.mp3'),
            # 'audio.format', 'mime:audio/mp3'),
            # but this one works
        ('autofield', "'mime audio mp3'", opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # TODO extend with more complex queries to test whoosh
            # query language configuration
    ):
        res = ds.search(query, mode=mode, full_record=True)
        assert_result_count(
            res,
            1,
            type='file',
            path=opj(ds.path, hitpath),
            # each file must report the ID of the dataset it is from, critical for
            # discovering related content
            dsid=ds.id)
        # in egrep we currently do not search unique values
        # and the queries above aim at files
        assert_result_count(res, 1 if mode == 'egrep' else 2)
        if mode != 'egrep':
            assert_result_count(res,
                                1,
                                type='dataset',
                                path=ds.path,
                                dsid=ds.id)
        # test the key and specific value of the match
        for matched_key, matched_val in matched.items():
            assert_in(matched_key, res[-1]['query_matched'])
            assert_equal(res[-1]['query_matched'][matched_key], matched_val)

    # test a suggestion msg being logged if no hits and key is a bit off
    with swallow_logs(new_level=logging.INFO) as cml:
        res = ds.search('audio.formats:mp3 audio.bitsrate:1', mode='egrep')
        assert not res
        assert_in('Did you mean any of', cml.out)
        assert_in('audio.format', cml.out)
        assert_in('audio.bitrate', cml.out)
Ejemplo n.º 15
0
def test_aggregation(path):
    with chpwd(path):
        assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    # before anything aggregated we would get nothing and only a log warning
    with swallow_logs(new_level=logging.WARNING) as cml:
        assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
    assert_re_in('.*Found no aggregated metadata.*update', cml.out)
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype',
                     'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype',
                        'frictionless_datapackage',
                        where='dataset')
    ds.save(recursive=True)
    assert_repo_status(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.aggregate_metadata(recursive=True, update_mode='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 3, status='ok', action='aggregate_metadata')
    assert_in_results(res, action='save', status="ok")
    # nice and tidy
    assert_repo_status(ds.path)

    # quick test of aggregate report
    aggs = ds.metadata(get_aggregates=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.metadata(recursive=True)
    # basic sanity check
    assert_result_count(origres, 6)
    assert_result_count(origres, 3, type='dataset')
    assert_result_count(origres, 3, type='file')  # Now that we have annex.key
    # three different IDs
    assert_equal(
        3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == ensure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(opj(path, 'clone'),
                    source=ds.path,
                    result_xfm='datasets',
                    return_type='item-or-list')
    # ID mechanism works
    assert_equal(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.metadata()
    # basic sanity check
    assert_result_count(cloneres, 2)
    assert_result_count(cloneres, 1, type='dataset')
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok',
                  clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(r['query_matched']['frictionless_datapackage.name'],
                      r['metadata']['frictionless_datapackage']['name'])
Ejemplo n.º 16
0
 def test_addurls_dropped_urls(self, path):
     ds = Dataset(path).create(force=True)
     with swallow_logs(new_level=logging.WARNING) as cml:
         ds.addurls(self.json_file, "", "{subdir}//{name}")
         assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL",
                      str(cml.out))
Ejemplo n.º 17
0
 def test_addurls_dropped_urls(self, path):
     ds = Dataset(path).create(force=True)
     with chpwd(path), swallow_logs(new_level=logging.WARNING) as cml:
         ds.addurls(self.json_file, "", "{subdir}//{name}")
         assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL",
                      str(cml.out))
Ejemplo n.º 18
0
def test_git_custom_calls(path, path2):
    # we need a GitRepo instance
    repo = GitRepo(path, create=True)
    with open(op.join(path, "cc_test.dat"), 'w') as f:
        f.write("test_git_custom_calls")

    out, err = repo._gitpy_custom_call('add', 'cc_test.dat')

    # actually executed:
    assert_in("cc_test.dat", repo.get_indexed_files())
    ok_(repo.dirty)

    # call using cmd_options:
    out, err = repo._gitpy_custom_call('commit',
                                       cmd_options={'m': 'added file'})
    ok_clean_git(path, annex=False)
    # check output:
    assert_in("1 file changed", out)
    assert_in("cc_test.dat", out)
    eq_('', err)

    # impossible 'add' call should raise ...
    assert_raises(GitCommandError,
                  repo._gitpy_custom_call,
                  'add',
                  'not_existing',
                  expect_fail=False)
    # .. except we expect it to fail:
    repo._gitpy_custom_call('add', 'not_existing', expect_fail=True)

    # log outputs:
    with swallow_logs(new_level=logging.DEBUG) as cm:
        out, err = repo._gitpy_custom_call('status',
                                           log_stdout=True,
                                           log_stderr=True)

        assert_in("On branch master", out)
        assert_in("nothing to commit", out)
        eq_("", err)
        for line in out.splitlines():
            assert_in("stdout| " + line, cm.out)

    # don't log outputs:
    with swallow_logs(new_level=logging.DEBUG) as cm:
        out, err = repo._gitpy_custom_call('status',
                                           log_stdout=False,
                                           log_stderr=False)

        assert_in("On branch master", out)
        assert_in("nothing to commit", out)
        eq_("", err)
        eq_("", cm.out)

    # use git_options:
    # Note: 'path2' doesn't contain a git repository
    with assert_raises(GitCommandError) as cm:
        repo._gitpy_custom_call('status', git_options={'C': path2})
    assert_in("-C %s status" % path2, str(cm.exception))
    assert_re_in("fatal: [Nn]ot a git repository",
                 str(cm.exception),
                 match=False)
Ejemplo n.º 19
0
def check_incorrect_option(opts, err_str):
    stdout, stderr = run_main((sys.argv[0],) + opts, expect_stderr=True, exit_code=2)
    out = stdout + stderr
    assert_in("usage: ", out)
    assert_re_in(err_str, out, match=False)
Ejemplo n.º 20
0
def test_container_from_subdataset(ds_path, src_subds_path, local_file):

    # prepare a to-be subdataset with a registered container
    src_subds = Dataset(src_subds_path).create()
    src_subds.containers_add(name="first",
                             url=get_local_file_url(
                                 op.join(local_file, 'some_container.img')))
    # add it as subdataset to a super ds:
    ds = Dataset(ds_path).create()
    subds = ds.install("sub", source=src_subds_path)
    # add it again one level down to see actual recursion:
    subds.install("subsub", source=src_subds_path)

    # We come up empty without recursive:
    res = ds.containers_list(recursive=False, **RAW_KWDS)
    assert_result_count(res, 0)

    # query available containers from within super:
    res = ds.containers_list(recursive=True, **RAW_KWDS)
    assert_result_count(res, 2)
    assert_in_results(res, action="containers", refds=ds.path)

    # default location within the subdataset:
    target_path = op.join(subds.path, '.datalad', 'environments', 'first',
                          'image')
    assert_result_count(res,
                        1,
                        name='sub/first',
                        type='file',
                        action='containers',
                        status='ok',
                        path=target_path,
                        parentds=subds.path)

    # not installed subdataset doesn't pose an issue:
    sub2 = ds.create("sub2")
    assert_result_count(ds.subdatasets(), 2, type="dataset")
    ds.uninstall("sub2")
    from datalad.tests.utils import assert_false
    assert_false(sub2.is_installed())

    # same results as before, not crashing or somehow confused by a not present
    # subds:
    res = ds.containers_list(recursive=True, **RAW_KWDS)
    assert_result_count(res, 2)
    assert_result_count(res,
                        1,
                        name='sub/first',
                        type='file',
                        action='containers',
                        status='ok',
                        path=target_path,
                        parentds=subds.path)

    # The default renderer includes the image names.
    with swallow_outputs() as out:
        ds.containers_list(recursive=True)
        lines = out.out.splitlines()
    assert_re_in("sub/first", lines)
    assert_re_in("sub/subsub/first", lines)
    # But we are careful not to render partial names from subdataset traversals
    # (i.e. we recurse with containers_list(..., result_renderer=None)).
    with assert_raises(AssertionError):
        assert_re_in("subsub/first", lines)
Ejemplo n.º 21
0
def test_utils_suppress_similar():
    tu = TestUtils()

    # Check suppression boundary for straight chain of similar
    # messages.

    # yield test results immediately to make test run fast
    sleep_dur = 0.0

    def n_foo(number):
        for i in range(number):
            yield dict(action="foo", status="ok", path="path{}".format(i))
            sleep(sleep_dur)

    with _swallow_outputs() as cmo:
        cmo.isatty = lambda: True
        list(tu(9, result_fn=n_foo, result_renderer="default"))
        assert_in("path8", cmo.out)
        assert_not_in("suppressed", cmo.out)

    with _swallow_outputs() as cmo:
        list(tu(10, result_fn=n_foo, result_renderer="default"))
        assert_in("path9", cmo.out)
        assert_not_in("suppressed", cmo.out)

    with _swallow_outputs() as cmo:
        list(tu(11, result_fn=n_foo, result_renderer="default"))
        assert_not_in("path10", cmo.out)
        assert_re_in(r"[^-0-9]1 .* suppressed", cmo.out, match=False)

    with _swallow_outputs() as cmo:
        # for this one test yield results slightly slower than 2Hz
        # such that we can see each individual supression message
        # and no get caught by the rate limiter
        sleep_dur = 0.51
        list(tu(13, result_fn=n_foo, result_renderer="default"))
        assert_not_in("path10", cmo.out)
        # We see an update for each result.
        assert_re_in(r"1 .* suppressed", cmo.out, match=False)
        assert_re_in(r"2 .* suppressed", cmo.out, match=False)
        assert_re_in(r"3 .* suppressed", cmo.out, match=False)

    # make tests run fast again
    sleep_dur = 0.0

    with _swallow_outputs(isatty=False) as cmo:
        list(tu(11, result_fn=n_foo, result_renderer="default"))
        assert_in("path10", cmo.out)

    # Check a chain of similar messages, split in half by a distinct one.

    def n_foo_split_by_a_bar(number):
        half = number // 2 - 1
        for i in range(number):
            yield dict(action="foo", status="ok", path="path{}".format(i))
            if i == half:
                yield dict(action="bar", status="ok", path="path")

    with _swallow_outputs() as cmo:
        list(tu(20, result_fn=n_foo_split_by_a_bar, result_renderer="default"))
        assert_in("path10", cmo.out)
        assert_in("path19", cmo.out)
        assert_not_in("suppressed", cmo.out)

    with _swallow_outputs() as cmo:
        list(tu(21, result_fn=n_foo_split_by_a_bar, result_renderer="default"))
        assert_in("path10", cmo.out)
        assert_not_in("path20", cmo.out)
        assert_re_in("[^-0-9]1 .* suppressed", cmo.out, match=False)
Ejemplo n.º 22
0
def test_aggregation(path):
    with chpwd(path):
        assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    # before anything aggregated we would get nothing and only a log warning
    with swallow_logs(new_level=logging.WARNING) as cml:
        assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
    assert_re_in('.*Found no aggregated metadata.*update', cml.out)
    ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
                        where='dataset')
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.aggregate_metadata(recursive=True, update_mode='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 6)
    assert_result_count(res, 3, status='ok', action='aggregate_metadata')
    assert_result_count(res, 3, status='ok', action='save')
    # nice and tidy
    ok_clean_git(ds.path)

    # quick test of aggregate report
    aggs = ds.metadata(get_aggregates=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.metadata(recursive=True)
    # basic sanity check
    assert_result_count(origres, 6)
    assert_result_count(origres, 3, type='dataset')
    assert_result_count(origres, 3, type='file')  # Now that we have annex.key
    # three different IDs
    assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == assure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(
        opj(path, 'clone'), source=ds.path,
        result_xfm='datasets', return_type='item-or-list')
    # ID mechanism works
    assert_equal(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.metadata()
    # basic sanity check
    assert_result_count(cloneres, 2)
    assert_result_count(cloneres, 1, type='dataset')
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok', clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(
                r['query_matched']['frictionless_datapackage.name'],
                r['metadata']['frictionless_datapackage']['name'])
def test_CapturedException():

    try:
        raise Exception("BOOM")
    except Exception as e:
        captured_exc = CapturedException(e)

    assert_re_in(
        "BOOM \[test_captured_exception.py:test_CapturedException:[0-9]+\]",
        captured_exc.format_oneline_tb())
    assert_re_in(
        "^\[.*\]",
        captured_exc.format_oneline_tb(include_str=False))  # only traceback

    try:
        raise NotImplementedError
    except Exception as e:
        captured_exc = CapturedException(e)

    assert_re_in(
        "NotImplementedError \[test_captured_exception.py:test_CapturedException:[0-9]+\]",
        captured_exc.format_oneline_tb())

    def f():
        def f2():
            raise Exception("my bad again")

        try:
            f2()
        except Exception as e:
            # exception chain
            raise RuntimeError("new message") from e

    try:
        f()
    except Exception as e:
        captured_exc = CapturedException(e)

    # default limit: one level:
    estr1 = captured_exc.format_oneline_tb(limit=1)
    estr2 = captured_exc.format_oneline_tb(limit=2)
    # and we can control it via environ/config by default
    try:
        with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}):
            cfg.reload()
            estr3 = captured_exc.format_oneline_tb()
        with patch.dict('os.environ', {}, clear=True):
            cfg.reload()
            estr_ = captured_exc.format_oneline_tb()
    finally:
        cfg.reload()  # make sure we don't have a side effect on other tests

    estr_full = captured_exc.format_oneline_tb(10)

    assert_re_in(
        "new message \[test_captured_exception.py:test_CapturedException:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]",
        estr_full)
    assert_re_in(
        "new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]",
        estr3)
    assert_re_in(
        "new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]",
        estr2)
    assert_re_in("new message \[test_captured_exception.py:f2:[0-9]+\]", estr1)
    assert_equal(estr_, estr1)

    # standard output
    full_display = captured_exc.format_standard().splitlines()

    assert_equal(full_display[0], "Traceback (most recent call last):")
    # points in f and f2 for first exception with two lines each
    # (where is the line and what reads the line):
    assert_true(full_display[1].lstrip().startswith("File"))
    assert_equal(full_display[2].strip(), "f2()")
    assert_true(full_display[3].lstrip().startswith("File"))
    assert_equal(full_display[4].strip(), "raise Exception(\"my bad again\")")
    assert_equal(full_display[5].strip(), "Exception: my bad again")
    assert_equal(
        full_display[7].strip(),
        "The above exception was the direct cause of the following exception:")
    assert_equal(full_display[9], "Traceback (most recent call last):")
    # ...
    assert_equal(full_display[-1].strip(), "RuntimeError: new message")

    # now logging / __str__:
    try:
        with patch.dict('os.environ', {'DATALAD_LOG_EXC': '1'}):
            cfg.reload()
            assert_re_in(
                "new message \[test_captured_exception.py:f2:[0-9]+\]",
                str(captured_exc))

        with patch.dict('os.environ', {'DATALAD_LOG_EXC': '0'}):
            cfg.reload()
            assert_equal("", str(captured_exc))
    finally:
        cfg.reload()  # make sure we don't have a side effect on other tests