def test_add_archive_use_archive_dir(repo_path): repo = AnnexRepo(repo_path, create=True) with chpwd(repo_path): # Let's add first archive to the repo with default setting archive_path = opj('4u', '1.tar.gz') # check it gives informative error if archive is not already added with assert_raises(RuntimeError) as cmr: add_archive_content(archive_path) assert_re_in( "You should run ['\"]datalad save 4u\\\\1\\.tar\\.gz['\"] first" if on_windows else "You should run ['\"]datalad save 4u/1\\.tar\\.gz['\"] first", str(cmr.exception), match=False) with swallow_outputs(): repo.add(archive_path) repo.commit("added 1.tar.gz") ok_archives_caches(repo.path, 0) add_archive_content(archive_path, strip_leading_dirs=True, use_current_dir=True) ok_(not exists(opj('4u', '1 f.txt'))) ok_file_under_git(repo.path, '1 f.txt', annexed=True) ok_archives_caches(repo.path, 0) # and now let's extract under archive dir add_archive_content(archive_path, strip_leading_dirs=True) ok_file_under_git(repo.path, opj('4u', '1 f.txt'), annexed=True) ok_archives_caches(repo.path, 0) add_archive_content(opj('4u', 'sub.tar.gz')) ok_file_under_git(repo.path, opj('4u', 'sub', '2 f.txt'), annexed=True) ok_archives_caches(repo.path, 0)
def test_get_mixed_hierarchy(src, path): origin = Dataset(src).create(no_annex=True) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('file_in_git.txt', to_git=True) origin_sub.add('file_in_annex.txt') origin.save(all_changes=True) # now, install that thing: ds, subds = install(path, source=src, recursive=True) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: with swallow_logs(new_level=logging.DEBUG) as cml: result = ds.get(curdir, recursive=True) assert_re_in('.*Found no annex at {0}. Skipped.'.format(ds), cml.out, flags=re.DOTALL) eq_(len(result), 1) eq_(result[0]['file'], opj("subds", "file_in_annex.txt")) ok_(result[0]['success'] is True) ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_logging_to_a_file(dst): ok_(not exists(dst)) lgr = LoggerHelper("dataladtest-1").get_initialized_logger(logtarget=dst) ok_(exists(dst)) # nothing was logged -- no file created msg = "Oh my god, they killed Kenny" lgr.error(msg) with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) line = lines[0] ok_(msg in line) ok_('\033[' not in line, msg="There should be no color formatting in log files. Got: %s" % line) # verify that time stamp and level are present in the log line # do not want to rely on not having race conditions around date/time changes # so matching just with regexp # (...)? is added to swallow possible traceback logs regex = "\[ERROR\]" if EnsureBool()(dl_cfg.get('datalad.log.timestamp', False)): regex = "\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} " + regex if EnsureBool()(dl_cfg.get('datalad.log.vmem', False)): regex += ' RSS/VMS: \S+/\S+( \S+)?\s*' regex += "(\s+\S+\s*)? " + msg assert_re_in(regex, line, match=True) # Close all handlers so windows is happy -- apparently not closed fast enough for handler in lgr.handlers: handler.close()
def test_get_mixed_hierarchy(src, path): origin = Dataset(src).create(no_annex=True) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('file_in_git.txt', to_git=True) origin_sub.add('file_in_annex.txt') origin.save(auto_add_changes=True) # now, install that thing: ds, subds = install(path, source=src, recursive=True) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: with swallow_logs(new_level=logging.DEBUG) as cml: result = ds.get(curdir, recursive=True) assert_re_in('.*Found no annex at {0}. Skipped.'.format(ds), cml.out, flags=re.DOTALL) eq_(len(result), 1) eq_(result[0]['file'], opj("subds", "file_in_annex.txt")) ok_(result[0]['success'] is True) ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_logging_to_a_file(dst): ok_(not exists(dst)) lgr = LoggerHelper("dataladtest-1").get_initialized_logger(logtarget=dst) ok_(exists(dst)) # nothing was logged -- no file created msg = "Oh my god, they killed Kenny" lgr.error(msg) with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) line = lines[0] ok_(msg in line) ok_('\033[' not in line, msg="There should be no color formatting in log files. Got: %s" % line) # verify that time stamp and level are present in the log line # do not want to rely on not having race conditions around date/time changes # so matching just with regexp # (...)? is added to swallow possible traceback logs regex = "\[ERROR\]" if EnsureBool()(cfg.get('datalad.log.timestamp', False)): regex = "\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} " + regex if EnsureBool()(cfg.get('datalad.log.vmem', False)): regex += ' RSS/VMS: \S+/\S+( \S+)?\s*' regex += "(\s+\S+\s*)? " + msg assert_re_in(regex, line, match=True) # Close all handlers so windows is happy -- apparently not closed fast enough for handler in lgr.handlers: handler.close()
def test_utils_suppress_similar(): tu = TestUtils() # Check suppression boundary for straight chain of similar # messages. def n_foo(number): for i in range(number): yield dict(action="foo", status="ok", path="path{}".format(i)) with _swallow_outputs() as cmo: cmo.isatty = lambda: True list(tu(9, result_fn=n_foo, result_renderer="default")) assert_in("path8", cmo.out) assert_not_in("suppressed", cmo.out) with _swallow_outputs() as cmo: list(tu(10, result_fn=n_foo, result_renderer="default")) assert_in("path9", cmo.out) assert_not_in("suppressed", cmo.out) with _swallow_outputs() as cmo: list(tu(11, result_fn=n_foo, result_renderer="default")) assert_not_in("path10", cmo.out) assert_re_in(r"[^-0-9]1 .* suppressed", cmo.out, match=False) with _swallow_outputs() as cmo: list(tu(13, result_fn=n_foo, result_renderer="default")) assert_not_in("path10", cmo.out) # We see an update for each result. assert_re_in(r"1 .* suppressed", cmo.out, match=False) assert_re_in(r"2 .* suppressed", cmo.out, match=False) assert_re_in(r"3 .* suppressed", cmo.out, match=False) with _swallow_outputs(isatty=False) as cmo: list(tu(11, result_fn=n_foo, result_renderer="default")) assert_in("path10", cmo.out) # Check a chain of similar messages, split in half by a distinct one. def n_foo_split_by_a_bar(number): half = number // 2 - 1 for i in range(number): yield dict(action="foo", status="ok", path="path{}".format(i)) if i == half: yield dict(action="bar", status="ok", path="path") with _swallow_outputs() as cmo: list(tu(20, result_fn=n_foo_split_by_a_bar, result_renderer="default")) assert_in("path10", cmo.out) assert_in("path19", cmo.out) assert_not_in("suppressed", cmo.out) with _swallow_outputs() as cmo: list(tu(21, result_fn=n_foo_split_by_a_bar, result_renderer="default")) assert_in("path10", cmo.out) assert_not_in("path20", cmo.out) assert_re_in("[^-0-9]1 .* suppressed", cmo.out, match=False)
def test_incorrect_msg_interpolation(): with assert_raises(TypeError) as cme: TestUtils2().__call__() # this must be our custom exception assert_re_in("Failed to render.*kaboom.*not enough arguments", str(cme.exception)) # there should be no exception if reported in the record path contains % TestUtils2().__call__("%eatthis")
def test_switch_re(): ran = [] def n2(data): for i in range(2): ran.append(len(ran)) yield updated(data, {'f2': 'x_%d' % i}) switch_node = switch( 'f1', OrderedDict([ ('m[13]', sub({'f2': { '_': '1' }})), # should be able to consume nodes and pipelines ('m[23]', [n2]), ('emp.*', None), # just return input ]), re=True) out = list(switch_node({'f1': 'm123', 'f2': 'x_'})) assert_equal(out, [{'f1': 'm123', 'f2': 'x1'}]) assert_equal(ran, []) # if there is a value mapping doesn't exist for, by default would fail data_missing = {'f1': 'xxxxx', 'f2': 'x_'} with assert_raises(KeyError) as cme: list(switch_node(data_missing)) assert_re_in('Found no matches for f1 == .xxxxx. matching one of', cme.exception.args) # but in the 2nd case, the thing is a sub-pipeline so it behaves as such without spitting # out its output out = list(switch_node({'f1': 'm2', 'f2': 'x_'})) assert_equal(out, _out([{'f1': 'm2', 'f2': 'x_'}])) assert_equal(ran, [0, 1]) # but does execute just fine # and if matches both -- we need to get all outputs for i in range(len(ran)): ran.remove(i) out = list(switch_node({'f1': 'm3', 'f2': 'x_'})) assert_equal(out, [{ 'f1': 'm3', 'f2': 'x1' }] + _out([{ 'f1': 'm3', 'f2': 'x_' }])) assert_equal(ran, [0, 1]) # and does execute just as fine # empty match out = list(switch_node({'f1': 'empty', 'f2': 'x_'})) assert_equal(out, [{'f1': 'empty', 'f2': 'x_'}])
def test_switch_re(): ran = [] def n2(data): for i in range(2): ran.append(len(ran)) yield updated(data, {'f2': 'x_%d' % i}) switch_node = switch( 'f1', OrderedDict([ ('m[13]', sub({'f2': {'_': '1'}})), # should be able to consume nodes and pipelines ('m[23]', [n2]), ('emp.*', None), # just return input ]), re=True ) out = list(switch_node({'f1': 'm123', 'f2': 'x_'})) assert_equal(out, [{'f1': 'm123', 'f2': 'x1'}]) assert_equal(ran, []) # if there is a value mapping doesn't exist for, by default would fail data_missing = {'f1': 'xxxxx', 'f2': 'x_'} with assert_raises(KeyError) as cme: list(switch_node(data_missing)) assert_re_in('Found no matches for f1 == .xxxxx. matching one of', cme.exception.args) # but in the 2nd case, the thing is a sub-pipeline so it behaves as such without spitting # out its output out = list(switch_node({'f1': 'm2', 'f2': 'x_'})) assert_equal(out, _out([{'f1': 'm2', 'f2': 'x_'}])) assert_equal(ran, [0, 1]) # but does execute just fine # and if matches both -- we need to get all outputs for i in range(len(ran)): ran.remove(i) out = list(switch_node({'f1': 'm3', 'f2': 'x_'})) assert_equal(out, [{'f1': 'm3', 'f2': 'x1'}] + _out([{'f1': 'm3', 'f2': 'x_'}])) assert_equal(ran, [0, 1]) # and does execute just as fine # empty match out = list(switch_node({'f1': 'empty', 'f2': 'x_'})) assert_equal(out, [{'f1': 'empty', 'f2': 'x_'}])
def test_assert_re_in(): assert_re_in(".*", "") assert_re_in(".*", ["any"]) # should do match not search assert_re_in("ab", "abc") assert_raises(AssertionError, assert_re_in, "ab", "cab") assert_raises(AssertionError, assert_re_in, "ab$", "abc") # Sufficient to have one entry matching assert_re_in("ab", ["", "abc", "laskdjf"]) assert_raises(AssertionError, assert_re_in, "ab$", ["ddd", ""]) # Tuples should be ok too assert_re_in("ab", ("", "abc", "laskdjf")) assert_raises(AssertionError, assert_re_in, "ab$", ("ddd", "")) # shouldn't "match" the empty list assert_raises(AssertionError, assert_re_in, "", [])
def check_incorrect_option(opts, err_str): # The first line used to be: # stdout, stderr = run_main((sys.argv[0],) + opts, expect_stderr=True, exit_code=2) # But: what do we expect to be in sys.argv[0] here? # It depends on how we invoke the test. # - nosetests -s -v datalad/cmdline/tests/test_main.py would result in: # sys.argv[0}=='nosetests' # - python -m nose -s -v datalad/cmdline/tests/test_main.py would result in: # sys.argv[0}=='python -m nose' # - python -c "import nose; nose.main()" -s -v datalad/cmdline/tests/test_main.py would result in: # sys.argv[0]=='-c' # This led to failure in case sys.argv[0] contained an option, that was # defined to be a datalad option too, therefore was a 'known_arg' and was # checked to meet its constraints. # But sys.argv[0] actually isn't used by main at all. It simply doesn't # matter what's in there. The only thing important to pass here is `opts`. stdout, stderr = run_main(opts, expect_stderr=True, exit_code=2) out = stdout + stderr assert_in("usage: ", out) assert_re_in(err_str, out, match=False)
def test_exc_str(): try: raise Exception("my bad") except Exception as e: estr = exc_str(e) assert_re_in("my bad \[test_dochelpers.py:test_exc_str:...\]", estr) def f(): def f2(): raise Exception("my bad again") f2() try: f() except Exception as e: # default one: estr2 = exc_str(e, 2) estr1 = exc_str(e, 1) # and we can control it via environ by default with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}): estr3 = exc_str(e) with patch.dict('os.environ', {}, clear=True): estr_ = exc_str() assert_re_in( "my bad again \[test_dochelpers.py:test_exc_str:...,test_dochelpers.py:f:...,test_dochelpers.py:f2:...\]", estr3) assert_re_in( "my bad again \[test_dochelpers.py:f:...,test_dochelpers.py:f2:...\]", estr2) assert_re_in("my bad again \[test_dochelpers.py:f2:...\]", estr1) assert_equal(estr_, estr1) try: raise NotImplementedError except Exception as e: assert_re_in( "NotImplementedError\(\) \[test_dochelpers.py:test_exc_str:...\]", exc_str(e))
def test_interface(): di = Demo() import argparse parser = argparse.ArgumentParser() di.setup_parser(parser) with swallow_outputs() as cmo: assert_equal(parser.print_help(), None) assert (cmo.out) assert_equal(cmo.err, '') args = parser.parse_args(['42', '11', '1', '2', '--demoarg', '23']) assert_is(args.demoarg, 23) assert_equal(args.demoposarg, [42, 11]) assert_equal(args.demooptposarg1, 1) assert_equal(args.demooptposarg2, 2) # wrong type with swallow_outputs() as cmo: assert_raises(SystemExit, parser.parse_args, ['--demoarg', 'abc']) # that is what we dump upon folks atm. TODO: improve reporting of illspecified options assert_re_in(".*invalid constraint:int value:.*", cmo.err, re.DOTALL) # missing argument to option with swallow_outputs() as cmo: assert_raises(SystemExit, parser.parse_args, ['--demoarg']) assert_re_in(".*--demoarg: expected one argument", cmo.err, re.DOTALL) # missing positional argument with swallow_outputs() as cmo: assert_raises(SystemExit, parser.parse_args, ['']) # PY2|PY3 assert_re_in( ".*error: (too few arguments|the following arguments are required: demoposarg)", cmo.err, re.DOTALL)
def test_within_ds_file_search(path): try: import mutagen except ImportError: raise SkipTest ds = Dataset(path).create(force=True) # override default and search for datasets and files for this test for m in ('egrep', 'textblob', 'autofield'): ds.config.add('datalad.search.index-{}-documenttype'.format(m), 'all', where='dataset') ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset') makedirs(opj(path, 'stim')) for src, dst in (('audio.mp3', opj('stim', 'stim1.mp3')), ): copy(opj(dirname(dirname(__file__)), 'tests', 'data', src), opj(path, dst)) ds.save() ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True) # If it is not under annex, below addition of metadata silently does # not do anything ds.repo.set_metadata(opj('stim', 'stim1.mp3'), init={'importance': 'very'}) ds.aggregate_metadata() assert_repo_status(ds.path) # basic sanity check on the metadata structure of the dataset dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata'] for src in ('audio', ): # something for each one assert_in(src, dsmeta) # each src declares its own context assert_in('@context', dsmeta[src]) # we have a unique content metadata summary for each src assert_in(src, dsmeta['datalad_unique_content_properties']) # test default behavior with swallow_outputs() as cmo: ds.search(show_keys='name', mode='textblob') assert_in("""\ id meta parentds path type """, cmo.out) target_out = """\ annex.importance annex.key audio.bitrate audio.duration(s) audio.format audio.music-Genre audio.music-album audio.music-artist audio.music-channels audio.music-sample_rate audio.name audio.tracknumber datalad_core.id datalad_core.refcommit id parentds path type """ # test default behavior while limiting set of keys reported with swallow_outputs() as cmo: ds.search(['\.id', 'artist$'], show_keys='short') out_lines = [l for l in cmo.out.split(os.linesep) if l] # test that only the ones matching were returned assert_equal([l for l in out_lines if not l.startswith(' ')], ['audio.music-artist', 'datalad_core.id']) # more specific test which would also test formatting assert_equal( out_lines, [ 'audio.music-artist', ' in 1 datasets', " has 1 unique values: 'dlartist'", 'datalad_core.id', ' in 1 datasets', # we have them sorted " has 1 unique values: '%s'" % ds.id ]) with assert_raises(ValueError) as cme: ds.search('*wrong') assert_re_in( r"regular expression '\(\?i\)\*wrong' \(original: '\*wrong'\) is incorrect: ", str(cme.exception)) # check generated autofield index keys with swallow_outputs() as cmo: ds.search(mode='autofield', show_keys='name') # it is impossible to assess what is different from that dump assert_in(target_out, cmo.out) assert_result_count(ds.search('blablob#'), 0) # now check that we can discover things from the aggregated metadata for mode, query, hitpath, matched in ( ('egrep', ':mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # same as above, leading : is stripped, in indicates "ALL FIELDS" ('egrep', 'mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # same as above, but with AND condition # get both matches ('egrep', ['mp3', 'type:file'], opj('stim', 'stim1.mp3'), { 'type': 'file', 'audio.format': 'mp3' }), # case insensitive search ('egrep', 'mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # field selection by expression ('egrep', 'audio\.+:mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # random keyword query ('textblob', 'mp3', opj('stim', 'stim1.mp3'), { 'meta': 'mp3' }), # report which field matched with auto-field ('autofield', 'mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # XXX next one is not supported by current text field analyser # decomposes the mime type in [mime, audio, mp3] # ('autofield', # "'mime:audio/mp3'", # opj('stim', 'stim1.mp3'), # 'audio.format', 'mime:audio/mp3'), # but this one works ('autofield', "'mime audio mp3'", opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # TODO extend with more complex queries to test whoosh # query language configuration ): res = ds.search(query, mode=mode, full_record=True) assert_result_count( res, 1, type='file', path=opj(ds.path, hitpath), # each file must report the ID of the dataset it is from, critical for # discovering related content dsid=ds.id) # in egrep we currently do not search unique values # and the queries above aim at files assert_result_count(res, 1 if mode == 'egrep' else 2) if mode != 'egrep': assert_result_count(res, 1, type='dataset', path=ds.path, dsid=ds.id) # test the key and specific value of the match for matched_key, matched_val in matched.items(): assert_in(matched_key, res[-1]['query_matched']) assert_equal(res[-1]['query_matched'][matched_key], matched_val) # test a suggestion msg being logged if no hits and key is a bit off with swallow_logs(new_level=logging.INFO) as cml: res = ds.search('audio.formats:mp3 audio.bitsrate:1', mode='egrep') assert not res assert_in('Did you mean any of', cml.out) assert_in('audio.format', cml.out) assert_in('audio.bitrate', cml.out)
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) # before anything aggregated we would get nothing and only a log warning with swallow_logs(new_level=logging.WARNING) as cml: assert_equal(list(query_aggregated_metadata('all', ds, [])), []) assert_re_in('.*Found no aggregated metadata.*update', cml.out) ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subds = ds.create('sub', force=True) subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subsubds = subds.create('subsub', force=True) subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') ds.save(recursive=True) assert_repo_status(ds.path) # aggregate metadata from all subdatasets into any superdataset, including # intermediate ones res = ds.aggregate_metadata(recursive=True, update_mode='all') # we get success report for both subdatasets and the superdataset, # and they get saved assert_result_count(res, 3, status='ok', action='aggregate_metadata') assert_in_results(res, action='save', status="ok") # nice and tidy assert_repo_status(ds.path) # quick test of aggregate report aggs = ds.metadata(get_aggregates=True) # one for each dataset assert_result_count(aggs, 3) # mother also report layout version assert_result_count(aggs, 1, path=ds.path, layout_version=1) # store clean direct result origres = ds.metadata(recursive=True) # basic sanity check assert_result_count(origres, 6) assert_result_count(origres, 3, type='dataset') assert_result_count(origres, 3, type='file') # Now that we have annex.key # three different IDs assert_equal( 3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset']))) # and we know about all three datasets for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s['metadata']['frictionless_datapackage']['name'] \ == ensure_unicode(name) for s in origres if s['type'] == 'dataset'])) # now clone the beast to simulate a new user installing an empty dataset clone = install(opj(path, 'clone'), source=ds.path, result_xfm='datasets', return_type='item-or-list') # ID mechanism works assert_equal(ds.id, clone.id) # get fresh metadata cloneres = clone.metadata() # basic sanity check assert_result_count(cloneres, 2) assert_result_count(cloneres, 1, type='dataset') assert_result_count(cloneres, 1, type='file') # now loop over the previous results from the direct metadata query of # origin and make sure we get the extact same stuff from the clone _compare_metadata_helper(origres, clone) # now obtain a subdataset in the clone, should make no difference assert_status('ok', clone.install('sub', result_xfm=None, return_type='list')) _compare_metadata_helper(origres, clone) # test search in search tests, not all over the place ## query smoke test assert_result_count(clone.search('mother', mode='egrep'), 1) assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1) child_res = clone.search('child', mode='egrep') assert_result_count(child_res, 2) for r in child_res: if r['type'] == 'dataset': assert_in(r['query_matched']['frictionless_datapackage.name'], r['metadata']['frictionless_datapackage']['name'])
def test_addurls_dropped_urls(self, path): ds = Dataset(path).create(force=True) with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "", "{subdir}//{name}") assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL", str(cml.out))
def test_addurls_dropped_urls(self, path): ds = Dataset(path).create(force=True) with chpwd(path), swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "", "{subdir}//{name}") assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL", str(cml.out))
def test_git_custom_calls(path, path2): # we need a GitRepo instance repo = GitRepo(path, create=True) with open(op.join(path, "cc_test.dat"), 'w') as f: f.write("test_git_custom_calls") out, err = repo._gitpy_custom_call('add', 'cc_test.dat') # actually executed: assert_in("cc_test.dat", repo.get_indexed_files()) ok_(repo.dirty) # call using cmd_options: out, err = repo._gitpy_custom_call('commit', cmd_options={'m': 'added file'}) ok_clean_git(path, annex=False) # check output: assert_in("1 file changed", out) assert_in("cc_test.dat", out) eq_('', err) # impossible 'add' call should raise ... assert_raises(GitCommandError, repo._gitpy_custom_call, 'add', 'not_existing', expect_fail=False) # .. except we expect it to fail: repo._gitpy_custom_call('add', 'not_existing', expect_fail=True) # log outputs: with swallow_logs(new_level=logging.DEBUG) as cm: out, err = repo._gitpy_custom_call('status', log_stdout=True, log_stderr=True) assert_in("On branch master", out) assert_in("nothing to commit", out) eq_("", err) for line in out.splitlines(): assert_in("stdout| " + line, cm.out) # don't log outputs: with swallow_logs(new_level=logging.DEBUG) as cm: out, err = repo._gitpy_custom_call('status', log_stdout=False, log_stderr=False) assert_in("On branch master", out) assert_in("nothing to commit", out) eq_("", err) eq_("", cm.out) # use git_options: # Note: 'path2' doesn't contain a git repository with assert_raises(GitCommandError) as cm: repo._gitpy_custom_call('status', git_options={'C': path2}) assert_in("-C %s status" % path2, str(cm.exception)) assert_re_in("fatal: [Nn]ot a git repository", str(cm.exception), match=False)
def check_incorrect_option(opts, err_str): stdout, stderr = run_main((sys.argv[0],) + opts, expect_stderr=True, exit_code=2) out = stdout + stderr assert_in("usage: ", out) assert_re_in(err_str, out, match=False)
def test_container_from_subdataset(ds_path, src_subds_path, local_file): # prepare a to-be subdataset with a registered container src_subds = Dataset(src_subds_path).create() src_subds.containers_add(name="first", url=get_local_file_url( op.join(local_file, 'some_container.img'))) # add it as subdataset to a super ds: ds = Dataset(ds_path).create() subds = ds.install("sub", source=src_subds_path) # add it again one level down to see actual recursion: subds.install("subsub", source=src_subds_path) # We come up empty without recursive: res = ds.containers_list(recursive=False, **RAW_KWDS) assert_result_count(res, 0) # query available containers from within super: res = ds.containers_list(recursive=True, **RAW_KWDS) assert_result_count(res, 2) assert_in_results(res, action="containers", refds=ds.path) # default location within the subdataset: target_path = op.join(subds.path, '.datalad', 'environments', 'first', 'image') assert_result_count(res, 1, name='sub/first', type='file', action='containers', status='ok', path=target_path, parentds=subds.path) # not installed subdataset doesn't pose an issue: sub2 = ds.create("sub2") assert_result_count(ds.subdatasets(), 2, type="dataset") ds.uninstall("sub2") from datalad.tests.utils import assert_false assert_false(sub2.is_installed()) # same results as before, not crashing or somehow confused by a not present # subds: res = ds.containers_list(recursive=True, **RAW_KWDS) assert_result_count(res, 2) assert_result_count(res, 1, name='sub/first', type='file', action='containers', status='ok', path=target_path, parentds=subds.path) # The default renderer includes the image names. with swallow_outputs() as out: ds.containers_list(recursive=True) lines = out.out.splitlines() assert_re_in("sub/first", lines) assert_re_in("sub/subsub/first", lines) # But we are careful not to render partial names from subdataset traversals # (i.e. we recurse with containers_list(..., result_renderer=None)). with assert_raises(AssertionError): assert_re_in("subsub/first", lines)
def test_utils_suppress_similar(): tu = TestUtils() # Check suppression boundary for straight chain of similar # messages. # yield test results immediately to make test run fast sleep_dur = 0.0 def n_foo(number): for i in range(number): yield dict(action="foo", status="ok", path="path{}".format(i)) sleep(sleep_dur) with _swallow_outputs() as cmo: cmo.isatty = lambda: True list(tu(9, result_fn=n_foo, result_renderer="default")) assert_in("path8", cmo.out) assert_not_in("suppressed", cmo.out) with _swallow_outputs() as cmo: list(tu(10, result_fn=n_foo, result_renderer="default")) assert_in("path9", cmo.out) assert_not_in("suppressed", cmo.out) with _swallow_outputs() as cmo: list(tu(11, result_fn=n_foo, result_renderer="default")) assert_not_in("path10", cmo.out) assert_re_in(r"[^-0-9]1 .* suppressed", cmo.out, match=False) with _swallow_outputs() as cmo: # for this one test yield results slightly slower than 2Hz # such that we can see each individual supression message # and no get caught by the rate limiter sleep_dur = 0.51 list(tu(13, result_fn=n_foo, result_renderer="default")) assert_not_in("path10", cmo.out) # We see an update for each result. assert_re_in(r"1 .* suppressed", cmo.out, match=False) assert_re_in(r"2 .* suppressed", cmo.out, match=False) assert_re_in(r"3 .* suppressed", cmo.out, match=False) # make tests run fast again sleep_dur = 0.0 with _swallow_outputs(isatty=False) as cmo: list(tu(11, result_fn=n_foo, result_renderer="default")) assert_in("path10", cmo.out) # Check a chain of similar messages, split in half by a distinct one. def n_foo_split_by_a_bar(number): half = number // 2 - 1 for i in range(number): yield dict(action="foo", status="ok", path="path{}".format(i)) if i == half: yield dict(action="bar", status="ok", path="path") with _swallow_outputs() as cmo: list(tu(20, result_fn=n_foo_split_by_a_bar, result_renderer="default")) assert_in("path10", cmo.out) assert_in("path19", cmo.out) assert_not_in("suppressed", cmo.out) with _swallow_outputs() as cmo: list(tu(21, result_fn=n_foo_split_by_a_bar, result_renderer="default")) assert_in("path10", cmo.out) assert_not_in("path20", cmo.out) assert_re_in("[^-0-9]1 .* suppressed", cmo.out, match=False)
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) # before anything aggregated we would get nothing and only a log warning with swallow_logs(new_level=logging.WARNING) as cml: assert_equal(list(query_aggregated_metadata('all', ds, [])), []) assert_re_in('.*Found no aggregated metadata.*update', cml.out) ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subds = ds.create('sub', force=True) subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subsubds = subds.create('subsub', force=True) subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') ds.add('.', recursive=True) ok_clean_git(ds.path) # aggregate metadata from all subdatasets into any superdataset, including # intermediate ones res = ds.aggregate_metadata(recursive=True, update_mode='all') # we get success report for both subdatasets and the superdataset, # and they get saved assert_result_count(res, 6) assert_result_count(res, 3, status='ok', action='aggregate_metadata') assert_result_count(res, 3, status='ok', action='save') # nice and tidy ok_clean_git(ds.path) # quick test of aggregate report aggs = ds.metadata(get_aggregates=True) # one for each dataset assert_result_count(aggs, 3) # mother also report layout version assert_result_count(aggs, 1, path=ds.path, layout_version=1) # store clean direct result origres = ds.metadata(recursive=True) # basic sanity check assert_result_count(origres, 6) assert_result_count(origres, 3, type='dataset') assert_result_count(origres, 3, type='file') # Now that we have annex.key # three different IDs assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset']))) # and we know about all three datasets for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s['metadata']['frictionless_datapackage']['name'] \ == assure_unicode(name) for s in origres if s['type'] == 'dataset'])) # now clone the beast to simulate a new user installing an empty dataset clone = install( opj(path, 'clone'), source=ds.path, result_xfm='datasets', return_type='item-or-list') # ID mechanism works assert_equal(ds.id, clone.id) # get fresh metadata cloneres = clone.metadata() # basic sanity check assert_result_count(cloneres, 2) assert_result_count(cloneres, 1, type='dataset') assert_result_count(cloneres, 1, type='file') # now loop over the previous results from the direct metadata query of # origin and make sure we get the extact same stuff from the clone _compare_metadata_helper(origres, clone) # now obtain a subdataset in the clone, should make no difference assert_status('ok', clone.install('sub', result_xfm=None, return_type='list')) _compare_metadata_helper(origres, clone) # test search in search tests, not all over the place ## query smoke test assert_result_count(clone.search('mother', mode='egrep'), 1) assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1) child_res = clone.search('child', mode='egrep') assert_result_count(child_res, 2) for r in child_res: if r['type'] == 'dataset': assert_in( r['query_matched']['frictionless_datapackage.name'], r['metadata']['frictionless_datapackage']['name'])
def test_CapturedException(): try: raise Exception("BOOM") except Exception as e: captured_exc = CapturedException(e) assert_re_in( "BOOM \[test_captured_exception.py:test_CapturedException:[0-9]+\]", captured_exc.format_oneline_tb()) assert_re_in( "^\[.*\]", captured_exc.format_oneline_tb(include_str=False)) # only traceback try: raise NotImplementedError except Exception as e: captured_exc = CapturedException(e) assert_re_in( "NotImplementedError \[test_captured_exception.py:test_CapturedException:[0-9]+\]", captured_exc.format_oneline_tb()) def f(): def f2(): raise Exception("my bad again") try: f2() except Exception as e: # exception chain raise RuntimeError("new message") from e try: f() except Exception as e: captured_exc = CapturedException(e) # default limit: one level: estr1 = captured_exc.format_oneline_tb(limit=1) estr2 = captured_exc.format_oneline_tb(limit=2) # and we can control it via environ/config by default try: with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}): cfg.reload() estr3 = captured_exc.format_oneline_tb() with patch.dict('os.environ', {}, clear=True): cfg.reload() estr_ = captured_exc.format_oneline_tb() finally: cfg.reload() # make sure we don't have a side effect on other tests estr_full = captured_exc.format_oneline_tb(10) assert_re_in( "new message \[test_captured_exception.py:test_CapturedException:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]", estr_full) assert_re_in( "new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]", estr3) assert_re_in( "new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]", estr2) assert_re_in("new message \[test_captured_exception.py:f2:[0-9]+\]", estr1) assert_equal(estr_, estr1) # standard output full_display = captured_exc.format_standard().splitlines() assert_equal(full_display[0], "Traceback (most recent call last):") # points in f and f2 for first exception with two lines each # (where is the line and what reads the line): assert_true(full_display[1].lstrip().startswith("File")) assert_equal(full_display[2].strip(), "f2()") assert_true(full_display[3].lstrip().startswith("File")) assert_equal(full_display[4].strip(), "raise Exception(\"my bad again\")") assert_equal(full_display[5].strip(), "Exception: my bad again") assert_equal( full_display[7].strip(), "The above exception was the direct cause of the following exception:") assert_equal(full_display[9], "Traceback (most recent call last):") # ... assert_equal(full_display[-1].strip(), "RuntimeError: new message") # now logging / __str__: try: with patch.dict('os.environ', {'DATALAD_LOG_EXC': '1'}): cfg.reload() assert_re_in( "new message \[test_captured_exception.py:f2:[0-9]+\]", str(captured_exc)) with patch.dict('os.environ', {'DATALAD_LOG_EXC': '0'}): cfg.reload() assert_equal("", str(captured_exc)) finally: cfg.reload() # make sure we don't have a side effect on other tests