def test_logging_to_a_file(dst): ok_(not exists(dst)) lgr = LoggerHelper("dataladtest-1").get_initialized_logger(logtarget=dst) ok_(exists(dst)) # nothing was logged -- no file created msg = "Oh my god, they killed Kenny" lgr.error(msg) with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) line = lines[0] ok_(msg in line) ok_(not '\033[' in line, msg="There should be no color formatting in log files. Got: %s" % line) # verify that time stamp and level are present in the log line # do not want to rely on not having race conditions around date/time changes # so matching just with regexp # .* is added to swallow possible traceback logs if EnsureBool()(cfg.get('datalad.log.timestamp', False)): ok_(re.match("\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} \[ERROR\](\s+\S+\s*)? %s" % msg, line)) else: ok_(re.match("\[ERROR\](\s+\S+\s*)? %s" % msg, line))
def test_help(): stdout, stderr = run_main(['--help']) # Let's extract section titles: sections = filter(re.compile('[a-zA-Z ]{4,50}:').match, stdout.split('\n')) ok_(sections[0].startswith('Usage:')) # == Usage: nosetests [-h] if running using nose assert_equal(sections[1:], ['Positional arguments:', 'Options:'])
def test_color_enabled(): # In the absence of NO_COLOR, follow ui.color, or ui.is_interactive if 'auto' with patch.dict(os.environ), \ patch('datalad.support.ansi_colors.ui'): os.environ.pop('NO_COLOR', None) for is_interactive in (True, False): colors.ui.is_interactive = is_interactive with patch_config({'datalad.ui.color': 'off'}): assert_equal(colors.color_enabled(), False) with patch_config({'datalad.ui.color': 'on'}): assert_equal(colors.color_enabled(), True) with patch_config({'datalad.ui.color': 'auto'}): assert_equal(colors.color_enabled(), is_interactive) # In the presence of NO_COLOR, default to disable, unless ui.color is "on" # The value of NO_COLOR should have no effect, so try true-ish and false-ish values for NO_COLOR in ("", "1", "0"): with patch.dict(os.environ, {'NO_COLOR': NO_COLOR}), \ patch('datalad.support.ansi_colors.ui'): for is_interactive in (True, False): colors.ui.is_interactive = is_interactive with patch_config({'datalad.ui.color': 'on'}): assert_equal(colors.color_enabled(), True) for ui_color in ('off', 'auto'): with patch_config({'datalad.ui.color': ui_color}): assert_equal(colors.color_enabled(), False)
def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install( source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_raises(ValueError, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # talk to github when existing is OK assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******', existing='reconfigure') # return happy emptiness when all is skipped assert_equal( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip'), [])
def test_script_shims(): runner = Runner() for script in [ 'datalad', 'git-annex-remote-datalad-archives', 'git-annex-remote-datalad']: if not on_windows: # those must be available for execution, and should not contain which, _ = runner(['which', script]) # test if there is no easy install shim in there with open(which.rstrip()) as f: content = f.read() else: from distutils.spawn import find_executable content = find_executable(script) assert_not_in('EASY', content) # NOTHING easy should be there assert_not_in('pkg_resources', content) # and let's check that it is our script out, err = runner([script, '--version']) version = (out + err).splitlines()[0].split(' ', 1)[1] # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [x for x in v.split('.') if x.isdigit()] # extract numeric portion assert get_numeric_portion(version) # that my lambda is correctish assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def test_logging_to_a_file(dst): ok_(not exists(dst)) lgr = LoggerHelper("dataladtest-1").get_initialized_logger(logtarget=dst) ok_(exists(dst)) # nothing was logged -- no file created msg = "Oh my god, they killed Kenny" lgr.error(msg) with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) line = lines[0] ok_(msg in line) ok_('\033[' not in line, msg="There should be no color formatting in log files. Got: %s" % line) # verify that time stamp and level are present in the log line # do not want to rely on not having race conditions around date/time changes # so matching just with regexp # (...)? is added to swallow possible traceback logs regex = "\[ERROR\]" if EnsureBool()(cfg.get('datalad.log.timestamp', False)): regex = "\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} " + regex if EnsureBool()(cfg.get('datalad.log.vmem', False)): regex += ' RSS/VMS: \S+/\S+( \S+)?\s*' regex += "(\s+\S+\s*)? " + msg assert_re_in(regex, line, match=True) # Close all handlers so windows is happy -- apparently not closed fast enough for handler in lgr.handlers: handler.close()
def test_install_subdataset(src, path): # get the superdataset: ds = install(path=path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'sub1')) assert_false(subds.is_installed()) # install it: ds.install('sub1') ok_(subds.is_installed()) # Verify that it is the correct submodule installed and not # new repository initiated assert_equal(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) # Now the obnoxious install an annex file within not yet # initialized repository! with swallow_outputs(): # progress bar ds.install(opj('sub2', 'test-annex.dat')) subds2 = Dataset(opj(path, 'sub2')) assert(subds2.is_installed()) assert(subds2.repo.file_has_content('test-annex.dat')) # we shouldn't be able silently ignore attempt to provide source while # "installing" file under git assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
def test_save_message_file(path): ds = Dataset(path).create() with assert_raises(IncompleteResultsError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": "add foo"}) ds.add("foo", save=False) ds.save(message_file=opj(ds.path, "msg")) assert_equal(ds.repo.repo.git.show("--format=%s", "--no-patch"), "add foo")
def test_global_config(): # from within tests, global config should be read from faked $HOME (see # setup_package) glb_cfg_file = Path(os.environ['HOME']) / '.gitconfig' assert any( glb_cfg_file.samefile(Path(p)) for p in dl_cfg._stores['git']['files']) assert_equal(dl_cfg.get("user.name"), "DataLad Tester") assert_equal(dl_cfg.get("user.email"), "*****@*****.**")
def test_read_access(store_path, store_url, ds_path): ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() if ds.repo.is_managed_branch(): # TODO: on crippled FS copytree to populate store doesn't seem to work. # Or may be it's just the serving via HTTP that doesn't work. # Either way, after copytree and fsck, whereis doesn't report # the store as an available source. raise SkipTest("Skip on crippled FS") files = [Path('one.txt'), Path('subdir') / 'two'] store_path = Path(store_path) url = "ria+" + store_url init_opts = common_init_opts + ['url={}'.format(url)] io = LocalIO() create_store(io, store_path, '1') create_ds_in_store(io, store_path, ds.id, '2', '1') ds.repo.init_remote('ora-remote', options=init_opts) ds.repo.fsck(remote='ora-remote', fast=True) store_uuid = ds.siblings(name='ora-remote', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] # nothing in store yet: for f in files: known_sources = ds.repo.whereis(str(f)) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) annex_obj_target = str(store_path / ds.id[:3] / ds.id[3:] / 'annex' / 'objects') shutil.rmtree(annex_obj_target) shutil.copytree(src=str(ds.repo.dot_git / 'annex' / 'objects'), dst=annex_obj_target) ds.repo.fsck(remote='ora-remote', fast=True) # all in store now: for f in files: known_sources = ds.repo.whereis(str(f)) assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources) ds.drop('.') res = ds.get('.') assert_equal(len(res), 2) assert_result_count(res, 2, status='ok', type='file', action='get', message="from ora-remote...")
def test_ancient_annex(): class _runner(object): def run(self, cmd): if '--raw' in cmd: raise CommandError return "git-annex version: 0.1", "" ev = ExternalVersions() with patch('datalad.support.external_versions._runner', _runner()): assert_equal(ev['cmd:annex'], '0.1')
def test_installationpath_from_url(): for p in ('lastbit', 'lastbit/', '/lastbit', 'lastbit.git', 'lastbit.git/', 'http://example.com/lastbit', 'http://example.com/lastbit.git', ): assert_equal(_installationpath_from_url(p), 'lastbit')
def test_no_stdin_swallow(fname): # will relay actual exit code on CommandError cmd = ['datalad', 'sshrun', 'localhost', 'cat'] out, err = Runner().run(cmd, stdin=open(fname)) assert_equal(out.rstrip(), '123magic') # test with -n switch now, which we could place even at the end out, err = Runner().run(cmd + ['-n'], stdin=open(fname)) assert_equal(out, '')
def test_zip_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True) ds.add('.') with chpwd(path): ds.plugin('export_archive', filename='my', archivetype='zip') assert_true(os.path.exists('my.zip')) custom1_md5 = md5sum('my.zip') time.sleep(1.1) ds.plugin('export_archive', filename='my', archivetype='zip') assert_equal(md5sum('my.zip'), custom1_md5)
def test_add_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.add("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": u"add β"}) ds.add("foo", message_file=opj(ds.path, "msg")) assert_equal(ds.repo.format_commit("%s"), u"add β")
def test_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.add('.') committed_date = ds.repo.get_commit_date() default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) with chpwd(path): res = list(ds.export_archive()) assert_status('ok', res) assert_result_count(res, 1) assert (isabs(res[0]['path'])) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export_archive(filename=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original archive filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export_archive(filename=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly four files (includes .gitattributes for default # MD5E backend), and expect no content for any directory assert_equal(nfiles, 4) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport') # now loose some content if ds.repo.is_direct_mode(): # in direct mode the add() aove commited directly to the annex/direct/master # branch, hence drop will have no effect (notneeded) # this might be undesired behavior (or not), but this is not the place to test # for it return ds.drop('file_up', check=False) assert_raises(IOError, ds.export_archive, filename=opj(path, 'my')) ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore') assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
def test_ls_noarg(toppath): # smoke test pretty much AnnexRepo(toppath, create=True) # this test is pointless for now and until ls() actually returns # something with swallow_outputs(): ls_out = ls(toppath) with chpwd(toppath): assert_equal(ls_out, ls([])) assert_equal(ls_out, ls('.'))
def assert_provides_and_raises(pc, exception, target=None): """Helper to get all results before exception is raised""" results = [] with assert_raises(exception): for r in pc: results.append(r) # results should be sorted since we do not guarantee order results = sorted(results) if target is not None: assert_equal(results, target) return results
def test_ssh_option(): # This test is hacky in that it depends on systems commonly configuring # `AcceptEnv LC_*` in their sshd_config. If it ends up causing problems, we # should just scrap it. with patch.dict('os.environ', {"LC_DATALAD_HACK": 'hackbert'}): with swallow_outputs() as cmo: # need to give smth with .fileno ;) main([ "datalad", "sshrun", "-oSendEnv=LC_DATALAD_HACK", "localhost", "echo $LC_DATALAD_HACK" ]) assert_equal(cmo.out.strip(), "hackbert")
def test_save_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": u"add β"}) ds.add("foo", save=False) ds.save(message_file=opj(ds.path, "msg")) assert_equal(ds.repo.format_commit("%s"), u"add β")
def test_crawl_api_chdir(run_pipeline_, load_pipeline_from_config_, chpwd_): output, stats = crawl('some_path_not_checked', chdir='somedir') assert_equal( stats, ActivityStats(datasets_crawled=1)) # nothing was done but we got it assert_equal(output, None) chpwd_.assert_called_with('somedir') load_pipeline_from_config_.assert_called_with('some_path_not_checked') run_pipeline_.assert_called_with(['pipeline'], stats=ActivityStats(datasets_crawled=1))
def test_installationpath_from_url(): for p in ( 'lastbit', 'lastbit/', '/lastbit', 'lastbit.git', 'lastbit.git/', 'http://example.com/lastbit', 'http://example.com/lastbit.git', ): assert_equal(_installationpath_from_url(p), 'lastbit')
def test_runner_failure(dir_): from ..support.annexrepo import AnnexRepo repo = AnnexRepo(dir_, create=True) runner = Runner() failing_cmd = ['git', 'annex', 'add', 'notexistent.dat'] with assert_raises(CommandError) as cme, \ swallow_logs() as cml: runner.run(failing_cmd, cwd=dir_) assert_in('notexistent.dat not found', cml.out) assert_equal(1, cme.exception.code)
def test_no_stdin_swallow(fname): # will relay actual exit code on CommandError cmd = ['datalad', 'sshrun', 'datalad-test', 'cat'] out = WitlessRunner().run(cmd, stdin=open(fname), protocol=StdOutCapture) assert_equal(out['stdout'].rstrip(), '123magic') # test with -n switch now, which we could place even at the end out = WitlessRunner().run(cmd + ['-n'], stdin=open(fname), protocol=StdOutCapture) assert_equal(out['stdout'], '')
def check_producing_consumer(jobs): def producer(): yield from range(3) def consumer(i): yield i if isinstance(i, int): pc.add_to_producer_queue(str(i**2)) # we auto-detect generator function producer pc = ProducerConsumer(producer, consumer, jobs=jobs) assert_equal(list(pc), [0, 1, 2, "0", "1", "4"])
def test_ls_s3(): url = 's3://datalad-test0-versioned/' with swallow_outputs(): # just to skip if no credentials get_test_providers(url) with swallow_outputs() as cmo: res = ls(url) assert_equal(len(res), 17) # all the entries counts = Counter(map(lambda x: x.__class__.__name__, res)) assert_equal(counts, {'Key': 14, 'DeleteMarker': 3}) assert_in('Bucket info:', cmo.out)
def test_find_files(d): assert_equal(sorted(list(sorted(x.items())) for x in find_files('.*', topdir=d)({})), [[('filename', '1'), ('path', d)], [('filename', '1.txt'), ('path', d)]]) assert_equal(list(find_files('.*\.txt', topdir=d)({})), [{'path': d, 'filename': '1.txt'}]) assert_equal(list(find_files('notmatchable', topdir=d)({})), []) assert_raises(RuntimeError, list, find_files('notmatchable', topdir=d, fail_if_none=True)({})) # and fail_if_none should operate globally i.e. this should be fine ff = find_files('.*\.txt', topdir=d, fail_if_none=True) assert_equal(list(ff({})), [{'path': d, 'filename': '1.txt'}]) os.unlink(opj(d, '1.txt')) assert_equal(list(ff({})), [])
def test_exit_code(): # will relay actual exit code on CommandError cmd = ['datalad', 'sshrun', 'localhost', 'exit 42'] with assert_raises(SystemExit) as cme: # running nosetests without -s if isinstance(sys.stdout, StringIO): # pragma: no cover with swallow_outputs(): # need to give smth with .fileno ;) main(cmd) else: # to test both scenarios main(cmd) assert_equal(cme.exception.code, 42)
def _test(*args_): #print args_ for args in args_: for recursive in [False, True]: # in both cases shouldn't fail with swallow_outputs() as cmo: ls(args, recursive=recursive) assert_equal(len(cmo.out.rstrip().split('\n')), len(args)) assert_in('[annex]', cmo.out) assert_in('[git]', cmo.out) assert_in(DEFAULT_BRANCH, cmo.out) if "bogus" in args: assert_in('unknown', cmo.out)
def test_int(): c = ct.EnsureInt() # this should always work assert_equal(c(7), 7) assert_equal(c(7.0), 7) assert_equal(c('7'), 7) assert_equal(c([7, 3]), [7, 3]) # this should always fail assert_raises(ValueError, lambda: c('fail')) assert_raises(ValueError, lambda: c([3, 'fail'])) # this will also fail assert_raises(ValueError, lambda: c('17.0')) assert_equal(c.short_description(), 'int')
def test_machinesize(): assert_equal(1.0, machinesize(1)) for key, value in { 'Byte': 0, 'Bytes': 0, 'kB': 1, 'MB': 2, 'GB': 3, 'TB': 4, 'PB': 5 }.items(): assert_equal(1.0 * (1000**value), machinesize('1 ' + key)) assert_raises(ValueError, machinesize, 't byte')
def test_bare(path): # can we handle a bare repo? gr = GitRepo(path, create=True, bare=True) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', gr.config) # not set something that wasn't there obscure_key = 'sec.reallyobscurename!@@.key' assert_not_in(obscure_key, gr.config) # to the local config, which is easily accessible gr.config.set(obscure_key, 'myvalue', where='local') assert_equal(gr.config.get(obscure_key), 'myvalue') # now make sure the config is where we think it is assert_in(obscure_key.split('.')[1], (gr.pathobj / 'config').read_text())
def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly three files, and expect no content for any directory assert_equal(nfiles, 3)
def check_api(no_annex, path): ds = Dataset(path).create(force=True, no_annex=no_annex) ds.save() assert_repo_status(ds.path) processed_extractors, skipped_extractors = [], [] for extractor_ep in iter_entry_points('datalad.metadata.extractors'): # we need to be able to query for metadata, even if there is none # from any extractor try: extractor_cls = extractor_ep.load() except Exception as exc: exc_ = str(exc) skipped_extractors += [exc_] continue extractor = extractor_cls( ds, paths=['file.dat']) meta = extractor.get_metadata( dataset=True, content=True) # we also get something for the dataset and something for the content # even if any of the two is empty assert_equal(len(meta), 2) dsmeta, contentmeta = meta assert (isinstance(dsmeta, dict)) assert hasattr(contentmeta, '__len__') or isgenerator(contentmeta) # verify that generator does not blow and has an entry for our # precious file cm = dict(contentmeta) # datalad_core does provide some (not really) information about our # precious file if extractor_ep.name == 'datalad_core': assert 'file.dat' in cm elif extractor_ep.name == 'annex': if not no_annex: # verify correct key, which is the same for all files of 0 size assert_equal( cm['file.dat']['key'], 'MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.dat' ) else: # no metadata on that file assert not cm processed_extractors.append(extractor_ep.name) assert "datalad_core" in processed_extractors, \ "Should have managed to find at least the core extractor extractor" if skipped_extractors: raise SkipTest( "Not fully tested/succeded since some extractors failed" " to load:\n%s" % ("\n".join(skipped_extractors)))
def test_url_keys(dspath, storepath): ds = Dataset(dspath).create() repo = ds.repo filename = 'url_no_size.html' # URL-type key without size repo.call_annex([ 'addurl', '--relaxed', '--raw', '--file', filename, 'http://example.com', ]) ds.save() # copy target ds.create_sibling_ria( name='ria', url='ria+file://{}'.format(storepath), storage_sibling='only', ) ds.get(filename) repo.call_annex(['copy', '--to', 'ria', filename]) ds.drop(filename) # in the store and on the web assert_equal(len(ds.repo.whereis(filename)), 2) # try download, but needs special permissions to even be attempted ds.config.set('annex.security.allow-unverified-downloads', 'ACKTHPPT', where='local') repo.call_annex(['copy', '--from', 'ria', filename]) assert_equal(len(ds.repo.whereis(filename)), 3) # smoke tests that execute the remaining pieces with the URL key repo.call_annex(['fsck', '-f', 'ria']) assert_equal(len(ds.repo.whereis(filename)), 3) # mapped key in whereis output assert_in('%%example', repo.call_annex(['whereis', filename])) repo.call_annex(['move', '-f', 'ria', filename]) # check that it does not magically reappear, because it actually # did not drop the file repo.call_annex(['fsck', '-f', 'ria']) assert_equal(len(ds.repo.whereis(filename)), 2)
def test_color_word(): s = 'word' green_s = '\033[1;32mword\033[0m' for enabled in (True, False): with patch('datalad.support.ansi_colors.color_enabled', lambda: enabled): assert_equal(colors.color_word(s, colors.GREEN, force=True), green_s) with patch('datalad.support.ansi_colors.color_enabled', lambda: True): assert_equal(colors.color_word(s, colors.GREEN), green_s) assert_equal(colors.color_word(s, colors.GREEN, force=False), green_s) with patch('datalad.support.ansi_colors.color_enabled', lambda: False): assert_equal(colors.color_word(s, colors.GREEN), s) assert_equal(colors.color_word(s, colors.GREEN, force=False), s)
def test_format_msg(): fmt = r'a$BOLDb$RESETc$BOLDd$RESETe' for enabled in (True, False): with patch('datalad.support.ansi_colors.color_enabled', lambda: enabled): assert_equal(colors.format_msg(fmt), 'abcde') assert_equal(colors.format_msg(fmt, use_color=False), 'abcde') with patch('datalad.support.ansi_colors.color_enabled', lambda: False): for use_color in (True, False): assert_equal(colors.format_msg(fmt), 'abcde') assert_equal(colors.format_msg(fmt, use_color=use_color), 'abcde') with patch('datalad.support.ansi_colors.color_enabled', lambda: True): assert_equal(colors.format_msg(fmt, use_color=True), 'a\033[1mb\033[0mc\033[1md\033[0me')
def test_color_status(): # status -> (plain, colored) statuses = { 'ok': ('ok', '\033[1;32mok\033[0m'), 'notneeded': ('notneeded', '\033[1;32mnotneeded\033[0m'), 'impossible': ('impossible', '\033[1;33mimpossible\033[0m'), 'error': ('error', '\033[1;31merror\033[0m'), 'invalid': ('invalid', 'invalid'), } for enabled in (True, False): with patch('datalad.support.ansi_colors.color_enabled', lambda: enabled): for status, retopts in statuses.items(): assert_equal(colors.color_status(status), retopts[enabled])
def test_safe_print(): """Just to test that we are getting two attempts to print""" called = [0] def _print(s): assert_equal(s, "bua") called[0] += 1 if called[0] == 1: raise UnicodeEncodeError('crap', u"", 0, 1, 'whatever') with patch.object(builtins, 'print', _print): safe_print("bua") assert_equal(called[0], 2)
def test_crawl_api_recursive(get_subdatasets_, run_pipeline_, load_pipeline_from_config_, get_repo_pipeline_script_path_, get_lofilename_, chpwd_, tdir): pwd = getpwd() with chpwd(tdir): output, stats = crawl(recursive=True) assert_equal(pwd, getpwd()) if external_versions['mock'] < '1.0.1': raise SkipTest( "needs a more recent mock which throws exceptions in side_effects") assert_equal(output, [[]] * 4 + [None]) # for now output is just a list of outputs assert_equal( stats, ActivityStats( datasets_crawled=5, datasets_crawl_failed=1)) # nothing was done but we got it crawled chpwd_.assert_has_calls([ call(None), call('path1'), call('path1/path1_1'), call('path2'), ], any_order=True) assert_equal( list(find_files('.*', tdir, exclude_vcs=False)), [_path_(tdir, 'some.log')]) # no files were generated besides the log
def test_credentials_from_env(): keyring = Keyring() cred = AWS_S3("test-s3", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.get('key_id'), None) assert_equal(cred.get('secret_id'), None) with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}): assert_equal(cred.get('key_id'), '1') assert_false(cred.is_known) with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}): assert_equal(cred.get('key_id'), '1') assert_equal(cred.get('secret_id'), '2') assert_true(cred.is_known) assert_false(cred.is_known) # no memory of the past
def test_mutliple_targets(dst1, dst2): ok_(not exists(dst1)) ok_(not exists(dst2)) lgr = LoggerHelper("dataladtest-3").get_initialized_logger( logtarget="%s,%s" % (dst1, dst2)) ok_(exists(dst1)) ok_(exists(dst2)) msg = "Oh my god, they killed Kenny" lgr.error(msg) for dst in (dst1, dst2): with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) ok_(msg in lines[0])
def test_zip_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True) ds.save() with chpwd(path): ds.export_archive(filename='my', archivetype='zip') assert_true(os.path.exists('my.zip')) custom1_md5 = md5sum('my.zip') time.sleep(1.1) ds.export_archive(filename='my', archivetype='zip') assert_equal(md5sum('my.zip'), custom1_md5) # should be able to export without us cd'ing to that ds directory ds.export_archive(filename=ds.path, archivetype='zip') default_name = 'datalad_{}.zip'.format(ds.id) assert_true(os.path.exists(os.path.join(ds.path, default_name)))
def test_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save() committed_date = ds.repo.get_commit_date() default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) with chpwd(path): res = list(ds.export_archive()) assert_status('ok', res) assert_result_count(res, 1) assert(isabs(res[0]['path'])) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export_archive(filename=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original archive filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export_archive(filename=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly four files (includes .gitattributes for default # MD5E backend), and expect no content for any directory assert_equal(nfiles, 4) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport') # now loose some content ds.drop('file_up', check=False) assert_raises(IOError, ds.export_archive, filename=opj(path, 'my')) ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore') assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
def test_cred1_call(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) # we will set the name but not the password, expecting UI # requesting it assert_equal(keyring.set('name', 'user', 'user1'), None) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(cred(), {'user': '******', 'password': '******'}) assert_equal(keyring.get('name', 'password'), 'password1')
def test_subds_path(path): # a dataset with a subdataset with a file, all neatly tracked ds = Dataset(path).create() subds = ds.create('sub') assert_repo_status(path) with (subds.pathobj / 'some.txt').open('w') as f: f.write(u'test') ds.save(recursive=True) assert_repo_status(path) # querying the toplevel dataset repo for a subdspath should # report the subdataset record in the dataset # (unlike `git status`, which is silent for subdataset paths), # but definitely not report the subdataset as deleted # https://github.com/datalad/datalad-revolution/issues/17 stat = ds.repo.status(paths=[op.join('sub', 'some.txt')]) assert_equal(list(stat.keys()), [subds.repo.pathobj]) assert_equal(stat[subds.repo.pathobj]['state'], 'clean')
def test_mutliple_targets(dst1, dst2): ok_(not exists(dst1)) ok_(not exists(dst2)) lgr = LoggerHelper("dataladtest-3").get_initialized_logger( logtarget="%s,%s" % (dst1, dst2)) ok_(exists(dst1)) ok_(exists(dst2)) msg = "Oh my god, they killed Kenny" lgr.error(msg) for dst in (dst1, dst2): with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) ok_(msg in lines[0]) # Close all handlers so windows is happy -- apparently not closed fast enough for handler in lgr.handlers: handler.close()
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None)
def test_logging_to_a_file(dst): ok_(not exists(dst)) lgr = LoggerHelper("dataladtest").get_initialized_logger(logtarget=dst) ok_(exists(dst)) msg = "Oh my god, they killed Kenny" lgr.error(msg) with open(dst) as f: lines = f.readlines() assert_equal(len(lines), 1, "Read more than a single log line: %s" % lines) line = lines[0] ok_(msg in line) ok_(not '\033[' in line, msg="There should be no color formatting in log files. Got: %s" % line) # verify that time stamp and level are present in the log line # do not want to rely on not having race conditions around date/time changes # so matching just with regexp ok_(re.match("\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} \[ERROR\] %s" % msg, line))
def test_tarball(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save(all_changes=True) committed_date = ds.repo.get_committed_date() with chpwd(path): _mod, tarball1 = ds.export('tarball') assert(not isabs(tarball1)) tarball1 = opj(path, tarball1) default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) assert_equal(tarball1, default_outname) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export('tarball', output=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original tarball filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export('tarball', output=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly three files, and expect no content for any directory assert_equal(nfiles, 3) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport')
def run_main(args, exit_code=0, expect_stderr=False): """Run main() of the datalad, do basic checks and provide outputs Parameters ---------- args : list List of string cmdline arguments to pass exit_code : int Expected exit code. Would raise AssertionError if differs expect_stderr : bool or string Whether to expect stderr output. If string -- match Returns ------- stdout, stderr strings Output produced """ with patch('sys.stderr', new_callable=StringIO) as cmerr: with patch('sys.stdout', new_callable=StringIO) as cmout: with assert_raises(SystemExit) as cm: main(["datalad"] + list(args)) assert_equal(cm.exception.code, exit_code) stdout = cmout.getvalue() stderr = cmerr.getvalue() if expect_stderr is False: assert_equal(stderr, "") elif expect_stderr is True: # do nothing -- just return pass else: # must be a string assert_equal(stderr, expect_stderr) return stdout, stderr
def test_report_absent_keys(path): ds = Dataset(path).create() # create an annexed file testfile = ds.pathobj / 'dummy' testfile.write_text(u'nothing') ds.save() # present in a full report and in a partial report # based on worktree of HEAD ref for ai in ( ds.repo.get_content_annexinfo(eval_availability=True), ds.repo.get_content_annexinfo( paths=['dummy'], eval_availability=True), ds.repo.get_content_annexinfo( ref='HEAD', eval_availability=True), ds.repo.get_content_annexinfo( ref='HEAD', paths=['dummy'], eval_availability=True)): assert_in(testfile, ai) assert_equal(ai[testfile]['has_content'], True) # drop the key, not available anywhere else ds.drop('dummy', check=False) # does not change a thing, except the key is gone for ai in ( ds.repo.get_content_annexinfo(eval_availability=True), ds.repo.get_content_annexinfo( paths=['dummy'], eval_availability=True), ds.repo.get_content_annexinfo( ref='HEAD', eval_availability=True), ds.repo.get_content_annexinfo( ref='HEAD', paths=['dummy'], eval_availability=True)): assert_in(testfile, ai) assert_equal(ai[testfile]['has_content'], False)
def test_func_to_node(): int_node = func_to_node(int) # node which requires nothing and nothing of output is used assert int_node.__doc__ in_dict = {'in': 1} ok_generator(int_node(in_dict)) # xrange is not considered to be a generator def xrange_(n, offset=0): for x in range(offset, offset + n): yield x xrange_node = func_to_node(xrange_, data_args='in', outputs='out') assert_in('assigned to out', xrange_node.__doc__) assert_false('Additional keyword arguments' in xrange_node.__doc__) range_node_gen = xrange_node(in_dict) ok_generator(range_node_gen) assert_equal(list(range_node_gen), [{'in': 1, 'out': 0}]) # with additional kwargs xrange_node = func_to_node(xrange_, data_args='in', outputs='out', kwargs={'offset': 10}) assert_in('assigned to out', xrange_node.__doc__) assert_in('Additional keyword arguments', xrange_node.__doc__) range_node_gen = xrange_node(in_dict) ok_generator(range_node_gen) assert_equal(list(range_node_gen), [{'in': 1, 'out': 10}]) # testing func_node data = {'offset': 5, 'in': 1} xrange_node = func_to_node(xrange_, data_args='in', data_kwargs=['offset'], outputs='out') assert_in('assigned to out', xrange_node.__doc__) assert_false('Additional keyword arguments' in xrange_node.__doc__) gen = xrange_node(data) ok_generator(gen) assert_equal(list(gen), [{'offset': 5, 'out': 5, 'in': 1}]) # with multiple outputs def split_(s, num): yield s.split('/', num) data = {'num': 3, 'in': 'datalad/crawler/nodes'} split_node = func_to_node(split_, data_args='in', data_kwargs=['num'], outputs=['a', 'b', 'c']) assert_in('assigned to a, b, c', split_node.__doc__) assert_false('Additional keyword arguments' in split_node.__doc__) split_node_gen = split_node(data) assert_equal(list(split_node_gen), [{'a': 'datalad', 'c': 'nodes', 'b': 'crawler', 'num': 3, 'in': 'datalad/crawler/nodes'}])
def test_ignored(topdir): # create annex, git repos AnnexRepo(opj(topdir, 'annexdir'), create=True) GitRepo(opj(topdir, 'gitdir'), create=True) # non-git or annex should not be ignored assert_equal(ignored(topdir), False) # git, annex and hidden nodes should be ignored for subdir in ["annexdir", "gitdir", ".hidden"]: assert_equal(ignored(opj(topdir, subdir)), True) # ignore only hidden nodes(not git or annex repos) flag should work assert_equal(ignored(opj(topdir, "annexdir"), only_hidden=True), False)
def test_sub(): s = sub({ 'url': { '(http)s?(://.*openfmri\.s3\.amazonaws.com/|://s3\.amazonaws\.com/openfmri/)': r'\1\2' } }) ex1 = {'url': 'http://example.com'} assert_equal(list(s(ex1)), [ex1]) assert_equal(list(s({'url': "https://openfmri.s3.amazonaws.com/tarballs/ds001_raw.tgz?param=1"})), [{'url': "http://openfmri.s3.amazonaws.com/tarballs/ds001_raw.tgz?param=1"}]) assert_equal( list(s({ 'url': "https://s3.amazonaws.com/openfmri/tarballs/ds031_retinotopy.tgz?versionId=HcKd4prWsHup6nEwuIq2Ejdv49zwX5U"})), [{ 'url': "http://s3.amazonaws.com/openfmri/tarballs/ds031_retinotopy.tgz?versionId=HcKd4prWsHup6nEwuIq2Ejdv49zwX5U"}] )
def test_fix_datalad_ri(): assert_equal(_fix_datalad_ri('/'), '/') assert_equal(_fix_datalad_ri('/a/b'), '/a/b') assert_equal(_fix_datalad_ri('//'), '///') assert_equal(_fix_datalad_ri('///'), '///') assert_equal(_fix_datalad_ri('//a'), '///a') assert_equal(_fix_datalad_ri('///a'), '///a') assert_equal(_fix_datalad_ri('//a/b'), '///a/b') assert_equal(_fix_datalad_ri('///a/b'), '///a/b')