def _describe_dataset(ds, sensitive): from datalad.interface.results import success_status_map from datalad.api import metadata try: infos = { 'path': ds.path, 'repo': ds.repo.__class__.__name__ if ds.repo else None, 'id': ds.id, } if not sensitive: infos['metadata'] = _HIDDEN elif ds.id: ds_meta = metadata( dataset=ds, reporton='datasets', return_type='list', result_filter=lambda x: x['action'] == 'metadata' and success_status_map[x['status']] == 'success', result_renderer='disabled', on_failure='ignore') if ds_meta: ds_meta = [dm['metadata'] for dm in ds_meta] if len(ds_meta) == 1: ds_meta = ds_meta.pop() infos['metadata'] = ds_meta else: infos['metadata'] = None return infos except InvalidGitRepositoryError as e: return {"invalid": exc_str(e)}
def _describe_dataset(ds, sensitive): from datalad.interface.results import success_status_map from datalad.api import metadata try: infos = { 'path': ds.path, 'repo': ds.repo.__class__.__name__ if ds.repo else None, } if not sensitive: infos['metadata'] = _HIDDEN elif ds.id: ds_meta = metadata( dataset=ds, reporton='datasets', return_type='list', result_filter=lambda x: x['action'] == 'metadata' and success_status_map[x['status']] == 'success', result_renderer='disabled', on_failure='ignore') if ds_meta: ds_meta = [dm['metadata'] for dm in ds_meta] if len(ds_meta) == 1: ds_meta = ds_meta.pop() infos['metadata'] = ds_meta else: infos['metadata'] = None return infos except InvalidGitRepositoryError as e: return {"invalid": exc_str(e)}
def _describe_dataset(ds, sensitive): from datalad.interface.results import success_status_map from datalad.api import metadata try: infos = { 'path': ds.path, 'repo': ds.repo.__class__.__name__ if ds.repo else None, 'id': ds.id, } # describe available branches and their states branches = [ '%s@%s' % (b, next(ds.repo.get_branch_commits_(branch=b))[:7]) for b in ds.repo.get_branches() ] infos['branches'] = branches if not sensitive: infos['metadata'] = _HIDDEN elif ds.id: ds_meta = metadata( dataset=ds, reporton='datasets', return_type='list', result_filter=lambda x: x['action'] == 'metadata' and success_status_map[x['status']] == 'success', result_renderer='disabled', on_failure='ignore') if ds_meta: ds_meta = [dm['metadata'] for dm in ds_meta] if len(ds_meta) == 1: ds_meta = ds_meta.pop() infos['metadata'] = ds_meta else: infos['metadata'] = None return infos except InvalidGitRepositoryError as e: ce = CapturedException(e) return {"invalid": ce.message}
def test_aggregate_query(path): ds = Dataset(path).create(force=True) # no magic change to actual dataset metadata due to presence of # aggregated metadata res = ds.metadata(reporton='datasets', on_failure='ignore') assert_result_count(res, 1) assert_not_in('metadata', res[0]) # but we can now ask for metadata of stuff that is unknown on disk res = ds.metadata(opj('sub', 'deep', 'some'), reporton='datasets') assert_result_count(res, 1) eq_({'homepage': 'http://top.example.com'}, res[0]['metadata']) # when no reference dataset is given the command will report the # aggregated metadata as it is recorded in the dataset that is the # closest parent on disk ds.create('sub', force=True) res = metadata(opj(path, 'sub', 'deep', 'some'), reporton='datasets') assert_result_count(res, 1) eq_({'homepage': 'http://sub.example.com'}, res[0]['metadata']) # when a reference dataset is given, it will be used as the metadata # provider res = ds.metadata(opj('sub', 'deep', 'some'), reporton='datasets') assert_result_count(res, 1) eq_({'homepage': 'http://top.example.com'}, res[0]['metadata'])
def test_get_aggregates_fails(path=None): with chpwd(path), assert_raises(NoDatasetFound): metadata(get_aggregates=True) ds = Dataset(path).create() res = ds.metadata(get_aggregates=True, on_failure='ignore') assert_result_count(res, 1, path=ds.path, status='impossible')
def __call__(dataset=None): from datalad.distribution.dataset import require_dataset from datalad.support.exceptions import NoDatasetArgumentFound ds = None try: ds = require_dataset(dataset, check_installed=False, purpose='reporting') except NoDatasetArgumentFound: # failure is already logged pass if ds and not ds.is_installed(): # we don't deal with absent datasets ds = None if ds is None: from datalad import cfg else: cfg = ds.config from datalad.ui import ui from datalad.api import metadata from datalad.metadata import extractors as metaextractors from datalad.support.external_versions import external_versions import os import platform as pl import json # formatting helper def _t2s(t): res = [] for e in t: if isinstance(e, tuple): es = _t2s(e) if es != '': res += ['(%s)' % es] elif e != '': res += [e] return '/'.join(res) report_template = """\ System ====== {system} Environment =========== {env} Externals ========= {externals} Available metadata extractors ============================= {metaextractors} Configuration ============= {cfg} {dataset} """ dataset_template = """\ Dataset information =================== {basic} Metadata -------- {meta} """ ds_meta = None if ds and ds.is_installed(): ds_meta = metadata( dataset=ds, reporton='datasets', return_type='list', result_filter=lambda x: x['action'] == 'metadata', result_renderer='disabled') if ds_meta: ds_meta = [dm['metadata'] for dm in ds_meta] if len(ds_meta) == 1: ds_meta = ds_meta.pop() ui.message( report_template.format( system='\n'.join( '{}: {}'.format(*i) for i in (('OS ', ' '.join( [os.name, pl.system(), pl.release(), pl.version()]).rstrip()), ('Distribution', ' '.join([ _t2s(pl.dist()), _t2s(pl.mac_ver()), _t2s(pl.win32_ver()) ]).rstrip()))), env='\n'.join('{}: {}'.format(k, v) for k, v in os.environ.items() if k.startswith('PYTHON') or k.startswith('GIT') or k.startswith('DATALAD')), dataset='' if not ds else dataset_template.format( basic='\n'.join('{}: {}'.format(k, v) for k, v in ( ('path', ds.path), ('repo', ds.repo.__class__.__name__ if ds.repo else '[NONE]'), )), meta=json.dumps(ds_meta, indent=1) if ds_meta else '[no metadata]'), externals=external_versions.dumps(preamble=None, indent='', query=True), metaextractors='\n'.join(p for p in dir(metaextractors) if not p.startswith('_')), cfg='\n'.join( '{}: {}'.format( k, '<HIDDEN>' if 'user' in k or 'token' in k or 'passwd' in k else v) for k, v in sorted(cfg.items(), key=lambda x: x[0])), )) yield
def dlplugin(dataset=None): """Generate a report about the DataLad installation and configuration IMPORTANT: Sharing this report with untrusted parties (e.g. on the web) should be done with care, as it may include identifying information, and/or credentials or access tokens. Parameters ---------- dataset : Dataset, optional If a dataset is given or found, information on this dataset is provided (if it exists), and its active configuration is reported. """ ds = dataset if ds and not ds.is_installed(): # we don't deal with absent datasets ds = None if ds is None: from datalad import cfg else: cfg = ds.config from datalad.ui import ui from datalad.api import metadata report_template = """\ {dataset} Configuration ============= {cfg} """ dataset_template = """\ Dataset information =================== {basic} Metadata -------- {meta} """ ds_meta = None if ds and ds.is_installed(): ds_meta = metadata(dataset=ds, dataset_global=True, return_type='item-or-list', result_filter=lambda x: x['action'] == 'metadata') if ds_meta: ds_meta = ds_meta['metadata'] ui.message( report_template.format( dataset='' if not ds else dataset_template.format( basic='\n'.join('{}: {}'.format(k, v) for k, v in ( ('path', ds.path), ('repo', ds.repo.__class__.__name__ if ds.repo else '[NONE]'), )), meta='\n'.join( '{}: {}'.format(k, v) for k, v in ds_meta) if ds_meta else '[no metadata]'), cfg='\n'.join('{}: {}'.format( k, '<HIDDEN>' if k.startswith('user.') or 'token' in k else v) for k, v in sorted(cfg.items(), key=lambda x: x[0])), )) yield
def test_basic_filemeta(path): with chpwd(path): # no repo -> error assert_status('error', metadata(on_failure='ignore')) # some repo, no error on query of pwd GitRepo('.', create=True) eq_([], metadata()) # impossible when making explicit query assert_status('impossible', metadata('.', on_failure='ignore')) # fine with annex AnnexRepo('.', create=True) eq_([], metadata()) eq_([], metadata('.')) # create playing field create_tree(path, { 'somefile': 'content', 'dir': { 'deepfile': 'othercontent' } }) ds = Dataset(path) ds.add('.') ok_clean_git(path) # full query -> 2 files res = ds.metadata() assert_result_count(res, 2) assert_result_count(res, 2, type='file', metadata={}) # # tags: just a special case of a metadata key without a value # # tag one file target_file = opj('dir', 'deepfile') # needs a sequence or dict assert_raises(ValueError, ds.metadata, target_file, add='mytag') # like this res = ds.metadata(target_file, add=['mytag']) assert_result_count(res, 1) assert_result_count(res, 1, type='file', path=opj(ds.path, target_file), metadata={'tag': ['mytag']}) # now init tag for all files that don't have one yet res = ds.metadata(init=['rest']) assert_result_count(res, 2) # from before assert_result_count(res, 1, type='file', path=opj(ds.path, target_file), metadata={'tag': ['mytag']}) # and the other one assert_result_count(res, 1, type='file', path=opj(ds.path, 'somefile'), metadata={'tag': ['rest']}) # add two more different tags res = ds.metadata(add=['other1', 'other2', 'other3']) assert_result_count(res, 2) for r in res: assert_in('other1', r['metadata']['tag']) assert_in('other2', r['metadata']['tag']) assert_in('other3', r['metadata']['tag']) # now remove two specifics tag from all files that exists in all files res = ds.metadata(remove=['other1', 'other3']) assert_result_count(res, 2) for r in res: assert_not_in('other1', r['metadata']['tag']) assert_in('other2', r['metadata']['tag']) # and now one that only exists in one file res = ds.metadata(remove=['rest']) # we still get 2 results, because we still touch all files assert_result_count(res, 2) # however there is no modification to files that don't have the tag assert_result_count(res, 1, type='file', path=opj(ds.path, 'somefile'), metadata={'tag': ['other2']}) assert_result_count(res, 1, type='file', path=opj(ds.path, target_file), metadata={'tag': ['mytag', 'other2']}) # and finally kill the tags res = ds.metadata(target_file, reset=['tag']) assert_result_count(res, 1) assert_result_count(res, 1, type='file', metadata={}, path=opj(ds.path, target_file)) # no change to the other one assert_result_count(ds.metadata('somefile'), 1, type='file', path=opj(ds.path, 'somefile'), metadata={'tag': ['other2']}) # kill all tags everywhere res = ds.metadata(reset=['tag']) assert_result_count(res, 2) assert_result_count(res, 2, type='file', metadata={}) # # key: value mapping # res = ds.metadata('somefile', add=dict(new=('v1', 'v2'))) assert_result_count(res, 1, metadata={'new': ['v1', 'v2']}) # same as this, which exits to support the way things come # in from the cmdline res = ds.metadata(target_file, add=[['new', 'v1', 'v2']]) assert_result_count(res, 1, metadata={'new': ['v1', 'v2']}) # other file got the exact same metadata now assert_result_count(ds.metadata(), 2, metadata={'new': ['v1', 'v2']}) # reset with just a key removes the entire mapping res = ds.metadata(target_file, reset=['new']) assert_result_count(res, 1, metadata={}) # reset with a mapping, overrides the old one res = ds.metadata('somefile', reset=dict(new='george', more='yeah')) assert_result_count(res, 1, metadata=dict(new=['george'], more=['yeah'])) # remove single value from mapping, last value to go removes the key res = ds.metadata('somefile', remove=dict(more='yeah')) assert_result_count(res, 1, metadata=dict(new=['george'])) # and finally init keys res = ds.metadata(init=dict(new=['two', 'three'], super='fresh')) assert_result_count(res, 2) assert_result_count( res, 1, path=opj(ds.path, target_file), # order of values is not maintained metadata=dict(new=['three', 'two'], super=['fresh'])) assert_result_count( res, 1, path=opj(ds.path, 'somefile'), # order of values is not maintained metadata=dict(new=['george'], super=['fresh']))
def test_get_aggregates_fails(path): with chpwd(path), assert_raises(NoDatasetArgumentFound): metadata(get_aggregates=True) ds = Dataset(path).create() res = ds.metadata(get_aggregates=True, on_failure='ignore') assert_result_count(res, 1, path=ds.path, status='impossible')
def dlplugin(dataset=None): """Generate a report about the DataLad installation and configuration IMPORTANT: Sharing this report with untrusted parties (e.g. on the web) should be done with care, as it may include identifying information, and/or credentials or access tokens. Parameters ---------- dataset : Dataset, optional If a dataset is given or found, information on this dataset is provided (if it exists), and its active configuration is reported. """ ds = dataset if ds and not ds.is_installed(): # we don't deal with absent datasets ds = None if ds is None: from datalad import cfg else: cfg = ds.config from datalad.ui import ui from datalad.api import metadata from datalad.support.external_versions import external_versions import os import platform as pl # formatting helper def _t2s(t): res = [] for e in t: if isinstance(e, tuple): es = _t2s(e) if es != '': res += ['(%s)' % es] elif e != '': res += [e] return '/'.join(res) report_template = """\ System ====== {system} Environment =========== {env} {dataset} Externals ========= {externals} Configuration ============= {cfg} """ dataset_template = """\ Dataset information =================== {basic} Metadata -------- {meta} """ ds_meta = None if ds and ds.is_installed(): ds_meta = metadata(dataset=ds, dataset_global=True, return_type='item-or-list', result_filter=lambda x: x['action'] == 'metadata') if ds_meta: ds_meta = ds_meta['metadata'] ui.message( report_template.format( system='\n'.join( '{}: {}'.format(*i) for i in (('OS ', ' '.join( [os.name, pl.system(), pl.release(), pl.version()]).rstrip()), ('Distribution', ' '.join([ _t2s(pl.dist()), _t2s(pl.mac_ver()), _t2s(pl.win32_ver()) ]).rstrip()))), env='\n'.join('{}: {}'.format(k, v) for k, v in os.environ.items() if k.startswith('PYTHON') or k.startswith('GIT') or k.startswith('DATALAD')), dataset='' if not ds else dataset_template.format( basic='\n'.join( '{}: {}'.format(k, v) for k, v in ( ('path', ds.path), ('repo', ds.repo.__class__.__name__ if ds.repo else '[NONE]'), )), meta='\n'.join( '{}: {}'.format(k, v) for k, v in ds_meta) if ds_meta else '[no metadata]'), externals=external_versions.dumps(preamble=None, indent='', query=True), cfg='\n'.join('{}: {}'.format( k, '<HIDDEN>' if k.startswith('user.') or 'token' in k or 'user' in k else v) for k, v in sorted(cfg.items(), key=lambda x: x[0])), )) yield
def m(self, path): from datalad.api import metadata return metadata(path, dataset=self.ds, result_renderer='disabled')
def __call__(dataset=None, sensitive=None, clipboard=None): from datalad import get_encoding_info from datalad import get_envvars_info from datalad.distribution.dataset import require_dataset from datalad.support.exceptions import NoDatasetArgumentFound ds = None try: ds = require_dataset(dataset, check_installed=False, purpose='reporting') except NoDatasetArgumentFound: # failure is already logged pass if ds and not ds.is_installed(): # we don't deal with absent datasets ds = None if sensitive: if ds is None: from datalad import cfg else: cfg = ds.config else: cfg = None from pkg_resources import iter_entry_points from datalad.ui import ui from datalad.api import metadata from datalad.support.external_versions import external_versions from datalad.dochelpers import exc_str from datalad.interface.results import success_status_map import os import platform as pl import json extractors={} for ep in iter_entry_points('datalad.metadata.extractors'): try: ep.load() status = 'OK' except Exception as e: status = 'BROKEN ({})'.format(exc_str(e)) extractors[ep.name] = status # formatting helper def _t2s(t): res = [] for e in t: if isinstance(e, tuple): es = _t2s(e) if es != '': res += ['(%s)' % es] elif e != '': res += [e] return '/'.join(res) report_template = """\ DataLad ======= {datalad} System ====== {system} Locale/Encoding =============== {loc} Environment =========== {env} Externals ========= {externals} Installed extensions ==================== {extensions} Known metadata extractors ========================= {metaextractors} Configuration ============= {cfg} {dataset} """ dataset_template = """\ Dataset information =================== {basic} Metadata -------- {meta} """ ds_meta = None if not sensitive: ds_meta = _HIDDEN elif ds and ds.is_installed() and ds.id: ds_meta = metadata( dataset=ds, reporton='datasets', return_type='list', result_filter=lambda x: x['action'] == 'metadata' and success_status_map[x['status']] == 'success', result_renderer='disabled', on_failure='ignore') if ds_meta: ds_meta = [dm['metadata'] for dm in ds_meta] if len(ds_meta) == 1: ds_meta = ds_meta.pop() if cfg is not None: # make it into a dict to be able to reassign cfg = dict(cfg.items()) if sensitive != 'all' and cfg: # filter out some of the entries which known to be highly sensitive for k in cfg.keys(): if 'user' in k or 'token' in k or 'passwd' in k: cfg[k] = _HIDDEN from datalad.version import __version__, __full_version__ text = report_template.format( datalad=_format_dict([ ('Version', __version__), ('Full version', __full_version__) ], indent=True), system=_format_dict([ ('OS', ' '.join([ os.name, pl.system(), pl.release(), pl.version()]).rstrip()), ('Distribution', ' '.join([_t2s(pl.dist()), _t2s(pl.mac_ver()), _t2s(pl.win32_ver())]).rstrip()) ], indent=True), loc=_format_dict(get_encoding_info(), indent=True), # , fmt="{}={!r}"), env=_format_dict(get_envvars_info(), fmt="{}={!r}"), dataset='' if not ds else dataset_template.format( basic=_format_dict([ ('path', ds.path), ('repo', ds.repo.__class__.__name__ if ds.repo else '[NONE]'), ]), meta=_HIDDEN if not sensitive else json.dumps(ds_meta, indent=1) if ds_meta else '[no metadata]' ), externals=external_versions.dumps(preamble=None, indent='', query=True), extensions='\n'.join(ep.name for ep in iter_entry_points('datalad.extensions')), metaextractors=_format_dict(extractors), cfg=_format_dict(sorted(cfg.items(), key=lambda x: x[0])) if cfg else _HIDDEN, ) if clipboard: from datalad.support.external_versions import external_versions external_versions.check( 'pyperclip', msg="It is needed to be able to use clipboard") import pyperclip pyperclip.copy(text) ui.message("WTF information of length %s copied to clipboard" % len(text)) else: ui.message(text) yield