def __init__(self, ds, force_reindex=False, **kwargs): super(_WhooshSearch, self).__init__(ds, **kwargs) self.idx_obj = None # where does the bunny have the eggs? self.index_dir = opj(self.ds.path, GitRepo.get_git_dir(ds), SEARCH_INDEX_DOTGITDIR) self._mk_search_index(force_reindex)
def test_get_git_dir(path): # minimal, only missing coverage assert_raises(RuntimeError, GitRepo.get_git_dir, path) srcpath = opj(path, 'src') targetpath = opj(path, 'target') targetgitpath = opj(targetpath, '.git') os.makedirs(srcpath) os.makedirs(targetpath) if not on_windows: # with PY3 would also work with Windows 6+ os.symlink(srcpath, targetgitpath) eq_(srcpath, GitRepo.get_git_dir(targetpath)) # cleanup for following test unlink(targetgitpath) with open(targetgitpath, 'w') as f: f.write('gitdir: {}'.format(srcpath)) eq_(srcpath, GitRepo.get_git_dir(targetpath))
def test_get_git_dir(path): # minimal, only missing coverage assert_raises(RuntimeError, GitRepo.get_git_dir, path) srcpath = opj(path, 'src') targetpath = opj(path, 'target') targetgitpath = opj(targetpath, '.git') os.makedirs(srcpath) os.makedirs(targetpath) if not on_windows: # with PY3 would also work with Windows 6+ os.symlink(srcpath, targetgitpath) eq_(srcpath, GitRepo.get_git_dir(targetpath)) # cleanup for following test unlink(targetgitpath) with open(targetgitpath, 'w') as f: f.write('gitdir: {}'.format(srcpath)) eq_(srcpath, GitRepo.get_git_dir(targetpath))
def __call__(dataset=None, what=None, recursive=False, recursion_limit=None): ds = require_dataset(dataset, purpose='clean-up') res_kwargs = dict(action='clean', logger=lgr, refds=ds.path) for ap in AnnotatePaths.__call__( dataset=ds.path, recursive=recursive, recursion_limit=recursion_limit, action='clean', unavailable_path_status='impossible', nondataset_path_status='impossible', return_type='generator', on_failure='ignore'): if ap.get('status', None): yield ap continue if ap.get('type', None) != 'dataset': ap.update(status='impossible', message='only datasets can be cleaned') yield ap continue d = ap['path'] gitdir = GitRepo.get_git_dir(d) DIRS_PLURAL = ("directory", "directories") FILES_PLURAL = ("file", "files") for dirpath, flag, msg, sing_pl in [ (ARCHIVES_TEMP_DIR, "cached-archives", "temporary archive", DIRS_PLURAL), (ANNEX_TEMP_DIR, "annex-tmp", "temporary annex", FILES_PLURAL), (ANNEX_TRANSFER_DIR, "annex-transfer", "annex temporary transfer", DIRS_PLURAL), (opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index', "metadata search index", FILES_PLURAL), ]: topdir = opj(d, dirpath) lgr.debug("Considering to clean %s:%s", d, dirpath) if not ((what is None) or (flag in what)): yield get_status_dict( path=topdir, status='notneeded', type='directory', **res_kwargs) continue paths = glob(opj(topdir, '*')) if not paths: yield get_status_dict( path=topdir, status='notneeded', type='directory', **res_kwargs) continue pl = len(paths) > 1 message = ("Removed %d %s %s: %s", len(paths), msg, sing_pl[int(pl)], ", ".join(sorted([x[len(topdir) + 1:] for x in paths]))) rmtree(topdir) yield get_status_dict( path=topdir, status='ok', type='dir', message=message, **res_kwargs)
def __call__(dataset=None, what=None, recursive=False, recursion_limit=None): ds = require_dataset(dataset, purpose='clean-up') res_kwargs = dict(action='clean', logger=lgr, refds=ds.path) for wds in itertools.chain( [ds], ds.subdatasets(fulfilled=True, recursive=recursive, recursion_limit=recursion_limit, return_type='generator', result_renderer='disabled', result_xfm='datasets') if recursive else []): d = wds.path gitdir = GitRepo.get_git_dir(d) DIRS_PLURAL = ("directory", "directories") FILES_PLURAL = ("file", "files") for dirpath, flag, msg, sing_pl in [ (ARCHIVES_TEMP_DIR, "cached-archives", "temporary archive", DIRS_PLURAL), (ANNEX_TEMP_DIR, "annex-tmp", "temporary annex", FILES_PLURAL), (ANNEX_TRANSFER_DIR, "annex-transfer", "annex temporary transfer", DIRS_PLURAL), (opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index', "metadata search index", FILES_PLURAL), ]: topdir = opj(d, dirpath) lgr.debug("Considering to clean %s:%s", d, dirpath) if not ((what is None) or (flag in what)): yield get_status_dict(path=topdir, status='notneeded', type='directory', **res_kwargs) continue paths = glob(opj(topdir, '*')) if not paths: yield get_status_dict(path=topdir, status='notneeded', type='directory', **res_kwargs) continue pl = len(paths) > 1 message = ("Removed %d %s %s: %s", len(paths), msg, sing_pl[int(pl)], ", ".join( sorted([x[len(topdir) + 1:] for x in paths]))) rmtree(topdir) yield get_status_dict(path=topdir, status='ok', type='dir', message=message, **res_kwargs)
def __call__(target, opts=None, dataset=None): # only non-bare repos have hashdirmixed, so require one ds = require_dataset(dataset, check_installed=True, purpose='ORA archive export') ds_repo = ds.repo # TODO remove once datalad 0.12rc7 or later is released if not hasattr(ds_repo, 'dot_git'): from datalad.support.gitrepo import GitRepo ds_repo.dot_git = ds_repo.pathobj / GitRepo.get_git_dir(ds_repo) annex_objs = ds_repo.dot_git / 'annex' / 'objects' archive = resolve_path(target, dataset) if archive.is_dir(): archive = archive / 'archive.7z' else: archive.parent.mkdir(exist_ok=True, parents=True) if not opts: # uncompressed by default opts = ['-mx0'] res_kwargs = dict( action="export-archive-ora", logger=lgr, ) if not annex_objs.is_dir(): yield get_status_dict( ds=ds, status='notneeded', message='no annex keys present', **res_kwargs, ) return exportdir = ds_repo.dot_git / 'datalad' / 'tmp' / 'ora_archive' if exportdir.exists(): yield get_status_dict( ds=ds, status='error', message=( 'export directory already exists, please remove first: %s', str(exportdir)), **res_kwargs, ) return keypaths = [ k for k in annex_objs.glob(op.join('**', '*')) if k.is_file() ] log_progress( lgr.info, 'oraarchiveexport', 'Start ORA archive export %s', ds, total=len(keypaths), label='ORA archive export', unit=' Keys', ) link_fx = os.link for keypath in keypaths: key = keypath.name hashdir = op.join(keypath.parts[-4], keypath.parts[-3]) log_progress(lgr.info, 'oraarchiveexport', 'Export key %s to %s', key, hashdir, update=1, increment=True) keydir = exportdir / hashdir / key keydir.mkdir(parents=True, exist_ok=True) try: link_fx(str(keypath), str(keydir / key)) except OSError: lgr.warning( 'No hard links supported at %s, will copy files instead', str(keydir)) # no hard links supported # switch function after first error link_fx = shutil.copyfile link_fx(str(keypath), str(keydir / key)) log_progress(lgr.info, 'oraarchiveexport', 'Finished RIA archive export from %s', ds) try: subprocess.run( ['7z', 'u', str(archive), '.'] + opts, cwd=str(exportdir), ) yield get_status_dict(path=str(archive), type='file', status='ok', **res_kwargs) except Exception as e: yield get_status_dict(path=str(archive), type='file', status='error', message=('7z failed: %s', exc_str(e)), **res_kwargs) return finally: rmtree(str(exportdir))