Beispiel #1
0
    def __init__(self, ds, force_reindex=False, **kwargs):
        super(_WhooshSearch, self).__init__(ds, **kwargs)

        self.idx_obj = None
        # where does the bunny have the eggs?
        self.index_dir = opj(self.ds.path, GitRepo.get_git_dir(ds),
                             SEARCH_INDEX_DOTGITDIR)
        self._mk_search_index(force_reindex)
Beispiel #2
0
def test_get_git_dir(path):
    # minimal, only missing coverage
    assert_raises(RuntimeError, GitRepo.get_git_dir, path)

    srcpath = opj(path, 'src')
    targetpath = opj(path, 'target')
    targetgitpath = opj(targetpath, '.git')
    os.makedirs(srcpath)
    os.makedirs(targetpath)
    if not on_windows:
        # with PY3 would also work with Windows 6+
        os.symlink(srcpath, targetgitpath)
        eq_(srcpath, GitRepo.get_git_dir(targetpath))
        # cleanup for following test
        unlink(targetgitpath)
    with open(targetgitpath, 'w') as f:
        f.write('gitdir: {}'.format(srcpath))
    eq_(srcpath, GitRepo.get_git_dir(targetpath))
Beispiel #3
0
def test_get_git_dir(path):
    # minimal, only missing coverage
    assert_raises(RuntimeError, GitRepo.get_git_dir, path)

    srcpath = opj(path, 'src')
    targetpath = opj(path, 'target')
    targetgitpath = opj(targetpath, '.git')
    os.makedirs(srcpath)
    os.makedirs(targetpath)
    if not on_windows:
        # with PY3 would also work with Windows 6+
        os.symlink(srcpath, targetgitpath)
        eq_(srcpath, GitRepo.get_git_dir(targetpath))
        # cleanup for following test
        unlink(targetgitpath)
    with open(targetgitpath, 'w') as f:
        f.write('gitdir: {}'.format(srcpath))
    eq_(srcpath, GitRepo.get_git_dir(targetpath))
Beispiel #4
0
 def __call__(dataset=None, what=None, recursive=False, recursion_limit=None):
     ds = require_dataset(dataset, purpose='clean-up')
     res_kwargs = dict(action='clean', logger=lgr, refds=ds.path)
     for ap in AnnotatePaths.__call__(
             dataset=ds.path,
             recursive=recursive,
             recursion_limit=recursion_limit,
             action='clean',
             unavailable_path_status='impossible',
             nondataset_path_status='impossible',
             return_type='generator',
             on_failure='ignore'):
         if ap.get('status', None):
             yield ap
             continue
         if ap.get('type', None) != 'dataset':
             ap.update(status='impossible',
                       message='only datasets can be cleaned')
             yield ap
             continue
         d = ap['path']
         gitdir = GitRepo.get_git_dir(d)
         DIRS_PLURAL = ("directory", "directories")
         FILES_PLURAL = ("file", "files")
         for dirpath, flag, msg, sing_pl in [
             (ARCHIVES_TEMP_DIR, "cached-archives",
              "temporary archive", DIRS_PLURAL),
             (ANNEX_TEMP_DIR, "annex-tmp",
              "temporary annex", FILES_PLURAL),
             (ANNEX_TRANSFER_DIR, "annex-transfer",
              "annex temporary transfer", DIRS_PLURAL),
             (opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index',
              "metadata search index", FILES_PLURAL),
         ]:
             topdir = opj(d, dirpath)
             lgr.debug("Considering to clean %s:%s", d, dirpath)
             if not ((what is None) or (flag in what)):
                 yield get_status_dict(
                     path=topdir, status='notneeded', type='directory', **res_kwargs)
                 continue
             paths = glob(opj(topdir, '*'))
             if not paths:
                 yield get_status_dict(
                     path=topdir, status='notneeded', type='directory', **res_kwargs)
                 continue
             pl = len(paths) > 1
             message = ("Removed %d %s %s: %s",
                        len(paths), msg, sing_pl[int(pl)],
                        ", ".join(sorted([x[len(topdir) + 1:] for x in paths])))
             rmtree(topdir)
             yield get_status_dict(
                 path=topdir, status='ok', type='dir', message=message,
                 **res_kwargs)
Beispiel #5
0
 def __call__(dataset=None,
              what=None,
              recursive=False,
              recursion_limit=None):
     ds = require_dataset(dataset, purpose='clean-up')
     res_kwargs = dict(action='clean', logger=lgr, refds=ds.path)
     for wds in itertools.chain(
         [ds],
             ds.subdatasets(fulfilled=True,
                            recursive=recursive,
                            recursion_limit=recursion_limit,
                            return_type='generator',
                            result_renderer='disabled',
                            result_xfm='datasets') if recursive else []):
         d = wds.path
         gitdir = GitRepo.get_git_dir(d)
         DIRS_PLURAL = ("directory", "directories")
         FILES_PLURAL = ("file", "files")
         for dirpath, flag, msg, sing_pl in [
             (ARCHIVES_TEMP_DIR, "cached-archives", "temporary archive",
              DIRS_PLURAL),
             (ANNEX_TEMP_DIR, "annex-tmp", "temporary annex", FILES_PLURAL),
             (ANNEX_TRANSFER_DIR, "annex-transfer",
              "annex temporary transfer", DIRS_PLURAL),
             (opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index',
              "metadata search index", FILES_PLURAL),
         ]:
             topdir = opj(d, dirpath)
             lgr.debug("Considering to clean %s:%s", d, dirpath)
             if not ((what is None) or (flag in what)):
                 yield get_status_dict(path=topdir,
                                       status='notneeded',
                                       type='directory',
                                       **res_kwargs)
                 continue
             paths = glob(opj(topdir, '*'))
             if not paths:
                 yield get_status_dict(path=topdir,
                                       status='notneeded',
                                       type='directory',
                                       **res_kwargs)
                 continue
             pl = len(paths) > 1
             message = ("Removed %d %s %s: %s", len(paths), msg,
                        sing_pl[int(pl)], ", ".join(
                            sorted([x[len(topdir) + 1:] for x in paths])))
             rmtree(topdir)
             yield get_status_dict(path=topdir,
                                   status='ok',
                                   type='dir',
                                   message=message,
                                   **res_kwargs)
Beispiel #6
0
    def __call__(target, opts=None, dataset=None):
        # only non-bare repos have hashdirmixed, so require one
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='ORA archive export')
        ds_repo = ds.repo

        # TODO remove once datalad 0.12rc7 or later is released
        if not hasattr(ds_repo, 'dot_git'):
            from datalad.support.gitrepo import GitRepo
            ds_repo.dot_git = ds_repo.pathobj / GitRepo.get_git_dir(ds_repo)

        annex_objs = ds_repo.dot_git / 'annex' / 'objects'

        archive = resolve_path(target, dataset)
        if archive.is_dir():
            archive = archive / 'archive.7z'
        else:
            archive.parent.mkdir(exist_ok=True, parents=True)

        if not opts:
            # uncompressed by default
            opts = ['-mx0']

        res_kwargs = dict(
            action="export-archive-ora",
            logger=lgr,
        )

        if not annex_objs.is_dir():
            yield get_status_dict(
                ds=ds,
                status='notneeded',
                message='no annex keys present',
                **res_kwargs,
            )
            return

        exportdir = ds_repo.dot_git / 'datalad' / 'tmp' / 'ora_archive'
        if exportdir.exists():
            yield get_status_dict(
                ds=ds,
                status='error',
                message=(
                    'export directory already exists, please remove first: %s',
                    str(exportdir)),
                **res_kwargs,
            )
            return

        keypaths = [
            k for k in annex_objs.glob(op.join('**', '*')) if k.is_file()
        ]

        log_progress(
            lgr.info,
            'oraarchiveexport',
            'Start ORA archive export %s',
            ds,
            total=len(keypaths),
            label='ORA archive export',
            unit=' Keys',
        )

        link_fx = os.link
        for keypath in keypaths:
            key = keypath.name
            hashdir = op.join(keypath.parts[-4], keypath.parts[-3])
            log_progress(lgr.info,
                         'oraarchiveexport',
                         'Export key %s to %s',
                         key,
                         hashdir,
                         update=1,
                         increment=True)
            keydir = exportdir / hashdir / key
            keydir.mkdir(parents=True, exist_ok=True)
            try:
                link_fx(str(keypath), str(keydir / key))
            except OSError:
                lgr.warning(
                    'No hard links supported at %s, will copy files instead',
                    str(keydir))
                # no hard links supported
                # switch function after first error
                link_fx = shutil.copyfile
                link_fx(str(keypath), str(keydir / key))

        log_progress(lgr.info, 'oraarchiveexport',
                     'Finished RIA archive export from %s', ds)
        try:
            subprocess.run(
                ['7z', 'u', str(archive), '.'] + opts,
                cwd=str(exportdir),
            )
            yield get_status_dict(path=str(archive),
                                  type='file',
                                  status='ok',
                                  **res_kwargs)
        except Exception as e:
            yield get_status_dict(path=str(archive),
                                  type='file',
                                  status='error',
                                  message=('7z failed: %s', exc_str(e)),
                                  **res_kwargs)
            return
        finally:
            rmtree(str(exportdir))