Beispiel #1
0
    def __call__(
            path=None,
            dataset=None,
            recursive=False,
            target_dir=None,
            specs_from=None,
            message=None):
        # Concept
        #
        # Loosely model after the POSIX cp command
        #
        # 1. Determine the target of the copy operation, and its associated
        #    dataset
        #
        # 2. for each source: determine source dataset, query for metadata, put
        #    into target dataset
        #
        # Instead of sifting and sorting through input args, process them one
        # by one sequentially. Utilize lookup caching to make things faster,
        # instead of making the procedure itself more complicated.

        if path and specs_from:
            raise ValueError(
                "Path argument(s) AND a specs-from specified, "
                "this is not supported.")

        ds = None
        if dataset:
            ds = require_dataset(dataset, check_installed=True,
                                 purpose='copy into')

        if target_dir:
            target_dir = resolve_path(target_dir, dataset)

        if path:
            # turn into list of absolute paths
            paths = [resolve_path(p, dataset) for p in ensure_list(path)]

            # we already checked that there are no specs_from
            if not target_dir:
                if len(paths) == 1:
                    if not ds:
                        raise ValueError("No target directory was given.")
                    # we can keep target_dir unset and need not manipulate
                    # paths, this is all done in a generic fashion below
                elif len(paths) == 2:
                    # single source+dest combo
                    if paths[-1].is_dir():
                        # check if we need to set target_dir, in case dest
                        # is a dir
                        target_dir = paths.pop(-1)
                    else:
                        specs_from = [paths]
                else:
                    target_dir = paths.pop(-1)

            if not specs_from:
                # in all other cases we have a plain source list
                specs_from = paths

        if not specs_from:
            raise ValueError("Neither `paths` nor `specs_from` given.")

        if target_dir:
            if ".git" in target_dir.parts:
                raise ValueError(
                    "Target directory should not contain a .git directory: {}"
                    .format(target_dir))
        elif ds:
            # no specific target set, but we have to write into a dataset,
            # and one was given. It seems to make sense to use this dataset
            # as a target. it is already to reference for any path resolution.
            # Any explicitly given destination, will take precedence over
            # a general target_dir setting nevertheless.
            target_dir = ds.pathobj

        res_kwargs = dict(
            action='copy_file',
            logger=lgr,
        )

        # lookup cache for dir to repo mappings, and as a DB for cleaning
        # things up
        repo_cache = {}
        # which paths to pass on to save
        to_save = []
        try:
            for src_path, dest_path in _yield_specs(specs_from):
                src_path = Path(src_path)
                dest_path = None \
                    if dest_path is None \
                    else resolve_path(dest_path, dataset)
                lgr.debug('Processing copy specification: %s -> %s',
                          src_path, dest_path)

                # Some checks, first impossibility "wins"
                msg_impossible = None
                if not recursive and src_path.is_dir():
                    msg_impossible = 'recursion not enabled, omitting directory'
                elif (dest_path and dest_path.name == '.git') \
                        or src_path.name == '.git':
                    msg_impossible = \
                        "refuse to place '.git' into destination dataset"
                elif not (dest_path or target_dir):
                    msg_impossible = 'need destination path or target directory'

                if msg_impossible:
                    yield dict(
                        path=str(src_path),
                        status='impossible',
                        message=msg_impossible,
                        **res_kwargs
                    )
                    continue

                for src_file, dest_file in _yield_src_dest_filepaths(
                        src_path, dest_path, target_dir=target_dir):
                    if ds and ds.pathobj not in dest_file.parents:
                        # take time to compose proper error
                        dpath = str(target_dir if target_dir else dest_path)
                        yield dict(
                            path=dpath,
                            status='error',
                            message=(
                                'reference dataset does not contain '
                                'destination path: %s',
                                dpath),
                            **res_kwargs
                        )
                        # only recursion could yield further results, which would
                        # all have the same issue, so call it over right here
                        break
                    for res in _copy_file(src_file, dest_file, cache=repo_cache):
                        yield dict(
                            res,
                            **res_kwargs
                        )
                        if res.get('status', None) == 'ok':
                            to_save.append(res['destination'])
        finally:
            # cleanup time
            # TODO this could also be the place to stop lingering batch processes
            _cleanup_cache(repo_cache)

        if not (ds and to_save):
            # nothing left to do
            return

        yield from ds.save(
            path=to_save,
            # we provide an explicit file list
            recursive=False,
            message=message,
        )