Python Interface._prep Beispiele, datalad.interface.base.Interface._prep Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: uninstall.py Projekt: debanjum/datalad

    def __call__(
            path=None,
            dataset=None,
            recursive=False,
            recursion_limit=None,
            check=True,
            if_dirty='save-before'):

        if dataset and not path:
            # act on the whole dataset if nothing else was specified
            path = dataset.path if isinstance(dataset, Dataset) else dataset
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive,
            recursion_limit=recursion_limit)
        handle_dirty_datasets(
            content_by_ds.keys(), mode=if_dirty, base=dataset)

        results = []

        # iterate over all datasets, order doesn't matter
        for ds_path in content_by_ds:
            ds = Dataset(ds_path)
            paths = content_by_ds[ds_path]
            res = _drop_files(ds, paths, check=check)
            results.extend(res)
        # there is nothing to save at the end
        return results

Beispiel #2

0

Datei anzeigen

Datei: uninstall.py Projekt: debanjum/datalad

    def __call__(
            path=None,
            dataset=None,
            recursive=False,
            check=True,
            if_dirty='save-before'):

        if dataset and not path:
            # act on the whole dataset if nothing else was specified
            path = dataset.path if isinstance(dataset, Dataset) else dataset
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive)
        if unavailable_paths:
            lgr.warning('ignored non-installed paths: %s', unavailable_paths)
        # upfront sanity and compliance checks
        if path_is_under(content_by_ds.keys()):
            # behave like `rm` and refuse to remove where we are
            raise ValueError(
                "refusing to uninstall current or parent directory")
        # check that we have no top-level datasets and not files to process
        args_ok = True
        for ds_path in content_by_ds:
            ds = Dataset(ds_path)
            paths = content_by_ds[ds_path]
            if ds_path not in paths:
                lgr.error(
                    "will not act on files at %s (consider the `drop` command)",
                    paths)
                args_ok = False
            if not ds.get_superdataset(
                    datalad_only=False,
                    topmost=False):
                lgr.error(
                    "will not uninstall top-level dataset at %s (consider the `remove` command)",
                    ds.path)
                args_ok = False
        if not args_ok:
            raise ValueError(
                'inappropriate arguments, see previous error message(s)')

        handle_dirty_datasets(
            content_by_ds, mode=if_dirty, base=dataset)

        results = []

        # iterate over all datasets, starting at the bottom
        # to deinit contained submodules first
        for ds_path in sorted(content_by_ds, reverse=True):
            ds = Dataset(ds_path)
            paths = content_by_ds[ds_path]
            results.extend(
                # we confirmed the super dataset presence above
                _uninstall_dataset(ds, check=check, has_super=True))
        # there is nothing to save at the end
        return results

Beispiel #3

0

Datei anzeigen

Datei: update.py Projekt: debanjum/datalad

    def __call__(
            path=None,
            name=None,
            merge=False,
            dataset=None,
            recursive=False,
            recursion_limit=None,
            fetch_all=False,
            reobtain_data=False):
        """
        """
        if reobtain_data:
            # TODO: properly define, what to do
            raise NotImplementedError("TODO: Option '--reobtain-data' not "
                                      "implemented yet.")

        if dataset and not path:
            # act on the whole dataset if nothing else was specified
            path = dataset.path if isinstance(dataset, Dataset) else dataset
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive,
            recursion_limit=recursion_limit)

        # TODO: check parsed inputs if any paths within a dataset were given
        # and issue a message that we will update the associate dataset as a whole
        # or fail -- see #1185 for a potential discussion
        results = []

        for ds_path in content_by_ds:
            ds = Dataset(ds_path)
            repo = ds.repo
            # get all remotes which have references (would exclude
            # special remotes)
            remotes = repo.get_remotes(with_refs_only=True)
            if not remotes:
                lgr.debug("No siblings known to dataset at %s\nSkipping",
                          repo.path)
                continue
            if name and name not in remotes:
                lgr.warning("'%s' not known to dataset %s\nSkipping",
                            name, repo.path)
                continue

            # Currently '--merge' works for single remote only:
            # TODO: - condition still incomplete
            #       - We can merge if a remote was given or there is a
            #         tracking branch
            #       - we also can fetch all remotes independently on whether or
            #         not we merge a certain remote
            if not name and len(remotes) > 1 and merge:
                lgr.debug("Found multiple remotes:\n%s" % remotes)
                raise NotImplementedError("No merge strategy for multiple "
                                          "remotes implemented yet.")
            lgr.info("Updating dataset '%s' ..." % repo.path)
            _update_repo(repo, name, merge, fetch_all)

Beispiel #4

0

Datei anzeigen

Datei: update.py Projekt: debanjum/datalad

    def __call__(path=None,
                 name=None,
                 merge=False,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 fetch_all=False,
                 reobtain_data=False):
        """
        """
        if reobtain_data:
            # TODO: properly define, what to do
            raise NotImplementedError("TODO: Option '--reobtain-data' not "
                                      "implemented yet.")

        if dataset and not path:
            # act on the whole dataset if nothing else was specified
            path = dataset.path if isinstance(dataset, Dataset) else dataset
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive,
            recursion_limit=recursion_limit)

        # TODO: check parsed inputs if any paths within a dataset were given
        # and issue a message that we will update the associate dataset as a whole
        # or fail -- see #1185 for a potential discussion
        results = []

        for ds_path in content_by_ds:
            ds = Dataset(ds_path)
            repo = ds.repo
            # get all remotes which have references (would exclude
            # special remotes)
            remotes = repo.get_remotes(with_refs_only=True)
            if not remotes:
                lgr.debug("No siblings known to dataset at %s\nSkipping",
                          repo.path)
                continue
            if name and name not in remotes:
                lgr.warning("'%s' not known to dataset %s\nSkipping", name,
                            repo.path)
                continue

            # Currently '--merge' works for single remote only:
            # TODO: - condition still incomplete
            #       - We can merge if a remote was given or there is a
            #         tracking branch
            #       - we also can fetch all remotes independently on whether or
            #         not we merge a certain remote
            if not name and len(remotes) > 1 and merge:
                lgr.debug("Found multiple remotes:\n%s" % remotes)
                raise NotImplementedError("No merge strategy for multiple "
                                          "remotes implemented yet.")
            lgr.info("Updating dataset '%s' ..." % repo.path)
            _update_repo(repo, name, merge, fetch_all)

Beispiel #5

0

Datei anzeigen

Datei: publish.py Projekt: yarikoptic/datalad

    def __call__(path=None,
                 dataset=None,
                 to=None,
                 since=None,
                 missing='fail',
                 force=False,
                 recursive=False,
                 recursion_limit=None,
                 git_opts=None,
                 annex_opts=None,
                 annex_copy_opts=None,
                 jobs=None):

        # if ever we get a mode, for "with-data" we would need this
        #if dataset and not path:
        #    # act on the whole dataset if nothing else was specified
        #    path = dataset.path if isinstance(dataset, Dataset) else dataset

        if not dataset and not path:
            # try to find a dataset in PWD
            dataset = require_dataset(None,
                                      check_installed=True,
                                      purpose='publishing')

        if since and not dataset:
            raise InsufficientArgumentsError(
                'Modification detection (--since) without a base dataset '
                'is not supported')

        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive,
            recursion_limit=recursion_limit,
            # we do not want for this command state that we want to publish
            # content by default by assigning paths for each sub-dataset
            # automagically. But if paths were provided -- sorting would
            # happen to point only to the submodules under those paths, and
            # then to stay consistent we want to copy those paths data
            sub_paths=bool(path))
        if unavailable_paths:
            raise ValueError(
                'cannot publish content that is not available locally: %s' %
                ', '.join(unavailable_paths))

        # here is the plan
        # 1. figure out remote to publish to
        # 2. figure out which content needs to be published to this remote
        # 3. look for any pre-publication dependencies of that remote
        #    (i.e. remotes that need to be published to before)
        # 4. publish the content needed to go to the primary remote to
        #    the dependencies first, and to the primary afterwards
        ds_remote_info = {}
        lgr.debug("Evaluating %i dataset publication candidate(s)",
                  len(content_by_ds))
        # TODO: fancier sorting, so we still follow somewhat the hierarchy
        #       in sorted order, e.g.
        #  d1/sub1/sub1
        #  d1/sub1
        #  d1
        #  d2/sub1
        #  d2
        content_by_ds = OrderedDict(
            (d, content_by_ds[d]) for d in sorted(content_by_ds, reverse=True))
        for ds_path in content_by_ds:
            ds = Dataset(ds_path)
            if to is None:
                # we need an upstream remote, if there's none given. We could
                # wait for git push to complain, but we need to explicitly
                # figure it out for pushing annex branch anyway and we might as
                # well fail right here.
                track_remote, track_refspec = ds.repo.get_tracking_branch()
                if not track_remote:
                    # no tracking remote configured, but let try one more
                    # if we only have one remote, and it has a push target
                    # configured that is "good enough" for us
                    cand_remotes = [
                        r for r in ds.repo.get_remotes()
                        if 'remote.{}.push'.format(r) in ds.config
                    ]
                    if len(cand_remotes) > 1:
                        lgr.warning(
                            'Target sibling ambiguous, please specific via --to'
                        )
                    elif len(cand_remotes) == 1:
                        track_remote = cand_remotes[0]
                    else:
                        lgr.warning(
                            'No target sibling configured for default publication, '
                            'please specific via --to')
                if track_remote:
                    ds_remote_info[ds_path] = dict(
                        zip(('remote', 'refspec'),
                            (track_remote, track_refspec)))
                elif missing == 'skip':
                    lgr.warning('Cannot determine target sibling, skipping %s',
                                ds)
                    ds_remote_info[ds_path] = None
                else:
                    # we have no remote given and no upstream => fail
                    raise InsufficientArgumentsError(
                        'Cannot determine target sibling for %s' % (ds, ))
            elif to not in ds.repo.get_remotes():
                # unknown given remote
                if missing == 'skip':
                    lgr.warning("Unknown target sibling '%s', skipping %s", to,
                                ds)
                    ds_remote_info[ds_path] = None
                elif missing == 'inherit':
                    superds = ds.get_superdataset()
                    if not superds:
                        raise RuntimeError(
                            "%s has no super-dataset to inherit settings for the remote %s"
                            % (ds, to))
                    # XXX due to difference between create-sibling and create-sibling-github
                    # would not be as transparent to inherit for -github
                    lgr.info(
                        "Will try to create a sibling inheriting settings from %s",
                        superds)
                    # XXX explicit None as sshurl for now
                    ds.create_sibling(None, name=to, inherit=True)
                    ds_remote_info[ds_path] = {'remote': to}
                else:
                    raise ValueError("Unknown target sibling '%s' for %s" %
                                     (to, ds))
            else:
                # all good: remote given and is known
                ds_remote_info[ds_path] = {'remote': to}

        if dataset and since:
            # remove all unmodified components from the spec
            lgr.debug("Testing %i dataset(s) for modifications since '%s'",
                      len(content_by_ds), since)
            content_by_ds = filter_unmodified(content_by_ds, dataset, since)

        lgr.debug("Attempt to publish %i datasets", len(content_by_ds))
        published, skipped = [], []
        for ds_path in content_by_ds:
            remote_info = ds_remote_info[ds_path]
            if not remote_info:
                # in case we are skipping
                lgr.debug("Skipping dataset at '%s'", ds_path)
                continue
            # and publish
            ds = Dataset(ds_path)
            pblsh, skp = _publish_dataset(ds,
                                          remote=remote_info['remote'],
                                          refspec=remote_info.get(
                                              'refspec', None),
                                          paths=content_by_ds[ds_path],
                                          annex_copy_options=annex_copy_opts,
                                          force=force,
                                          jobs=jobs)
            published.extend(pblsh)
            skipped.extend(skp)
        return published, skipped

Beispiel #6

0

Datei anzeigen

Datei: add.py Projekt: debanjum/datalad

    def __call__(path=None,
                 dataset=None,
                 to_git=False,
                 save=True,
                 recursive=False,
                 recursion_limit=None,
                 ds2super=False,
                 git_opts=None,
                 annex_opts=None,
                 annex_add_opts=None,
                 jobs=None):

        # parameter constraints:
        if not path:
            raise InsufficientArgumentsError(
                "insufficient information for adding: requires at least a path"
            )
        # never recursion, need to handle manually below to be able to
        # discover untracked content
        content_by_ds, unavailable_paths = Interface._prep(path=path,
                                                           dataset=dataset,
                                                           recursive=False)
        if unavailable_paths:
            lgr.warning("ignoring non-existent path(s): %s", unavailable_paths)
        if recursive:
            # with --recursive for each input path traverse the directory
            # tree, when we find a dataset, add it to the spec, AND add it as
            # a path to the spec of the parent
            # MIH: wrap in list() to avoid exception, because dict size might
            # change, but we want to loop over all that are in at the start
            # only
            for d in list(content_by_ds.keys()):
                for p in content_by_ds[d]:
                    _discover_subdatasets_recursively(p, [d], content_by_ds,
                                                      recursion_limit)

        if not content_by_ds:
            raise InsufficientArgumentsError(
                "no existing content given to add")

        if dataset:
            # remeber the datasets associated with actual inputs
            input_ds = list(content_by_ds.keys())
            # forge chain from base dataset to any leaf dataset
            _discover_trace_to_known(dataset.path, [], content_by_ds)
            if ds2super:
                # now check all dataset entries corresponding to the original
                # input to see if they contain their own paths and remove them
                for inpds in input_ds:
                    content_by_ds[inpds] = [
                        p for p in content_by_ds[inpds] if not p == inpds
                    ]
                # and lastly remove all entries that contain no path to avoid
                # saving any staged content in the final step
                content_by_ds = {d: v for d, v in content_by_ds.items() if v}

        results = []
        # simple loop over datasets -- save happens later
        # start deep down
        for ds_path in sorted(content_by_ds, reverse=True):
            ds = Dataset(ds_path)
            toadd = list(set(content_by_ds[ds_path]))
            # handle anything that looks like a wannabe subdataset
            for subds_path in [
                    d for d in toadd if GitRepo.is_valid_repo(d)
                    and d != ds_path and d not in ds.get_subdatasets(
                        recursive=False, absolute=True, fulfilled=True)
            ]:
                # TODO add check that the subds has a commit, and refuse
                # to operate on it otherwise, or we would get a bastard
                # submodule that cripples git operations
                _install_subds_inplace(ds=ds,
                                       path=subds_path,
                                       relativepath=relpath(
                                           subds_path, ds_path))
                # make sure that .gitmodules is added to the list of files
                toadd.append(opj(ds.path, '.gitmodules'))
                # report added subdatasets -- add below won't do it
                results.append({'success': True, 'file': Dataset(subds_path)})
            # make sure any last minute additions make it to the saving stage
            content_by_ds[ds_path] = toadd
            added = ds.repo.add(
                toadd,
                git=to_git if isinstance(ds.repo, AnnexRepo) else True,
                commit=False)
            for a in added:
                a['file'] = opj(ds_path, a['file'])
            results.extend(added)

        if results and save:
            save_dataset_hierarchy(content_by_ds,
                                   base=dataset.path if dataset
                                   and dataset.is_installed() else None,
                                   message='[DATALAD] added content')

        return results

Beispiel #7

0

Datei anzeigen

Datei: uninstall.py Projekt: debanjum/datalad

    def __call__(
            path=None,
            dataset=None,
            recursive=False,
            check=True,
            if_dirty='save-before'):
        if dataset:
            dataset = require_dataset(
                dataset, check_installed=False, purpose='removal')
            if not dataset.is_installed() and not path:
                # all done already
                return []
            if not path:
                # act on the whole dataset if nothing else was specified
                path = dataset.path if isinstance(dataset, Dataset) else dataset
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive)

        nonexistent_paths = []
        for p in unavailable_paths:
            # we need to check whether any of these correspond
            # to a known subdataset, and add those to the list of
            # things to be removed
            toppath = get_dataset_root(p)
            if not toppath:
                nonexistent_paths.append(p)
                continue
            if p in Dataset(toppath).get_subdatasets(
                    recursive=False, absolute=True):
                # this is a known subdataset that needs to be removed
                pl = content_by_ds.get(p, [])
                pl.append(p)
                content_by_ds[p] = pl
        if nonexistent_paths:
            lgr.warning("ignoring non-existent path(s): %s",
                        nonexistent_paths)

        if path_is_under(content_by_ds):
            # behave like `rm` and refuse to remove where we are
            raise ValueError(
                "refusing to uninstall current or parent directory")

        handle_dirty_datasets(
            content_by_ds, mode=if_dirty, base=dataset)
        ds2save = set()
        results = []
        # iterate over all datasets, starting at the bottom
        # to make the removal of dataset content known upstairs
        for ds_path in sorted(content_by_ds, reverse=True):
            ds = Dataset(ds_path)
            paths = content_by_ds[ds_path]
            if ds_path in paths:
                # entire dataset needs to go
                superds = ds.get_superdataset(
                    datalad_only=False,
                    topmost=False)
                res = _uninstall_dataset(ds, check=check, has_super=False)
                results.extend(res)
                if ds.path in ds2save:
                    # we just uninstalled it, no need to save anything
                    ds2save.discard(ds.path)
                if not superds:
                    continue
                subds_relpath = relpath(ds_path, start=superds.path)
                # remove submodule reference
                submodule = [sm for sm in superds.repo.repo.submodules
                             if sm.path == subds_relpath]
                # there can only be one!
                assert(len(submodule) == 1)
                submodule = submodule[0]
                submodule.remove()
                if exists(ds_path):
                    # could be an empty dir in case an already uninstalled subdataset
                    # got removed
                    os.rmdir(ds_path)
                # need to save changes to .gitmodules later
                content_by_ds[superds.path] = \
                    content_by_ds.get(superds.path, []) \
                    + [opj(superds.path, '.gitmodules'),
                       ds_path]
                ds2save.add(superds.path)
            else:
                if check and hasattr(ds.repo, 'drop'):
                    _drop_files(ds, paths, check=True)
                results.extend(ds.repo.remove(paths, r=True))
                ds2save.add(ds.path)

        if dataset and dataset.is_installed():
            # forge chain from base dataset to any leaf dataset
            # in order to save state changes all the way up
            _discover_trace_to_known(dataset.path, [], content_by_ds)

        save_dataset_hierarchy(
            content_by_ds,
            base=dataset.path if dataset and dataset.is_installed() else None,
            message='[DATALAD] removed content')
        return results

Beispiel #8

0

Datei anzeigen

Datei: add.py Projekt: debanjum/datalad

    def __call__(
            path=None,
            dataset=None,
            to_git=False,
            save=True,
            recursive=False,
            recursion_limit=None,
            ds2super=False,
            git_opts=None,
            annex_opts=None,
            annex_add_opts=None,
            jobs=None):

        # parameter constraints:
        if not path:
            raise InsufficientArgumentsError(
                "insufficient information for adding: requires at least a path")
        # never recursion, need to handle manually below to be able to
        # discover untracked content
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=False)
        if unavailable_paths:
            lgr.warning("ignoring non-existent path(s): %s",
                        unavailable_paths)
        if recursive:
            # with --recursive for each input path traverse the directory
            # tree, when we find a dataset, add it to the spec, AND add it as
            # a path to the spec of the parent
            # MIH: wrap in list() to avoid exception, because dict size might
            # change, but we want to loop over all that are in at the start
            # only
            for d in list(content_by_ds.keys()):
                for p in content_by_ds[d]:
                    _discover_subdatasets_recursively(
                        p,
                        [d],
                        content_by_ds,
                        recursion_limit)

        if not content_by_ds:
            raise InsufficientArgumentsError(
                "no existing content given to add")

        if dataset:
            # remeber the datasets associated with actual inputs
            input_ds = list(content_by_ds.keys())
            # forge chain from base dataset to any leaf dataset
            _discover_trace_to_known(dataset.path, [], content_by_ds)
            if ds2super:
                # now check all dataset entries corresponding to the original
                # input to see if they contain their own paths and remove them
                for inpds in input_ds:
                    content_by_ds[inpds] = [p for p in content_by_ds[inpds]
                                            if not p == inpds]
                # and lastly remove all entries that contain no path to avoid
                # saving any staged content in the final step
                content_by_ds = {d: v for d, v in content_by_ds.items() if v}

        results = []
        # simple loop over datasets -- save happens later
        # start deep down
        for ds_path in sorted(content_by_ds, reverse=True):
            ds = Dataset(ds_path)
            toadd = list(set(content_by_ds[ds_path]))
            # handle anything that looks like a wannabe subdataset
            for subds_path in [d for d in toadd
                               if GitRepo.is_valid_repo(d) and
                               d != ds_path and
                               d not in ds.get_subdatasets(
                                   recursive=False,
                                   absolute=True,
                                   fulfilled=True)]:
                # TODO add check that the subds has a commit, and refuse
                # to operate on it otherwise, or we would get a bastard
                # submodule that cripples git operations
                _install_subds_inplace(
                    ds=ds,
                    path=subds_path,
                    relativepath=relpath(subds_path, ds_path))
                # make sure that .gitmodules is added to the list of files
                toadd.append(opj(ds.path, '.gitmodules'))
                # report added subdatasets -- add below won't do it
                results.append({
                    'success': True,
                    'file': Dataset(subds_path)})
            # make sure any last minute additions make it to the saving stage
            content_by_ds[ds_path] = toadd
            added = ds.repo.add(
                toadd,
                git=to_git if isinstance(ds.repo, AnnexRepo) else True,
                commit=False)
            for a in added:
                a['file'] = opj(ds_path, a['file'])
            results.extend(added)

        if results and save:
            save_dataset_hierarchy(
                content_by_ds,
                base=dataset.path if dataset and dataset.is_installed() else None,
                message='[DATALAD] added content')

        return results

Beispiel #9

0

Datei anzeigen

Datei: get.py Projekt: debanjum/datalad

    def __call__(
            path=None,
            source=None,
            dataset=None,
            recursive=False,
            recursion_limit=None,
            get_data=True,
            reckless=False,
            git_opts=None,
            annex_opts=None,
            annex_get_opts=None,
            jobs=None,
            verbose=False,
            # internal -- instead of returning 'get'ed items, return final
            # content_by_ds, unavailable_paths.  To be used by the call from
            # Install.__call__ and done so to avoid creating another reusable
            # function which would need to duplicate all this heavy list of
            # kwargs
            _return_datasets=False
    ):
        # IMPLEMENTATION CONCEPT:
        #
        # 1. Sort the world into existing handles and the rest
        # 2. Try locate missing handles (obtain subdatasets along the way)
        # 3. Expand into subdatasets with recursion enables (potentially
        #    obtain even more subdatasets
        # 4. Shoot info of which handles to get in each subdataset to,
        #    git-annex, once at the very end

        dataset_path = dataset.path if isinstance(dataset, Dataset) else dataset
        if not (dataset or path):
            raise InsufficientArgumentsError(
                "Neither dataset nor target path(s) provided")
        if dataset and not path:
            # act on the whole dataset if nothing else was specified
            path = dataset_path
        # use lookup cache -- we need that info further down
        dir_lookup = {}
        content_by_ds, unavailable_paths = Interface._prep(
            path=path,
            dataset=dataset,
            recursive=recursive,
            recursion_limit=recursion_limit,
            dir_lookup=dir_lookup)

        # explore the unknown
        for path in sorted(unavailable_paths):
            # how close can we get?
            dspath = get_dataset_root(path)
            if dspath is None:
                # nothing we can do for this path
                continue
            ds = Dataset(dspath)
            # must always yield a dataset -- we sorted out the ones outside
            # any dataset at the very top
            assert ds.is_installed()
            # now actually obtain whatever is necessary to get to this path
            containing_ds = install_necessary_subdatasets(ds, path, reckless)
            if containing_ds.path != ds.path:
                lgr.debug("Installed %s to fulfill request for content for "
                          "path %s", containing_ds, path)
                # mark resulting dataset as auto-installed
                if containing_ds.path == path:
                    # we had to get the entire dataset, not something within
                    # mark that it just appeared
                    content_by_ds[path] = [curdir]
                else:
                    # we need to get content within
                    content_by_ds[path] = [path]

        if recursive and not recursion_limit == 'existing':
            # obtain any subdatasets underneath the paths given inside the
            # subdatasets that we know already exist
            # unless we do not want recursion into not-yet-installed datasets
            for subdspath in sorted(content_by_ds.keys()):
                for content_path in content_by_ds[subdspath]:
                    if not isdir(content_path):
                        # a non-directory cannot have content underneath
                        continue
                    subds = Dataset(subdspath)
                    lgr.info(
                        "Obtaining %s %s recursively",
                        subds,
                        ("underneath %s" % content_path
                         if subds.path != content_path
                         else ""))
                    cbysubds = _recursive_install_subds_underneath(
                        subds,
                        # `content_path` was explicitly given as input
                        # we count recursions from the input, hence we
                        # can start with the full number
                        recursion_limit,
                        reckless,
                        # protect against magic marker misinterpretation
                        # only relevant for _get, hence replace here
                        start=content_path if content_path != curdir else None)
                    # gets file content for all freshly installed subdatasets
                    content_by_ds.update(cbysubds)

        ## we have now done everything we could to obtain whatever subdataset
        ## to get something on the file system for previously unavailable paths
        ## check and sort one last
        content_by_ds, unavailable_paths, nondataset_paths = \
            get_paths_by_dataset(
                unavailable_paths,
                recursive=recursive,
                recursion_limit=recursion_limit,
                out=content_by_ds,
                dir_lookup=dir_lookup)

        if nondataset_paths:
            # XXX likely can never get here
            lgr.warning(
                "ignored paths that do not belong to any dataset: %s",
                nondataset_paths)

        if unavailable_paths:
            lgr.warning('ignored non-existing paths: %s', unavailable_paths)

        # hand over to git-annex
        results = list(chain.from_iterable(
            _get(content_by_ds, refpath=dataset_path, source=source, jobs=jobs,
                 get_data=get_data)))
        # ??? should we in _return_datasets case just return both content_by_ds
        # and unavailable_paths may be so we provide consistent across runs output
        # and then issue outside similar IncompleteResultsError?
        if unavailable_paths:  # and likely other error flags
            if _return_datasets:
                results = sorted(set(content_by_ds).difference(unavailable_paths))
            raise IncompleteResultsError(results, failed=unavailable_paths)
        else:
            return sorted(content_by_ds) if _return_datasets else results