Example #1
0
def _search_from_virgin_install(dataset, query):
    #
    # this is to be nice to newbies
    #
    exc_info = sys.exc_info()
    if dataset is None:
        if not ui.is_interactive:
            raise NoDatasetArgumentFound(
                "No DataLad dataset found. Specify a dataset to be "
                "searched, or run interactively to get assistance "
                "installing a queriable superdataset."
            )
        # none was provided so we could ask user whether he possibly wants
        # to install our beautiful mega-duper-super-dataset?
        # TODO: following logic could possibly benefit other actions.
        DEFAULT_DATASET_PATH = cfg.obtain('datalad.locations.default-dataset')
        if os.path.exists(DEFAULT_DATASET_PATH):
            default_ds = Dataset(DEFAULT_DATASET_PATH)
            if default_ds.is_installed():
                if ui.yesno(
                    title="No DataLad dataset found at current location",
                    text="Would you like to search the DataLad "
                         "superdataset at %r?"
                          % DEFAULT_DATASET_PATH):
                    pass
                else:
                    raise exc_info[1]
            else:
                raise NoDatasetArgumentFound(
                    "No DataLad dataset found at current location. "
                    "The DataLad superdataset location %r exists, "
                    "but does not contain an dataset."
                    % DEFAULT_DATASET_PATH)
        elif ui.yesno(
                title="No DataLad dataset found at current location",
                text="Would you like to install the DataLad "
                     "superdataset at %r?"
                     % DEFAULT_DATASET_PATH):
            from datalad.api import install
            default_ds = install(DEFAULT_DATASET_PATH, source='///')
            ui.message(
                "From now on you can refer to this dataset using the "
                "label '///'"
            )
        else:
            raise exc_info[1]

        lgr.info(
            "Performing search using DataLad superdataset %r",
            default_ds.path
        )
        for res in default_ds.search(query):
            yield res
        return
    else:
        raise  # this function is called within exception handling block
Example #2
0
def _search_from_virgin_install(dataset, query):
    #
    # this is to be nice to newbies
    #
    exc_info = sys.exc_info()
    if dataset is None:
        if not ui.is_interactive:
            raise NoDatasetArgumentFound(
                "No DataLad dataset found. Specify a dataset to be "
                "searched, or run interactively to get assistance "
                "installing a queriable superdataset.")
        # none was provided so we could ask user either he possibly wants
        # to install our beautiful mega-duper-super-dataset?
        # TODO: following logic could possibly benefit other actions.
        if os.path.exists(LOCAL_CENTRAL_PATH):
            central_ds = Dataset(LOCAL_CENTRAL_PATH)
            if central_ds.is_installed():
                if ui.yesno(
                        title="No DataLad dataset found at current location",
                        text="Would you like to search the DataLad "
                        "superdataset at %r?" % LOCAL_CENTRAL_PATH):
                    pass
                else:
                    reraise(*exc_info)
            else:
                raise NoDatasetArgumentFound(
                    "No DataLad dataset found at current location. "
                    "The DataLad superdataset location %r exists, "
                    "but does not contain an dataset." % LOCAL_CENTRAL_PATH)
        elif ui.yesno(title="No DataLad dataset found at current location",
                      text="Would you like to install the DataLad "
                      "superdataset at %r?" % LOCAL_CENTRAL_PATH):
            from datalad.api import install
            central_ds = install(LOCAL_CENTRAL_PATH, source='///')
            ui.message("From now on you can refer to this dataset using the "
                       "label '///'")
        else:
            reraise(*exc_info)

        lgr.info("Performing search using DataLad superdataset %r",
                 central_ds.path)
        for res in central_ds.search(query):
            yield res
        return
    else:
        raise
Example #3
0
    def __call__(match,
                 dataset=None,
                 search=None,
                 report=None,
                 report_matched=False,
                 format='custom',
                 regex=False):
        """
        Yields
        ------
        location : str
            (relative) path to the dataset
        report : dict
            fields which were requested by `report` option
        """

        lgr.debug("Initiating search for match=%r and dataset %r",
                  match, dataset)
        try:
            ds = require_dataset(dataset, check_installed=True, purpose='dataset search')
            if ds.id is None:
                raise NoDatasetArgumentFound(
                    "This does not seem to be a dataset (no DataLad dataset ID "
                    "found). 'datalad create --force %s' can initialize "
                    "this repository as a DataLad dataset" % ds.path)
        except NoDatasetArgumentFound:
            exc_info = sys.exc_info()
            if dataset is None:
                if not ui.is_interactive:
                    raise NoDatasetArgumentFound(
                        "No DataLad dataset found. Specify a dataset to be "
                        "searched, or run interactively to get assistance "
                        "installing a queriable superdataset."
                    )
                # none was provided so we could ask user either he possibly wants
                # to install our beautiful mega-duper-super-dataset?
                # TODO: following logic could possibly benefit other actions.
                if os.path.exists(LOCAL_CENTRAL_PATH):
                    central_ds = Dataset(LOCAL_CENTRAL_PATH)
                    if central_ds.is_installed():
                        if ui.yesno(
                            title="No DataLad dataset found at current location",
                            text="Would you like to search the DataLad "
                                 "superdataset at %r?"
                                  % LOCAL_CENTRAL_PATH):
                            pass
                        else:
                            reraise(*exc_info)
                    else:
                        raise NoDatasetArgumentFound(
                            "No DataLad dataset found at current location. "
                            "The DataLad superdataset location %r exists, "
                            "but does not contain an dataset."
                            % LOCAL_CENTRAL_PATH)
                elif ui.yesno(
                        title="No DataLad dataset found at current location",
                        text="Would you like to install the DataLad "
                             "superdataset at %r?"
                             % LOCAL_CENTRAL_PATH):
                    from datalad.api import install
                    central_ds = install(LOCAL_CENTRAL_PATH, source='///')
                    ui.message(
                        "From now on you can refer to this dataset using the "
                        "label '///'"
                    )
                else:
                    reraise(*exc_info)

                lgr.info(
                    "Performing search using DataLad superdataset %r",
                    central_ds.path
                )
                for res in central_ds.search(
                        match,
                        search=search, report=report,
                        report_matched=report_matched,
                        format=format, regex=regex):
                    yield res
                return
            else:
                raise

        cache_dir = opj(opj(ds.path, get_git_dir(ds.path)), 'datalad', 'cache')
        mcache_fname = opj(cache_dir, 'metadata.p%d' % pickle.HIGHEST_PROTOCOL)

        meta = None
        if os.path.exists(mcache_fname):
            lgr.debug("use cached metadata of '{}' from {}".format(ds, mcache_fname))
            meta, checksum = pickle.load(open(mcache_fname, 'rb'))
            # TODO add more sophisticated tests to decide when the cache is no longer valid
            if checksum != ds.repo.get_hexsha():
                # errrr, try again below
                meta = None

        # don't put in 'else', as yet to be written tests above might fail and require
        # regenerating meta data
        if meta is None:
            lgr.info("Loading and caching local meta-data... might take a few seconds")
            if not exists(cache_dir):
                os.makedirs(cache_dir)

            meta = get_metadata(ds, guess_type=False, ignore_subdatasets=False,
                                ignore_cache=False)
            # merge all info on datasets into a single dict per dataset
            meta = flatten_metadata_graph(meta)
            # extract graph, if any
            meta = meta.get('@graph', meta)
            # build simple queriable representation
            if not isinstance(meta, list):
                meta = [meta]

            # sort entries by location (if present)
            sort_keys = ('location', 'description', 'id')
            # note with str() instead of '%' getting encoding issues...
            meta = sorted(meta, key=lambda m: tuple("%s" % (m.get(x, ""),) for x in sort_keys))

            # use pickle to store the optimized graph in the cache
            pickle.dump(
                # graph plus checksum from what it was built
                (meta, ds.repo.get_hexsha()),
                open(mcache_fname, 'wb'))
            lgr.debug("cached meta data graph of '{}' in {}".format(ds, mcache_fname))

        if report in ('', ['']):
            report = []
        elif report and not isinstance(report, list):
            report = [report]

        match = assure_list(match)
        search = assure_list(search)
        # convert all to lower case for case insensitive matching
        search = {x.lower() for x in search}

        def get_in_matcher(m):
            """Function generator to provide closure for a specific value of m"""
            mlower = m.lower()

            def matcher(s):
                return mlower in s.lower()
            return matcher

        matchers = [
            re.compile(match_).search
            if regex
            else get_in_matcher(match_)
            for match_ in match
        ]

        # location should be reported relative to current location
        # We will assume that noone chpwd while we are yielding
        ds_path_prefix = get_path_prefix(ds.path)

        # So we could provide a useful message whenever there were not a single
        # dataset with specified `--search` properties
        observed_properties = set()

        # for every meta data set
        for mds in meta:
            hit = False
            hits = [False] * len(matchers)
            matched_fields = set()
            if not mds.get('type', mds.get('schema:type', None)) == 'Dataset':
                # we are presently only dealing with datasets
                continue
            # TODO consider the possibility of nested and context/graph dicts
            # but so far we were trying to build simple lists of dicts, as much
            # as possible
            if not isinstance(mds, dict):
                raise NotImplementedError("nested meta data is not yet supported")

            # manual loop for now
            for k, v in iteritems(mds):
                if search:
                    k_lower = k.lower()
                    if k_lower not in search:
                        if observed_properties is not None:
                            # record for providing a hint later
                            observed_properties.add(k_lower)
                        continue
                    # so we have a hit, no need to track
                    observed_properties = None
                if isinstance(v, dict) or isinstance(v, list):
                    v = text_type(v)
                for imatcher, matcher in enumerate(matchers):
                    if matcher(v):
                        hits[imatcher] = True
                        matched_fields.add(k)
                if all(hits):
                    hit = True
                    # no need to do it longer than necessary
                    if not report_matched:
                        break

            if hit:
                location = mds.get('location', '.')
                report_ = matched_fields.union(report if report else {}) \
                    if report_matched else report
                if report_ == ['*']:
                    report_dict = mds
                elif report_:
                    report_dict = {k: mds[k] for k in report_ if k in mds}
                    if report_ and not report_dict:
                        lgr.debug(
                            'meta data match for %s, but no to-be-reported '
                            'properties (%s) found. Present properties: %s',
                            location, ", ".join(report_), ", ".join(sorted(mds))
                        )
                else:
                    report_dict = {}  # it was empty but not None -- asked to
                    # not report any specific field
                if isinstance(location, (list, tuple)):
                    # could be that the same dataset installed into multiple
                    # locations. For now report them separately
                    for l in location:
                        yield opj(ds_path_prefix, l), report_dict
                else:
                    yield opj(ds_path_prefix, location), report_dict

        if search and observed_properties is not None:
            import difflib
            suggestions = {
                s: difflib.get_close_matches(s, observed_properties)
                for s in search
            }
            suggestions_str = "\n ".join(
                "%s for %s" % (", ".join(choices), s)
                for s, choices in iteritems(suggestions) if choices
            )
            lgr.warning(
                "Found no properties which matched one of the one you "
                "specified (%s).  May be you meant one among: %s.\n"
                "Suggestions:\n"
                " %s",
                ", ".join(search),
                ", ".join(observed_properties),
                suggestions_str if suggestions_str.strip() else "none"
            )
Example #4
0
 def func2(x):
     assert x == 1
     eq_(ui.yesno("title"), True)
     eq_(ui.question("title2"), "maybe so")
     assert_raises(AssertionError, ui.question, "asking more than we know")
     return x * 2
Example #5
0
    def __call__(dataset, filename=None, missing_content='error', no_annex=False,
                 # TODO: support working with projects and articles within them
                 # project_id=None,
                 article_id=None):
        import os
        import logging
        lgr = logging.getLogger('datalad.plugin.export_to_figshare')

        from datalad.ui import ui
        from datalad.api import add_archive_content
        from datalad.api import export_archive
        from datalad.distribution.dataset import require_dataset
        from datalad.support.annexrepo import AnnexRepo

        dataset = require_dataset(dataset, check_installed=True,
                                  purpose='export to figshare')

        if not isinstance(dataset.repo, AnnexRepo):
            raise ValueError(
                "%s is not an annex repo, so annexification could be done"
                % dataset
            )

        if dataset.repo.is_dirty():
            raise RuntimeError(
                "Paranoid authors of DataLad refuse to proceed in a dirty repository"
            )
        if filename is None:
            filename = dataset.path
        lgr.info(
            "Exporting current tree as an archive under %s since figshare "
            "does not support directories",
            filename
        )
        archive_out = next(
            export_archive(
                dataset,
                filename=filename,
                archivetype='zip',
                missing_content=missing_content,
                return_type="generator"
            )
        )
        assert archive_out['status'] == 'ok'
        fname = archive_out['path']

        lgr.info("Uploading %s to figshare", fname)
        figshare = FigshareRESTLaison()

        if not article_id:
            # TODO: ask if it should be an article within a project
            if ui.is_interactive:
                # or should we just upload to a new article?
                if ui.yesno(
                    "Would you like to create a new article to upload to?  "
                    "If not - we will list existing articles",
                    title="Article"
                ):
                    article = figshare.create_article(
                        title=os.path.basename(dataset.path)
                    )
                    lgr.info(
                        "Created a new (private) article %(id)s at %(url_private_html)s. "
                        "Please visit it, enter additional meta-data and make public",
                        article
                    )
                    article_id = article['id']
                else:
                    article_id = int(ui.question(
                        "Which of the articles should we upload to.",
                        choices=list(map(str, figshare.get_article_ids()))
                    ))
            if not article_id:
                raise ValueError("We need an article to upload to.")

        file_info = figshare.upload_file(
            fname,
            files_url='account/articles/%s/files' % article_id
        )

        if no_annex:
            lgr.info("Removing generated tarball")
            unlink(fname)
        else:
            # I will leave all the complaining etc to the dataset add if path
            # is outside etc
            lgr.info("'Registering' %s within annex", fname)
            repo = dataset.repo
            repo.add(fname, git=False)
            key = repo.get_file_key(fname)
            lgr.info("Adding URL %(download_url)s for it", file_info)
            repo._annex_custom_command([],
                [
                    "git", "annex", "registerurl", '-c', 'annex.alwayscommit=false',
                    key, file_info['download_url']
                ]
            )

            lgr.info("Registering links back for the content of the archive")
            add_archive_content(
                fname,
                annex=dataset.repo,
                delete_after=True,  # just remove extracted into a temp dir
                allow_dirty=True,  # since we have a tarball
                commit=False  # we do not want to commit anything we have done here
            )

            lgr.info("Removing generated and now registered in annex archive")
            repo.drop(key, key=True, options=['--force'])
            repo.remove(fname, force=True)  # remove the tarball

            # if annex in {'delete'}:
            #     dataset.repo.remove(fname)
            # else:
            #     # kinda makes little sense I guess.
            #     # Made more sense if export_archive could export an arbitrary treeish
            #     # so we could create a branch where to dump and export to figshare
            #     # (kinda closer to my idea)
            #     dataset.save(fname, message="Added the entire dataset into a zip file")

        # TODO: add to downloader knowledge about figshare token so it could download-url
        # those zipballs before they go public
        yield dict(
            status='ok',
            # TODO: add article url (which needs to be queried if only ID is known
            message="Published archive {}".format(
                file_info['download_url']),
            file_info=file_info,
            path=dataset,
            action='export_to_figshare',
            logger=lgr
        )
def _create_dataset_sibling(name, ds, hierarchy_basepath, shell,
                            replicate_local_structure, ri, target_dir,
                            target_url, target_pushurl, existing, shared,
                            group, publish_depends, publish_by_default,
                            install_postupdate_hook, as_common_datasrc,
                            annex_wanted, annex_group, annex_groupwanted,
                            inherit):
    """Everyone is very smart here and could figure out the combinatorial
    affluence among provided tiny (just slightly over a dozen) number of options
    and only a few pages of code
    """
    localds_path = ds.path
    ds_name = relpath(localds_path, start=hierarchy_basepath)
    if not replicate_local_structure:
        ds_name = '' if ds_name == curdir \
            else '-{}'.format(ds_name.replace("/", "-"))
        remoteds_path = target_dir.replace("%RELNAME", ds_name)
    else:
        # TODO: opj depends on local platform, not the remote one.
        # check how to deal with it. Does windows ssh server accept
        # posix paths? vice versa? Should planned SSH class provide
        # tools for this issue?
        # see gh-1188
        remoteds_path = normpath(opj(target_dir, ds_name))

    ds_repo = ds.repo
    # construct a would-be ssh url based on the current dataset's path
    ri.path = remoteds_path
    ds_url = ri.as_str()
    # configure dataset's git-access urls
    ds_target_url = target_url.replace('%RELNAME', ds_name) \
        if target_url else ds_url
    # push, configure only if needed
    ds_target_pushurl = None
    if ds_target_url != ds_url:
        # not guaranteed that we can push via the primary URL
        ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \
            if target_pushurl else ds_url

    lgr.info("Considering to create a target dataset {0} at {1} of {2}".format(
        localds_path, remoteds_path,
        "localhost" if isinstance(ri, PathRI) else ri.hostname))
    # Must be set to True only if exists and existing='reconfigure'
    # otherwise we might skip actions if we say existing='reconfigure'
    # but it did not even exist before
    only_reconfigure = False
    if remoteds_path != '.':
        # check if target exists
        # TODO: Is this condition valid for != '.' only?
        path_children = _ls_remote_path(shell, remoteds_path)
        path_exists = path_children is not None

        if path_exists:
            _msg = "Target path %s already exists." % remoteds_path
        if path_exists and not path_children:
            # path should be an empty directory, which should be ok to remove
            try:
                lgr.debug(
                    "Trying to rmdir %s on remote since seems to be an empty dir",
                    remoteds_path)
                # should be safe since should not remove anything unless an empty dir
                shell("rmdir {}".format(sh_quote(remoteds_path)))
                path_exists = False
            except CommandError as e:
                # If fails to rmdir -- either contains stuff no permissions
                # TODO: fixup encode/decode dance again :-/ we should have got
                # unicode/str here by now.  I guess it is the same as
                # https://github.com/ReproNim/niceman/issues/83
                # where I have reused this Runner thing
                try:
                    # ds_name is unicode which makes _msg unicode so we must be
                    # unicode-ready
                    err_str = str(e.stderr)
                except UnicodeDecodeError:
                    err_str = e.stderr.decode(errors='replace')
                _msg += " And it fails to rmdir (%s)." % (err_str.strip(), )

        if path_exists:
            if existing == 'error':
                raise RuntimeError(_msg)
            elif existing == 'skip':
                lgr.info(_msg + " Skipping")
                return
            elif existing == 'replace':
                remove = False
                if path_children:
                    has_git = '.git' in path_children
                    _msg_stats = _msg \
                                 + " It is %sa git repository and has %d files/dirs." % (
                                     "" if has_git else "not ", len(path_children)
                                 )
                    if ui.is_interactive:
                        remove = ui.yesno("Do you really want to remove it?",
                                          title=_msg_stats,
                                          default=False)
                    else:
                        raise RuntimeError(
                            _msg_stats +
                            " Remove it manually first or rerun datalad in "
                            "interactive shell to confirm this action.")
                if not remove:
                    raise RuntimeError(_msg)
                # Remote location might already contain a git repository or be
                # just a directory.
                lgr.info(_msg + " Replacing")
                # enable write permissions to allow removing dir
                shell("chmod +r+w -R {}".format(sh_quote(remoteds_path)))
                # remove target at path
                shell("rm -rf {}".format(sh_quote(remoteds_path)))
                # if we succeeded in removing it
                path_exists = False
                # Since it is gone now, git-annex also should forget about it
                remotes = ds_repo.get_remotes()
                if name in remotes:
                    # so we had this remote already, we should announce it dead
                    # XXX what if there was some kind of mismatch and this name
                    # isn't matching the actual remote UUID?  should have we
                    # checked more carefully?
                    lgr.info(
                        "Announcing existing remote %s dead to annex and removing",
                        name)
                    if isinstance(ds_repo, AnnexRepo):
                        ds_repo.set_remote_dead(name)
                    ds_repo.remove_remote(name)
            elif existing == 'reconfigure':
                lgr.info(_msg + " Will only reconfigure")
                only_reconfigure = True
            else:
                raise ValueError(
                    "Do not know how to handle existing={}".format(
                        repr(existing)))

        if not path_exists:
            shell("mkdir -p {}".format(sh_quote(remoteds_path)))

    delayed_super = _DelayedSuper(ds)
    if inherit and delayed_super.super:
        if shared is None:
            # here we must analyze current_ds's super, not the super_ds
            # inherit from the setting on remote end
            shared = CreateSibling._get_ds_remote_shared_setting(
                delayed_super, name, shell)

        if not install_postupdate_hook:
            # Even though directive from above was False due to no UI explicitly
            # requested, we were asked to inherit the setup, so we might need
            # to install the hook, if super has it on remote
            install_postupdate_hook = CreateSibling._has_active_postupdate(
                delayed_super, name, shell)

    if group:
        # Either repository existed before or a new directory was created for it,
        # set its group to a desired one if was provided with the same chgrp
        shell("chgrp -R {} {}".format(sh_quote(str(group)),
                                      sh_quote(remoteds_path)))
    # don't (re-)initialize dataset if existing == reconfigure
    if not only_reconfigure:
        # init git and possibly annex repo
        if not CreateSibling.init_remote_repo(
                remoteds_path, shell, shared, ds, description=target_url):
            return

        if target_url and not is_ssh(target_url):
            # we are not coming in via SSH, hence cannot assume proper
            # setup for webserver access -> fix
            shell('git -C {} update-server-info'.format(
                sh_quote(remoteds_path)))
    else:
        # TODO -- we might still want to reconfigure 'shared' setting!
        pass

    # at this point we have a remote sibling in some shape or form
    # -> add as remote
    lgr.debug("Adding the siblings")
    # TODO generator, yield the now swallowed results
    Siblings.__call__(
        'configure',
        dataset=ds,
        name=name,
        url=ds_target_url,
        pushurl=ds_target_pushurl,
        recursive=False,
        fetch=True,
        as_common_datasrc=as_common_datasrc,
        publish_by_default=publish_by_default,
        publish_depends=publish_depends,
        annex_wanted=annex_wanted,
        annex_group=annex_group,
        annex_groupwanted=annex_groupwanted,
        inherit=inherit,
        result_renderer=None,
    )

    # check git version on remote end
    lgr.info("Adjusting remote git configuration")
    if shell.get_git_version(
    ) and shell.get_git_version() >= LooseVersion("2.4"):
        # allow for pushing to checked out branch
        try:
            shell("git -C {} config receive.denyCurrentBranch updateInstead".
                  format(sh_quote(remoteds_path)))
        except CommandError as e:
            lgr.error(
                "git config failed at remote location %s.\n"
                "You will not be able to push to checked out "
                "branch. Error: %s", remoteds_path, exc_str(e))
    else:
        lgr.error(
            "Git version >= 2.4 needed to configure remote."
            " Version detected on server: %s\nSkipping configuration"
            " of receive.denyCurrentBranch - you will not be able to"
            " publish updates to this repository. Upgrade your git"
            " and run with --existing=reconfigure", shell.get_git_version())

    branch = ds_repo.get_active_branch()
    if branch is not None:
        branch = ds_repo.get_corresponding_branch(branch) or branch
        # Setting the HEAD for the created sibling to the original repo's
        # current branch should be unsurprising, and it helps with consumers
        # that don't properly handle the default branch with no commits. See
        # gh-4349.
        shell("git -C {} symbolic-ref HEAD refs/heads/{}".format(
            sh_quote(remoteds_path), branch))

    if install_postupdate_hook:
        # enable metadata refresh on dataset updates to publication server
        lgr.info("Enabling git post-update hook ...")
        try:
            CreateSibling.create_postupdate_hook(remoteds_path, shell, ds)
        except CommandError as e:
            lgr.error("Failed to add json creation command to post update "
                      "hook.\nError: %s" % exc_str(e))

    return remoteds_path
Example #7
0
    def enter_new(self,
                  url=None,
                  auth_types=[],
                  url_re=None,
                  name=None,
                  credential_name=None,
                  credential_type=None):
        # TODO: level/location!
        """Create new provider and credential config

        If interactive, this will ask the user to enter the details (or confirm
        default choices). A dedicated config file is written at
        <user_config_dir>/providers/<name>.cfg

        Parameters:
        -----------
        url: str or RI
          URL this config is created for
        auth_types: list
          List of authentication types to choose from. First entry becomes
          default. See datalad.downloaders.providers.AUTHENTICATION_TYPES
        url_re: str
          regular expression; Once created, this config will be used for any
          matching URL; defaults to `url`
        name: str
          name for the provider; needs to be unique per user
        credential_name: str
          name for the credential; defaults to the provider's name
        credential_type: str
          credential type to use (key for datalad.downloaders.CREDENTIAL_TYPES)
        """

        from datalad.ui import ui
        if url and not name:
            ri = RI(url)
            for f in ('hostname', 'name'):
                try:
                    # might need sanitarization
                    name = str(getattr(ri, f))
                except AttributeError:
                    pass
        known_providers_by_name = {p.name: p for p in self._providers}
        providers_user_dir = self._get_providers_dirs()['user']
        while True:
            name = ui.question(
                title="New provider name",
                text="Unique name to identify 'provider' for %s" % url,
                default=name)
            filename = pathjoin(providers_user_dir, '%s.cfg' % name)
            if name in known_providers_by_name:
                if ui.yesno(
                        title="Known provider %s" % name,
                        text=
                        "Provider with name %s already known. Do you want to "
                        "use it for this session?" % name,
                        default=True):
                    return known_providers_by_name[name]
            elif path.lexists(filename):
                ui.error("File %s already exists, choose another name" %
                         filename)
            else:
                break

        if not credential_name:
            credential_name = name
        if not url_re:
            url_re = re.escape(url) if url else None
        while True:
            url_re = ui.question(
                title="New provider regular expression",
                text="A (Python) regular expression to specify for which URLs "
                "this provider should be used",
                default=url_re)
            if not re.match(url_re, url):
                ui.error("Provided regular expression doesn't match original "
                         "url.  Please re-enter")
            # TODO: url_re of another provider might match it as well
            #  I am not sure if we have any kind of "priority" setting ATM
            #  to differentiate or to to try multiple types :-/
            else:
                break

        authentication_type = None
        if auth_types:
            auth_types = [t for t in auth_types if t in AUTHENTICATION_TYPES]
            if auth_types:
                authentication_type = auth_types[0]

        # Setup credential
        authentication_type = ui.question(
            title="Authentication type",
            text="What authentication type to use",
            default=authentication_type,
            choices=sorted(AUTHENTICATION_TYPES))
        authenticator_class = AUTHENTICATION_TYPES[authentication_type]

        # TODO: need to figure out what fields that authenticator might
        #       need to have setup and ask for them here!

        credential_type = ui.question(
            title="Credential",
            text="What type of credential should be used?",
            choices=sorted(CREDENTIAL_TYPES),
            default=credential_type
            or getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE'))

        cfg = self._CONFIG_TEMPLATE.format(**locals())
        if ui.yesno(title="Save provider configuration file",
                    text="Following configuration will be written to %s:\n%s" %
                    (filename, cfg),
                    default='yes'):
            # Just create a configuration file and reload the thing
            return self._store_new(url=url,
                                   authentication_type=authentication_type,
                                   authenticator_class=authenticator_class,
                                   url_re=url_re,
                                   name=name,
                                   credential_name=credential_name,
                                   credential_type=credential_type,
                                   level='user')
        else:
            return None
Example #8
0
    def enter_new(self, url=None, auth_types=[]):
        from datalad.ui import ui
        name = None
        if url:
            ri = RI(url)
            for f in ('hostname', 'name'):
                try:
                    # might need sanitarization
                    name = str(getattr(ri, f))
                except AttributeError:
                    pass
        known_providers_by_name = {p.name: p for p in self._providers}
        providers_user_dir = self._get_providers_dirs()['user']
        while True:
            name = ui.question(
                title="New provider name",
                text="Unique name to identify 'provider' for %s" % url,
                default=name
            )
            filename = pathjoin(providers_user_dir, '%s.cfg' % name)
            if name in known_providers_by_name:
                if ui.yesno(
                    title="Known provider %s" % name,
                    text="Provider with name %s already known. Do you want to "
                         "use it for this session?"
                         % name,
                    default=True
                ):
                    return known_providers_by_name[name]
            elif path.lexists(filename):
                ui.error(
                    "File %s already exists, choose another name" % filename)
            else:
                break

        url_re = re.escape(url) if url else None
        while True:
            url_re = ui.question(
                title="New provider regular expression",
                text="A (Python) regular expression to specify for which URLs "
                     "this provider should be used",
                default=url_re
            )
            if not re.match(url_re, url):
                ui.error("Provided regular expression doesn't match original "
                         "url.  Please re-enter")
            # TODO: url_re of another provider might match it as well
            #  I am not sure if we have any kind of "priority" setting ATM
            #  to differentiate or to to try multiple types :-/
            else:
                break

        authentication_type = None
        if auth_types:
            auth_types = [
                t for t in auth_types if t in AUTHENTICATION_TYPES
            ]
            if auth_types:
                authentication_type = auth_types[0]

        # Setup credential
        authentication_type = ui.question(
            title="Authentication type",
            text="What authentication type to use",
            default=authentication_type,
            choices=sorted(AUTHENTICATION_TYPES)
        )
        authenticator_class = AUTHENTICATION_TYPES[authentication_type]

        # TODO: need to figure out what fields that authenticator might
        #       need to have setup and ask for them here!

        credential_type = ui.question(
            title="Credential",
            text="What type of credential should be used?",
            choices=sorted(CREDENTIAL_TYPES),
            default=getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE')
        )

        # Just create a configuration file and reload the thing
        if not path.lexists(providers_user_dir):
            os.makedirs(providers_user_dir)
        cfg = """\
# Provider configuration file created to initially access
# {url}

[provider:{name}]
url_re = {url_re}
authentication_type = {authentication_type}
# Note that you might need to specify additional fields specific to the
# authenticator.  Fow now "look into the docs/source" of {authenticator_class}
# {authentication_type}_
credential = {name}

[credential:{name}]
# If known, specify URL or email to how/where to request credentials
# url = ???
type = {credential_type}
""".format(**locals())
        if ui.yesno(
            title="Save provider configuration file",
            text="Following configuration will be written to %s:\n%s"
                % (filename, cfg),
            default='yes'
        ):
            with open(filename, 'wb') as f:
                f.write(cfg.encode('utf-8'))
        else:
            return None
        self.reload()
        # XXX see above note about possibly multiple matches etc
        return self.get_provider(url)
Example #9
0
    def create_repo(self, ds, reponame, organization, private, dry_run,
                    existing):
        """Create a repository on the target platform

        Returns
        -------
        dict
          Result record, with status='ok' when all is good, status='error'
          when unrecoverably broken, status='impossible' when recoverably
          broken

        Raises
        ------
        Exception
          Any unhandled condition (in particular unexpected non-success
          HTTP response codes) will raise an exception.
        """
        res = self.repo_create_request(reponame, organization, private,
                                       dry_run)

        if res.get('status') == 'impossible' and res.get('preexisted'):
            # we cannot create, because there is something in the target
            # spot
            orguser = organization or self.authenticated_user['login']

            if existing == 'reconfigure':
                # we want to use the existing one instead
                # query properties, report, and be done
                repo_props = self.repo_get_request(orguser, reponame)
                res.update(
                    status='notneeded',
                    # return in full
                    host_response=repo_props,
                    # perform some normalization
                    **self.normalize_repo_properties(repo_props))
            elif existing == 'replace':
                # only implemented for backward compat with
                # create-sibling-github
                _msg = ('repository "%s" already exists', reponame)
                if ui.is_interactive:
                    remove = ui.yesno("Do you really want to remove it?",
                                      title=_msg[0] % _msg[1],
                                      default=False)
                else:
                    return dict(
                        res,
                        status='impossible',
                        message=(_msg[0] + " Remove it manually first or "
                                 "rerun DataLad in an interactive shell "
                                 "to confirm this action.", _msg[1]),
                    )
                if not remove:
                    return dict(
                        res,
                        status='impossible',
                        message=_msg,
                    )
                # remove the beast in cold blood
                self.repo_delete_request(
                    organization or self.authenticated_user['login'], reponame)
                # try creating now
                return self.create_repo(ds, reponame, organization, private,
                                        dry_run, existing)

        # TODO intermediate error handling?

        return res
Example #10
0
    def enter_new(self, url=None, auth_types=[]):
        from datalad.ui import ui
        name = None
        if url:
            ri = RI(url)
            for f in ('hostname', 'name'):
                try:
                    # might need sanitarization
                    name = str(getattr(ri, f))
                except AttributeError:
                    pass
        known_providers_by_name = {p.name: p for p in self._providers}
        providers_user_dir = self._get_providers_dirs()['user']
        while True:
            name = ui.question(
                title="New provider name",
                text="Unique name to identify 'provider' for %s" % url,
                default=name
            )
            filename = pathjoin(providers_user_dir, '%s.cfg' % name)
            if name in known_providers_by_name:
                if ui.yesno(
                    title="Known provider %s" % name,
                    text="Provider with name %s already known. Do you want to "
                         "use it for this session?"
                         % name,
                    default=True
                ):
                    return known_providers_by_name[name]
            elif path.lexists(filename):
                ui.error(
                    "File %s already exists, choose another name" % filename)
            else:
                break

        url_re = re.escape(url) if url else None
        while True:
            url_re = ui.question(
                title="New provider regular expression",
                text="A (Python) regular expression to specify for which URLs "
                     "this provider should be used",
                default=url_re
            )
            if not re.match(url_re, url):
                ui.error("Provided regular expression doesn't match original "
                         "url.  Please re-enter")
            # TODO: url_re of another provider might match it as well
            #  I am not sure if we have any kind of "priority" setting ATM
            #  to differentiate or to to try multiple types :-/
            else:
                break

        authentication_type = None
        if auth_types:
            auth_types = [
                t for t in auth_types if t in AUTHENTICATION_TYPES
            ]
            if auth_types:
                authentication_type = auth_types[0]

        # Setup credential
        authentication_type = ui.question(
            title="Authentication type",
            text="What authentication type to use",
            default=authentication_type,
            choices=sorted(AUTHENTICATION_TYPES)
        )
        authenticator_class = AUTHENTICATION_TYPES[authentication_type]

        # TODO: need to figure out what fields that authenticator might
        #       need to have setup and ask for them here!

        credential_type = ui.question(
            title="Credential",
            text="What type of credential should be used?",
            choices=sorted(CREDENTIAL_TYPES),
            default=getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE')
        )

        # Just create a configuration file and reload the thing
        if not path.lexists(providers_user_dir):
            os.makedirs(providers_user_dir)
        cfg = """\
# Provider configuration file created to initially access
# {url}

[provider:{name}]
url_re = {url_re}
authentication_type = {authentication_type}
# Note that you might need to specify additional fields specific to the
# authenticator.  Fow now "look into the docs/source" of {authenticator_class}
# {authentication_type}_
credential = {name}

[credential:{name}]
# If known, specify URL or email to how/where to request credentials
# url = ???
type = {credential_type}
""".format(**locals())
        if ui.yesno(
            title="Save provider configuration file",
            text="Following configuration will be written to %s:\n%s"
                % (filename, cfg),
            default='yes'
        ):
            with open(filename, 'wb') as f:
                f.write(cfg.encode('utf-8'))
        else:
            return None
        self.reload()
        # XXX see above note about possibly multiple matches etc
        return self.get_provider(url)
Example #11
0
 def func2(x):
     assert x == 1
     eq_(ui.yesno("title"), True)
     eq_(ui.question("title2"), "maybe so")
     assert_raises(AssertionError, ui.question, "asking more than we know")
     return x*2
Example #12
0
    def __call__(match,
                 dataset=None,
                 search=None,
                 report=None,
                 report_matched=False,
                 format='custom',
                 regex=False):

        lgr.debug("Initiating search for match=%r and dataset %r",
                  match, dataset)
        try:
            ds = require_dataset(dataset, check_installed=True, purpose='dataset search')
            if ds.id is None:
                raise NoDatasetArgumentFound(
                    "This does not seem to be a dataset (no DataLad dataset ID "
                    "found). 'datalad create --force %s' can initialize "
                    "this repository as a DataLad dataset" % ds.path)
        except NoDatasetArgumentFound:
            exc_info = sys.exc_info()
            if dataset is None:
                if not ui.is_interactive:
                    raise NoDatasetArgumentFound(
                        "No DataLad dataset found. Specify a dataset to be "
                        "searched, or run interactively to get assistance "
                        "installing a queriable superdataset."
                    )
                # none was provided so we could ask user either he possibly wants
                # to install our beautiful mega-duper-super-dataset?
                # TODO: following logic could possibly benefit other actions.
                if os.path.exists(LOCAL_CENTRAL_PATH):
                    central_ds = Dataset(LOCAL_CENTRAL_PATH)
                    if central_ds.is_installed():
                        if ui.yesno(
                            title="No DataLad dataset found at current location",
                            text="Would you like to search the DataLad "
                                 "superdataset at %r?"
                                  % LOCAL_CENTRAL_PATH):
                            pass
                        else:
                            reraise(*exc_info)
                    else:
                        raise NoDatasetArgumentFound(
                            "No DataLad dataset found at current location. "
                            "The DataLad superdataset location %r exists, "
                            "but does not contain an dataset."
                            % LOCAL_CENTRAL_PATH)
                elif ui.yesno(
                        title="No DataLad dataset found at current location",
                        text="Would you like to install the DataLad "
                             "superdataset at %r?"
                             % LOCAL_CENTRAL_PATH):
                    from datalad.api import install
                    central_ds = install(LOCAL_CENTRAL_PATH, source='///')
                    ui.message(
                        "From now on you can refer to this dataset using the "
                        "label '///'"
                    )
                else:
                    reraise(*exc_info)

                lgr.info(
                    "Performing search using DataLad superdataset %r",
                    central_ds.path
                )
                for res in central_ds.search(
                        match,
                        search=search, report=report,
                        report_matched=report_matched,
                        format=format, regex=regex):
                    yield res
                return
            else:
                raise

        cache_dir = opj(opj(ds.path, get_git_dir(ds.path)), 'datalad', 'cache')
        mcache_fname = opj(cache_dir, 'metadata.p%d' % pickle.HIGHEST_PROTOCOL)

        meta = None
        if os.path.exists(mcache_fname):
            lgr.debug("use cached metadata of '{}' from {}".format(ds, mcache_fname))
            meta, checksum = pickle.load(open(mcache_fname, 'rb'))
            # TODO add more sophisticated tests to decide when the cache is no longer valid
            if checksum != ds.repo.get_hexsha():
                # errrr, try again below
                meta = None

        # don't put in 'else', as yet to be written tests above might fail and require
        # regenerating meta data
        if meta is None:
            lgr.info("Loading and caching local meta-data... might take a few seconds")
            if not exists(cache_dir):
                os.makedirs(cache_dir)

            meta = get_metadata(ds, guess_type=False, ignore_subdatasets=False,
                                ignore_cache=False)
            # merge all info on datasets into a single dict per dataset
            meta = flatten_metadata_graph(meta)
            # extract graph, if any
            meta = meta.get('@graph', meta)
            # build simple queriable representation
            if not isinstance(meta, list):
                meta = [meta]

            # sort entries by location (if present)
            sort_keys = ('location', 'description', 'id')
            meta = sorted(meta, key=lambda m: tuple(m.get(x, "") for x in sort_keys))

            # use pickle to store the optimized graph in the cache
            pickle.dump(
                # graph plus checksum from what it was built
                (meta, ds.repo.get_hexsha()),
                open(mcache_fname, 'wb'))
            lgr.debug("cached meta data graph of '{}' in {}".format(ds, mcache_fname))

        if report in ('', ['']):
            report = []
        elif report and not isinstance(report, list):
            report = [report]

        match = assure_list(match)
        search = assure_list(search)
        # convert all to lower case for case insensitive matching
        search = {x.lower() for x in search}

        def get_in_matcher(m):
            """Function generator to provide closure for a specific value of m"""
            mlower = m.lower()

            def matcher(s):
                return mlower in s.lower()
            return matcher

        matchers = [
            re.compile(match_).search
            if regex
            else get_in_matcher(match_)
            for match_ in match
        ]

        # location should be reported relative to current location
        # We will assume that noone chpwd while we are yielding
        ds_path_prefix = get_path_prefix(ds.path)

        # So we could provide a useful message whenever there were not a single
        # dataset with specified `--search` properties
        observed_properties = set()

        # for every meta data set
        for mds in meta:
            hit = False
            hits = [False] * len(matchers)
            matched_fields = set()
            if not mds.get('type', mds.get('schema:type', None)) == 'Dataset':
                # we are presently only dealing with datasets
                continue
            # TODO consider the possibility of nested and context/graph dicts
            # but so far we were trying to build simple lists of dicts, as much
            # as possible
            if not isinstance(mds, dict):
                raise NotImplementedError("nested meta data is not yet supported")

            # manual loop for now
            for k, v in iteritems(mds):
                if search:
                    k_lower = k.lower()
                    if k_lower not in search:
                        if observed_properties is not None:
                            # record for providing a hint later
                            observed_properties.add(k_lower)
                        continue
                    # so we have a hit, no need to track
                    observed_properties = None
                if isinstance(v, dict) or isinstance(v, list):
                    v = text_type(v)
                for imatcher, matcher in enumerate(matchers):
                    if matcher(v):
                        hits[imatcher] = True
                        matched_fields.add(k)
                if all(hits):
                    hit = True
                    # no need to do it longer than necessary
                    if not report_matched:
                        break

            if hit:
                location = mds.get('location', '.')
                report_ = matched_fields.union(report if report else {}) \
                    if report_matched else report
                if report_ == ['*']:
                    report_dict = mds
                elif report_:
                    report_dict = {k: mds[k] for k in report_ if k in mds}
                    if report_ and not report_dict:
                        lgr.debug(
                            'meta data match for %s, but no to-be-reported '
                            'properties (%s) found. Present properties: %s',
                            location, ", ".join(report_), ", ".join(sorted(mds))
                        )
                else:
                    report_dict = {}  # it was empty but not None -- asked to
                    # not report any specific field
                if isinstance(location, (list, tuple)):
                    # could be that the same dataset installed into multiple
                    # locations. For now report them separately
                    for l in location:
                        yield opj(ds_path_prefix, l), report_dict
                else:
                    yield opj(ds_path_prefix, location), report_dict

        if search and observed_properties is not None:
            import difflib
            suggestions = {
                s: difflib.get_close_matches(s, observed_properties)
                for s in search
            }
            suggestions_str = "\n ".join(
                "%s for %s" % (", ".join(choices), s)
                for s, choices in iteritems(suggestions) if choices
            )
            lgr.warning(
                "Found no properties which matched one of the one you "
                "specified (%s).  May be you meant one among: %s.\n"
                "Suggestions:\n"
                " %s",
                ", ".join(search),
                ", ".join(observed_properties),
                suggestions_str if suggestions_str.strip() else "none"
            )