예제 #1
0
def postclonecfg_ria(ds, props):
    """Configure a dataset freshly cloned from a RIA store"""
    repo = ds.repo
    # RIA uses hashdir mixed, copying data to it via git-annex (if cloned via
    # ssh) would make it see a bare repo and establish a hashdir lower annex
    # object tree.
    # Moreover, we want the ORA remote to receive all data for the store, so its
    # objects could be moved into archives (the main point of a RIA store).
    RIA_REMOTE_NAME = 'origin'  # don't hardcode everywhere
    ds.config.set(
        'remote.{}.annex-ignore'.format(RIA_REMOTE_NAME), 'true',
        where='local')

    # chances are that if this dataset came from a RIA store, its subdatasets
    # may live there too. Place a subdataset source candidate config that makes
    # get probe this RIA store when obtaining subdatasets
    ds.config.set(
        # we use the label 'origin' for this candidate in order to not have to
        # generate a complicated name from the actual source specification.
        # we pick a cost of 200 to sort it before datalad's default candidates
        # for non-RIA URLs, because they prioritize hierarchical layouts that
        # cannot be found in a RIA store
        'datalad.get.subdataset-source-candidate-200origin',
        # use the entire original URL, up to the fragment + plus dataset ID
        # placeholder, this should make things work with any store setup we
        # support (paths, ports, ...)
        props['source'].split('#', maxsplit=1)[0] + '#{id}',
        where='local')

    # setup publication dependency, if a corresponding special remote exists
    # and was enabled (there could be RIA stores that actually only have repos)
    # make this function be a generator
    ora_remotes = [s for s in ds.siblings('query', result_renderer='disabled')
                   if s.get('annex-externaltype') == 'ora']
    if not ora_remotes and any(
            r.get('externaltype') == 'ora'
            for r in (repo.get_special_remotes().values()
                      if hasattr(repo, 'get_special_remotes')
                      else [])):
        # no ORA remote autoenabled, but configuration known about at least one.
        # Let's check origin's config for datalad.ora-remote.uuid as stored by
        # create-sibling-ria and enable try enabling that one.
        lgr.debug("Found no autoenabled ORA special remote. Trying to look it "
                  "up in source config ...")

        # First figure whether we cloned via SSH, HTTP or local path and then
        # get that config file the same way:
        config_content = None
        scheme = props['giturl'].split(':', 1)[0]
        if scheme in ['http', 'https']:
            try:
                config_content = download_url(
                    "{}{}config".format(
                        props['giturl'],
                        '/' if not props['giturl'].endswith('/') else ''))
            except DownloadError as e:
                lgr.debug("Failed to get config file from source:\n%s",
                          exc_str(e))
        elif scheme == 'ssh':
            # TODO: switch the following to proper command abstraction:
            # SSHRemoteIO ignores the path part ATM. No remote CWD! (To be
            # changed with command abstractions). So we need to get that part to
            # have a valid path to origin's config file:
            cfg_path = PurePosixPath(URL(props['giturl']).path) / 'config'
            op = SSHRemoteIO(props['giturl'])
            try:
                config_content = op.read_file(cfg_path)
            except RIARemoteError as e:
                lgr.debug("Failed to get config file from source: %s",
                          exc_str(e))

        elif scheme == 'file':
            # TODO: switch the following to proper command abstraction:
            op = LocalIO()
            cfg_path = Path(URL(props['giturl']).localpath) / 'config'
            try:
                config_content = op.read_file(cfg_path)
            except (RIARemoteError, OSError) as e:
                lgr.debug("Failed to get config file from source: %s",
                          exc_str(e))
        else:
            lgr.debug("Unknown URL-Scheme %s in %s. Can handle SSH, HTTP or "
                      "FILE scheme URLs.", scheme, props['source'])

        # 3. And read it
        org_uuid = None
        if config_content:
            # TODO: We might be able to spare the saving to a file.
            #       "git config -f -" is not explicitly documented but happens
            #       to work and would read from stdin. Make sure we know this
            #       works for required git versions and on all platforms.
            with make_tempfile(content=config_content) as cfg_file:
                runner = GitWitlessRunner()
                try:
                    result = runner.run(
                        ['git', 'config', '-f', cfg_file,
                         'datalad.ora-remote.uuid'],
                        protocol=StdOutCapture
                    )
                    org_uuid = result['stdout'].strip()
                except CommandError as e:
                    # doesn't contain what we are looking for
                    lgr.debug("Found no UUID for ORA special remote at "
                              "'%s' (%s)", RIA_REMOTE_NAME, exc_str(e))

        # Now, enable it. If annex-init didn't fail to enable it as stored, we
        # wouldn't end up here, so enable with store URL as suggested by the URL
        # we cloned from.
        if org_uuid:
            srs = repo.get_special_remotes()
            if org_uuid in srs.keys():
                # TODO: - Double-check autoenable value and only do this when
                #         true?
                #       - What if still fails? -> Annex shouldn't change config
                #         in that case

                # we only need the store:
                new_url = props['source'].split('#')[0]
                try:
                    repo.enable_remote(srs[org_uuid]['name'],
                                       options=['url={}'.format(new_url)]
                                       )
                    lgr.info("Reconfigured %s for %s",
                             srs[org_uuid]['name'], new_url)
                    # update ora_remotes for considering publication dependency
                    # below
                    ora_remotes = [s for s in
                                   ds.siblings('query',
                                               result_renderer='disabled')
                                   if s.get('annex-externaltype', None) ==
                                   'ora']
                except CommandError as e:
                    lgr.debug("Failed to reconfigure ORA special remote: %s",
                              exc_str(e))
            else:
                lgr.debug("Unknown ORA special remote uuid at '%s': %s",
                          RIA_REMOTE_NAME, org_uuid)
    if ora_remotes:
        if len(ora_remotes) == 1:
            yield from ds.siblings('configure',
                                   name=RIA_REMOTE_NAME,
                                   publish_depends=ora_remotes[0]['name'],
                                   result_filter=None,
                                   result_renderer='disabled')
        else:
            lgr.warning("Found multiple ORA remotes. Couldn't decide which "
                        "publishing to 'origin' should depend on: %s. Consider "
                        "running 'datalad siblings configure -s origin "
                        "--publish-depends ORAREMOTENAME' to set publication "
                        "dependency manually.",
                        [r['name'] for r in ora_remotes])
예제 #2
0
    def __call__(
            url,
            name,
            *,  # note that `name` is required but not posarg in CLI
            dataset=None,
            storage_name=None,
            alias=None,
            post_update_hook=False,
            shared=None,
            group=None,
            storage_sibling=True,
            existing='error',
            new_store_ok=False,
            trust_level=None,
            recursive=False,
            recursion_limit=None,
            disable_storage__=None,
            push_url=None):
        if disable_storage__ is not None:
            import warnings
            warnings.warn(
                "datalad-create-sibling-ria --no-storage-sibling "
                "is deprecated, use --storage-sibling off instead.",
                DeprecationWarning)
            # recode to new setup
            disable_storage__ = None
            storage_sibling = False

        if storage_sibling == 'only' and storage_name:
            lgr.warning(
                "Sibling name will be used for storage sibling in "
                "storage-sibling-only mode, but a storage sibling name "
                "was provided")

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='create RIA sibling(s)')
        res_kwargs = dict(
            ds=ds,
            action="create-sibling-ria",
            logger=lgr,
        )

        # parse target URL
        # Note: URL parsing is done twice ATM (for top-level ds). This can't be
        # reduced to single instance, since rewriting url based on config could
        # be different for subdatasets.
        try:
            ssh_host, base_path, rewritten_url = \
                verify_ria_url(push_url if push_url else url, ds.config)
        except ValueError as e:
            yield get_status_dict(status='error', message=str(e), **res_kwargs)
            return

        if ds.repo.get_hexsha() is None or ds.id is None:
            raise RuntimeError("Repository at {} is not a DataLad dataset, "
                               "run 'datalad create [--force]' first.".format(
                                   ds.path))

        if not storage_sibling and storage_name:
            lgr.warning(
                "Storage sibling setup disabled, but a storage sibling name "
                "was provided")

        if storage_sibling and not storage_name:
            storage_name = "{}-storage".format(name)

        if storage_sibling and name == storage_name:
            # leads to unresolvable, circular dependency with publish-depends
            raise ValueError("sibling names must not be equal")

        if not isinstance(url, str):
            raise TypeError("url is not a string, but %s" % type(url))

        # Query existing siblings upfront in order to fail early on
        # existing=='error', since misconfiguration (particularly of special
        # remotes) only to fail in a subdataset later on with that config, can
        # be quite painful.
        # TODO: messages - this is "create-sibling". Don't confuse existence of
        #       local remotes with existence of the actual remote sibling
        #       in wording
        if existing == 'error':
            failed = False
            for dpath, sname in _yield_ds_w_matching_siblings(
                    ds, (name, storage_name),
                    recursive=recursive,
                    recursion_limit=recursion_limit):
                res = get_status_dict(
                    status='error',
                    message=(
                        "a sibling %r is already configured in dataset %r",
                        sname, dpath),
                    type='sibling',
                    name=sname,
                    ds=ds,
                    **res_kwargs,
                )
                failed = True
                yield res
            if failed:
                return
        # TODO: - URL parsing + store creation needs to be RF'ed based on
        #         command abstractions
        #       - more generally consider store creation a dedicated command or
        #         option

        io = SSHRemoteIO(ssh_host) if ssh_host else LocalIO()
        try:
            # determine the existence of a store by trying to read its layout.
            # Because this raises a FileNotFound error if non-existent, we need
            # to catch it
            io.read_file(Path(base_path) / 'ria-layout-version')
        except (FileNotFoundError, RIARemoteError,
                RemoteCommandFailedError) as e:
            if not new_store_ok:
                # we're instructed to only act in case of an existing RIA store
                res = get_status_dict(status='error',
                                      message="No store found at '{}'. Forgot "
                                      "--new-store-ok ?".format(
                                          Path(base_path)),
                                      **res_kwargs)
                yield res
                return

        log_progress(
            lgr.info,
            'create-sibling-ria',
            'Creating a new RIA store at %s',
            Path(base_path),
        )
        create_store(io, Path(base_path), '1')

        yield from _create_sibling_ria(ds, url, push_url, name,
                                       storage_sibling, storage_name, alias,
                                       existing, shared, group,
                                       post_update_hook, trust_level,
                                       res_kwargs)

        if recursive:
            # Note: subdatasets can be treated independently, so go full
            # recursion when querying for them and _no_recursion with the
            # actual call. Theoretically this can be parallelized.

            for subds in ds.subdatasets(state='present',
                                        recursive=True,
                                        recursion_limit=recursion_limit,
                                        return_type='generator',
                                        result_renderer='disabled',
                                        result_xfm='datasets'):
                yield from _create_sibling_ria(
                    subds,
                    url,
                    push_url,
                    name,
                    storage_sibling,
                    storage_name,
                    None,  # subdatasets can't have the same alias as the parent
                    existing,
                    shared,
                    group,
                    post_update_hook,
                    trust_level,
                    res_kwargs)