def _get_flexible_source_candidates(src, base_url=None, alternate_suffix=True): """Get candidates to try cloning from. Primarily to mitigate the problem that git doesn't append /.git while cloning from non-bare repos over dummy protocol (http*). Also to simplify creation of urls whenever base url and relative path within it provided Parameters ---------- src : string or RI Full or relative (then considered within base_url if provided) path base_url : string or RI, optional alternate_suffix : bool Whether to generate URL candidates with and without '/.git' suffixes. Returns ------- candidates : list of str List of RIs (path, url, ssh targets) to try to install from """ candidates = [] ri = RI(src) if isinstance(ri, PathRI) and not isabs(ri.path) and base_url: ri = RI(base_url) if ri.path.endswith('/.git'): base_path = ri.path[:-5] base_suffix = '.git' else: base_path = ri.path base_suffix = '' if isinstance(ri, PathRI): # this is a path, so stay native ri.path = normpath(opj(base_path, src, base_suffix)) else: # we are handling a URL, use POSIX path conventions ri.path = posixpath.normpath( posixpath.join(base_path, src, base_suffix)) src = str(ri) candidates.append(src) if alternate_suffix and isinstance(ri, URL): if ri.scheme in {'http', 'https'}: # additionally try to consider .git: if not src.rstrip('/').endswith('/.git'): candidates.append( '{0}/.git'.format(src.rstrip('/'))) # TODO: # We need to provide some error msg with InstallFailedError, since now # it just swallows everything. # yoh: not sure if this comment applies here, but could be still applicable # outisde return candidates
def _get_flexible_source_candidates(src, base_url=None, alternate_suffix=True): """Get candidates to try cloning from. Primarily to mitigate the problem that git doesn't append /.git while cloning from non-bare repos over dummy protocol (http*). Also to simplify creation of urls whenever base url and relative path within it provided Parameters ---------- src : string or RI Full or relative (then considered within base_url if provided) path base_url : string or RI, optional alternate_suffix : bool Whether to generate URL candidates with and without '/.git' suffixes. Returns ------- candidates : list of str List of RIs (path, url, ssh targets) to try to install from """ candidates = [] ri = RI(src) if isinstance(ri, PathRI) and not isabs(ri.path) and base_url: ri = RI(base_url) if ri.path.endswith('/.git'): base_path = ri.path[:-5] base_suffix = '.git' else: base_path = ri.path base_suffix = '' if isinstance(ri, PathRI): # this is a path, so stay native ri.path = normpath(opj(base_path, src, base_suffix)) else: # we are handling a URL, use POSIX path conventions ri.path = posixpath.normpath( posixpath.join(base_path, src, base_suffix)) src = str(ri) candidates.append(src) if alternate_suffix and isinstance(ri, URL): if ri.scheme in {'http', 'https'}: # additionally try to consider .git: if not src.rstrip('/').endswith('/.git'): candidates.append( '{0}/.git'.format(src.rstrip('/'))) # TODO: # We need to provide some error msg with InstallFailedError, since now # it just swallows everything. # yoh: not sure if this comment applies here, but could be still applicable # outisde return candidates
def decode_source_spec(spec, cfg=None): """Decode information from a clone source specification Parameters ---------- spec : str Any supported clone source specification cfg : ConfigManager, optional Configuration will be queried from the instance (i.e. from a particular dataset). If None is given, the global DataLad configuration will be queried. Returns ------- dict The value of each decoded property is stored under its own key in this dict. By default the following keys are return: 'type', a specification type label {'giturl', 'dataladri', 'ria'}; 'source' the original source specification; 'giturl' a URL for the source that is a suitable source argument for git-clone; 'version' a version-identifer, if present (None else); 'default_destpath' a relative path that that can be used as a clone destination. """ if cfg is None: from datalad import cfg # standard property dict composition props = dict( source=spec, version=None, ) # Git never gets to see these URLs, so let's manually apply any # rewrite configuration Git might know about. # Note: We need to rewrite before parsing, otherwise parsing might go wrong. # This is particularly true for insteadOf labels replacing even the URL # scheme. spec = cfg.rewrite_url(spec) # common starting point is a RI instance, support for accepting an RI # instance is kept for backward-compatibility reasons source_ri = RI(spec) if not isinstance(spec, RI) else spec # scenario switch, each case must set 'giturl' at the very minimum if isinstance(source_ri, DataLadRI): # we have got our DataLadRI as the source, so expand it props['type'] = 'dataladri' props['giturl'] = source_ri.as_git_url() elif isinstance(source_ri, URL) and source_ri.scheme.startswith('ria+'): # parse a RIA URI dsid, version = source_ri.fragment.split('@', maxsplit=1) \ if '@' in source_ri.fragment else (source_ri.fragment, None) uuid_regex = r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}' if re.match(uuid_regex, dsid): trace = '{}/{}'.format(dsid[:3], dsid[3:]) default_destpath = dsid elif dsid.startswith('~'): trace = 'alias/{}'.format(dsid[1:]) default_destpath = dsid[1:] else: raise ValueError( 'RIA URI not recognized, no valid dataset ID or other supported ' 'scheme: {}'.format(spec)) # now we cancel the fragment in the original URL, but keep everthing else # in order to be able to support the various combinations of ports, paths, # and everything else source_ri.fragment = '' # strip the custom protocol and go with standard one source_ri.scheme = source_ri.scheme[4:] # take any existing path, and add trace to dataset within the store source_ri.path = '{urlpath}{urldelim}{trace}'.format( urlpath=source_ri.path if source_ri.path else '', urldelim='' if not source_ri.path or source_ri.path.endswith('/') else '/', trace=trace, ) props.update( type='ria', giturl=str(source_ri), version=version, default_destpath=default_destpath, ) else: # let's assume that anything else is a URI that Git can handle props['type'] = 'giturl' # use original input verbatim props['giturl'] = spec if 'default_destpath' not in props: # if we still have no good idea on where a dataset could be cloned to if no # path was given, do something similar to git clone and derive the path from # the source props['default_destpath'] = _get_installationpath_from_url(props['giturl']) return props
def __call__(sshurl, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, group=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option") if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified") # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc)) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings") # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to destinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(refds_path, super_ds.path)) # check the login URL sibling_ri = RI(sshurl) ssh_sibling = is_ssh(sibling_ri) if not (ssh_sibling or isinstance(sibling_ri, PathRI)): raise ValueError( "Unsupported SSH URL or path: '{0}', " "use ssh://host/path, host:path or path syntax".format(sshurl)) if not name: name = sibling_ri.hostname if ssh_sibling else "local" lgr.debug("No sibling name given. Using %s'%s' as sibling name", "URL hostname " if ssh_sibling else "", name) if since == '': # consider creating siblings only since the point of # the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) # # parse the base dataset to find all subdatasets that need processing # to_process = [] cand_ds = [ Dataset(r['path']) for r in diff_dataset( ds, fr=since, to=None, # make explicit, but doesn't matter, no recursion in diff() constant_refs=True, # contrain to the paths of all locally existing subdatasets path=[ sds['path'] for sds in ds.subdatasets(recursive=recursive, recursion_limit=recursion_limit, fulfilled=True, result_renderer=None) ], # save cycles, we are only looking for datasets annex=None, untracked='no', # recursion was done faster by subdatasets() recursive=False, # save cycles, we are only looking for datasets eval_file_type=False, ) if r.get('type') == 'dataset' and r.get('state', None) != 'clean' ] # check remotes setup for d in cand_ds if since else ([ds] + cand_ds): d_repo = d.repo if d_repo is None: continue checkds_remotes = d.repo.get_remotes() res = dict( action='create_sibling', path=d.path, type='dataset', ) if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set( ensure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: yield dict( res, status='error', message=('unknown sibling(s) specified as publication ' 'dependency: %s', unknown_deps), ) continue if name in checkds_remotes and existing in ('error', 'skip'): yield dict( res, status='error' if existing == 'error' else 'notneeded', message=( "sibling '%s' already configured (specify alternative " "name, or force reconfiguration via --existing", name), ) continue to_process.append(res) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if ssh_sibling: # request ssh connection: lgr.info("Connecting ...") shell = ssh_manager.get_connection(sshurl) else: shell = _RunnerAdapter() sibling_ri.path = str(resolve_path(sibling_ri.path, dataset)) if target_dir: target_dir = opj(sibling_ri.path, target_dir) if target_dir is None: if sibling_ri.path: target_dir = sibling_ri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir if not shell.get_annex_version(): raise MissingExternalDependency( 'git-annex', msg="It's required on the {} machine to create a sibling". format('remote' if ssh_sibling else 'local')) # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, refds_path, shell, replicate_local_structure, sibling_ri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, ui, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit) if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == refds_path and ui: lgr.info("Uploading web interface to %s" % path) try: CreateSibling.upload_web_interface(path, shell, shared, ui) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s (if exists and executable)", path) try: shell( "cd {} " "&& ( [ -x hooks/post-update ] && hooks/post-update || : )" "".format(sh_quote(_path_(path, ".git")))) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap