def _get_git_url_from_source(source): """Return URL for cloning associated with a source specification For now just resolves DataLadRIs """ # TODO: Probably RF this into RI.as_git_url(), that would be overridden # by subclasses or sth. like that if not isinstance(source, RI): source_ri = RI(source) else: source_ri = source if isinstance(source_ri, DataLadRI): # we have got our DataLadRI as the source, so expand it source = source_ri.as_git_url() else: source = str(source_ri) return source
def decode_source_spec(spec, cfg=None): """Decode information from a clone source specification Parameters ---------- spec : str Any supported clone source specification cfg : ConfigManager, optional Configuration will be queried from the instance (i.e. from a particular dataset). If None is given, the global DataLad configuration will be queried. Returns ------- dict The value of each decoded property is stored under its own key in this dict. By default the following keys are return: 'type', a specification type label {'giturl', 'dataladri', 'ria'}; 'source' the original source specification; 'giturl' a URL for the source that is a suitable source argument for git-clone; 'version' a version-identifer, if present (None else); 'default_destpath' a relative path that that can be used as a clone destination. """ if cfg is None: from datalad import cfg # standard property dict composition props = dict( source=spec, version=None, ) # Git never gets to see these URLs, so let's manually apply any # rewrite configuration Git might know about. # Note: We need to rewrite before parsing, otherwise parsing might go wrong. # This is particularly true for insteadOf labels replacing even the URL # scheme. spec = cfg.rewrite_url(spec) # common starting point is a RI instance, support for accepting an RI # instance is kept for backward-compatibility reasons source_ri = RI(spec) if not isinstance(spec, RI) else spec # scenario switch, each case must set 'giturl' at the very minimum if isinstance(source_ri, DataLadRI): # we have got our DataLadRI as the source, so expand it props['type'] = 'dataladri' props['giturl'] = source_ri.as_git_url() elif isinstance(source_ri, URL) and source_ri.scheme.startswith('ria+'): # parse a RIA URI dsid, version = source_ri.fragment.split('@', maxsplit=1) \ if '@' in source_ri.fragment else (source_ri.fragment, None) uuid_regex = r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}' if re.match(uuid_regex, dsid): trace = '{}/{}'.format(dsid[:3], dsid[3:]) default_destpath = dsid elif dsid.startswith('~'): trace = 'alias/{}'.format(dsid[1:]) default_destpath = dsid[1:] else: raise ValueError( 'RIA URI not recognized, no valid dataset ID or other supported ' 'scheme: {}'.format(spec)) # now we cancel the fragment in the original URL, but keep everthing else # in order to be able to support the various combinations of ports, paths, # and everything else source_ri.fragment = '' # strip the custom protocol and go with standard one source_ri.scheme = source_ri.scheme[4:] # take any existing path, and add trace to dataset within the store source_ri.path = '{urlpath}{urldelim}{trace}'.format( urlpath=source_ri.path if source_ri.path else '', urldelim='' if not source_ri.path or source_ri.path.endswith('/') else '/', trace=trace, ) props.update( type='ria', giturl=str(source_ri), version=version, default_destpath=default_destpath, ) else: # let's assume that anything else is a URI that Git can handle props['type'] = 'giturl' # use original input verbatim props['giturl'] = spec if 'default_destpath' not in props: # if we still have no good idea on where a dataset could be cloned to if no # path was given, do something similar to git clone and derive the path from # the source props['default_destpath'] = _get_installationpath_from_url(props['giturl']) return props