def parse_spec(self, spec_str, all_index=False): '''Parse a a string into a `Spec`. :param spec_str: the spec string. :param all_index: this spec_str is just an index (useful for file specs) ''' scheme, netloc, path, query, fragment = urlparse.urlsplit(spec_str) # case where we just provide a path ... if scheme == '': # default scheme scheme = CONFIG.get('dpm', 'index.default') if scheme == 'file': if '://' in spec_str: path = spec_str.split('://')[1] path = urllib.url2pathname(path) path = path.replace('/', os.sep) path = os.path.abspath(path) # for file netloc is everything up to last name if all_index: netloc = path path = '' else: netloc = os.path.join(os.path.dirname(path)) path = os.path.basename(path) elif scheme in ('ckan', 'db', 'egg'): # python >= 2.6.5 changes behaviour of urlsplit for novel url # schemes to be rfc compliant # http://bugs.python.org/issue7904 # urlparse.urlsplit(ckan://ckan) gives: # python < 2.6.5 # SplitResult(scheme='ckan', netloc='', path='ckan', query='', fragment='') # python >= 2.5.5 # SplitResult(scheme='ckan', netloc='ckan', path='', query='', fragment='') if netloc != '': # python >= 2.6.5 path = netloc + path if path else netloc netloc = '' # after urlsplit of ckan://... have path = //... for python < 2.6.5 while path.startswith('/'): path = path[1:] netloc = '/'.join(path.split('/')[:-1]) path = path.split('/')[-1] if scheme == 'ckan': # we have a path but did not put http:// ... if netloc and not netloc.startswith('http'): netloc = 'http://' + netloc elif scheme == 'db': if netloc and not netloc.startswith('file'): netloc = 'file://' + netloc elif scheme == 'egg': if path and not netloc: netloc, path = path, '' netloc = netloc.strip("/") path = path.lstrip("/") spec = Spec(scheme, netloc, path) return spec
def parse_spec(self, spec_str, all_index=False): '''Parse a a string into a `Spec`. :param spec_str: the spec string. :param all_index: this spec_str is just an index (useful for file specs) ''' scheme, netloc, path, query, fragment = urlparse.urlsplit(spec_str) # case where we just provide a path ... if scheme == '': # default scheme scheme = CONFIG.get('dpm', 'index.default') if scheme == 'http': # assume this is a ckan instance ... # for ckan netloc = full API url, path = dataset name scheme = 'ckan' # ckan netloc should be full url # TODO: what happens if CKAN instance not at base domain! netloc = 'http://' + netloc path = path.rstrip('/') if path: out = re.match('(/.+)?/dataset/(.+)$', path) if not out: msg = 'URL is not a path to a CKAN instance or dataset: %s' % spec_str raise Exception(msg) (offset, name) = out.groups() if offset: netloc += offset path = name netloc += '/api' elif scheme == 'file': if '://' in spec_str: path = spec_str.split('://')[1] path = urllib.url2pathname(path) path = path.replace('/', os.sep) path = os.path.abspath(path) # for file netloc is everything up to last name if all_index: netloc = path path = '' else: netloc = os.path.join(os.path.dirname(path)) path = os.path.basename(path) elif scheme in ('ckan', 'egg'): # python >= 2.6.5 changes behaviour of urlsplit for novel url # schemes to be rfc compliant # http://bugs.python.org/issue7904 # urlparse.urlsplit(ckan://ckan) gives: # python < 2.6.5 # SplitResult(scheme='ckan', netloc='', path='ckan', query='', fragment='') # python >= 2.5.5 # SplitResult(scheme='ckan', netloc='ckan', path='', query='', fragment='') if netloc != '': # python >= 2.6.5 path = netloc + path if path else netloc netloc = '' # after urlsplit of ckan://... have path = //... for python < 2.6.5 while path.startswith('/'): path = path[1:] netloc = '/'.join(path.split('/')[:-1]) path = path.split('/')[-1] if scheme == 'ckan': # we have a path but did not put http:// ... if netloc and not netloc.startswith('http'): netloc = 'http://' + netloc elif scheme == 'egg': if path and not netloc: netloc, path = path, '' netloc = netloc.strip("/") path = path.lstrip("/") spec = Spec(scheme, netloc, path) return spec