Beispiel #1
0
    def parse_spec(self, spec_str, all_index=False):
        '''Parse a a string into a `Spec`.

        :param spec_str: the spec string.
        :param all_index: this spec_str is just an index (useful for file
        specs)
        '''
        scheme, netloc, path, query, fragment = urlparse.urlsplit(spec_str)
        # case where we just provide a path ...
        if scheme == '':
            # default scheme
            scheme = CONFIG.get('dpm', 'index.default')

        if scheme == 'file':
            if '://' in spec_str:
                path = spec_str.split('://')[1]
            path = urllib.url2pathname(path)
            path = path.replace('/', os.sep)
            path = os.path.abspath(path)
            # for file netloc is everything up to last name
            if all_index:
                netloc = path
                path = ''
            else:
                netloc = os.path.join(os.path.dirname(path))
                path = os.path.basename(path)
        elif scheme in ('ckan', 'db', 'egg'):
            # python >= 2.6.5 changes behaviour of urlsplit for novel url
            # schemes to be rfc compliant
            # http://bugs.python.org/issue7904
            # urlparse.urlsplit(ckan://ckan) gives:
            # python < 2.6.5
            # SplitResult(scheme='ckan', netloc='', path='ckan', query='', fragment='')
            # python >= 2.5.5
            # SplitResult(scheme='ckan', netloc='ckan', path='', query='', fragment='')
            if netloc != '': # python >= 2.6.5
                path = netloc + path if path else netloc
                netloc = ''
            # after urlsplit of ckan://... have path = //... for python < 2.6.5
            while path.startswith('/'):
                path = path[1:]
            netloc = '/'.join(path.split('/')[:-1])
            path = path.split('/')[-1]
            if scheme == 'ckan':
                # we have a path but did not put http:// ...
                if netloc and not netloc.startswith('http'):
                    netloc = 'http://' + netloc
            elif scheme == 'db':
                if netloc and not netloc.startswith('file'):
                    netloc = 'file://' + netloc
            elif scheme == 'egg':
                if path and not netloc:
                    netloc, path = path, ''
                netloc = netloc.strip("/")
                path = path.lstrip("/")
        spec = Spec(scheme, netloc, path)
        return spec
Beispiel #2
0
    def parse_spec(self, spec_str, all_index=False):
        '''Parse a a string into a `Spec`.

        :param spec_str: the spec string.
        :param all_index: this spec_str is just an index (useful for file
        specs)
        '''
        scheme, netloc, path, query, fragment = urlparse.urlsplit(spec_str)
        # case where we just provide a path ...
        if scheme == '':
            # default scheme
            scheme = CONFIG.get('dpm', 'index.default')

        if scheme == 'http':
            # assume this is a ckan instance ...
            # for ckan netloc = full API url, path = dataset name
            scheme = 'ckan'
            # ckan netloc should be full url
            # TODO: what happens if CKAN instance not at base domain!
            netloc = 'http://' + netloc
            path = path.rstrip('/')
            if path:
                out = re.match('(/.+)?/dataset/(.+)$', path)
                if not out:
                    msg = 'URL is not a path to a CKAN instance or dataset: %s' % spec_str
                    raise Exception(msg)
                (offset, name) = out.groups()
                if offset:
                    netloc += offset
                path = name
            netloc += '/api'
        elif scheme == 'file':
            if '://' in spec_str:
                path = spec_str.split('://')[1]
            path = urllib.url2pathname(path)
            path = path.replace('/', os.sep)
            path = os.path.abspath(path)
            # for file netloc is everything up to last name
            if all_index:
                netloc = path
                path = ''
            else:
                netloc = os.path.join(os.path.dirname(path))
                path = os.path.basename(path)
        elif scheme in ('ckan', 'egg'):
            # python >= 2.6.5 changes behaviour of urlsplit for novel url
            # schemes to be rfc compliant
            # http://bugs.python.org/issue7904
            # urlparse.urlsplit(ckan://ckan) gives:
            # python < 2.6.5
            # SplitResult(scheme='ckan', netloc='', path='ckan', query='', fragment='')
            # python >= 2.5.5
            # SplitResult(scheme='ckan', netloc='ckan', path='', query='', fragment='')
            if netloc != '': # python >= 2.6.5
                path = netloc + path if path else netloc
                netloc = ''
            # after urlsplit of ckan://... have path = //... for python < 2.6.5
            while path.startswith('/'):
                path = path[1:]
            netloc = '/'.join(path.split('/')[:-1])
            path = path.split('/')[-1]
            if scheme == 'ckan':
                # we have a path but did not put http:// ...
                if netloc and not netloc.startswith('http'):
                    netloc = 'http://' + netloc
            elif scheme == 'egg':
                if path and not netloc:
                    netloc, path = path, ''
                netloc = netloc.strip("/")
                path = path.lstrip("/")
        spec = Spec(scheme, netloc, path)
        return spec