예제 #1
0
파일: util_test.py 프로젝트: robcza/gglsbl3
 def test_int_to_ip(self):
     for ip, ip_int in self.ip_pairs:
         eq_(util.int_to_ip(ip_int), ip)
예제 #2
0
파일: protocol.py 프로젝트: robcza/gglsbl3
    def canonical(self):
        "Convert URL to its canonical form"
        def full_unescape(url):
            """
            Undo escaping of special characters in url
            """
            unescaped_url = urllib.parse.unquote(url)
            if unescaped_url == url:
                return unescaped_url
            else:
                return full_unescape(unescaped_url)

        def quote(unsafe_string):
            """
            Returns url safe representation of input with special characters escaped
            """
            safe_chars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~'
            return urllib.parse.quote(unsafe_string, safe=safe_chars)
        url = self.url.strip()
        url = url.replace('\n', '').replace('\r', '').replace('\t', '')
        url = url.split('#', 1)[0]
        url = quote(full_unescape(url))
        url_parts = urllib.parse.urlsplit(url)
        log.log(TRACE, 'url parts are %s', url_parts)
        if not url_parts[0]:
            url = 'http://%s' % url
            url_parts = urllib.parse.urlsplit(url)
        protocol = url_parts.scheme
        host = full_unescape(url_parts.hostname)
        path = full_unescape(url_parts.path)
        log.log(TRACE, "url host is '%s' and url path is '%s'", host, path)
        query = url_parts.query
        if not query and '?' not in url:
            query = None
        if not path:
            path = '/'
        has_trailing_slash = (path[-1] == '/')
        log.log(TRACE, 'url hash trailing slash: %s', has_trailing_slash)
        path = posixpath.normpath(path).replace('//', '/')
        if has_trailing_slash and path[-1] != '/':
            path = path + '/'
        _user = url_parts.username
        port = url_parts.port
        host = host.strip('.')
        host = re.sub(r'\.+', '.', host).lower()
        if host.isdigit():
            log.debug("Host is digit: %s", host)
            host = util.int_to_ip(int(host))
            log.debug("after conversion host is now %s", host)
        if host.startswith('0x') and '.' not in host:
            log.debug("Host is hex: %s", host)
            host = util.int_to_ip(int(host, 16))
            log.debug("after conversion host is now %s", host)
        quoted_path = quote(path)
        quoted_host = quote(host)
        if port is not None:
            quoted_host = '%s:%s' % (quoted_host, port)
        canonical_url = '%s://%s%s' % (protocol, quoted_host, quoted_path)
        if query is not None:
            canonical_url = '%s?%s' % (canonical_url, query)
        log.log(TRACE, 'returning canonical url %s', canonical_url)
        return canonical_url