コード例 #1
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_strict_escape(self):
        '''Test strict mode escaping'''
        examples = [
            ('danny%27s pub'                , 'danny%27s%20pub'                  ),
            ('http://*****:*****@foo.com'     , 'http://*****:*****@foo.com'         ),
            (u'http://José:no [email protected]'  , 'http://Jos%C3%A9:no%[email protected]'),
            ('http://oops!:don%[email protected]' , 'http://oops!:don%[email protected]'     ),
            (u'española,nm%2cusa.html?gunk=junk+glunk&foo=bar baz',

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).escape(strict=True).utf8(), good)
            # Escaping should also be idempotent
                url.parse(bad).escape(strict=True).escape(strict=True).utf8(), good)

        # Examples with userinfo
        examples = [
            ('http://user%[email protected]/', 'http://user%[email protected]/')
        for bad, good in examples:
            self.assertEqual(url.parse(bad).escape(strict=True).utf8(), good)
            # Escaping should also be idempotent
                url.parse(bad).escape(strict=True).escape(strict=True).utf8(), good)

        # Test Unicode escaping in strict mode
        u = url.URL(u'http', u'foo.com', None, u'española,nm%2cusa.html', u'', u'gunk=junk+glunk&foo=bar baz', u'')
        self.assertTrue(isinstance(u._path, str))
        self.assertEqual(u._path, 'espa%C3%B1ola,nm%2Cusa.html')
コード例 #2
ファイル: test.py プロジェクト: voidlily/url-py
 def test(rules, example, pld, tld):
         assert_equal(url.parse(example).pld, pld)
         assert_equal(url.parse(example).tld, tld)
         url.set_psl(pkgutil.get_data('url', 'psl/2016-08-16.psl'))
コード例 #3
ファイル: test.py プロジェクト: seomoz/url-py
 def test(rules, example, pld, tld):
         assert_equal(url.parse(example).pld, pld)
         assert_equal(url.parse(example).tld, tld)
         url.set_psl(pkgutil.get_data('url', 'psl/2016-08-16.psl'))
コード例 #4
ファイル: __init__.py プロジェクト: mathieulongtin/nsq-py
 def __init__(self, target, **params):
     if isinstance(target, basestring):
         self._host = url.parse(target)
     elif isinstance(target, (tuple, list)):
         self._host = url.parse('http://%s:%s/' % target)
         raise TypeError('Host must be a string or tuple')
     self._params = params
コード例 #5
ファイル: test.py プロジェクト: brugeman/url-py
 def test(uni, puny):
     assert_equal(url.parse(uni).escape().punycode().utf8(), puny)
     # Also make sure punycode is idempotent
         url.parse(uni).escape().punycode().punycode().utf8(), puny)
     # Make sure that we can reverse the procedure correctly
         url.parse(uni).escape().punycode().unpunycode().unescape(), uni)
     # And we get what we'd expect going the opposite direction
     assert_equal(url.parse(puny).unescape().unpunycode().unicode(), uni)
コード例 #6
ファイル: test.py プロジェクト: wil/url-py
    def test_str_repr(self):
        '''Make sure str and repr produce reasonable results'''
        examples = [
            ('http://foo.com/', 'http://foo.com/'),
            ('http://FOO.com/', 'http://foo.com/')

        for toparse, strng in examples:
            self.assertEqual(str(url.parse(toparse)), strng)
                '<url.URL object "%s" >' % strng)
コード例 #7
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_str_repr(self):
        '''Make sure str and repr produce reasonable results'''
        examples = [
            ('http://foo.com/', 'http://foo.com/'),
            ('http://FOO.com/', 'http://foo.com/')

        for toparse, strng in examples:
            self.assertEqual(str(url.parse(toparse)), strng)
                '<url.URL object "%s" >' % strng)
コード例 #8
ファイル: test.py プロジェクト: seomoz/url-py
 def test(example):
     assert_equal(url.parse(example).escape().punycode().unicode, example)
     # Also make sure punycode is idempotent
         url.parse(example).escape().punycode().punycode().unicode, example)
     # Make sure that we can reverse the procedure correctly
     # And we get what we'd expect going the opposite direction
         url.parse(example).unescape().unpunycode().unicode, example)
コード例 #9
ファイル: test.py プロジェクト: mherdeg/url-py
 def test_empty_hostname(self):
     '''Allow empty hostnames'''
     examples = [
     for example in examples:
         # Equal to itself
         self.assertEqual(url.parse(example), example)
         # String representation equal to the provided example
         self.assertEqual(url.parse(example).utf8(), example)
コード例 #10
ファイル: test.py プロジェクト: djiesamsoe/url-py
 def test_empty_hostname(self):
     '''Allow empty hostnames'''
     examples = [
     for example in examples:
         # Equal to itself
         self.assertEqual(url.parse(example), example)
         # String representation equal to the provided example
         self.assertEqual(url.parse(example).utf8(), example)
コード例 #11
ファイル: test.py プロジェクト: masayuko/url-py
 def test(uni, puny, upuny, epuny):
     assert_equal(url.parse(uni).escape().punycode().utf8(), epuny)
     # Also make sure punycode is idempotent
         url.parse(uni).escape().punycode().punycode().utf8(), epuny)
     # Make sure that we can reverse the procedure correctly
     # And we get what we'd expect going the opposite direction
         url.parse(puny).unescape().unpunycode().unicode(), uni)
コード例 #12
ファイル: spider.py プロジェクト: sunlightlabs/nanospider
    def _initialize_crawl(self):
        if self.crawl_requires_gevent:
            from gevent.monkey import saved
            if 'socket' not in saved:
                # we're not gevent-monkey-patched
                raise RuntimeError(
                    "Spider.crawl() needs gevent monkey patching to have been applied"


        if len(
                        "SELECT * FROM seen LIMIT 1"))
        ) == 0 and self._queue.empty():
            # we're at the beginning, so start with the home page
            # follow any homepage redirects, so we get the right protocol and domain
            tmp_response = requests.get("http://%s/" % self.domain)

            first_url = moz_url.parse(tmp_response.url)
            if first_url._host not in self._allowed_hosts:

コード例 #13
ファイル: page.py プロジェクト: apendleton/mlscrape
def _response_to_features(response):
    features = set()
    tree = etree.HTML(response.text)

    for item in tree.iter(tag=etree.Element):
        features.add("tag-%s" % item.tag)

        if "class" in item.attrib and item.attrib["class"].strip():
            classes = whitespace.split(item.attrib["class"])
            for _c in classes:
                c = _c.strip()
                if c:
                    features.add("class-%s" % c)

        if "id" in item.attrib:
            features.add("id-%s" % item.attrib["id"])

    # path parts
    u = moz_url.parse(response.url)
    path = u._path.split("/")[1:]
    for idx, part in enumerate(path):
        features.add("path-%s-%s" % (idx, path))

    if u._query:
        for k, vl in urlparse.parse_qs(u._query).iteritems():
            features.add("qse-%s" % k)
            for v in vl:
                features.add("qsv-%s-%s" % (k, v))

    return features
コード例 #14
def canonical_url(uri):
    Return the canonical representation of a given URI.
    This assumes the `uri` has a scheme.

    * When a default port corresponding for the scheme is explicitly declared
      (such as port 80 for http), the port will be removed from the output.
    * Fragments '#' are not removed.
     * Params and query string arguments are not reordered.
        parsed = urlpy.parse(uri)
        if not parsed:
        if not (getattr(parsed, '_scheme', None)
                and getattr(parsed, '_host', None)):

        if TRACE: logger_debug('canonical_url: parsed:', parsed)
        sanitized = parsed.sanitize()
        if TRACE:
            logger_debug('canonical_url: sanitized:', sanitized)

        punycoded = sanitized.punycode()
        if TRACE:
            logger_debug('canonical_url: punycoded:', punycoded)

        if punycoded._port == urlpy.PORTS.get(punycoded._scheme, None):
            punycoded._port = None
        return punycoded.utf8()
    except Exception as e:
        if TRACE:
            logger_debug('canonical_url: failed for:', uri, 'with:', repr(e))
        # ignore it
コード例 #15
ファイル: page.py プロジェクト: sunlightlabs/mlscrape
def _response_to_features(response):
    features = set()
    tree = etree.HTML(response.text)

    for item in tree.iter(tag=etree.Element):
        features.add("tag-%s" % item.tag)

        if 'class' in item.attrib and item.attrib['class'].strip():
            classes = whitespace.split(item.attrib['class'])
            for _c in classes:
                c = _c.strip()
                if c:
                    features.add("class-%s" % c)

        if 'id' in item.attrib:
            features.add("id-%s" % item.attrib['id'])

    # path parts
    u = moz_url.parse(response.url)
    path = u._path.split("/")[1:]
    for idx, part in enumerate(path):
        features.add('path-%s-%s' % (idx, path))

    if u._query:
        for k, vl in urlparse.parse_qs(u._query).iteritems():
            features.add('qse-%s' % k)
            for v in vl:
                features.add('qsv-%s-%s' % (k, v))

    return features
コード例 #16
ファイル: test.py プロジェクト: wil/url-py
    def test_abspath(self):
        '''Make sure absolute path checking works correctly'''
        examples = [
            ('howdy'           , 'howdy'        ),
            ('hello//how//are' , 'hello/how/are'),
            ('hello/../how/are', 'how/are'      ),
            ('hello//..//how/' , 'how/'         ),
            ('a/b/../../c'     , 'c'            ),
            ('../../../c'      , 'c'            ),
            ('./hello'         , 'hello'        ),
            ('./././hello'     , 'hello'        ),
            ('a/b/c/'          , 'a/b/c/'       ),
            ('a/b/c/..'        , 'a/b/'         ),
            ('a/b/.'           , 'a/b/'         ),
            ('a/b/./././'      , 'a/b/'         ),
            ('a/b/../'         , 'a/'           ),
            ('.'               , ''             ),
            ('../../..'        , ''             ),
            ('////foo'         , 'foo'          )

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).abspath().utf8(), good)
コード例 #17
    def filter_links(self, links):
        filteredLinks = []

        for link in links:
			# if link is a directory, then follow it
            # TODO: "?dir=" is provided to properly recognize as dirs the ones which
            #       are specified as queries (it is an in-place fix and should be removed)
            if link.url.endswith("/") or link.url.find("?dir=")>=0:

            # if not, verify whether it is a video file: if it is then save it, otherwise skip
                if isVideoURL(link.url):
                    # normalize the URL
                    # normLinkURL = link.url
                    normLinkURL = url.parse(link.url).canonical().escape().punycode().utf8()

                    # save the url... but only if it has not been indexed yet
                    # check if the URL exists in redis
                    if not self.r.exists(normLinkURL):
                        # if not, add it to the toIndex queue
                        # (NOTE: it might be already present in toIndex, but we don't mind as it is a set)
                        self._logger.info("sadd %s %s " % (self._conf['key_toIndex'],normLinkURL))
                        self.r.sadd(self._conf['key_toIndex'], normLinkURL)
        return filteredLinks
コード例 #18
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_abspath(self):
        '''Make sure absolute path checking works correctly'''
        examples = [
            ('howdy'           , 'howdy'        ),
            ('hello//how//are' , 'hello/how/are'),
            ('hello/../how/are', 'how/are'      ),
            ('hello//..//how/' , 'how/'         ),
            ('a/b/../../c'     , 'c'            ),
            ('../../../c'      , 'c'            ),
            ('./hello'         , 'hello'        ),
            ('./././hello'     , 'hello'        ),
            ('a/b/c/'          , 'a/b/c/'       ),
            ('a/b/c/..'        , 'a/b/'         ),
            ('a/b/.'           , 'a/b/'         ),
            ('a/b/./././'      , 'a/b/'         ),
            ('a/b/../'         , 'a/'           ),
            ('.'               , ''             ),
            ('../../..'        , ''             ),
            ('////foo'         , 'foo'          )

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).abspath().utf8(), good)
コード例 #19
def save(url_, path="", wait=60):
    if hasattr(url_, "url"):
        url_ = url_.url
    if len(path) < 5 or "." not in path[-5:-3]:
        file = url.parse(str(url_)).filename
        path = os.path.join(path, file)
    open(path, "w").write(download(url_, wait))
    return path
コード例 #20
ファイル: __init__.py プロジェクト: imclab/plotdevice-libs
def save(url_, path="", wait=60):
    if hasattr(url_, "url"):
        url_ = url_.url
    if len(path) < 5 or "." not in path[-5:-3]:
        file = url.parse(str(url_)).filename
        path = os.path.join(path, file)
    open(path, "w").write(download(url_, wait))
    return path
コード例 #21
ファイル: test.py プロジェクト: mherdeg/url-py
 def test_deuserinfo(self):
     '''Correctly removes userinfo'''
     examples = [
         ('http://*****:*****@foo.com/', 'http://foo.com/'),
         ('http://[email protected]/', 'http://foo.com/')
     for bad, good in examples:
         self.assertEqual(url.parse(bad).deuserinfo().utf8(), good)
コード例 #22
ファイル: test.py プロジェクト: djiesamsoe/url-py
 def test_deuserinfo(self):
     '''Correctly removes userinfo'''
     examples = [
         ('http://*****:*****@foo.com/', 'http://foo.com/'),
         ('http://[email protected]/', 'http://foo.com/')
     for bad, good in examples:
         self.assertEqual(url.parse(bad).deuserinfo().utf8(), good)
コード例 #23
ファイル: test.py プロジェクト: djiesamsoe/url-py
 def test_tld(self):
     '''Test the pay-level domain functionality'''
     examples = [
         ('http://foo.com/bar'    , 'com'),
         ('http://bar.foo.com/bar', 'com'),
         ('/foo'                  , '')
     for query, result in examples:
         self.assertEqual(url.parse(query).tld(), result)
コード例 #24
ファイル: test.py プロジェクト: mherdeg/url-py
 def test_tld(self):
     '''Test the pay-level domain functionality'''
     examples = [
         ('http://foo.com/bar'    , 'com'),
         ('http://bar.foo.com/bar', 'com'),
         ('/foo'                  , '')
     for query, result in examples:
         self.assertEqual(url.parse(query).tld(), result)
コード例 #25
ファイル: server.py プロジェクト: eXenon/miaou
def handle(request):

    # URL Parsing
    s = "api."
    if 'project' in request.match_info:
        p = re.sub('[^0-9a-zA-Z]+', '', request.match_info['project'])
        s += p + "."
    if 'module' in request.match_info:
        p = re.sub('[^0-9a-zA-Z]+', '', request.match_info['module'])
        s += p + "."
    if 'action' in request.match_info:
        p = re.sub('[^0-9a-zA-Z]+', '', request.match_info['action'])
        s += p
    if verbose_logging:
        print("Incoming request - " + str(request.raw_path))

    # Session loading
    if "cookie" in request.headers and "AIOHTTP_SESSION" in request.headers[
        cookies = request.headers.getall("cookie")[0]
        aiocookie = re.search("AIOHTTP_SESSION=([0-9a-z]{32})", cookies)
        if aiocookie:
            session = sessions.Session(aiocookie.group(1))
            if verbose_logging:
                print("Session ID - " + session.id)
            session = sessions.Session()
            if verbose_logging:
                print("New Session with ID - " + session.id)
        session = sessions.Session()
        if verbose_logging:
            print("New Session with ID - " + session.id)

    # Response building
        module = importlib.import_module(s)
        arguments = url.parse(request.GET, module.arguments)
        response = yield from module.process(session, arguments)
        headers = response.get('headers', {})
        if session.is_new_session:
            headers.update({"Set-Cookie": "AIOHTTP_SESSION=" + session.id})
        if 'json' in response:
            # Dump to json if the module wants to return json
            return respond(headers=headers,
                           status=response.get('status', 200),
                           text=json.dumps(response.get("json", "")),
            return respond(headers=headers,
                           status=response.get('status', 200),
                           text=response.get("text", ""))
    except ImportError:
        return respond(status=404, text="Page does not exist")
    except Exception as e:
        s = str(e.args) if verbose_errors else "No verbose errors."
        return respond(status=500, text="Error while querying data.\n" + s)
コード例 #26
ファイル: test.py プロジェクト: seomoz/url-py
 def test(first, second):
     # Equiv with another URL object
     assert url.parse(first).equiv(url.parse(second))
     # Equiv with a string
     assert url.parse(first).equiv(second)
     # Make sure it's also symmetric
     assert url.parse(second).equiv(url.parse(first))
     # Symmetric with string arg
     assert url.parse(second).equiv(first)
     # Should be equivalent to self
     assert url.parse(first).equiv(first)
     assert url.parse(second).equiv(second)
コード例 #27
ファイル: test.py プロジェクト: voidlily/url-py
 def test(first, second):
     # Equiv with another URL object
     assert url.parse(first).equiv(url.parse(second))
     # Equiv with a string
     assert url.parse(first).equiv(second)
     # Make sure it's also symmetric
     assert url.parse(second).equiv(url.parse(first))
     # Symmetric with string arg
     assert url.parse(second).equiv(first)
     # Should be equivalent to self
     assert url.parse(first).equiv(first)
     assert url.parse(second).equiv(second)
コード例 #28
ファイル: test.py プロジェクト: wil/url-py
    def test_absolute(self):
        '''Can it recognize if it's a relative or absolute url?'''
        examples = [
            ('http://foo.com/bar', True ),
            ('foo/'              , False),
            ('http://foo.com'    , True ),
            ('/foo/bar/../'      , False)

        for query, result in examples:
            self.assertEqual(url.parse(query).absolute(), result)
コード例 #29
ファイル: test.py プロジェクト: wil/url-py
    def test_escape(self):
        '''Make sure we escape paths correctly'''
        examples = [
            ('hello%20and%20how%20are%20you', 'hello%20and%20how%20are%20you'),
            ('danny\'s pub'                 , 'danny%27s%20pub'              ),
            ('danny%27s pub?foo=bar&yo'     , 'danny%27s%20pub?foo=bar&yo'   ),
            # Thanks to @myronmarston for these test cases
            ('foo?bar none=foo bar'         , 'foo?bar%20none=foo%20bar'     ),
            ('foo;a=1;b=2?a=1&b=2'          , 'foo;a=1;b=2?a=1&b=2'          ),
            ('foo?bar=["hello","howdy"]'    ,

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).escape().utf8(), good)
            # Escaping should also be idempotent
            self.assertEqual(url.parse(bad).escape().escape().utf8(), good)
コード例 #30
ファイル: test.py プロジェクト: wil/url-py
 def test_lower(self):
     '''Can lowercase the domain name correctly'''
     examples = [
         ('www.TESTING.coM'    , 'www.testing.com/'   ),
         ('WWW.testing.com'    , 'www.testing.com/'   ),
         ('WWW.testing.com/FOO', 'www.testing.com/FOO')
     for bad, good in examples:
         bad = 'http://' + bad
         good = 'http://' + good
         self.assertEqual(url.parse(bad).utf8(), good)
コード例 #31
ファイル: test.py プロジェクト: djiesamsoe/url-py
 def test_userinfo(self):
     '''Allow a userinfo section'''
     examples = [
         ('http://*****:*****@foo.com',   'http://*****:*****@foo.com'),
         ('http://[email protected]', 'http://[email protected]')
     suffix = '/page.html'
     for bad, good in examples:
         bad = bad + suffix
         good = good + suffix
         self.assertEqual(url.parse(bad).utf8(), good)
コード例 #32
ファイル: test.py プロジェクト: djiesamsoe/url-py
 def test_lower(self):
     '''Can lowercase the domain name correctly'''
     examples = [
         ('www.TESTING.coM'    , 'www.testing.com/'   ),
         ('WWW.testing.com'    , 'www.testing.com/'   ),
         ('WWW.testing.com/FOO', 'www.testing.com/FOO')
     for bad, good in examples:
         bad = 'http://' + bad
         good = 'http://' + good
         self.assertEqual(url.parse(bad).utf8(), good)
コード例 #33
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_absolute(self):
        '''Can it recognize if it's a relative or absolute url?'''
        examples = [
            ('http://foo.com/bar', True ),
            ('foo/'              , False),
            ('http://foo.com'    , True ),
            ('/foo/bar/../'      , False)

        for query, result in examples:
            self.assertEqual(url.parse(query).absolute(), result)
コード例 #34
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_defrag(self):
        '''Correctly defrags urls'''
        examples = [
            ('foo#bar', 'foo')

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).defrag().utf8(), good)
コード例 #35
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_sanitize(self):
        '''Make sure the sanitize method does all that it should'''
        examples = [
            ('../foo/bar none', 'foo/bar%20none')

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).sanitize().utf8(), good)
コード例 #36
ファイル: test.py プロジェクト: wil/url-py
    def test_sanitize(self):
        '''Make sure the sanitize method does all that it should'''
        examples = [
            ('../foo/bar none', 'foo/bar%20none')

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).sanitize().utf8(), good)
コード例 #37
ファイル: test.py プロジェクト: mherdeg/url-py
 def test_userinfo(self):
     '''Allow a userinfo section'''
     examples = [
         ('http://*****:*****@foo.com',   'http://*****:*****@foo.com'),
         ('http://[email protected]', 'http://[email protected]')
     suffix = '/page.html'
     for bad, good in examples:
         bad = bad + suffix
         good = good + suffix
         self.assertEqual(url.parse(bad).utf8(), good)
コード例 #38
ファイル: test.py プロジェクト: wil/url-py
    def test_defrag(self):
        '''Correctly defrags urls'''
        examples = [
            ('foo#bar', 'foo')

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).defrag().utf8(), good)
コード例 #39
def url_host_domain(url):
    Return a tuple of the (host, domain) of a URL or None. Assumes that the
    URL has a scheme.
    parsed = urlpy.parse(url)
    host = parsed._host
    if not host:
        return None, None
    host = host.lower()
    domain = parsed.pld().lower()
    return host, domain
コード例 #40
ファイル: test.py プロジェクト: wil/url-py
    def test_canonical(self):
        '''Correctly canonicalizes urls'''
        examples = [
            ('?b=2&a=1&c=3', '?a=1&b=2&c=3'),
            (';b=2;a=1;c=3', ';a=1;b=2;c=3')

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).canonical().utf8(), good)
コード例 #41
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_canonical(self):
        '''Correctly canonicalizes urls'''
        examples = [
            ('?b=2&a=1&c=3', '?a=1&b=2&c=3'),
            (';b=2;a=1;c=3', ';a=1;b=2;c=3')

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).canonical().utf8(), good)
コード例 #42
ファイル: finder.py プロジェクト: 10imaging/scancode-toolkit
def url_host_domain(url):
    Return a tuple of the (host, domain) of a URL or None. Assumes that the
    URL has a scheme.
    parsed = urlpy.parse(url)
    host = parsed._host
    if not host:
        return None, None
    host = host.lower()
    domain = parsed.pld().lower()
    return host, domain
コード例 #43
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_punycode(self):
        '''Make sure punycode encoding works correctly'''
        examples = [

        for uni, puny in examples:
            self.assertEqual(url.parse(uni).escape().punycode().utf8(), puny)
            # Also make sure punycode is idempotent
                url.parse(uni).escape().punycode().punycode().utf8(), puny)
            # Make sure that we can reverse the procedure correctly
            # And we get what we'd expect going the opposite direction
                url.parse(puny).unescape().unpunycode().unicode(), uni)

        # Make sure that we can't punycode or unpunycode relative urls
        examples = ['foo', '../foo', '/bar/foo']
        for relative in examples:
            self.assertRaises(TypeError, url.parse(relative).punycode)
            self.assertRaises(TypeError, url.parse(relative).unpunycode)
コード例 #44
ファイル: test.py プロジェクト: wil/url-py
    def test_punycode(self):
        '''Make sure punycode encoding works correctly'''
        examples = [

        for uni, puny in examples:
            self.assertEqual(url.parse(uni).escape().punycode().utf8(), puny)
            # Also make sure punycode is idempotent
                url.parse(uni).escape().punycode().punycode().utf8(), puny)
            # Make sure that we can reverse the procedure correctly
            # And we get what we'd expect going the opposite direction
                url.parse(puny).unescape().unpunycode().unicode(), uni)

        # Make sure that we can't punycode or unpunycode relative urls
        examples = ['foo', '../foo', '/bar/foo']
        for relative in examples:
            self.assertRaises(TypeError, url.parse(relative).punycode)
            self.assertRaises(TypeError, url.parse(relative).unpunycode)
コード例 #45
ファイル: urls.py プロジェクト: DistilledLtd/moneypenny
def normalize(url):
    """ Uses the Moz URL library to normalise and strip the URLs of
        extraneous information, and the urlparse library to ensure it
        is not a blank URL.

    if url[:4] != 'http':
        url = 'http://'+url
    url = url.lower()
    url_parts = urlparse(url)
    if url_parts.netloc:
        url_obj = parse(url).defrag().abspath().canonical().punycode()
        return url_obj.utf8()
コード例 #46
def clean_url(u):
    u = url.parse(u)
    https_param = get_http_param(u.query)
    if len(https_param)==1:
        u = https_param.pop()
        return clean_url(u)
    u = str(u)
    return u
コード例 #47
ファイル: urls.py プロジェクト: Brainlabs-Digital/moneypenny
def normalize(url):
    """ Uses the Moz URL library to normalise and strip the URLs of
        extraneous information, and the urlparse library to ensure it
        is not a blank URL.

    if url[:4] != 'http':
        url = 'http://' + url
    url = url.lower()
    url_parts = urlparse(url)
    if url_parts.netloc:
        url_obj = parse(url).defrag().abspath().canonical().punycode()
        return url_obj.utf8()
コード例 #48
 def test(bad, good, ugood, egood):
     assert_equal(str(url.parse(bad).escape()), good)
     assert_equal(url.parse(bad).escape().utf8(), egood)
     assert_equal(url.parse(bad).escape().unicode(), ugood)
     # Escaping should also be idempotent
     assert_equal(str(url.parse(bad).escape().escape()), good)
     assert_equal(url.parse(bad).escape().escape().utf8(), egood)
     assert_equal(url.parse(bad).escape().escape().unicode(), ugood)
コード例 #49
ファイル: test.py プロジェクト: mherdeg/url-py
    def test_strict_escape(self):
        '''Test strict mode escaping'''
        examples = [
            ('danny%27s pub'                , 'danny%27s%20pub'                  ),
            ('this%5Fand%5Fthat'            , 'this_and_that'                    ),
            ('http://*****:*****@foo.com'     , 'http://*****:*****@foo.com'         ),
            (u'http://José:no [email protected]'  , 'http://Jos%C3%A9:no%[email protected]'),
            ('http://oops!:don%[email protected]' , 'http://oops!:don%[email protected]'     ),
            (u'española,nm%2cusa.html?gunk=junk+glunk&foo=bar baz',
            ('http://foo.com/bar\nbaz.html\n', 'http://foo.com/bar%0Abaz.html%0A'),
            ('http://foo.com/bar.jsp?param=\n/value%2F', 'http://foo.com/bar.jsp?param=%0A/value%2F'),

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).escape(strict=True).utf8(), good)
            # Escaping should also be idempotent
                url.parse(bad).escape(strict=True).escape(strict=True).utf8(), good)

        # Examples with userinfo
        examples = [
            ('http://user%[email protected]/', 'http://user%[email protected]/')
        for bad, good in examples:
            self.assertEqual(url.parse(bad).escape(strict=True).utf8(), good)
            # Escaping should also be idempotent
                url.parse(bad).escape(strict=True).escape(strict=True).utf8(), good)

        # Test Unicode escaping in strict mode
        u = url.URL(u'http', u'foo.com', None, u'española,nm%2cusa.html', u'', u'gunk=junk+glunk&foo=bar baz', u'')
        self.assertTrue(isinstance(u._path, str))
        self.assertEqual(u._path, 'espa%C3%B1ola,nm%2Cusa.html')
コード例 #50
ファイル: test.py プロジェクト: seomoz/url-py
def test_component_assignment_unicode():
    parsed = url.parse('http://[email protected]:80/path;params?query#fragment')
    parsed.scheme = u'https'
    parsed.userinfo = u'username'
    parsed.host = u'foo.example.com'
    parsed.port = 443
    parsed.path = u'/another/path'
    parsed.params = u'no-params'
    parsed.query = u'no-query'
    parsed.fragment = u'no-fragment'
        'https://[email protected]:443/another/path;no-params?no-query#no-fragment'
コード例 #51
ファイル: finder.py プロジェクト: 10imaging/scancode-toolkit
def canonical_url(uri):
    Return the canonical representation of a given URI.
    This assumes the `uri` has a scheme.
    * When a default port corresponding for the scheme is explicitly declared
      (such as port 80 for http), the port will be removed from the output.
    * Fragments '#' are not removed. 
     * Params and query string arguments are not reordered.
    normalized = urlpy.parse(uri).sanitize().punycode()
    if normalized._port == urlpy.PORTS.get(normalized._scheme, None):
        normalized._port = None
    return normalized.utf8()
コード例 #52
ファイル: test.py プロジェクト: brugeman/url-py
def test_relative():
    def test(rel, absolute):
        assert_equal(base.relative(rel).utf8(), absolute)

    base = url.parse('http://testing.com/a/b/c')
    examples = [('../foo', 'http://testing.com/a/foo'),
                ('./foo', 'http://testing.com/a/b/foo'),
                ('foo', 'http://testing.com/a/b/foo'),
                ('/foo', 'http://testing.com/foo'),
                ('http://foo.com/bar', 'http://foo.com/bar'),
                (u'/foo', 'http://testing.com/foo')]

    for rel, absolute in examples:
        yield test, rel, absolute
コード例 #53
ファイル: test.py プロジェクト: wil/url-py
    def test_relative(self):
        '''Test relative url parsing'''
        base = url.parse('http://testing.com/a/b/c')
        examples = [
            ('../foo'            , 'http://testing.com/a/foo'  ),
            ('./foo'             , 'http://testing.com/a/b/foo'),
            ('foo'               , 'http://testing.com/a/b/foo'),
            ('/foo'              , 'http://testing.com/foo'    ),
            ('http://foo.com/bar', 'http://foo.com/bar'        ),
            (u'/foo'             , 'http://testing.com/foo'    )

        for rel, absolute in examples:
            self.assertEqual(base.relative(rel).utf8(), absolute)
コード例 #54
ファイル: test.py プロジェクト: djiesamsoe/url-py
    def test_relative(self):
        '''Test relative url parsing'''
        base = url.parse('http://testing.com/a/b/c')
        examples = [
            ('../foo'            , 'http://testing.com/a/foo'  ),
            ('./foo'             , 'http://testing.com/a/b/foo'),
            ('foo'               , 'http://testing.com/a/b/foo'),
            ('/foo'              , 'http://testing.com/foo'    ),
            ('http://foo.com/bar', 'http://foo.com/bar'        ),
            (u'/foo'             , 'http://testing.com/foo'    )

        for rel, absolute in examples:
            self.assertEqual(base.relative(rel).utf8(), absolute)
コード例 #55
ファイル: test.py プロジェクト: voidlily/url-py
def test_component_assignment_unicode():
    parsed = url.parse('http://[email protected]:80/path;params?query#fragment')
    parsed.scheme = u'https'
    parsed.userinfo = u'username'
    parsed.host = u'foo.example.com'
    parsed.port = 443
    parsed.path = u'/another/path'
    parsed.params = u'no-params'
    parsed.query = u'no-query'
    parsed.fragment = u'no-fragment'
        'https://[email protected]:443/another/path;no-params?no-query#no-fragment'
コード例 #56
def canonical_url(uri):
    Return the canonical representation of a given URI.
    This assumes the `uri` has a scheme.

    * When a default port corresponding for the scheme is explicitly declared
      (such as port 80 for http), the port will be removed from the output.
    * Fragments '#' are not removed.
     * Params and query string arguments are not reordered.
    normalized = urlpy.parse(uri).sanitize().punycode()
    if normalized._port == urlpy.PORTS.get(normalized._scheme, None):
        normalized._port = None
    return normalized.utf8()
コード例 #57
ファイル: test.py プロジェクト: mherdeg/url-py
    def test_escape(self):
        '''Make sure we escape paths correctly'''
        examples = [
            ('hello%20and%20how%20are%20you', 'hello%20and%20how%20are%20you'),
            ('danny\'s pub'                 , 'danny\'s%20pub'               ),
            ('danny%27s pub'                , 'danny\'s%20pub'               ),
            ('danny\'s pub?foo=bar&yo'      , 'danny\'s%20pub?foo=bar&yo'    ),
            ('hello%2c world'               , 'hello,%20world'               ),
            ('%3f%23%5b%5d'                 , '%3F%23%5B%5D'                 ),
            # Thanks to @myronmarston for these test cases
            ('foo?bar none=foo bar'         , 'foo?bar%20none=foo%20bar'     ),
            ('foo;a=1;b=2?a=1&b=2'          , 'foo;a=1;b=2?a=1&b=2'          ),
            ('foo?bar=["hello","howdy"]'    ,

        base = 'http://testing.com/'
        for bad, good in examples:
            bad = base + bad
            good = base + good
            self.assertEqual(url.parse(bad).escape().utf8(), good)
            # Escaping should also be idempotent
            self.assertEqual(url.parse(bad).escape().escape().utf8(), good)

        # This example's from the wild:
        example = 'http://www.balset.com/DE3FJ4Yg/p:h=300&m=2011~07~25~2444705.png&ma=cb&or=1&w=400/2011/10/10/2923710.jpg'
            url.parse(example).unescape().escape().utf8(), example)

        # Examples with userinfo
        examples = [
            ('http://user%[email protected]/', 'http://*****:*****@foo.com/')
        for bad, good in examples:
            self.assertEqual(url.parse(bad).escape().utf8(), good)
            # Escaping should also be idempotent
            self.assertEqual(url.parse(bad).escape().escape().utf8(), good)
コード例 #58
ファイル: test.py プロジェクト: seomoz/url-py
 def test(first, second):
     # None of these examples should evaluate as strictly equal
     assert_not_equal(url.parse(first), url.parse(second),
         'URL(%s) should not equal URL(%s)' % (first, second))
     # Using a string
     assert_not_equal(url.parse(first), second,
         'URL(%s) should not equal %s' % (first, second))
     # Symmetric
     assert_not_equal(url.parse(second), url.parse(first),
         'URL(%s) should not equal URL(%s)' % (second, first))
     # Using a string, symmetric
     assert_not_equal(url.parse(second), first,
         'URL(%s) should not equal %s' % (second, first))
     # Should equal self
     assert_equal(url.parse(first), first,
         'URL(%s) should equal itself' % first)
     assert_equal(url.parse(second), second,
         'URL(%s) should equal itself' % second)