Beispiel #1
0
    def test_link_encoding(self):
        import mechanize
        from mechanize._rfc3986 import clean_url
        url = "http://example.com/"
        for encoding in ["UTF-8", "latin-1"]:
            encoding_decl = "; charset=%s" % encoding
            b = TestBrowser()
            r = MockResponse(url, """\
<a href="http://example.com/foo/bar&mdash;&#x2014;.html"
   name="name0&mdash;&#x2014;">blah&mdash;&#x2014;</a>
""", {"content-type": "text/html%s" % encoding_decl})
            b.add_handler(make_mock_handler()([("http_open", r)]))
            r = b.open(url)

            Link = mechanize.Link
            mdashx2 = em_dash.encode('utf-8') * 2
            qmdashx2 = clean_url(mdashx2, encoding)
            # base_url, url, text, tag, attrs
            exp = Link(
                url, "http://example.com/foo/bar%s.html" % qmdashx2,
                unicode_type("blah") + (em_dash * 2), "a",
                [("href", unicode_type("http://example.com/foo/bar%s.html") % (
                    em_dash*2)),
                 ("name", unicode_type("name0") + em_dash + em_dash)])
            # nr
            link = b.find_link()
            #             print
            #             print exp
            #             print link
            self.assertEqual(link, exp)
Beispiel #2
0
    def _test_link_encoding(self, factory):
        import mechanize
        from mechanize._rfc3986 import clean_url
        url = "http://example.com/"
        for encoding in ["UTF-8", "latin-1"]:
            encoding_decl = "; charset=%s" % encoding
            b = TestBrowser(factory=factory)
            r = MockResponse(
                url,
                """\
<a href="http://example.com/foo/bar&mdash;&#x2014;.html"
   name="name0&mdash;&#x2014;">blah&mdash;&#x2014;</a>
""",  #"
                {"content-type": "text/html%s" % encoding_decl})
            b.add_handler(make_mock_handler()([("http_open", r)]))
            r = b.open(url)

            Link = mechanize.Link
            try:
                mdashx2 = u"\u2014".encode(encoding) * 2
            except UnicodeError:
                mdashx2 = '&mdash;&#x2014;'
            qmdashx2 = clean_url(mdashx2, encoding)
            # base_url, url, text, tag, attrs
            exp = Link(
                url, "http://example.com/foo/bar%s.html" % qmdashx2,
                "blah" + mdashx2, "a",
                [("href", "http://example.com/foo/bar%s.html" % mdashx2),
                 ("name", "name0%s" % mdashx2)])
            # nr
            link = b.find_link()
            ##             print
            ##             print exp
            ##             print link
            self.assertEqual(link, exp)
Beispiel #3
0
 def test_link_whitespace(self):
     from mechanize import Link
     base_url = "http://example.com/"
     url = "  http://example.com/foo.html%20+ "
     stripped_url = url.strip()
     html = '<a href="%s"></a>' % url
     b = TestBrowser()
     r = MockResponse(base_url, html, {"content-type": "text/html"})
     b.add_handler(make_mock_handler()([("http_open", r)]))
     r = b.open(base_url)
     link = b.find_link(nr=0)
     self.assertEqual(
         link, Link(base_url, stripped_url, "", "a", [("href", url)]))
Beispiel #4
0
    def test_links(self):
        import mechanize
        from mechanize import Link
        url = "http://example.com/"

        b = TestBrowser()
        r = MockResponse(url, """<html>
<head><title>Title</title></head>
<body>
<a href="http://example.com/foo/bar.html" name="apples"></a>
<a name="pears"></a>
<a href="spam" name="pears"></a>
<area href="blah" name="foo"></area>
<form name="form2">
 <input type="submit" name="two">
</form>
<iframe name="name" href="href" src="src"></iframe>
<iframe name="name2" href="href" src="src"></iframe>
<a name="name3" href="one">yada yada</a>
<a name="pears" href="two" weird="stuff">rhubarb</a>
<a></a>
<iframe src="foo"></iframe>
</body>
</html>
""", {"content-type": "text/html"})
        b.add_handler(make_mock_handler()([("http_open", r)]))
        r = b.open(url)

        exp_links = [
            # base_url, url, text, tag, attrs
            Link(url, "http://example.com/foo/bar.html", "", "a",
                 [("href", "http://example.com/foo/bar.html"),
                  ("name", "apples")]),
            Link(url, "spam", "", "a", [("href", "spam"), ("name", "pears")]),
            Link(url, "blah", '', "area", [("href", "blah"), ("name", "foo")]),
            Link(url, "src", '', "iframe",
                 [("name", "name"), ("href", "href"), ("src", "src")]),
            Link(url, "src", '', "iframe",
                 [("name", "name2"), ("href", "href"), ("src", "src")]),
            Link(url, "one", "yada yada", "a",
                 [("name", "name3"), ("href", "one")]),
            Link(url, "two", "rhubarb", "a",
                 [("name", "pears"), ("href", "two"), ("weird", "stuff")]),
            Link(url, "foo", '', "iframe", [("src", "foo")]),
        ]
        links = list(b.links())
        for got, expect in zip(links, exp_links):
            self.assertEqual(got, expect)
        self.assertEqual(len(links), len(exp_links))
        # nr
        lnk = b.find_link()
        self.assertEqual(lnk.url, "http://example.com/foo/bar.html")
        lnk = b.find_link(nr=1)
        self.assertEqual(lnk.url, "spam")
        # text
        lnk = b.find_link(text="yada yada")
        self.assertEqual(lnk.url, "one")
        self.assertRaises(
            mechanize.LinkNotFoundError, b.find_link, text="da ya")
        lnk = b.find_link(text_regex=re.compile("da ya"))
        self.assertEqual(lnk.url, "one")
        lnk = b.find_link(text_regex="da ya")
        self.assertEqual(lnk.url, "one")
        # name
        lnk = b.find_link(name="name3")
        self.assertEqual(lnk.url, "one")
        lnk = b.find_link(name_regex=re.compile("oo"))
        self.assertEqual(lnk.url, "blah")
        lnk = b.find_link(name_regex="oo")
        self.assertEqual(lnk.url, "blah")
        # url
        lnk = b.find_link(url="spam")
        self.assertEqual(lnk.url, "spam")
        lnk = b.find_link(url_regex=re.compile("pam"))
        self.assertEqual(lnk.url, "spam")
        lnk = b.find_link(url_regex="pam")
        self.assertEqual(lnk.url, "spam")
        # tag
        lnk = b.find_link(tag="area")
        self.assertEqual(lnk.url, "blah")
        # predicate
        lnk = b.find_link(
            predicate=lambda lnk: dict(lnk.attrs).get("weird") == "stuff")
        self.assertEqual(lnk.url, "two")
        # combinations
        lnk = b.find_link(name="pears", nr=1)
        self.assertEqual(lnk.text, "rhubarb")
        lnk = b.find_link(url="src", nr=0, name="name2")
        self.assertEqual(lnk.tag, "iframe")
        self.assertEqual(lnk.url, "src")
        self.assertRaises(
            mechanize.LinkNotFoundError,
            b.find_link,
            url="src",
            nr=1,
            name="name2")
        lnk = b.find_link(
            tag="a", predicate=lambda lnk: dict(lnk.attrs).get(
                "weird") == "stuff")
        self.assertEqual(lnk.url, "two")

        # .links()
        self.assertEqual(
            list(b.links(url="src")), [
                Link(
                    url,
                    url="src",
                    text='',
                    tag="iframe",
                    attrs=[("name", "name"), ("href", "href"),
                           ("src", "src")]),
                Link(
                    url,
                    url="src",
                    text='',
                    tag="iframe",
                    attrs=[("name", "name2"), ("href", "href"),
                           ("src", "src")]),
            ])