def test_link_encoding(self): import mechanize from mechanize._rfc3986 import clean_url url = "http://example.com/" for encoding in ["UTF-8", "latin-1"]: encoding_decl = "; charset=%s" % encoding b = TestBrowser() r = MockResponse(url, """\ <a href="http://example.com/foo/bar——.html" name="name0——">blah——</a> """, {"content-type": "text/html%s" % encoding_decl}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(url) Link = mechanize.Link mdashx2 = em_dash.encode('utf-8') * 2 qmdashx2 = clean_url(mdashx2, encoding) # base_url, url, text, tag, attrs exp = Link( url, "http://example.com/foo/bar%s.html" % qmdashx2, unicode_type("blah") + (em_dash * 2), "a", [("href", unicode_type("http://example.com/foo/bar%s.html") % ( em_dash*2)), ("name", unicode_type("name0") + em_dash + em_dash)]) # nr link = b.find_link() # print # print exp # print link self.assertEqual(link, exp)
def _test_link_encoding(self, factory): import mechanize from mechanize._rfc3986 import clean_url url = "http://example.com/" for encoding in ["UTF-8", "latin-1"]: encoding_decl = "; charset=%s" % encoding b = TestBrowser(factory=factory) r = MockResponse( url, """\ <a href="http://example.com/foo/bar——.html" name="name0——">blah——</a> """, #" {"content-type": "text/html%s" % encoding_decl}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(url) Link = mechanize.Link try: mdashx2 = u"\u2014".encode(encoding) * 2 except UnicodeError: mdashx2 = '——' qmdashx2 = clean_url(mdashx2, encoding) # base_url, url, text, tag, attrs exp = Link( url, "http://example.com/foo/bar%s.html" % qmdashx2, "blah" + mdashx2, "a", [("href", "http://example.com/foo/bar%s.html" % mdashx2), ("name", "name0%s" % mdashx2)]) # nr link = b.find_link() ## print ## print exp ## print link self.assertEqual(link, exp)
def test_link_whitespace(self): from mechanize import Link base_url = "http://example.com/" url = " http://example.com/foo.html%20+ " stripped_url = url.strip() html = '<a href="%s"></a>' % url b = TestBrowser() r = MockResponse(base_url, html, {"content-type": "text/html"}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(base_url) link = b.find_link(nr=0) self.assertEqual( link, Link(base_url, stripped_url, "", "a", [("href", url)]))
def test_links(self): import mechanize from mechanize import Link url = "http://example.com/" b = TestBrowser() r = MockResponse(url, """<html> <head><title>Title</title></head> <body> <a href="http://example.com/foo/bar.html" name="apples"></a> <a name="pears"></a> <a href="spam" name="pears"></a> <area href="blah" name="foo"></area> <form name="form2"> <input type="submit" name="two"> </form> <iframe name="name" href="href" src="src"></iframe> <iframe name="name2" href="href" src="src"></iframe> <a name="name3" href="one">yada yada</a> <a name="pears" href="two" weird="stuff">rhubarb</a> <a></a> <iframe src="foo"></iframe> </body> </html> """, {"content-type": "text/html"}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(url) exp_links = [ # base_url, url, text, tag, attrs Link(url, "http://example.com/foo/bar.html", "", "a", [("href", "http://example.com/foo/bar.html"), ("name", "apples")]), Link(url, "spam", "", "a", [("href", "spam"), ("name", "pears")]), Link(url, "blah", '', "area", [("href", "blah"), ("name", "foo")]), Link(url, "src", '', "iframe", [("name", "name"), ("href", "href"), ("src", "src")]), Link(url, "src", '', "iframe", [("name", "name2"), ("href", "href"), ("src", "src")]), Link(url, "one", "yada yada", "a", [("name", "name3"), ("href", "one")]), Link(url, "two", "rhubarb", "a", [("name", "pears"), ("href", "two"), ("weird", "stuff")]), Link(url, "foo", '', "iframe", [("src", "foo")]), ] links = list(b.links()) for got, expect in zip(links, exp_links): self.assertEqual(got, expect) self.assertEqual(len(links), len(exp_links)) # nr lnk = b.find_link() self.assertEqual(lnk.url, "http://example.com/foo/bar.html") lnk = b.find_link(nr=1) self.assertEqual(lnk.url, "spam") # text lnk = b.find_link(text="yada yada") self.assertEqual(lnk.url, "one") self.assertRaises( mechanize.LinkNotFoundError, b.find_link, text="da ya") lnk = b.find_link(text_regex=re.compile("da ya")) self.assertEqual(lnk.url, "one") lnk = b.find_link(text_regex="da ya") self.assertEqual(lnk.url, "one") # name lnk = b.find_link(name="name3") self.assertEqual(lnk.url, "one") lnk = b.find_link(name_regex=re.compile("oo")) self.assertEqual(lnk.url, "blah") lnk = b.find_link(name_regex="oo") self.assertEqual(lnk.url, "blah") # url lnk = b.find_link(url="spam") self.assertEqual(lnk.url, "spam") lnk = b.find_link(url_regex=re.compile("pam")) self.assertEqual(lnk.url, "spam") lnk = b.find_link(url_regex="pam") self.assertEqual(lnk.url, "spam") # tag lnk = b.find_link(tag="area") self.assertEqual(lnk.url, "blah") # predicate lnk = b.find_link( predicate=lambda lnk: dict(lnk.attrs).get("weird") == "stuff") self.assertEqual(lnk.url, "two") # combinations lnk = b.find_link(name="pears", nr=1) self.assertEqual(lnk.text, "rhubarb") lnk = b.find_link(url="src", nr=0, name="name2") self.assertEqual(lnk.tag, "iframe") self.assertEqual(lnk.url, "src") self.assertRaises( mechanize.LinkNotFoundError, b.find_link, url="src", nr=1, name="name2") lnk = b.find_link( tag="a", predicate=lambda lnk: dict(lnk.attrs).get( "weird") == "stuff") self.assertEqual(lnk.url, "two") # .links() self.assertEqual( list(b.links(url="src")), [ Link( url, url="src", text='', tag="iframe", attrs=[("name", "name"), ("href", "href"), ("src", "src")]), Link( url, url="src", text='', tag="iframe", attrs=[("name", "name2"), ("href", "href"), ("src", "src")]), ])