def http_error_302(self, req, fp, code, msg, headers): # Code from mechanize._urllib2_fork.HTTPRedirectHandler: if 'location' in headers: newurl = headers.getheaders('location')[0] elif 'uri' in headers: newurl = headers.getheaders('uri')[0] else: return newurl = _rfc3986.clean_url(newurl, "latin-1") newurl = _rfc3986.urljoin(req.get_full_url(), newurl) new = self.redirect_request(req, fp, code, msg, headers, newurl) if new is None: return if hasattr(req, 'redirect_dict'): visited = new.redirect_dict = req.redirect_dict if (visited.get(newurl, 0) >= self.max_repeats or len(visited) >= self.max_redirections): raise urllib.error.HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} visited[newurl] = visited.get(newurl, 0) + 1 fp.read() fp.close() # If the redirected URL doesn't match new_url = new.get_full_url() if not re.search('^http(?:s)?\:\/\/.*www\.linkedin\.com', new_url): return _response.make_response('', headers.items(), new_url, 200, 'OK') else: return self.parent.open(new)
def _test_link_encoding(self, factory): import mechanize from mechanize._rfc3986 import clean_url url = "http://example.com/" for encoding in ["UTF-8", "latin-1"]: encoding_decl = "; charset=%s" % encoding b = TestBrowser(factory=factory) r = MockResponse(url, """\ <a href="http://example.com/foo/bar——.html" name="name0——">blah——</a> """, #" {"content-type": "text/html%s" % encoding_decl}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(url) Link = mechanize.Link try: mdashx2 = u"\u2014".encode(encoding)*2 except UnicodeError: mdashx2 = '——' qmdashx2 = clean_url(mdashx2, encoding) # base_url, url, text, tag, attrs exp = Link(url, "http://example.com/foo/bar%s.html" % qmdashx2, "blah"+mdashx2, "a", [("href", "http://example.com/foo/bar%s.html" % mdashx2), ("name", "name0%s" % mdashx2)]) # nr link = b.find_link() ## print ## print exp ## print link self.assertEqual(link, exp)
def test_link_encoding(self): import mechanize from mechanize._rfc3986 import clean_url url = "http://example.com/" for encoding in ["UTF-8", "latin-1"]: encoding_decl = "; charset=%s" % encoding b = TestBrowser() r = MockResponse(url, """\ <a href="http://example.com/foo/bar——.html" name="name0——">blah——</a> """, {"content-type": "text/html%s" % encoding_decl}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(url) Link = mechanize.Link mdashx2 = em_dash.encode('utf-8') * 2 qmdashx2 = clean_url(mdashx2, encoding) # base_url, url, text, tag, attrs exp = Link( url, "http://example.com/foo/bar%s.html" % qmdashx2, unicode_type("blah") + (em_dash * 2), "a", [("href", unicode_type("http://example.com/foo/bar%s.html") % ( em_dash*2)), ("name", unicode_type("name0") + em_dash + em_dash)]) # nr link = b.find_link() # print # print exp # print link self.assertEqual(link, exp)
def _test_link_encoding(self, factory): import mechanize from mechanize._rfc3986 import clean_url url = "http://example.com/" for encoding in ["UTF-8", "latin-1"]: encoding_decl = "; charset=%s" % encoding b = TestBrowser(factory=factory) r = MockResponse( url, """\ <a href="http://example.com/foo/bar——.html" name="name0——">blah——</a> """, #" {"content-type": "text/html%s" % encoding_decl}) b.add_handler(make_mock_handler()([("http_open", r)])) r = b.open(url) Link = mechanize.Link try: mdashx2 = u"\u2014".encode(encoding) * 2 except UnicodeError: mdashx2 = '——' qmdashx2 = clean_url(mdashx2, encoding) # base_url, url, text, tag, attrs exp = Link( url, "http://example.com/foo/bar%s.html" % qmdashx2, "blah" + mdashx2, "a", [("href", "http://example.com/foo/bar%s.html" % mdashx2), ("name", "name0%s" % mdashx2)]) # nr link = b.find_link() ## print ## print exp ## print link self.assertEqual(link, exp)