예제 #1
0
    def testSetResponseData(self):
        from mechanize import response_seek_wrapper
        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
        rsw = response_seek_wrapper(r)
        rsw.set_data(b"""\
A Seeming somwhat more than View;
  That doth instruct the Mind
  In Things that ly behind,
""")
        self.assertEqual(rsw.read(9), b"A Seeming")
        self.assertEqual(rsw.read(13), b" somwhat more")
        rsw.seek(0)
        self.assertEqual(rsw.read(9), b"A Seeming")
        self.assertEqual(rsw.readline(), b" somwhat more than View;\n")
        rsw.seek(0)
        self.assertEqual(rsw.readline(),
                         b"A Seeming somwhat more than View;\n")
        rsw.seek(-1, 1)
        self.assertEqual(rsw.read(7), b"\n  That")

        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
        rsw = response_seek_wrapper(r)
        rsw.set_data(self.text)
        self._test2(rsw)
        rsw.seek(0)
        self._test4(rsw)
예제 #2
0
    def testSetResponseData(self):
        from mechanize import response_seek_wrapper
        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
        rsw = response_seek_wrapper(r)
        rsw.set_data("""\
A Seeming somwhat more than View;
  That doth instruct the Mind
  In Things that ly behind,
""")
        self.assertEqual(rsw.read(9), "A Seeming")
        self.assertEqual(rsw.read(13), " somwhat more")
        rsw.seek(0)
        self.assertEqual(rsw.read(9), "A Seeming")
        self.assertEqual(rsw.readline(), " somwhat more than View;\n")
        rsw.seek(0)
        self.assertEqual(rsw.readline(), "A Seeming somwhat more than View;\n")
        rsw.seek(-1, 1)
        self.assertEqual(rsw.read(7), "\n  That")

        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
        rsw = response_seek_wrapper(r)
        rsw.set_data(self.text)
        self._test2(rsw)
        rsw.seek(0)
        self._test4(rsw)
예제 #3
0
    def test_set_response(self):
        import copy
        from mechanize import response_seek_wrapper

        br = TestBrowser()
        url = "http://example.com/"
        html = """<html><body><a href="spam">click me</a></body></html>"""
        headers = {"content-type": "text/html"}
        r = response_seek_wrapper(MockResponse(url, html, headers))
        br.add_handler(make_mock_handler()([("http_open", r)]))

        r = br.open(url)
        self.assertEqual(r.read(), html)
        r.seek(0)
        self.assertEqual(copy.copy(r).read(), html)
        self.assertEqual(list(br.links())[0].url, "spam")

        newhtml = """<html><body><a href="eggs">click me</a></body></html>"""

        r.set_data(newhtml)
        self.assertEqual(r.read(), newhtml)
        self.assertEqual(br.response().read(), html)
        br.response().set_data(newhtml)
        self.assertEqual(br.response().read(), html)
        self.assertEqual(list(br.links())[0].url, "spam")
        r.seek(0)

        br.set_response(r)
        self.assertEqual(br.response().read(), newhtml)
        self.assertEqual(list(br.links())[0].url, "eggs")
예제 #4
0
    def test_set_response(self):
        import copy
        from mechanize import response_seek_wrapper

        br = TestBrowser()
        url = "http://example.com/"
        html = b"""<html><body><a href="spam">click me</a></body></html>"""
        headers = {"content-type": "text/html"}
        r = response_seek_wrapper(MockResponse(url, html, headers))
        br.add_handler(make_mock_handler()([("http_open", r)]))

        r = br.open(url)
        self.assertEqual(r.read(), html)
        r.seek(0)
        self.assertEqual(copy.copy(r).read(), html)
        self.assertEqual(list(br.links())[0].url, "spam")

        newhtml = b"""<html><body><a href="eggs">click me</a></body></html>"""

        r.set_data(newhtml)
        self.assertEqual(r.read(), newhtml)
        self.assertEqual(br.response().read(), html)
        br.response().set_data(newhtml)
        self.assertEqual(br.response().read(), html)
        self.assertEqual(list(br.links())[0].url, "spam")
        r.seek(0)

        br.set_response(r)
        self.assertEqual(br.response().read(), newhtml)
        self.assertEqual(list(br.links())[0].url, "eggs")
예제 #5
0
    def http_response(self, request, response):
        if not hasattr(response, "seek"):
            response = mechanize.response_seek_wrapper(response)
        if response.info().dict.has_key('content-type') and (
                'html' in response.info().dict['content-type']):

            p = Popen([self.tidybin, "-q", "-i"],
                      stdout=PIPE,
                      stdin=PIPE,
                      stderr=PIPE)

            html = p.communicate(input=response.get_data())[0]
            #print html

            #p = Popen(["/usr/bin/tidy", "-q", "-i"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
            #p.stdin.write(response.get_data())
            #p.stdin.flush()
            #p.stdin.close()
            #html = p.stdout.read()
            #p.stdout.close()
            response.set_data(html)

            #html = etree.HTML(response.get_data())
            #response.set_data(etree.tostring(html))
        return response
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = mechanize.response_seek_wrapper(response)
     # only use BeautifulSoup if response is html
     if response.info().dict.has_key('content-type') and ('html' in response.info().dict['content-type']):
         soup = BeautifulSoup(response.get_data())
         response.set_data(soup.prettify())
     return response
예제 #7
0
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = mechanize.response_seek_wrapper(response)
     # Run HTML through BeautifulSoup for sanitizing
     if 'html' in response.info().get('content-type', ''):
         soup = get_soup(response.get_data())
         response.set_data(soup.prettify(encoding=soup.original_encoding))
     return response
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = mechanize.response_seek_wrapper(response)
     # only use BeautifulSoup if response is html
     if response.info().dict.has_key('content-type') and ('html' in response.info().dict['content-type']):
         soup = MinimalSoup (response.get_data())
         response.set_data(soup.prettify())
     return response
예제 #9
0
 def http_response(self, request, httpResponse):
     if not hasattr(httpResponse, "seek"):
         httpResponse = mechanize.response_seek_wrapper(httpResponse)
     # If HTML used, get it though a robust Parser like BeautifulSoup
     if 'content-type' in httpResponse.info().dict and ('html' in httpResponse.info().dict['content-type']):
         soup = BeautifulSoup(httpResponse.get_data())
         httpResponse.set_data(soup.prettify())
     return httpResponse
예제 #10
0
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = mechanize.response_seek_wrapper(response)
     # Run HTML through BeautifulSoup for sanitizing
     if 'html' in response.info().get('content-type', ''):
         soup = get_soup(response.get_data())
         response.set_data(
             soup.prettify(encoding=soup.original_encoding))
     return response
예제 #11
0
    def testGetResponseData(self):
        from mechanize import response_seek_wrapper
        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
        rsw = response_seek_wrapper(r)

        self.assertEqual(rsw.get_data(), self.text)
        self._test2(rsw)
        rsw.seek(0)
        self._test4(rsw)
예제 #12
0
    def testGetResponseData(self):
        from mechanize import response_seek_wrapper
        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
        rsw = response_seek_wrapper(r)

        self.assertEqual(rsw.get_data(), self.text)
        self._test2(rsw)
        rsw.seek(0)
        self._test4(rsw)
예제 #13
0
    def testResponseSeekWrapper(self):
        from mechanize import response_seek_wrapper
        hdrs = {"Content-type": "text/html"}
        r = TestUnSeekableResponse(self.text, hdrs)
        rsw = response_seek_wrapper(r)
        rsw2 = self._testCopy(rsw)
        self.assert_(rsw is not rsw2)
        self.assertEqual(rsw.info(), rsw2.info())
        self.assert_(rsw.info() is not rsw2.info())

        # should be able to close already-closed object
        rsw2.close()
        rsw2.close()
예제 #14
0
    def testResponseSeekWrapper(self):
        from mechanize import response_seek_wrapper
        hdrs = {"Content-type": "text/html"}
        r = TestUnSeekableResponse(self.text, hdrs)
        rsw = response_seek_wrapper(r)
        rsw2 = self._testCopy(rsw)
        self.assertTrue(rsw is not rsw2)
        self.assertEqual(rsw.info(), rsw2.info())
        self.assertTrue(rsw.info() is not rsw2.info())

        # should be able to close already-closed object
        rsw2.close()
        rsw2.close()
예제 #15
0
    def processLink(self, link):
        """Process a link."""
        url = link.absoluteURL

        # Whatever will happen, we have looked at the URL
        self.visited.append(url)

        # Retrieve the content
        try:
            self.browser.open(link.callableURL)
        except urllib2.HTTPError, error:
            # Something went wrong with retrieving the page.
            self.linkErrors += 1
            self.sendMessage(
                '%s (%i): %s' % (error.msg, error.code, link.callableURL), 2)
            self.sendMessage('+-> Reference: ' + link.referenceURL, 2)
            # Now set the error page as the response
            from mechanize import response_seek_wrapper
            self.browser._response = response_seek_wrapper(error)
예제 #16
0
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = mechanize.response_seek_wrapper(response)
     # only use if response is html
     if response.info().dict.has_key('content-type') and (
             'html' in response.info().dict['content-type']):
         tag_soup = response.get_data()
         try:
             self.element = lxml.html.fromstring(tag_soup)
             ignore = lxml.etree.tostring(
                 self.element, encoding=unicode
             )  # check the unicode entity conversion has worked
         except (UnicodeDecodeError, lxml.etree.XMLSyntaxError):
             self.element = lxml.html.soupparser.fromstring(
                 tag_soup
             )  # fall back to beautiful soup if there is an error
         response.set_data(
             lxml.etree.tostring(self.element,
                                 pretty_print=True,
                                 method="html"))
     return response
예제 #17
0
	def http_response(self, request, response):
		if not hasattr(response, "seek"):
			response = mechanize.response_seek_wrapper(response)
		if response.info().dict.has_key('content-type') and ('html' in response.info().dict['content-type']):

			p = Popen([self.tidybin, "-q", "-i"], stdout=PIPE, stdin=PIPE, stderr=PIPE)

			html = p.communicate(input=response.get_data())[0]
			#print html

			#p = Popen(["/usr/bin/tidy", "-q", "-i"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
			#p.stdin.write(response.get_data())
			#p.stdin.flush()
			#p.stdin.close()
			#html = p.stdout.read()
			#p.stdout.close()
			response.set_data(html)

			#html = etree.HTML(response.get_data())
			#response.set_data(etree.tostring(html))
		return response