def test_body(self): r1 = Response(url="http://www.example.com/", request=Request('http://www.example.com/') ) assert r1.body == b'' r2 = Response(url="http://www.example.com/", body=b"", request=Request('http://www.example.com/')) assert isinstance(r2.body, bytes) self.assertEqual(r2.encoding, 'utf-8') # default encoding r3 = Response(url="http://www.example.com/", body=u"Price: \xa3100", encoding='utf-8', request=Request('http://www.example.com/')) assert isinstance(r3.body, bytes) self.assertEqual(r3.body, b"Price: \xc2\xa3100") r4 = Response(url="http://www.example.com/", request=Request('http://www.example.com/'), body=u"Price: \xa3100", encoding='latin1' ) assert isinstance(r4.body, bytes) self.assertEqual(r4.body, b"Price: \xa3100")
def test_init(self): self.assertRaises(Exception, Response) self.assertRaises(Exception, Response, url='http://www.example.com/') self.assertRaises(Exception, Response, request=Request('http://www.example.com/')) self.assertRaises(ValueError, Response, url='foo', request=Request('http://www.example.com/') ) self.assertRaises(ValueError, Response, 'http://www.example.com/', status='foo', request=Request('http://www.example.com/') ) self.assertRaises(TypeError, Response, 'http://www.example.com/', request='foo' ) response = Response('http://www.example.com/', Request('http://www.example.com/') ) assert response.url assert not response.body response = Response('http://www.example.com/', Request('http://www.example.com/'), headers={'Content-Type': 'text/html', 'Content-Length': 1234 } )
def test_url(self): response = Response('http://www.example.com/', request=Request('http://www.example.com/') ) self.assertIsInstance(response.url, str) self.assertEqual(response.url, 'http://www.example.com/') response = Response(u'http://www.example.com?content=测试', request=Request('http://www.example.com/') ) self.assertEqual(response.url, safe_url_string('http://www.example.com?content=测试')) self.assertRaises(TypeError, Response, 123)
def test_copy(self): response1 = Response('http://www.example.com/', headers={'Content-Type': 'text/html', 'Content-Length': 1234 }, request=Request('http://www.example.com/') ) response2 = response1.copy() assert response1.__dict__ == response2.__dict__ self.assertEqual(response1.headers, response2.headers) self.assertEqual(response1.request, response2.request) self.assertEqual(response1, response2) self.assertIsNot(response1.headers, response2.headers) self.assertIsNot(response1.request, response2.request) self.assertIsNot(response1, response2)
def _fetch_static(self, request, url): self.logger.info("processing static page %s", url) kwargs = { "timeout": self.settings["TIMEOUT"], "headers": request.headers, "verify": self.settings["STATIC_REQUEST_SSL_VERIFY"], } if "proxy" in request.meta and request.meta["proxy"]: kwargs.update(proxies=request.meta["proxy"]) try: session = requests.Session() if request.cookiejar: session.cookies = request.cookiejar if request.method == 'GET': response = session.get(url, **kwargs) elif request.method == 'POST': if request.body: kwargs.update(data=request.body) response = session.post(url, **kwargs) else: raise ValueError('Unacceptable HTTP verb %s' % request.method) return Response(response.url, request, status=response.status_code, cookiejar=response.cookies, body=response.content) except Timeout as e: raise TimeoutException(e.message) except Exception as e: self.logger.error("download error: %s", str(e), exc_info=True) raise e
def test_process_response(self): request = Request('http://httpbin.org/') response = Response('http://httpbin.org/', request, status=500) rm = RetryMiddleware(self.spider.settings, self.spider.logger) request.meta["dont_retry"] = True self.assertEqual(rm.process_response(request, response), response) request.meta["dont_retry"] = False request = rm.process_response(request, response) self.assertIsInstance(request, Request) self.assertEqual(request.meta.get("retry_count"), 1) request = rm.process_response(request, response) self.assertIsInstance(request, Request) request = rm.process_response(request, response) self.assertIsInstance(request, Request) self.assertIsInstance(rm.process_response(request, response), Response)
def _fetch_dynamic(self, request, url): self.logger.info("processing dynamic page %s", url) try: self.driver_sem.acquire() if request.cookiejar: cookies = _get_cookies_from_cookiejar(request.cookiejar) cookies = self._covert_cookies_to_dict(cookies) #self._removed_first_dot_in_front_of_domain(cookies) command_list = self._get_command_list(cookies) # make the current page to have the same domain with cookies self.driver.get(url) # load cookies for command in command_list: self.driver.execute_script(command) self.driver.set_page_load_timeout(self.settings["TIMEOUT"]) self.driver.get(url) gevent.sleep(request.wait) for func in request.browser_actions: func(self.driver) url = self.driver.current_url html = self.driver.page_source # generate cookies all_cookies = self.driver.get_cookies() self.driver.delete_all_cookies() self.driver_sem.release() all_cookies = self._to_byte(all_cookies) cookies = [self._make_cookie(**d) for d in all_cookies] # set cookies to cookiejar cj = cookielib.CookieJar() for cookie in cookies: cj.set_cookie(cookie) return Response(url, request, cookiejar=cj, body=html) except _TimeoutException as e: raise TimeoutException(e.message) except Exception as e: self.logger.error("download error: %s", str(e), exc_info=True) raise e
def test_request(self): response = Response('http://www.example.com/', request=Request('http://www.example.com/') ) self.assertIsInstance(response.request, Request) self.assertEqual(response.request, Request('http://www.example.com/'))