예제 #1
0
    def test_body(self):
        r1 = Response(url="http://www.example.com/",
                      request=Request('http://www.example.com/')
                      )
        assert r1.body == b''

        r2 = Response(url="http://www.example.com/",
                      body=b"",
                      request=Request('http://www.example.com/'))
        assert isinstance(r2.body, bytes)
        self.assertEqual(r2.encoding, 'utf-8')  # default encoding

        r3 = Response(url="http://www.example.com/",
                      body=u"Price: \xa3100",
                      encoding='utf-8',
                      request=Request('http://www.example.com/'))
        assert isinstance(r3.body, bytes)
        self.assertEqual(r3.body, b"Price: \xc2\xa3100")

        r4 = Response(url="http://www.example.com/",
                      request=Request('http://www.example.com/'),
                      body=u"Price: \xa3100",
                      encoding='latin1'
                      )
        assert isinstance(r4.body, bytes)
        self.assertEqual(r4.body, b"Price: \xa3100")
예제 #2
0
 def test_init(self):
     self.assertRaises(Exception, Response)
     self.assertRaises(Exception, Response, url='http://www.example.com/')
     self.assertRaises(Exception, Response, request=Request('http://www.example.com/'))
     self.assertRaises(ValueError,
                       Response,
                       url='foo',
                       request=Request('http://www.example.com/')
                       )
     self.assertRaises(ValueError,
                       Response,
                       'http://www.example.com/',
                       status='foo',
                       request=Request('http://www.example.com/')
                       )
     self.assertRaises(TypeError,
                       Response,
                       'http://www.example.com/',
                       request='foo'
                       )
     response = Response('http://www.example.com/',
                         Request('http://www.example.com/')
                         )
     assert response.url
     assert not response.body
     response = Response('http://www.example.com/',
                         Request('http://www.example.com/'),
                         headers={'Content-Type': 'text/html',
                                  'Content-Length': 1234
                                  }
                         )
예제 #3
0
 def test_url(self):
     response = Response('http://www.example.com/',
                         request=Request('http://www.example.com/')
                         )
     self.assertIsInstance(response.url, str)
     self.assertEqual(response.url, 'http://www.example.com/')
     response = Response(u'http://www.example.com?content=测试',
                         request=Request('http://www.example.com/')
                         )
     self.assertEqual(response.url,
                      safe_url_string('http://www.example.com?content=测试'))
     self.assertRaises(TypeError, Response, 123)
예제 #4
0
    def test_copy(self):
        response1 = Response('http://www.example.com/',
                             headers={'Content-Type': 'text/html',
                                      'Content-Length': 1234
                                      },
                             request=Request('http://www.example.com/')
                             )
        response2 = response1.copy()
        assert response1.__dict__ == response2.__dict__
        self.assertEqual(response1.headers, response2.headers)
        self.assertEqual(response1.request, response2.request)
        self.assertEqual(response1, response2)

        self.assertIsNot(response1.headers, response2.headers)
        self.assertIsNot(response1.request, response2.request)
        self.assertIsNot(response1, response2)
예제 #5
0
 def _fetch_static(self, request, url):
     self.logger.info("processing static page %s", url)
     kwargs = {
         "timeout": self.settings["TIMEOUT"],
         "headers": request.headers,
         "verify": self.settings["STATIC_REQUEST_SSL_VERIFY"],
     }
     if "proxy" in request.meta and request.meta["proxy"]:
         kwargs.update(proxies=request.meta["proxy"])
     try:
         session = requests.Session()
         if request.cookiejar:
             session.cookies = request.cookiejar
         if request.method == 'GET':
             response = session.get(url, **kwargs)
         elif request.method == 'POST':
             if request.body:
                 kwargs.update(data=request.body)
             response = session.post(url, **kwargs)
         else:
             raise ValueError('Unacceptable HTTP verb %s' % request.method)
         return Response(response.url,
                         request,
                         status=response.status_code,
                         cookiejar=response.cookies,
                         body=response.content)
     except Timeout as e:
         raise TimeoutException(e.message)
     except Exception as e:
         self.logger.error("download error: %s", str(e), exc_info=True)
         raise e
    def test_process_response(self):
        request = Request('http://httpbin.org/')
        response = Response('http://httpbin.org/', request, status=500)
        rm = RetryMiddleware(self.spider.settings, self.spider.logger)
        request.meta["dont_retry"] = True
        self.assertEqual(rm.process_response(request, response), response)

        request.meta["dont_retry"] = False
        request = rm.process_response(request, response)
        self.assertIsInstance(request, Request)
        self.assertEqual(request.meta.get("retry_count"), 1)
        request = rm.process_response(request, response)
        self.assertIsInstance(request, Request)
        request = rm.process_response(request, response)
        self.assertIsInstance(request, Request)
        self.assertIsInstance(rm.process_response(request, response), Response)
예제 #7
0
    def _fetch_dynamic(self, request, url):
        self.logger.info("processing dynamic page %s", url)
        try:
            self.driver_sem.acquire()
            if request.cookiejar:
                cookies = _get_cookies_from_cookiejar(request.cookiejar)
                cookies = self._covert_cookies_to_dict(cookies)
                #self._removed_first_dot_in_front_of_domain(cookies)
                command_list = self._get_command_list(cookies)
                # make the current page to have the same domain with cookies
                self.driver.get(url)
                # load cookies
                for command in command_list:
                    self.driver.execute_script(command)

            self.driver.set_page_load_timeout(self.settings["TIMEOUT"])
            self.driver.get(url)
            gevent.sleep(request.wait)
            for func in request.browser_actions:
                func(self.driver)
            url = self.driver.current_url
            html = self.driver.page_source

            # generate cookies
            all_cookies = self.driver.get_cookies()
            self.driver.delete_all_cookies()
            self.driver_sem.release()

            all_cookies = self._to_byte(all_cookies)
            cookies = [self._make_cookie(**d) for d in all_cookies]

            # set cookies to cookiejar
            cj = cookielib.CookieJar()
            for cookie in cookies:
                cj.set_cookie(cookie)
            return Response(url, request, cookiejar=cj, body=html)
        except _TimeoutException as e:
            raise TimeoutException(e.message)
        except Exception as e:
            self.logger.error("download error: %s", str(e), exc_info=True)
            raise e
예제 #8
0
 def test_request(self):
     response = Response('http://www.example.com/',
                         request=Request('http://www.example.com/')
                         )
     self.assertIsInstance(response.request, Request)
     self.assertEqual(response.request, Request('http://www.example.com/'))