def test_retry_dns_error(self): with mock.patch('socket.gethostbyname', side_effect=socket.gaierror( -5, 'No address associated with hostname')): spider = SimpleSpider("http://example.com/") yield docrawl(spider) self._assert_retried()
def test_unbounded_response(self): # Completeness of responses without Content-Length or Transfer-Encoding # can not be determined, we treat them as valid but flagged as "partial" from urllib import urlencode query = urlencode({ 'raw': '''\ HTTP/1.1 200 OK Server: Apache-Coyote/1.1 X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0 Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/ Pragma: no-cache Expires: Thu, 01 Jan 1970 00:00:00 GMT Cache-Control: no-cache Cache-Control: no-store Content-Type: text/html;charset=UTF-8 Content-Language: en Date: Tue, 27 Aug 2013 13:05:05 GMT Connection: close foo body with multiples lines ''' }) spider = SimpleSpider("http://localhost:8998/raw?{0}".format(query)) yield docrawl(spider) log = get_testlog() self.assertEqual(log.count("Got response 200"), 1)
def test_https_noconnect(self): os.environ[ 'https_proxy'] = 'http://*****:*****@localhost:8888?noconnect' spider = SimpleSpider("https://*****:*****@localhost:8888'
def test_https_tunnel_auth_error(self): os.environ['https_proxy'] = 'http://*****:*****@localhost:8888' spider = SimpleSpider("https://*****:*****@localhost:8888'
def test_retry_dns_error(self): spider = SimpleSpider("http://localhost666/status?n=503") yield docrawl(spider) self._assert_retried()
def test_retry_conn_failed(self): spider = SimpleSpider("http://localhost:65432/status?n=503") yield docrawl(spider) self._assert_retried()
def test_retry_503(self): spider = SimpleSpider("http://localhost:8998/status?n=503") yield docrawl(spider) self._assert_retried()
def test_retry_conn_aborted(self): # connection lost before receiving data spider = SimpleSpider("http://localhost:8998/drop?abort=1") yield docrawl(spider) self._assert_retried()
def test_https_noconnect_auth_error(self): os.environ[ 'https_proxy'] = 'http://*****:*****@localhost:8888?noconnect' spider = SimpleSpider("https://localhost:8999/status?n=200") yield docrawl(spider) self._assert_got_response_code(407)
def test_https_connect_tunnel_error(self): spider = SimpleSpider("https://localhost:99999/status?n=200") yield docrawl(spider) self._assert_got_tunnel_error()
def test_https_connect_tunnel(self): spider = SimpleSpider("https://localhost:8999/status?n=200") yield docrawl(spider) self._assert_got_response_code(200)