def test_response_httprepr(self): r1 = Response("http://www.example.com") self.assertEqual(response_httprepr(r1), b'HTTP/1.1 200 OK\r\n\r\n') r1 = Response("http://www.example.com", status=404, headers={"Content-type": "text/html"}, body=b"Some body") self.assertEqual(response_httprepr(r1), b'HTTP/1.1 404 Not Found\r\nContent-Type: text/html\r\n\r\nSome body') r1 = Response("http://www.example.com", status=6666, headers={"Content-type": "text/html"}, body=b"Some body") self.assertEqual(response_httprepr(r1), b'HTTP/1.1 6666 \r\nContent-Type: text/html\r\n\r\nSome body')
def process_response(self, request, response, spider): # spider.log('Latency: %.2f' % request.meta.get('download_latency')) image_request = request.meta.get('IMAGES_PIPELINE', False) reslen = len(response_httprepr(response)) / 1024 # Convert to kb if image_request: self.stats.inc_value('downloader/images/total_latency', request.meta.get('download_latency'), spider=spider) self.stats.inc_value('downloader/images/response_count', 1, spider=spider) self.stats.inc_value('downloader/images/response_kilobytes', float("{0:.2f}".format(reslen)), spider=spider) else: self.stats.inc_value('downloader/non_images/total_latency', request.meta.get('download_latency'), spider=spider) self.stats.inc_value('downloader/non_images/response_count', 1, spider=spider) self.stats.inc_value('downloader/non_images/response_kilobytes', float("{0:.2f}".format(reslen)), spider=spider) super(CustomDownloaderStats, self).process_response(request, response, spider) return response
def process_response(self, request, response, spider): self.stats.inc_value('downloader/response_count', spider=spider) self.stats.inc_value( 'downloader/response_status_count/%s' % response.status, spider=spider) reslen = len(response_httprepr(response)) self.stats.inc_value( 'downloader/response_bytes', reslen, spider=spider) return response
def process_response(self, request, response, spider): stats.inc_value('downloader/response_count', spider=spider) stats.inc_value('downloader/response_status_count/%s' % response.status, spider=spider) reslen = len(response_httprepr(response)) stats.inc_value('downloader/response_bytes', reslen, spider=spider) return response
def process_response(self, request, response, spider): self.stats.inc_value("downloader/response_count", spider=spider) self.stats.inc_value( f"downloader/response_status_count/{response.status}", spider=spider) reslen = len(response_httprepr(response)) self.stats.inc_value("downloader/response_bytes", reslen, spider=spider) return response
def process_response(self, request, response, spider): domain = _get_domain_from_url(response.url) self.stats.inc_value('downloader/%s/response_count' % domain, spider=spider) self.stats.inc_value('downloader/%s/response_status_count/%s' % (domain, response.status), spider=spider) reslen = len(response_httprepr(response)) self.stats.inc_value('downloader/%s/response_bytes' % domain, reslen, spider=spider) return response
def test_response_httprepr(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", ScrapyDeprecationWarning) r1 = Response("http://www.example.com") self.assertEqual(response_httprepr(r1), b'HTTP/1.1 200 OK\r\n\r\n') r1 = Response("http://www.example.com", status=404, headers={"Content-type": "text/html"}, body=b"Some body") self.assertEqual( response_httprepr(r1), b'HTTP/1.1 404 Not Found\r\nContent-Type: text/html\r\n\r\nSome body' ) r1 = Response("http://www.example.com", status=6666, headers={"Content-type": "text/html"}, body=b"Some body") self.assertEqual( response_httprepr(r1), b'HTTP/1.1 6666 \r\nContent-Type: text/html\r\n\r\nSome body')
def process_response(self, request, response, spider): proxy = get_request_proxy(request) if proxy: self.stats.inc_value('downloader/proxy/%s/response_count' % proxy, spider=spider) self.stats.inc_value( 'downloader/proxy/%s/response_status_count/%s' % (proxy, response.status), spider=spider) reslen = len(response_httprepr(response)) self.stats.inc_value('downloader/proxy/%s/response_bytes' % proxy, reslen, spider=spider) return response
def process_response(self, request, response, spider): self.stats.inc_value('downloader/response_count', spider=spider) self.stats.inc_value('downloader/response_status_count/%s' % response.status, spider=spider) reslen = len(response_httprepr(response)) self.stats.inc_value('downloader/response_bytes', reslen, spider=spider) request.meta["url"] = request.url # if response.status not in spider.crawler.settings["RETRY_HTTP_CODES"]+[301, 302, 303, 307, 200]: # if request.meta.get("if_next_page"): # self.stats.inc_total_pages(crawlid=request.meta['crawlid'], # spiderid=request.meta['spiderid'], # appid=request.meta['appid']) # self.stats.set_failed_download_value(request.meta, response_status_message(response.status)) # else: return response
def test_response_len(self): body = (b'', b'not_empty') # empty/notempty body headers = ({}, { 'lang': 'en' }, { 'lang': 'en', 'User-Agent': 'scrapy' }) # 0 headers, 1h and 2h test_responses = [ # form test responses with all combinations of body/headers Response(url='scrapytest.org', status=200, body=r[0], headers=r[1]) for r in product(body, headers) ] for test_response in test_responses: self.crawler.stats.set_value('downloader/response_bytes', 0) self.mw.process_response(self.req, test_response, self.spider) self.assertStatsEqual('downloader/response_bytes', len(response_httprepr(test_response)))
def process_spider_input(self, response, spider): if stats.get_value("items_sampled", spider=spider) >= items_per_spider: return [] elif max_response_size and max_response_size > len(response_httprepr(response)): return []