コード例 #1
0
ファイル: test_utils_response.py プロジェクト: zz1512/scrapy
    def test_response_httprepr(self):
        r1 = Response("http://www.example.com")
        self.assertEqual(response_httprepr(r1), b'HTTP/1.1 200 OK\r\n\r\n')

        r1 = Response("http://www.example.com", status=404, headers={"Content-type": "text/html"}, body=b"Some body")
        self.assertEqual(response_httprepr(r1), b'HTTP/1.1 404 Not Found\r\nContent-Type: text/html\r\n\r\nSome body')

        r1 = Response("http://www.example.com", status=6666, headers={"Content-type": "text/html"}, body=b"Some body")
        self.assertEqual(response_httprepr(r1), b'HTTP/1.1 6666 \r\nContent-Type: text/html\r\n\r\nSome body')
コード例 #2
0
 def process_response(self, request, response, spider):
     # spider.log('Latency: %.2f' % request.meta.get('download_latency'))
     image_request = request.meta.get('IMAGES_PIPELINE', False)
     reslen = len(response_httprepr(response)) / 1024  # Convert to kb
     if image_request:
         self.stats.inc_value('downloader/images/total_latency',
                              request.meta.get('download_latency'),
                              spider=spider)
         self.stats.inc_value('downloader/images/response_count',
                              1,
                              spider=spider)
         self.stats.inc_value('downloader/images/response_kilobytes',
                              float("{0:.2f}".format(reslen)),
                              spider=spider)
     else:
         self.stats.inc_value('downloader/non_images/total_latency',
                              request.meta.get('download_latency'),
                              spider=spider)
         self.stats.inc_value('downloader/non_images/response_count',
                              1,
                              spider=spider)
         self.stats.inc_value('downloader/non_images/response_kilobytes',
                              float("{0:.2f}".format(reslen)),
                              spider=spider)
     super(CustomDownloaderStats,
           self).process_response(request, response, spider)
     return response
コード例 #3
0
ファイル: stats.py プロジェクト: pyarnold/scrapy
 def process_response(self, request, response, spider):
     self.stats.inc_value('downloader/response_count', spider=spider)
     self.stats.inc_value(
         'downloader/response_status_count/%s' % response.status, spider=spider)
     reslen = len(response_httprepr(response))
     self.stats.inc_value(
         'downloader/response_bytes', reslen, spider=spider)
     return response
コード例 #4
0
 def process_response(self, request, response, spider):
     stats.inc_value('downloader/response_count', spider=spider)
     stats.inc_value('downloader/response_status_count/%s' %
                     response.status,
                     spider=spider)
     reslen = len(response_httprepr(response))
     stats.inc_value('downloader/response_bytes', reslen, spider=spider)
     return response
コード例 #5
0
 def process_response(self, request, response, spider):
     self.stats.inc_value("downloader/response_count", spider=spider)
     self.stats.inc_value(
         f"downloader/response_status_count/{response.status}",
         spider=spider)
     reslen = len(response_httprepr(response))
     self.stats.inc_value("downloader/response_bytes",
                          reslen,
                          spider=spider)
     return response
コード例 #6
0
ファイル: stats.py プロジェクト: oceancloud82/scraping
 def process_response(self, request, response, spider):
     domain = _get_domain_from_url(response.url)
     self.stats.inc_value('downloader/%s/response_count' % domain,
                          spider=spider)
     self.stats.inc_value('downloader/%s/response_status_count/%s' %
                          (domain, response.status),
                          spider=spider)
     reslen = len(response_httprepr(response))
     self.stats.inc_value('downloader/%s/response_bytes' % domain,
                          reslen,
                          spider=spider)
     return response
コード例 #7
0
    def test_response_httprepr(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", ScrapyDeprecationWarning)

            r1 = Response("http://www.example.com")
            self.assertEqual(response_httprepr(r1), b'HTTP/1.1 200 OK\r\n\r\n')

            r1 = Response("http://www.example.com",
                          status=404,
                          headers={"Content-type": "text/html"},
                          body=b"Some body")
            self.assertEqual(
                response_httprepr(r1),
                b'HTTP/1.1 404 Not Found\r\nContent-Type: text/html\r\n\r\nSome body'
            )

            r1 = Response("http://www.example.com",
                          status=6666,
                          headers={"Content-type": "text/html"},
                          body=b"Some body")
            self.assertEqual(
                response_httprepr(r1),
                b'HTTP/1.1 6666 \r\nContent-Type: text/html\r\n\r\nSome body')
コード例 #8
0
ファイル: stats.py プロジェクト: oceancloud82/scraping
 def process_response(self, request, response, spider):
     proxy = get_request_proxy(request)
     if proxy:
         self.stats.inc_value('downloader/proxy/%s/response_count' % proxy,
                              spider=spider)
         self.stats.inc_value(
             'downloader/proxy/%s/response_status_count/%s' %
             (proxy, response.status),
             spider=spider)
         reslen = len(response_httprepr(response))
         self.stats.inc_value('downloader/proxy/%s/response_bytes' % proxy,
                              reslen,
                              spider=spider)
     return response
コード例 #9
0
ファイル: download_stats.py プロジェクト: mtaziz/jaycluster
 def process_response(self, request, response, spider):
     self.stats.inc_value('downloader/response_count', spider=spider)
     self.stats.inc_value('downloader/response_status_count/%s' % response.status, spider=spider)
     reslen = len(response_httprepr(response))
     self.stats.inc_value('downloader/response_bytes', reslen, spider=spider)
     request.meta["url"] = request.url
     # if response.status not in spider.crawler.settings["RETRY_HTTP_CODES"]+[301, 302, 303, 307, 200]:
     #     if request.meta.get("if_next_page"):
     #         self.stats.inc_total_pages(crawlid=request.meta['crawlid'],
     #                                            spiderid=request.meta['spiderid'],
     #                                            appid=request.meta['appid'])
     #     self.stats.set_failed_download_value(request.meta, response_status_message(response.status))
     # else:
     return response
コード例 #10
0
 def test_response_len(self):
     body = (b'', b'not_empty')  # empty/notempty body
     headers = ({}, {
         'lang': 'en'
     }, {
         'lang': 'en',
         'User-Agent': 'scrapy'
     })  # 0 headers, 1h and 2h
     test_responses = [  # form test responses with all combinations of body/headers
         Response(url='scrapytest.org', status=200, body=r[0], headers=r[1])
         for r in product(body, headers)
     ]
     for test_response in test_responses:
         self.crawler.stats.set_value('downloader/response_bytes', 0)
         self.mw.process_response(self.req, test_response, self.spider)
         self.assertStatsEqual('downloader/response_bytes',
                               len(response_httprepr(test_response)))
コード例 #11
0
ファイル: itemsampler.py プロジェクト: serkanh/scrapy
 def process_spider_input(self, response, spider):
     if stats.get_value("items_sampled", spider=spider) >= items_per_spider:
         return []
     elif max_response_size and max_response_size > len(response_httprepr(response)):
         return []