コード例 #1
0
ファイル: sitemap.py プロジェクト: dahuangfeng123/crawlmi
def get_sitemap_body(response):
    '''Return the sitemap body contained in the given response, or None if the
    response is not a sitemap.
    '''
    if isinstance(response, XmlResponse):
        return response.body
    elif is_gzipped(response):
        return gunzip(response.body)
    elif response.url.endswith('.xml'):
        return response.body
    elif response.url.endswith('.xml.gz'):
        return gunzip(response.body)
コード例 #2
0
ファイル: http_compression.py プロジェクト: Mimino666/crawlmi
 def process_response(self, response):
     content_encoding = response.headers.getlist('Content-Encoding')
     if content_encoding and not is_gzipped(response):
         max_length = self.settings.get_int('DOWNLOAD_SIZE_LIMIT', 0,
                                            response.request)
         encoding = content_encoding.pop()
         if not content_encoding:
             del response.headers['Content-Encoding']
         decoded_body = self._decode(response.body, encoding.lower(),
                                     max_length)
         resp_cls = factory.from_args(headers=response.headers,
                                      url=response.url)
         response = response.replace(cls=resp_cls, body=decoded_body)
     return response
コード例 #3
0
 def process_response(self, response):
     content_encoding = response.headers.getlist('Content-Encoding')
     if content_encoding and not is_gzipped(response):
         max_length = self.settings.get_int('DOWNLOAD_SIZE_LIMIT', 0,
                                            response.request)
         encoding = content_encoding.pop()
         if not content_encoding:
             del response.headers['Content-Encoding']
         decoded_body = self._decode(response.body, encoding.lower(),
                                     max_length)
         resp_cls = factory.from_args(headers=response.headers,
                                      url=response.url)
         response = response.replace(cls=resp_cls, body=decoded_body)
     return response