Example #1
0
def get_sitemap_body(response):
    '''Return the sitemap body contained in the given response, or None if the
    response is not a sitemap.
    '''
    if isinstance(response, XmlResponse):
        return response.body
    elif is_gzipped(response):
        return gunzip(response.body)
    elif response.url.endswith('.xml'):
        return response.body
    elif response.url.endswith('.xml.gz'):
        return gunzip(response.body)
Example #2
0
 def _decode(self, body, encoding, max_length=0):
     if encoding == 'gzip' or encoding == 'x-gzip':
         body = gunzip(body, max_length)
     elif encoding == 'deflate':
         try:
             if max_length:
                 dobj = zlib.decompressobj()
                 body = dobj.decompress(body, max_length)
                 if dobj.unconsumed_tail:
                     raise DecompressSizeError(
                         'Response exceeded %s bytes' % max_length)
             else:
                 body = zlib.decompress(body)
         except zlib.error:
             # ugly hack to work with raw deflate content that may
             # be sent by microsoft servers. For more information, see:
             # http://carsten.codimi.de/gzip.yaws/
             # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
             # http://www.gzip.org/zlib/zlib_faq.html#faq38
             if max_length:
                 dobj = zlib.decompressobj(-15)
                 body = dobj.decompress(body, max_length)
                 if dobj.unconsumed_tail:
                     raise DecompressSizeError(
                         'Response exceeded %s bytes' % max_length)
             else:
                 body = zlib.decompress(body, -15)
     return body
 def _decode(self, body, encoding, max_length=0):
     if encoding == 'gzip' or encoding == 'x-gzip':
         body = gunzip(body, max_length)
     elif encoding == 'deflate':
         try:
             if max_length:
                 dobj = zlib.decompressobj()
                 body = dobj.decompress(body, max_length)
                 if dobj.unconsumed_tail:
                     raise DecompressSizeError(
                         'Response exceeded %s bytes' % max_length)
             else:
                 body = zlib.decompress(body)
         except zlib.error:
             # ugly hack to work with raw deflate content that may
             # be sent by microsoft servers. For more information, see:
             # http://carsten.codimi.de/gzip.yaws/
             # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
             # http://www.gzip.org/zlib/zlib_faq.html#faq38
             if max_length:
                 dobj = zlib.decompressobj(-15)
                 body = dobj.decompress(body, max_length)
                 if dobj.unconsumed_tail:
                     raise DecompressSizeError(
                         'Response exceeded %s bytes' % max_length)
             else:
                 body = zlib.decompress(body, -15)
     return body
Example #4
0
 def test_max_length(self):
     with open(join(SAMPLE_DIR, 'feed-sample1.xml.gz'), 'rb') as f:
         raw = f.read()
         self.assertEqual(len(gunzip(raw, 9950)), 9950)
         self.assertRaises(DecompressSizeError, gunzip, raw, 9949)
Example #5
0
 def test_gunzip_truncated_short(self):
     with open(join(SAMPLE_DIR, 'truncated-crc-error-short.gz'), 'rb') as f:
         text = gunzip(f.read())
         self.assertTrue(text.endswith('</html>'))
Example #6
0
 def test_gunzip_basic(self):
     with open(join(SAMPLE_DIR, 'feed-sample1.xml.gz'), 'rb') as f:
         text = gunzip(f.read())
         self.assertEqual(len(text), 9950)
Example #7
0
 def test_max_length(self):
     with open(join(SAMPLE_DIR, 'feed-sample1.xml.gz'), 'rb') as f:
         raw = f.read()
         self.assertEqual(len(gunzip(raw, 9950)), 9950)
         self.assertRaises(DecompressSizeError, gunzip, raw, 9949)
Example #8
0
 def test_gunzip_truncated_short(self):
     with open(join(SAMPLE_DIR, 'truncated-crc-error-short.gz'), 'rb') as f:
         text = gunzip(f.read())
         self.assertTrue(text.endswith('</html>'))
Example #9
0
 def test_gunzip_basic(self):
     with open(join(SAMPLE_DIR, 'feed-sample1.xml.gz'), 'rb') as f:
         text = gunzip(f.read())
         self.assertEqual(len(text), 9950)