예제 #1
0
 def _get_encoding(self, infer=False):
     enc = self._declared_encoding()
     if enc and not encoding_exists(enc):
         enc = None
     if not enc and infer:
         enc = self._body_inferred_encoding()
     if not enc:
         enc = self._DEFAULT_ENCODING
     return resolve_encoding(enc)
예제 #2
0
파일: text.py 프로젝트: pkufranky/scrapy
 def _get_encoding(self, infer=False):
     enc = self._declared_encoding()
     if enc and not encoding_exists(enc):
         enc = None
     if not enc and infer:
         enc = self._body_inferred_encoding()
     if not enc:
         enc = self._DEFAULT_ENCODING
     return resolve_encoding(enc)
    def test_process_response_force_recalculate_encoding(self):
        headers = {"Content-Type": "text/html", "Content-Encoding": "gzip"}
        f = StringIO()
        plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
        zf = GzipFile(fileobj=f, mode="wb")
        zf.write(plainbody)
        zf.close()
        response = HtmlResponse("http;//www.example.com/page.html", headers=headers, body=f.getvalue())
        request = Request("http://www.example.com/")

        newresponse = self.mw.process_response(request, response, self.spider)
        assert isinstance(newresponse, HtmlResponse)
        self.assertEqual(newresponse.body, plainbody)
        self.assertEqual(newresponse.encoding, resolve_encoding("gb2312"))
    def test_process_response_encoding_inside_body(self):
        headers = {
            'Content-Type': 'text/html',
            'Content-Encoding': 'gzip',
        }
        f = StringIO()
        plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
        zf = GzipFile(fileobj=f, mode='wb')
        zf.write(plainbody)
        zf.close()
        response = Response("http;//www.example.com/", headers=headers, body=f.getvalue())
        request = Request("http://www.example.com/")

        newresponse = self.mw.process_response(request, response, self.spider)
        assert isinstance(newresponse, HtmlResponse)
        self.assertEqual(newresponse.body, plainbody)
        self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
    def test_process_response_force_recalculate_encoding(self):
        headers = {
            'Content-Type': 'text/html',
            'Content-Encoding': 'gzip',
        }
        f = StringIO()
        plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
        zf = GzipFile(fileobj=f, mode='wb')
        zf.write(plainbody)
        zf.close()
        response = HtmlResponse("http;//www.example.com/page.html",
                                headers=headers,
                                body=f.getvalue())
        request = Request("http://www.example.com/")

        newresponse = self.mw.process_response(request, response, self.spider)
        assert isinstance(newresponse, HtmlResponse)
        self.assertEqual(newresponse.body, plainbody)
        self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
예제 #6
0
 def _assert_response_encoding(self, response, encoding):
     self.assertEqual(response.encoding, resolve_encoding(encoding))
예제 #7
0
 def _assert_response_encoding(self, response, encoding):
     self.assertEqual(response.encoding, resolve_encoding(encoding))
예제 #8
0
 def test_resolve_encoding(self):
     self.assertEqual(resolve_encoding('latin1', self._ENCODING_ALIASES),
                      'latin1')
     self.assertEqual(resolve_encoding('foo', self._ENCODING_ALIASES),
                      'cp1252')
예제 #9
0
 def test_resolve_encoding(self):
     self.assertEqual(resolve_encoding('latin1', self._ENCODING_ALIASES),
                      'latin1')
     self.assertEqual(resolve_encoding('foo', self._ENCODING_ALIASES),
                      'cp1252')