def test_replace_wrong_encoding(self): '''Test invalid chars are replaced properly.''' encoding, body_unicode = get_unicode_from_response( MockResponse('PREFIX\xe3\xabSUFFIX', 'utf-8')) self.assertIn(u'\ufffd', body_unicode) self.assertIn(u'PREFIX', body_unicode) self.assertIn(u'SUFFIX', body_unicode) # Do not destroy html tags due to encoding bugs encoding, body_unicode = get_unicode_from_response( MockResponse('\xf0<span>value</span>', 'utf-8')) self.assertIn(u'<span>value</span>', body_unicode)
def test_unicode_body(self): unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442' original_string = unicode_string.encode('cp1251') encoding, body_unicode = get_unicode_from_response( MockResponse(original_string, 'cp1251')) # check body_as_unicode self.assertTrue(isinstance(body_unicode, unicode)) self.assertEqual(body_unicode, unicode_string)
def _assert_encoding(self, body, http_encoding, expected_encoding, expected_unicode): encoding, body_unicode = get_unicode_from_response( MockResponse(body, http_encoding)) self.assertTrue(isinstance(body_unicode, unicode)) self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding)) self.assertEqual(body_unicode, expected_unicode)
def _assert_encoding_detected(self, header_encoding, expected_encoding, body, **kwargs): encoding, body_unicode = get_unicode_from_response( MockResponse(body, header_encoding), **kwargs) self.assertTrue(isinstance(body_unicode, unicode)) self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))
def _prepare_unicode_body(self): if self._encoding is None: self._encoding, self._unicode_body = get_unicode_from_response(self) else: self._unicode_body = unicode(self.body, self._encoding, 'replace')
def _prepare_unicode_body(self): if self._encoding is None: self._encoding, self._unicode_body = get_unicode_from_response( self) else: self._unicode_body = unicode(self.body, self._encoding, 'replace')