def check_in_range(self, from_, to, encode=False): element = etree.Element('test') for char_int in range(from_, to + 1): try: char = unicode_chr(char_int) if encode: char = char.encode('utf-8') stripped = strip_invalid_characters(char) element.text = stripped element.set('some_attr', stripped) except UnicodeEncodeError as e: if e.reason != 'surrogates not allowed': raise except Exception as e: self.fail(r'Failed on unicode char \0x{char:x}: {e}'.format(char=char_int, e=e))
def test_not_basestring(self): self.assertEqual(strip_invalid_characters(5), u'5')
def test_utf8_encoded_str(self): value = u'\x85 пример utf-8 строки'.encode('utf-8') res = strip_invalid_characters(value) self.assertIsInstance(res, unicode_type) self.assertEqual(res, u'\x85 пример utf-8 строки')
def test_none(self): self.assertEqual(strip_invalid_characters(None), u'')