Example #1
0
 def check_in_range(self, from_, to, encode=False):
     element = etree.Element('test')
     for char_int in range(from_, to + 1):
         try:
             char = unicode_chr(char_int)
             if encode:
                 char = char.encode('utf-8')
             stripped = strip_invalid_characters(char)
             element.text = stripped
             element.set('some_attr', stripped)
         except UnicodeEncodeError as e:
             if e.reason != 'surrogates not allowed':
                 raise
         except Exception as e:
             self.fail(r'Failed on unicode char \0x{char:x}: {e}'.format(char=char_int, e=e))
Example #2
0
 def test_not_basestring(self):
     self.assertEqual(strip_invalid_characters(5), u'5')
Example #3
0
 def test_utf8_encoded_str(self):
     value = u'\x85 пример utf-8 строки'.encode('utf-8')
     res = strip_invalid_characters(value)
     self.assertIsInstance(res, unicode_type)
     self.assertEqual(res, u'\x85 пример utf-8 строки')
Example #4
0
 def test_none(self):
     self.assertEqual(strip_invalid_characters(None), u'')