Example #1
0
 def test_decode_to_unicode(self):
     """textutils - decode_to_unicode."""
     self.assertEqual(
         decode_to_unicode('\202\203\204\205',
                           default_encoding='latin1'),
         u'\x82\x83\x84\x85')
     self.assertEqual(decode_to_unicode('àèéìòù'),
                      u'\xe0\xe8\xe9\xec\xf2\xf9')
     self.assertEqual(decode_to_unicode('Ιθάκη'),
                      u'\u0399\u03b8\u03ac\u03ba\u03b7')
 def _sort_alphanumerically_remove_leading_articles(self, val):
     """
     Convert:
     'The title' => 'title'
     'A title' => 'title'
     'Title' => 'title'
     """
     if not val:
         return ''
     val = decode_to_unicode(val).lower().encode('UTF-8')
     val_tokens = val.split(" ", 1) #split in leading_word, phrase_without_leading_word
     if len(val_tokens) == 2 and val_tokens[0].strip() in LEADING_ARTICLES:
         return val_tokens[1].strip()
     return val.strip()
Example #3
0
def to_unicode(s):
    if isinstance(s, unicode):
        return s
    if isinstance(s, basestring):
        return decode_to_unicode(s)
    return unicode(s)
 def test_decode_to_unicode(self):
     """textutils - decode_to_unicode."""
     self.assertEqual(decode_to_unicode('\202\203\204\205', failover_encoding='latin1'), u'\x82\x83\x84\x85')
     self.assertEqual(decode_to_unicode('àèéìòù'), u'\xe0\xe8\xe9\xec\xf2\xf9')
     self.assertEqual(decode_to_unicode('Ιθάκη'), u'\u0399\u03b8\u03ac\u03ba\u03b7')
Example #5
0
def to_unicode(s):
    if isinstance(s, unicode):
        return s
    if isinstance(s, basestring):
        return decode_to_unicode(s)
    return unicode(s)
 def test_decode_to_unicode(self):
     """textutils - decode_to_unicode."""
     self.assertEqual(decode_to_unicode("\202\203\204\205", default_encoding="latin1"), u"\x82\x83\x84\x85")
     self.assertEqual(decode_to_unicode("àèéìòù"), u"\xe0\xe8\xe9\xec\xf2\xf9")
     self.assertEqual(decode_to_unicode("Ιθάκη"), u"\u0399\u03b8\u03ac\u03ba\u03b7")
 def _sort_case_insensitive(self, val):
     """Conversion to lower case"""
     if not val:
         return ''
     return decode_to_unicode(val).lower().encode('UTF-8')