Beispiel #1
0
 def __init__(self, lang="zh", encoding='utf-8'):
     self.preferred_encoding = encoding
     if lang == "ja":
         self.decoder = Jadecoder()
     elif lang == "kr":
         self.decoder = Krdecoder()
     elif lang == "vn":
         self.decoder = Vndecoder()
     else: #zh and others
         self.decoder = Unidecoder(lang)
Beispiel #2
0
 def __init__(self, lang="zh", encoding='utf-8'):
     self.preferred_encoding = encoding
     if lang == "ja":
         self.decoder = Jadecoder()
     elif lang == "kr":
         self.decoder = Krdecoder()
     elif lang == "vn":
         self.decoder = Vndecoder()
     else: #zh and others
         self.decoder = Unidecoder()
class TestUnidecoder(unittest.TestCase):
    def setUp(self):
        self.decoder = Unidecoder("zh")

    def test_code_group(self):
        self.assertEqual(self.decoder.code_group(u"\u1234"), "x12")

    def test_grouped_point(self):
        self.assertEqual(self.decoder.grouped_point(u"\u1234"), 0x34)

    def test_decode(self):
        self.assertEqual(self.decoder.decode("a"), "a")

    def test_replace_point(self):
        self.assertEqual(self.decoder.replace_point("a"), "a")
Beispiel #4
0
class TestUnidecoder(unittest.TestCase):
    def setUp(self):
        self.decoder = Unidecoder('zh')

    def test_code_group(self):
        self.assertEqual(self.decoder.code_group("\u1234"), 'x12')

    def test_grouped_point(self):
        self.assertEqual(self.decoder.grouped_point("\u1234"), 0x34)

    def test_decode(self):
        self.assertEqual(self.decoder.decode("a"), "a")

    def test_replace_point(self):
        self.assertEqual(self.decoder.replace_point('a'), 'a')
Beispiel #5
0
class Unihandecoder(object):
    preferred_encoding = None
    decoder = None

    def __init__(self, lang="zh", encoding='utf-8'):
        self.preferred_encoding = encoding
        if lang == "ja":
            self.decoder = Jadecoder()
        elif lang == "kr":
            self.decoder = Krdecoder()
        elif lang == "vn":
            self.decoder = Vndecoder()
        else: #zh and others
            self.decoder = Unidecoder(lang)

    def _text_filter(self, text):
        if PY2:
            if not isinstance(text, unicode):
                try:
                    text = unicode(text)
                except: # pragma: no cover
                    try:
                        text = text.decode(self.preferred_encoding)
                    except:
                        text = text.decode('utf-8', 'replace')
        #at first unicode normalize it. (see Unicode standards)
        return unicodedata.normalize('NFC',text)

    def decode(self, text):
        return self.decoder.decode(self._text_filter(text))
Beispiel #6
0
class Unihandecoder(object):
    preferred_encoding = None
    decoder = None

    def __init__(self, lang="zh", encoding='utf-8'):
        self.preferred_encoding = encoding
        if lang == "ja":
            self.decoder = Jadecoder()
        elif lang == "kr":
            self.decoder = Krdecoder()
        elif lang == "vn":
            self.decoder = Vndecoder()
        else: #zh and others
            self.decoder = Unidecoder()

    def decode(self, text):
        try:
            unicode # python2
            if not isinstance(text, unicode):
                try:
                    text = unicode(text)
                except:
                    try:
                        text = text.decode(self.preferred_encoding)
                    except:
                        text = text.decode('utf-8', 'replace')
        except: # python3, str is unicode
            pass
        #at first unicode normalize it. (see Unicode standards)
        ntext = unicodedata.normalize('NFC',text)
        return self.decoder.decode(ntext)
Beispiel #7
0
class Unihandecoder(object):
    preferred_encoding = None
    decoder = None

    def __init__(self, lang="zh", encoding='utf-8'):
        self.preferred_encoding = encoding
        if lang == "ja":
            self.decoder = Jadecoder()
        elif lang == "kr":
            self.decoder = Krdecoder()
        elif lang == "vn":
            self.decoder = Vndecoder()
        else:  #zh and others
            self.decoder = Unidecoder(lang)

    def _text_filter(self, text):
        if PY2:
            if not isinstance(text, unicode):
                try:
                    text = unicode(text)
                except:  # pragma: no cover
                    try:
                        text = text.decode(self.preferred_encoding)
                    except:
                        text = text.decode('utf-8', 'replace')
        #at first unicode normalize it. (see Unicode standards)
        return unicodedata.normalize('NFC', text)

    def decode(self, text):
        return self.decoder.decode(self._text_filter(text))
 def setUp(self):
     self.decoder = Unidecoder("zh")
Beispiel #9
0
 def setUp(self):
     self.decoder = Unidecoder('zh')
Beispiel #10
0
 def setUp(self):
     self.decoder = Unidecoder('zh')