Exemple #1
0
 def assert_examples(self, examples, lang=None, logger=None):
     cls = type(self)
     # store examples
     if getattr(cls, '_store_examples', False):
         cls._stored_examples.update(examples)
         return
     errors = []
     lang = lang or self.lang
     if isinstance(lang, basestring):
         lang = hangulize.get_lang(lang)
     try:
         lang_name = type(lang).__name__
     except AttributeError:
         lang_name = 'AnonymouseLanguage'
     for word, want in examples.items():
         try:
             got = lang.hangulize(word, logger=logger)
             assert want == got
         except self.failureException:
             errors.append((word, want, got))
     if errors:
         def form(error):
             return " * '%s' should be '%s', but '%s' was given" % (
                 color(error[0], 'cyan'),
                 color(error[1], 'green'),
                 color(error[2], 'red')
             )
         errors = map(form, errors)
         msg = color(lang_name, 'yellow') + '\n' + '\n'.join(errors)
         raise HangulizeAssertionError(msg.encode('utf-8'))
Exemple #2
0
 def test_singleton(self):
     from hangulize import get_lang
     from hangulize.langs.ita import Italian
     from hangulize.langs.jpn import Japanese
     assert Italian() is Italian()
     assert get_lang('ita') is Italian()
     assert get_lang('ita') is get_lang('ita')
     assert Japanese() is Japanese()
     assert get_lang('jpn') is Japanese()
     assert get_lang('jpn') is get_lang('jpn')
     assert Italian() is not Japanese()
     assert get_lang('ita') is not Japanese()
     assert get_lang('ita') is not get_lang('jpn')
Exemple #3
0
 def test_singleton(self):
     from hangulize import get_lang
     from hangulize.langs.ita import Italian
     from hangulize.langs.jpn import Japanese
     assert Italian() is Italian()
     assert get_lang('ita') is Italian()
     assert get_lang('ita') is get_lang('ita')
     assert Japanese() is Japanese()
     assert get_lang('jpn') is Japanese()
     assert get_lang('jpn') is get_lang('jpn')
     assert Italian() is not Japanese()
     assert get_lang('ita') is not Japanese()
     assert get_lang('ita') is not get_lang('jpn')
Exemple #4
0
 def _repl():
     while True:
         lang = self.lang or raw_input(color("Lang: ", "magenta"))
         try:
             lang = get_lang(lang)
             logger.info("** " + color(type(lang).__name__, "green") + " is selected")
             break
         except Exception, e:
             logger.error(color(e, "red"))
             self.lang = None
Exemple #5
0
 def _repl():
     while True:
         lang = self.lang or raw_input(color('Lang: ', 'magenta'))
         try:
             lang = get_lang(lang)
             logger.info('** ' + color(type(lang).__name__, 'green') + \
                         ' is selected')
             break
         except HangulizeError, e:
             logger.error(color(e, 'red'))
             self.lang = None
Exemple #6
0
 def _repl():
     while True:
         lang = self.lang or raw_input(color('Lang: ', 'magenta'))
         try:
             lang = get_lang(lang)
             logger.info('** ' + color(type(lang).__name__, 'green') +
                         ' is selected')
             break
         except Exception, e:
             logger.error(color(e, 'red'))
             self.lang = None
Exemple #7
0
 def _repl():
     while True:
         lang = self.lang or raw_input(color('Lang: ', 'magenta'))
         try:
             lang = get_lang(lang)
             logger.info('** ' + color(type(lang).__name__, 'green') +
                         ' is selected')
             break
         except Exception as e:
             logger.error(color(e, 'red'))
             self.lang = None
     while True:
         string = raw_input(color('==> ', 'cyan'))
         if not string:
             logger.info('** ' + color('End', 'green'))
             break
         yield lang.hangulize(string.decode(encoding), logger=logger)
Exemple #8
0
 def _repl():
     while True:
         lang = self.lang or raw_input(color('Lang: ', 'magenta'))
         try:
             lang = get_lang(lang)
             logger.info('** ' + color(type(lang).__name__, 'green') +
                         ' is selected')
             break
         except Exception as e:
             logger.error(color(e, 'red'))
             self.lang = None
     while True:
         string = raw_input(color('==> ', 'cyan'))
         if not string:
             logger.info('** ' + color('End', 'green'))
             break
         yield lang.hangulize(string.decode(encoding), logger=logger)
Exemple #9
0
 def test_regard_iso639_1(self):
     assert type(get_lang('bg', iso639=1)) is type(get_lang('bg'))
     assert type(get_lang('ja', iso639=1)) is type(get_lang('ja'))
Exemple #10
0
        for i in xrange(len(langs)):
            lang = langs.pop(0)
            test = lang.replace('.', '_')
            test = getattr(__import__('tests.%s' % test), test)
            try:
                test_case = getattr(test, [x for x in dir(test) \
                                             if x.endswith('TestCase')][0])
                test_method = [x for x in dir(test_case) \
                               if x.startswith('test')][0]
            except IndexError:
                continue
            assert isinstance(test_case.get_examples(test_method), dict)


try:
    get_lang('it', iso639=1)
    class LanguageCodeTestCase(unittest.TestCase):

        table = [('bg', 'bul', 'bul'),
                 ('ca', 'cat', 'cat'),
                 ('cs', 'cze', 'ces'),
                 ('cy', 'wel', 'cym'),
                 ('de', 'ger', 'deu'),
                 ('el', 'gre', 'ell'),
                 ('et', 'est', 'est'),
                 ('fi', 'fin', 'fin'),
                 (None, 'grc', 'grc'),
                 (None, None, 'hbs'),
                 ('hu', 'hun', 'hun'),
                 ('ja', 'jpn', 'jpn')]
Exemple #11
0
 def test_sub_lang(self):
     from hangulize import get_lang
     assert get_lang('kat.narrow')
Exemple #12
0
 def test_sub_lang(self):
     from hangulize import get_lang
     assert get_lang('kat.narrow')
Exemple #13
0
        for i in range(len(langs)):
            lang = langs.pop(0)
            test = lang.replace('.', '_')
            test = getattr(__import__('tests.%s' % test), test)
            try:
                test_case = getattr(test, [x for x in dir(test) \
                                             if x.endswith('TestCase')][0])
                test_method = [x for x in dir(test_case) \
                               if x.startswith('test')][0]
            except IndexError:
                continue
            assert isinstance(test_case.get_examples(test_method), dict)


try:
    get_lang('it', iso639=1)

    class LanguageCodeTestCase(unittest.TestCase):

        table = [('bg', 'bul', 'bul'), ('ca', 'cat', 'cat'),
                 ('cs', 'cze', 'ces'), ('cy', 'wel', 'cym'),
                 ('de', 'ger', 'deu'), ('el', 'gre', 'ell'),
                 ('et', 'est', 'est'), ('fi', 'fin', 'fin'),
                 (None, 'grc', 'grc'), (None, None, 'hbs'),
                 ('hu', 'hun', 'hun'), ('ja', 'jpn', 'jpn')]

        def test_regard_iso639_1(self):
            assert type(get_lang('bg', iso639=1)) is type(get_lang('bg'))
            assert type(get_lang('ja', iso639=1)) is type(get_lang('ja'))

        def test_iso639_1(self):
Exemple #14
0
def main(argv):
    args = cli.parse_args()

    lang = hangulize.get_lang(args.lang)
    try:
        locale = babel.Locale(lang.iso639_1)
    except babel.core.UnknownLocaleError:
        locale = None
        print('failed to find locale for lang (%s, %s, %s)'
              '' % (lang.iso639_1, lang.iso639_2, lang.iso639_3),
              file=sys.stderr)

    # detect normalize
    additional_of_normalize_roman = {}
    normalize_roman_called = []

    def hacked_normalize_roman(string, additional=None):
        if additional:
            additional_of_normalize_roman.update(additional)
        normalize_roman_called.append(1)

    normalize_f = lang.normalize.__func__
    normalize_f.__globals__['normalize_roman'] = hacked_normalize_roman
    normalize_f(lang, '')

    normalize = defaultdict(set)
    for src, dst in additional_of_normalize_roman.items():
        if src == dst:
            continue
        normalize[dst].add(src)

    # detect script
    if normalize_roman_called:
        script = 'roman'
    else:
        script = '???'
        print('failed to detect script of lang (%s, %s, %s)'
              '' % (lang.iso639_1, lang.iso639_2, lang.iso639_3),
              file=sys.stderr)

    # find vars
    vars_ = []
    for attr in dir(lang.__class__):
        if attr.startswith('_'):
            continue
        if hasattr(lang.__class__.__bases__[0], attr):
            continue
        vars_.append(attr)
    if lang.vowels:
        vars_.append('vowels')

    # group rewrite/transcribe
    rewrite = []
    transcribe = []
    for x, rule in enumerate(lang.notation.rules):
        pattern = rule[0]
        rpattern = rule[1:]

        # ZWSP "/" has been changed with "{}".
        pattern = pattern.replace('/', '{}')

        # some rpattern is 2d tuple redundantly.
        if isinstance(rpattern[0], tuple):
            rpattern = rpattern[0]

        if isinstance(rpattern[0], hangulize.Phoneme):
            transcribe.append((pattern, rpattern))
            continue

        if rpattern[0] is None:
            if transcribe:
                transcribe.append((pattern, rpattern))
                continue
        else:
            # "/" -> "{}" here too.
            rpattern = rpattern[0].replace('/', '{}')
        rewrite.append((pattern, rpattern))

    # find test
    test_modname = args.lang.replace('.', '_')
    test_module = getattr(__import__('tests.%s' % test_modname), test_modname)
    for attr, val in vars(test_module).items():
        if attr.endswith('TestCase') and not attr.startswith('Hangulize'):
            break
    test_case = val
    examples = test_case.get_examples()

    # render

    sec = Section('lang')
    sec.put('id', args.lang)
    sec.put('codes', lang.iso639_1, lang.iso639_3)
    if locale is None:
        sec.put('english', '???')
        sec.put('korean', '???')
    else:
        sec.put('english', locale.get_language_name('en_US'))
        sec.put('korean', locale.get_language_name('ko_KR'))
    sec.put('script', script)
    print(sec.draw('='), end='')

    sec = Section('config')
    sec.put('author', args.author)
    sec.put('stage', 'draft')
    print(sec.draw('='), end='')

    sec = Section('macros')
    if lang.vowels:
        sec.put('@', '<vowels>')
    print(sec.draw('=', quote_keys=True), end='')

    sec = Section('vars')
    for var in vars_:
        sec.put(var, *getattr(lang, var))
    print(sec.draw('=', quote_keys=True), end='')

    sec = Section('normalize')
    for to, froms in normalize.items():
        sec.put(to, *froms)
    print(sec.draw('=', quote_keys=True), end='')

    sec = Section('rewrite')
    for pattern, rpattern in rewrite:
        sec.put(pattern, rpattern)
    print(sec.draw('->', quote_keys=True), end='')

    sec = Section('transcribe')
    for pattern, rpattern in transcribe:
        sec.put(pattern, rpattern)
    print(sec.draw('->', quote_keys=True), end='')

    sec = Section('test')
    for loanword, hangul in examples.items():
        sec.put(loanword, hangul)
    print(sec.draw('->', quote_keys=True), end='')
Exemple #15
0
 def test_iso639_2(self):
     for iso639_1, iso639_2, iso639_3 in self.table:
         if not iso639_2:
             continue
         assert type(get_lang(iso639_3)) is type(get_lang(iso639_2,
                                                          iso639=2))
Exemple #16
0
 def test_iso639_3(self):
     for iso639_1, iso639_2, iso639_3 in self.table:
         assert type(get_lang(iso639_3)) is type(get_lang(iso639_3,
                                                          iso639=3))
Exemple #17
0
 def test_regard_iso639_1(self):
     assert type(get_lang('bg', iso639=1)) is type(get_lang('bg'))
     assert type(get_lang('ja', iso639=1)) is type(get_lang('ja'))
Exemple #18
0
 def __init__(self, word, code=None, iso639=None, lang=None):
     hangulize = self._import_hangulize()
     self.lang = lang or hangulize.get_lang(code, iso639)
     super(Loanword, self).__init__(word)
Exemple #19
0
 def test_iso639_2(self):
     for iso639_1, iso639_2, iso639_3 in self.table:
         if not iso639_2:
             continue
         assert type(get_lang(iso639_3)) is type(
             get_lang(iso639_2, iso639=2))
Exemple #20
0
 def __init__(self, word, code=None, iso639=None, lang=None):
     hangulize = self._import_hangulize()
     self.lang = lang or hangulize.get_lang(code, iso639)
     super(Loanword, self).__init__(word)
Exemple #21
0
 def test_iso639_3(self):
     for iso639_1, iso639_2, iso639_3 in self.table:
         assert type(get_lang(iso639_3)) is type(
             get_lang(iso639_3, iso639=3))