Esempi in Python per split_syllable_char, esempi in Python per hangul_utils.split_syllable_char

Esempio n. 1

0

Mostra file

def get_middle_name(conn, n1, n2, saju, gender, n2_rsc_type, mode):
    name_dict = {}
    s = conn.cursor()
    query = """
    SELECT hanja,reading,strokes,add_strokes,rsc_type,pronunciations
    FROM naming_hanja
    WHERE is_naming_hanja=1 AND reading
    NOT IN ('만', '병', '백', '장', '춘', '최', '충', '창', '치', '참', '천',
    '택', '탁', '태', '외', '사', '매', '읍', '소', '종', '순', '요', '자',
    '경', '옥', '해', '부', '효', '존', '난', '류', '홍', '처', '필')
    """
    for n3 in s.execute(query):
        if n1[HANJA] == n3[HANJA] or n2[HANJA] == n3[HANJA]:  # 김주김, 김소소
            continue
        elif n1[READING] == n3[READING] or n2[READING] == n3[READING]:  # 김주김, 김소소
            continue

        n2n3 = '%s%s' % (n2[READING], n3[READING])
        if check_name(n2n3) is False:
            continue

        if check_last_name_gender(n3[READING], n2n3, gender) is False:
            continue

        if saju['c1'] != n3[RSC_TYPE] and saju['c2'] != n3[RSC_TYPE]:
            continue
        if n2_rsc_type == n3[RSC_TYPE]:  # 중간이름에서 사용한 자원오행은 배제함
            continue

        if check_positive_negative(conn, n1, n2, n3) is False:
            continue

        # check positive negative hanja strokes as well
        if check_total_stroke(conn, n1, n2, n3, mode) is False:
            continue

        s1 = split_syllable_char(n1[READING])
        s2 = split_syllable_char(n2[READING])
        s3 = split_syllable_char(n3[READING])
        if balum_oheng(s1, s2, s3) is False:
            continue

        if check_all_name_hard_pronounce(s1, s2, s3) is False:
            continue

        temp_hanja = '%s%s%s' % (n1[HANJA], n2[HANJA], n3[HANJA])
        # temp_hanja = '%s %s %s [%s/ %s]' % (n1[HANJA], n2[HANJA], n3[HANJA],
        #         n2[PRONUNCIATIONS], n3[PRONUNCIATIONS])
        temp_name = '%s%s%s' % (n1[READING], n2[READING], n3[READING])
        name_dict.update({temp_hanja: temp_name})

    if len(name_dict) == 0:
        return None
    return name_dict

Esempio n. 2

0

Mostra file

def check_two_words_hard_pronounce(n1, n2):
    s1 = split_syllable_char(n1)
    s2 = split_syllable_char(n2)

    if len(s1) == 3 and len(s2) == 3:
        if s1[1] == 'ㅕ' and s1[2] == 'ㅇ' and s2[1] == 'ㅡ' and s2[2] == 'ㅇ':  # 경흥원
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅏ' and s2[2] == 'ㅇ':  # 강항준
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅑ' and s2[2] == 'ㅇ':  # 강향준
            return False
    return True

Esempio n. 3

0

Mostra file

File: __init__.py Progetto: peternara/KoreanOCR-tf-flask

def get_label(index_data):
    # len + 1 for one 'invalid' label
    label_ko_cho = np.zeros([len(index_data), len(ko_chset_cho)+1])
    label_ko_jung = np.zeros([len(index_data), len(ko_chset_jung)+1])
    label_ko_jong = np.zeros([len(index_data), len(ko_chset_jong)+1])
    label_en = np.zeros([len(index_data), len(en_chset)+1])
    for i, member in enumerate(index_data):
        target = member['target'] # Target Character
        # Is Hangeul?
        if (len(target) == 1 and check_syllable(target)):
            splited = split_syllable_char(target)
            label_ko_cho[i][ko_chset_cho.index(splited[0])] = 1
            label_ko_jung[i][ko_chset_jung.index(splited[1])] = 1
            if len(splited) < 3:
                label_ko_jong[i][0] = 1
            else:
                label_ko_jong[i][ko_chset_jong.index(splited[2])] = 1
        else :
            label_ko_cho[i][len(ko_chset_cho)] = 1
            label_ko_jung[i][len(ko_chset_jung)] = 1
            label_ko_jong[i][len(ko_chset_jong)] = 1
        
        # Is English?
        if (target in en_chset):
            label_en[i][en_chset.index(target)] = 1
        else:
            label_en[i][len(en_chset)] = 1
            
    # Concatenate all labels
    label = np.concatenate((label_ko_cho, label_ko_jung, label_ko_jong, label_en), axis=1)
    print("label loaded")
    return label

Esempio n. 4

0

Mostra file

File: 1.first_naming.py Progetto: imtaehyun/EdenNaming

def set_five_type(conn):
    query = 'SELECT hanja,is_naming_hanja,reading FROM naming_hanja'
    ftype_select = conn.cursor()
    for row in ftype_select.execute(query):
        if row[1] == 0:  # Can not use Korean name
            continue
        cs = split_syllable_char(row[2])  # 초성
        # hlen = hangul_len(row[2])
        f_type = get_five_type(cs[0])  # 음양'오행'
        if f_type == -1:
            continue
        print(f_type, row[0])
        update_hangul_strokes(f_type, row[0], conn)

Esempio n. 5

0

Mostra file

File: semantic.py Progetto: peternara/KoreanOCR-tf-flask

def eval_tail(c, cand):
    """Analyze tail point which is the measure of too many splits.
    They are normally length - 1
    """
    point = 0
    # Please please split the . and , symbols...!!
    if cand.value == '.' or cand.value == ',':
        point += 10

    if hasattr(c, "prob") and c.value is not None and len(c.value) > 0 and len(cand.value) > 0:
        if c.value == '\'':
            point += 2
        if cand.prob > 0.99 and check_syllable(c.value) and split_syllable_char(c.value)[1] == 'ㅏ':
            point += 2

    return cand.tail + 1 - point

Esempio n. 6

0

Mostra file

def get_hangul_list():
    hanguls = []
    with open('unicode_hangul') as f:
        for line in f:
            hangul = line.split()
            for h in hangul:
                if check_syllable(h) is False:
                    continue
                split_h = split_syllable_char(h)
                try:
                    if BLOCK_LIST1[split_h[0]] == 1:
                        continue
                except:
                    try:
                        if BLOCK_LIST3[split_h[2]] == 1:
                            continue
                    except:
                        pass
                hanguls.append(h)
    return hanguls

Esempio n. 7

0

Mostra file

File: generate.py Progetto: dj-shin/ctf2018-writeups

def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)

Esempio n. 8

0

Mostra file

def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[
                    jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)

Esempio n. 9

0

Mostra file

File: utils.py Progetto: snowvil/Kiwi

 def split(c):
     from hangul_utils import split_syllable_char
     jm = split_syllable_char(c)
     #if jm[0] == 'ㅇ': return jm[1:]
     return jm