Python split_syllable_charの例、hangul_utils.split_syllable_char Pythonの例

コード例 #1

0

ファイルを表示

def get_middle_name(conn, n1, n2, saju, gender, n2_rsc_type, mode):
    name_dict = {}
    s = conn.cursor()
    query = """
    SELECT hanja,reading,strokes,add_strokes,rsc_type,pronunciations
    FROM naming_hanja
    WHERE is_naming_hanja=1 AND reading
    NOT IN ('만', '병', '백', '장', '춘', '최', '충', '창', '치', '참', '천',
    '택', '탁', '태', '외', '사', '매', '읍', '소', '종', '순', '요', '자',
    '경', '옥', '해', '부', '효', '존', '난', '류', '홍', '처', '필')
    """
    for n3 in s.execute(query):
        if n1[HANJA] == n3[HANJA] or n2[HANJA] == n3[HANJA]:  # 김주김, 김소소
            continue
        elif n1[READING] == n3[READING] or n2[READING] == n3[READING]:  # 김주김, 김소소
            continue

        n2n3 = '%s%s' % (n2[READING], n3[READING])
        if check_name(n2n3) is False:
            continue

        if check_last_name_gender(n3[READING], n2n3, gender) is False:
            continue

        if saju['c1'] != n3[RSC_TYPE] and saju['c2'] != n3[RSC_TYPE]:
            continue
        if n2_rsc_type == n3[RSC_TYPE]:  # 중간이름에서 사용한 자원오행은 배제함
            continue

        if check_positive_negative(conn, n1, n2, n3) is False:
            continue

        # check positive negative hanja strokes as well
        if check_total_stroke(conn, n1, n2, n3, mode) is False:
            continue

        s1 = split_syllable_char(n1[READING])
        s2 = split_syllable_char(n2[READING])
        s3 = split_syllable_char(n3[READING])
        if balum_oheng(s1, s2, s3) is False:
            continue

        if check_all_name_hard_pronounce(s1, s2, s3) is False:
            continue

        temp_hanja = '%s%s%s' % (n1[HANJA], n2[HANJA], n3[HANJA])
        # temp_hanja = '%s %s %s [%s/ %s]' % (n1[HANJA], n2[HANJA], n3[HANJA],
        #         n2[PRONUNCIATIONS], n3[PRONUNCIATIONS])
        temp_name = '%s%s%s' % (n1[READING], n2[READING], n3[READING])
        name_dict.update({temp_hanja: temp_name})

    if len(name_dict) == 0:
        return None
    return name_dict

コード例 #2

0

ファイルを表示

def check_two_words_hard_pronounce(n1, n2):
    s1 = split_syllable_char(n1)
    s2 = split_syllable_char(n2)

    if len(s1) == 3 and len(s2) == 3:
        if s1[1] == 'ㅕ' and s1[2] == 'ㅇ' and s2[1] == 'ㅡ' and s2[2] == 'ㅇ':  # 경흥원
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅏ' and s2[2] == 'ㅇ':  # 강항준
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅑ' and s2[2] == 'ㅇ':  # 강향준
            return False
    return True

コード例 #3

0

ファイルを表示

ファイル: __init__.py プロジェクト: peternara/KoreanOCR-tf-flask

def get_label(index_data):
    # len + 1 for one 'invalid' label
    label_ko_cho = np.zeros([len(index_data), len(ko_chset_cho)+1])
    label_ko_jung = np.zeros([len(index_data), len(ko_chset_jung)+1])
    label_ko_jong = np.zeros([len(index_data), len(ko_chset_jong)+1])
    label_en = np.zeros([len(index_data), len(en_chset)+1])
    for i, member in enumerate(index_data):
        target = member['target'] # Target Character
        # Is Hangeul?
        if (len(target) == 1 and check_syllable(target)):
            splited = split_syllable_char(target)
            label_ko_cho[i][ko_chset_cho.index(splited[0])] = 1
            label_ko_jung[i][ko_chset_jung.index(splited[1])] = 1
            if len(splited) < 3:
                label_ko_jong[i][0] = 1
            else:
                label_ko_jong[i][ko_chset_jong.index(splited[2])] = 1
        else :
            label_ko_cho[i][len(ko_chset_cho)] = 1
            label_ko_jung[i][len(ko_chset_jung)] = 1
            label_ko_jong[i][len(ko_chset_jong)] = 1
        
        # Is English?
        if (target in en_chset):
            label_en[i][en_chset.index(target)] = 1
        else:
            label_en[i][len(en_chset)] = 1
            
    # Concatenate all labels
    label = np.concatenate((label_ko_cho, label_ko_jung, label_ko_jong, label_en), axis=1)
    print("label loaded")
    return label

コード例 #4

0

ファイルを表示

ファイル: 1.first_naming.py プロジェクト: imtaehyun/EdenNaming

def set_five_type(conn):
    query = 'SELECT hanja,is_naming_hanja,reading FROM naming_hanja'
    ftype_select = conn.cursor()
    for row in ftype_select.execute(query):
        if row[1] == 0:  # Can not use Korean name
            continue
        cs = split_syllable_char(row[2])  # 초성
        # hlen = hangul_len(row[2])
        f_type = get_five_type(cs[0])  # 음양'오행'
        if f_type == -1:
            continue
        print(f_type, row[0])
        update_hangul_strokes(f_type, row[0], conn)

コード例 #5

0

ファイルを表示

ファイル: semantic.py プロジェクト: peternara/KoreanOCR-tf-flask

def eval_tail(c, cand):
    """Analyze tail point which is the measure of too many splits.
    They are normally length - 1
    """
    point = 0
    # Please please split the . and , symbols...!!
    if cand.value == '.' or cand.value == ',':
        point += 10

    if hasattr(c, "prob") and c.value is not None and len(c.value) > 0 and len(cand.value) > 0:
        if c.value == '\'':
            point += 2
        if cand.prob > 0.99 and check_syllable(c.value) and split_syllable_char(c.value)[1] == 'ㅏ':
            point += 2

    return cand.tail + 1 - point

コード例 #6

0

ファイルを表示

def get_hangul_list():
    hanguls = []
    with open('unicode_hangul') as f:
        for line in f:
            hangul = line.split()
            for h in hangul:
                if check_syllable(h) is False:
                    continue
                split_h = split_syllable_char(h)
                try:
                    if BLOCK_LIST1[split_h[0]] == 1:
                        continue
                except:
                    try:
                        if BLOCK_LIST3[split_h[2]] == 1:
                            continue
                    except:
                        pass
                hanguls.append(h)
    return hanguls

コード例 #7

0

ファイルを表示

ファイル: generate.py プロジェクト: dj-shin/ctf2018-writeups

def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)

コード例 #8

0

ファイルを表示

def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[
                    jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)

コード例 #9

0

ファイルを表示

ファイル: utils.py プロジェクト: snowvil/Kiwi

 def split(c):
     from hangul_utils import split_syllable_char
     jm = split_syllable_char(c)
     #if jm[0] == 'ㅇ': return jm[1:]
     return jm