def get_middle_name(conn, n1, n2, saju, gender, n2_rsc_type, mode): name_dict = {} s = conn.cursor() query = """ SELECT hanja,reading,strokes,add_strokes,rsc_type,pronunciations FROM naming_hanja WHERE is_naming_hanja=1 AND reading NOT IN ('만', '병', '백', '장', '춘', '최', '충', '창', '치', '참', '천', '택', '탁', '태', '외', '사', '매', '읍', '소', '종', '순', '요', '자', '경', '옥', '해', '부', '효', '존', '난', '류', '홍', '처', '필') """ for n3 in s.execute(query): if n1[HANJA] == n3[HANJA] or n2[HANJA] == n3[HANJA]: # 김주김, 김소소 continue elif n1[READING] == n3[READING] or n2[READING] == n3[READING]: # 김주김, 김소소 continue n2n3 = '%s%s' % (n2[READING], n3[READING]) if check_name(n2n3) is False: continue if check_last_name_gender(n3[READING], n2n3, gender) is False: continue if saju['c1'] != n3[RSC_TYPE] and saju['c2'] != n3[RSC_TYPE]: continue if n2_rsc_type == n3[RSC_TYPE]: # 중간이름에서 사용한 자원오행은 배제함 continue if check_positive_negative(conn, n1, n2, n3) is False: continue # check positive negative hanja strokes as well if check_total_stroke(conn, n1, n2, n3, mode) is False: continue s1 = split_syllable_char(n1[READING]) s2 = split_syllable_char(n2[READING]) s3 = split_syllable_char(n3[READING]) if balum_oheng(s1, s2, s3) is False: continue if check_all_name_hard_pronounce(s1, s2, s3) is False: continue temp_hanja = '%s%s%s' % (n1[HANJA], n2[HANJA], n3[HANJA]) # temp_hanja = '%s %s %s [%s/ %s]' % (n1[HANJA], n2[HANJA], n3[HANJA], # n2[PRONUNCIATIONS], n3[PRONUNCIATIONS]) temp_name = '%s%s%s' % (n1[READING], n2[READING], n3[READING]) name_dict.update({temp_hanja: temp_name}) if len(name_dict) == 0: return None return name_dict
def check_two_words_hard_pronounce(n1, n2): s1 = split_syllable_char(n1) s2 = split_syllable_char(n2) if len(s1) == 3 and len(s2) == 3: if s1[1] == 'ㅕ' and s1[2] == 'ㅇ' and s2[1] == 'ㅡ' and s2[2] == 'ㅇ': # 경흥원 return False elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅏ' and s2[2] == 'ㅇ': # 강항준 return False elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅑ' and s2[2] == 'ㅇ': # 강향준 return False return True
def get_label(index_data): # len + 1 for one 'invalid' label label_ko_cho = np.zeros([len(index_data), len(ko_chset_cho)+1]) label_ko_jung = np.zeros([len(index_data), len(ko_chset_jung)+1]) label_ko_jong = np.zeros([len(index_data), len(ko_chset_jong)+1]) label_en = np.zeros([len(index_data), len(en_chset)+1]) for i, member in enumerate(index_data): target = member['target'] # Target Character # Is Hangeul? if (len(target) == 1 and check_syllable(target)): splited = split_syllable_char(target) label_ko_cho[i][ko_chset_cho.index(splited[0])] = 1 label_ko_jung[i][ko_chset_jung.index(splited[1])] = 1 if len(splited) < 3: label_ko_jong[i][0] = 1 else: label_ko_jong[i][ko_chset_jong.index(splited[2])] = 1 else : label_ko_cho[i][len(ko_chset_cho)] = 1 label_ko_jung[i][len(ko_chset_jung)] = 1 label_ko_jong[i][len(ko_chset_jong)] = 1 # Is English? if (target in en_chset): label_en[i][en_chset.index(target)] = 1 else: label_en[i][len(en_chset)] = 1 # Concatenate all labels label = np.concatenate((label_ko_cho, label_ko_jung, label_ko_jong, label_en), axis=1) print("label loaded") return label
def set_five_type(conn): query = 'SELECT hanja,is_naming_hanja,reading FROM naming_hanja' ftype_select = conn.cursor() for row in ftype_select.execute(query): if row[1] == 0: # Can not use Korean name continue cs = split_syllable_char(row[2]) # 초성 # hlen = hangul_len(row[2]) f_type = get_five_type(cs[0]) # 음양'오행' if f_type == -1: continue print(f_type, row[0]) update_hangul_strokes(f_type, row[0], conn)
def eval_tail(c, cand): """Analyze tail point which is the measure of too many splits. They are normally length - 1 """ point = 0 # Please please split the . and , symbols...!! if cand.value == '.' or cand.value == ',': point += 10 if hasattr(c, "prob") and c.value is not None and len(c.value) > 0 and len(cand.value) > 0: if c.value == '\'': point += 2 if cand.prob > 0.99 and check_syllable(c.value) and split_syllable_char(c.value)[1] == 'ㅏ': point += 2 return cand.tail + 1 - point
def get_hangul_list(): hanguls = [] with open('unicode_hangul') as f: for line in f: hangul = line.split() for h in hangul: if check_syllable(h) is False: continue split_h = split_syllable_char(h) try: if BLOCK_LIST1[split_h[0]] == 1: continue except: try: if BLOCK_LIST3[split_h[2]] == 1: continue except: pass hanguls.append(h) return hanguls
def generate(flag): cho_sung = { 'ㄱ': 'k', 'ㄴ': 'u', 'ㄷ': 'i', 'ㄹ': 'm', 'ㅁ': 'y', 'ㅂ': 'o', 'ㅅ': 'n', 'ㅇ': 'j', 'ㅈ': 'l', 'ㅊ': 'hl', 'ㅋ': 'hk', 'ㅌ': 'hi', 'ㅍ': 'ho', 'ㅎ': 'h', 'ㅆ': 'nn', 'ㄸ': 'ii', 'ㄲ': 'kk', 'ㅉ': 'll', } jung_sung = { 'ㅏ': 'f', 'ㅑ': '.g', 'ㅓ': 'r', 'ㅕ': 't', 'ㅗ': 'v', 'ㅛ': '.v', 'ㅜ': 'b', 'ㅠ': '.b', 'ㅡ': 'g', 'ㅣ': 'd', 'ㅐ': 'fd', 'ㅔ': 'c', 'ㅟ': 'bd', 'ㅘ': 'vf', 'ㅚ': 'vd', 'ㅢ': 'gd', 'ㅝ': 'vr', } jong_sung = { 'ㄱ': 'x', 'ㄴ': 's', 'ㄷ': ';z', 'ㄹ': 'e', 'ㅁ': 'z', 'ㅂ': 'w', 'ㅅ': 'q', 'ㅇ': 'a', 'ㅈ': ';e', 'ㅊ': ';q', 'ㅋ': ';x', 'ㅌ': 'sz', 'ㅍ': ';w', 'ㅎ': ';s', 'ㅆ': ';', 'ㄶ': 'sa', 'ㅄ': 'wq', } key_list = list() for c in flag: try: jamo = split_syllable_char(c) if len(jamo) == 2: k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]] key_list += [k1, k2] else: k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[jamo[2]] key_list += [k1, k2, k3] except ValueError: if c == ' ' or c == '\n': key_list.append(c) else: assert False, '"{}" is invalid'.format(c) keyset = list(set(''.join(key_list))) sound_map = {k: i for i, k in enumerate(keyset)} msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)]) with open('keystroke.txt', 'wt') as f: f.write(msg)
def generate(flag): cho_sung = { 'ㄱ': 'k', 'ㄴ': 'u', 'ㄷ': 'i', 'ㄹ': 'm', 'ㅁ': 'y', 'ㅂ': 'o', 'ㅅ': 'n', 'ㅇ': 'j', 'ㅈ': 'l', 'ㅊ': 'hl', 'ㅋ': 'hk', 'ㅌ': 'hi', 'ㅍ': 'ho', 'ㅎ': 'h', 'ㅆ': 'nn', 'ㄸ': 'ii', 'ㄲ': 'kk', 'ㅉ': 'll', } jung_sung = { 'ㅏ': 'f', 'ㅑ': '.g', 'ㅓ': 'r', 'ㅕ': 't', 'ㅗ': 'v', 'ㅛ': '.v', 'ㅜ': 'b', 'ㅠ': '.b', 'ㅡ': 'g', 'ㅣ': 'd', 'ㅐ': 'fd', 'ㅔ': 'c', 'ㅟ': 'bd', 'ㅘ': 'vf', 'ㅚ': 'vd', 'ㅢ': 'gd', 'ㅝ': 'vr', } jong_sung = { 'ㄱ': 'x', 'ㄴ': 's', 'ㄷ': ';z', 'ㄹ': 'e', 'ㅁ': 'z', 'ㅂ': 'w', 'ㅅ': 'q', 'ㅇ': 'a', 'ㅈ': ';e', 'ㅊ': ';q', 'ㅋ': ';x', 'ㅌ': 'sz', 'ㅍ': ';w', 'ㅎ': ';s', 'ㅆ': ';', 'ㄶ': 'sa', 'ㅄ': 'wq', } key_list = list() for c in flag: try: jamo = split_syllable_char(c) if len(jamo) == 2: k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]] key_list += [k1, k2] else: k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[ jamo[2]] key_list += [k1, k2, k3] except ValueError: if c == ' ' or c == '\n': key_list.append(c) else: assert False, '"{}" is invalid'.format(c) keyset = list(set(''.join(key_list))) sound_map = {k: i for i, k in enumerate(keyset)} msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)]) with open('keystroke.txt', 'wt') as f: f.write(msg)
def split(c): from hangul_utils import split_syllable_char jm = split_syllable_char(c) #if jm[0] == 'ㅇ': return jm[1:] return jm