def split_char(ch): """ @param ch unicode @return [unicode] or None """ try: return filter(bool, hangul.split_char(ch)) except: pass
def pull(text): exploded = [list(hangul.split_char(c)) if hangul.is_hangul(c) else c for c in text] for i in range(len(exploded)): if i == 0: continue a = exploded[i - 1] b = exploded[i] if type(a) is list and type(b) is list: if not a[2] and b[0] != "ㅇ": a[2] = simplify_rule.get(b[0], b[0]) b[0] = "ㅇ" elif a[2] in compound_rule and b[0] in compound_rule[a[2]]: a[2] = compound_rule[a[2]][b[0]] b[0] = "ㅇ" return "".join([hangul.join_char(c) if type(c) is list else c for c in exploded])
def pull(text): exploded = [ list(hangul.split_char(c)) if hangul.is_hangul(c) else c for c in text ] for i in range(len(exploded)): if i == 0: continue a = exploded[i - 1] b = exploded[i] if type(a) is list and type(b) is list: if not a[2] and b[0] != "ㅇ": next = b[0] if next == "ㅉ": next = "ㅈ" elif next == "ㅃ": next = "ㅂ" elif next == "ㄸ": next = "ㄷ" a[2] = next b[0] = "ㅇ" elif a[2] == "ㄹ": if b[0] == "ㄱ": a[2] = "ㄺ" elif b[0] == "ㅁ": a[2] = "ㄻ" elif b[0] == "ㅍ": a[2] = "ㄿ" b[0] = "ㅇ" elif a[2] == "ㄱ" and b[0] == "ㅅ": a[2] = "ㄳ" b[0] = "ㅇ" elif a[2] == "ㄴ" and b[0] == "ㅈ": a[2] = "ㄵ" b[0] = "ㅇ" elif a[2] == "ㅂ" and b[0] == "ㅅ": a[2] = "ㅄ" b[0] = "ㅇ" elif a[2] == "ㄹ": if b[0] == "ㅂ": a[2] = "ㄼ" elif b[0] == "ㅅ": a[2] = "ㄽ" elif b[0] == "ㅌ": a[2] = "ㄾ" b[0] = "ㅇ" return "".join( [hangul.join_char(c) if type(c) is list else c for c in exploded])
def insert(self, word): word = unicode(word) current_node = self.head word_splitted = [] #단어를 자소 단위로 분리 (초성, 중성, 종성) for char in word: consonants = hangul.split_char(char) word_splitted.append(consonants) #종성이 없는 경우를 필터링 word_splitted = filter(lambda x: x != u'', list(chain(*word_splitted))) #Trie에 삽입 for char in word_splitted: if char not in current_node.children: current_node.children[char] = TrieNode(char) current_node = current_node.children[char] current_node.data = word return 0
def get_position(korchar): if type(korchar) == str: korchar = korchar.decode('utf-8') if len(korchar) > 1: positions = [get_position(char) for char in korchar] return ''.join(positions) else: pass splits = filter(None, hangul.split_char(korchar)) pos_dict = {u'ㄱ': 'r', u'ㄲ': 'r', u'ㄴ': 's', u'ㄷ': 'e', u'ㄹ': 'f', u'ㄸ': 'e', u'ㄺ': 'fr', u'ㅁ': 'a', u'ㅂ': 'q', u'ㅅ': 't', u'ㅇ': 'd', u'ㅆ': 't', u'ㅈ': 'w', u'ㅋ': 'z', u'ㅊ': 'c', u'ㅍ': 'v', u'ㅌ': 'e', u'ㅏ': 'k', u'ㅎ': 'g', u'ㅑ': 'i', u'ㅐ': 'o', u'ㅓ': 'j', u'ㅕ': 'u', u'ㅔ': 'p', u'ㅗ': 'h', u'ㅖ': 'p', u'ㅙ': 'ho', u'ㅘ': 'hk', u'ㅛ': 'y', u'ㅚ': 'hl', u'ㅝ': 'nj', u'ㅜ': 'n', u'ㅟ': 'nl', u'ㅞ': 'np', u'ㅡ': 'm', u'ㅠ': 'b', u'ㅣ': 'l', u'ㅢ': 'ml'} positions = [pos_dict[s] for s in splits] return ''.join(positions)
def pull(text): exploded = [list(hangul.split_char(c)) if hangul.is_hangul(c) else c for c in text] for i in range(len(exploded)): if i == 0: continue a = exploded[i - 1] b = exploded[i] if type(a) is list and type(b) is list: if not a[2] and b[0] != "ㅇ": next = b[0] if next == "ㅉ": next = "ㅈ" elif next == "ㅃ": next = "ㅂ" elif next == "ㄸ": next = "ㄷ" a[2] = next b[0] = "ㅇ" elif a[2] == "ㄹ": if b[0] == "ㄱ": a[2] = "ㄺ" elif b[0] == "ㅁ": a[2] = "ㄻ" elif b[0] == "ㅍ": a[2] = "ㄿ" b[0] = "ㅇ" elif a[2] == "ㄱ" and b[0] == "ㅅ": a[2] = "ㄳ" b[0] = "ㅇ" elif a[2] == "ㄴ" and b[0] == "ㅈ": a[2] = "ㄵ" b[0] = "ㅇ" elif a[2] == "ㅂ" and b[0] == "ㅅ": a[2] = "ㅄ" b[0] = "ㅇ" elif a[2] == "ㄹ": if b[0] == "ㅂ": a[2] = "ㄼ" elif b[0] == "ㅅ": a[2] = "ㄽ" elif b[0] == "ㅌ": a[2] = "ㄾ" b[0] = "ㅇ" return "".join([hangul.join_char(c) if type(c) is list else c for c in exploded])
def prefix_search(self, prefix): prefix = unicode(prefix) current_node = self.head result = [] subTrie = None prefix_splitted = [] """ 접두사를 자소 단위로 분리 글자가 완성형이 아닌 경우 그대로 포함함 (ex: 'ㅆ') """ for char in prefix: try: consonants = hangul.split_char(char) prefix_splitted.append(consonants) except: prefix_splitted.append((char)) #종성이 없어서 발생하는 공백 문자 필터링 prefix_splitted = filter(lambda x: x != u'', list(chain(*prefix_splitted))) #BFS 기반으로 prefix 탐색 for char in prefix_splitted: if char in current_node.children: current_node = current_node.children[char] subTrie = current_node else: return [] #subTrie 내에서 완성형 단어 탐색 queue = list(subTrie.children.values()) while queue: q = queue.pop() if q.data != None: result.append(q.data) queue += list(q.children.values()) return result
def decompose(cls, hc): return cls._make(hangul.split_char(hc))