Python split_char Exemples, korean.hangul.split_char Python Exemples

Exemple #1

0

Afficher le fichier

def split_char(ch):
  """
  @param  ch  unicode
  @return  [unicode] or None
  """
  try: return filter(bool,  hangul.split_char(ch))
  except: pass

Exemple #2

0

Afficher le fichier

Fichier : views.py Projet : theeluwin/ammummal

def pull(text):
    exploded = [list(hangul.split_char(c)) if hangul.is_hangul(c) else c for c in text]
    for i in range(len(exploded)):
        if i == 0:
            continue
        a = exploded[i - 1]
        b = exploded[i]
        if type(a) is list and type(b) is list:
            if not a[2] and b[0] != "ㅇ":
                a[2] = simplify_rule.get(b[0], b[0])
                b[0] = "ㅇ"
            elif a[2] in compound_rule and b[0] in compound_rule[a[2]]:
                a[2] = compound_rule[a[2]][b[0]]
                b[0] = "ㅇ"
    return "".join([hangul.join_char(c) if type(c) is list else c for c in exploded])

Exemple #3

0

Afficher le fichier

Fichier : views.py Projet : Perlmint/ammummal

def pull(text):
    exploded = [
        list(hangul.split_char(c)) if hangul.is_hangul(c) else c for c in text
    ]
    for i in range(len(exploded)):
        if i == 0:
            continue
        a = exploded[i - 1]
        b = exploded[i]
        if type(a) is list and type(b) is list:
            if not a[2] and b[0] != "ㅇ":
                next = b[0]
                if next == "ㅉ":
                    next = "ㅈ"
                elif next == "ㅃ":
                    next = "ㅂ"
                elif next == "ㄸ":
                    next = "ㄷ"
                a[2] = next
                b[0] = "ㅇ"
            elif a[2] == "ㄹ":
                if b[0] == "ㄱ":
                    a[2] = "ㄺ"
                elif b[0] == "ㅁ":
                    a[2] = "ㄻ"
                elif b[0] == "ㅍ":
                    a[2] = "ㄿ"
                b[0] = "ㅇ"
            elif a[2] == "ㄱ" and b[0] == "ㅅ":
                a[2] = "ㄳ"
                b[0] = "ㅇ"
            elif a[2] == "ㄴ" and b[0] == "ㅈ":
                a[2] = "ㄵ"
                b[0] = "ㅇ"
            elif a[2] == "ㅂ" and b[0] == "ㅅ":
                a[2] = "ㅄ"
                b[0] = "ㅇ"
            elif a[2] == "ㄹ":
                if b[0] == "ㅂ":
                    a[2] = "ㄼ"
                elif b[0] == "ㅅ":
                    a[2] = "ㄽ"
                elif b[0] == "ㅌ":
                    a[2] = "ㄾ"
                b[0] = "ㅇ"
    return "".join(
        [hangul.join_char(c) if type(c) is list else c for c in exploded])

Exemple #4

0

Afficher le fichier

 def insert(self, word):
     word = unicode(word)
     current_node = self.head
     word_splitted = []
     #단어를 자소 단위로 분리 (초성, 중성, 종성)
     for char in word:
         consonants = hangul.split_char(char)
         word_splitted.append(consonants)
     #종성이 없는 경우를 필터링
     word_splitted = filter(lambda x: x != u'', list(chain(*word_splitted)))
     #Trie에 삽입
     for char in word_splitted:
         if char not in current_node.children:
             current_node.children[char] = TrieNode(char)
         current_node = current_node.children[char]
     current_node.data = word
     return 0

Exemple #5

0

Afficher le fichier

Fichier : namechar_crawler.py Projet : heevery/webid

def get_position(korchar):
	if type(korchar) == str:
		korchar = korchar.decode('utf-8')
		if len(korchar) > 1:
			positions = [get_position(char) for char in korchar]
			return ''.join(positions)
	else:
		pass
	splits = filter(None, hangul.split_char(korchar))

	pos_dict = {u'ㄱ': 'r', u'ㄲ': 'r', u'ㄴ': 's', u'ㄷ': 'e', u'ㄹ': 'f', u'ㄸ': 'e', u'ㄺ': 'fr', u'ㅁ': 'a',
				u'ㅂ': 'q',	u'ㅅ': 't', u'ㅇ': 'd', u'ㅆ': 't', u'ㅈ': 'w', u'ㅋ': 'z', u'ㅊ': 'c', u'ㅍ': 'v',
				u'ㅌ': 'e', u'ㅏ': 'k', u'ㅎ': 'g', u'ㅑ': 'i', u'ㅐ': 'o', u'ㅓ': 'j', u'ㅕ': 'u', u'ㅔ': 'p', u'ㅗ': 'h',
				u'ㅖ': 'p', u'ㅙ': 'ho', u'ㅘ': 'hk', u'ㅛ': 'y', u'ㅚ': 'hl',	u'ㅝ': 'nj', u'ㅜ': 'n', u'ㅟ': 'nl',
				u'ㅞ': 'np', u'ㅡ': 'm', u'ㅠ': 'b', u'ㅣ': 'l', u'ㅢ': 'ml'}
	
	positions = [pos_dict[s] for s in splits]
	return ''.join(positions)

Exemple #6

0

Afficher le fichier

Fichier : views.py Projet : Perlmint/ammummal

def pull(text):
    exploded = [list(hangul.split_char(c)) if hangul.is_hangul(c) else c for c in text]
    for i in range(len(exploded)):
        if i == 0:
            continue
        a = exploded[i - 1]
        b = exploded[i]
        if type(a) is list and type(b) is list:
            if not a[2] and b[0] != "ㅇ":
                next = b[0]
                if next == "ㅉ":
                    next = "ㅈ"
                elif next == "ㅃ":
                    next = "ㅂ"
                elif next == "ㄸ":
                    next = "ㄷ"
                a[2] = next
                b[0] = "ㅇ"
            elif a[2] == "ㄹ":
                if b[0] == "ㄱ":
                    a[2] = "ㄺ"
                elif b[0] == "ㅁ":
                    a[2] = "ㄻ"
                elif b[0] == "ㅍ":
                    a[2] = "ㄿ"
                b[0] = "ㅇ"
            elif a[2] == "ㄱ" and b[0] == "ㅅ":
                a[2] = "ㄳ"
                b[0] = "ㅇ"
            elif a[2] == "ㄴ" and b[0] == "ㅈ":
                a[2] = "ㄵ"
                b[0] = "ㅇ"
            elif a[2] == "ㅂ" and b[0] == "ㅅ":
                a[2] = "ㅄ"
                b[0] = "ㅇ"
            elif a[2] == "ㄹ":
                if b[0] == "ㅂ":
                    a[2] = "ㄼ"
                elif b[0] == "ㅅ":
                    a[2] = "ㄽ"
                elif b[0] == "ㅌ":
                    a[2] = "ㄾ"
                b[0] = "ㅇ"
    return "".join([hangul.join_char(c) if type(c) is list else c for c in exploded])

Exemple #7

0

Afficher le fichier

    def prefix_search(self, prefix):
        prefix = unicode(prefix)
        current_node = self.head
        result = []
        subTrie = None
        prefix_splitted = []
        """
		접두사를 자소 단위로 분리
		글자가 완성형이 아닌 경우 그대로 포함함 (ex: 'ㅆ') 
		"""
        for char in prefix:
            try:
                consonants = hangul.split_char(char)
                prefix_splitted.append(consonants)
            except:
                prefix_splitted.append((char))
        #종성이 없어서 발생하는 공백 문자 필터링
        prefix_splitted = filter(lambda x: x != u'',
                                 list(chain(*prefix_splitted)))

        #BFS 기반으로 prefix 탐색
        for char in prefix_splitted:
            if char in current_node.children:
                current_node = current_node.children[char]
                subTrie = current_node
            else:
                return []

        #subTrie 내에서 완성형 단어 탐색
        queue = list(subTrie.children.values())

        while queue:
            q = queue.pop()
            if q.data != None:
                result.append(q.data)
            queue += list(q.children.values())

        return result

Exemple #8

0

Afficher le fichier

 def decompose(cls, hc):
     return cls._make(hangul.split_char(hc))

Exemple #9

0

Afficher le fichier

Fichier : KoreanNormalizer.py Projet : cedar101/twitter-korean-py

 def decompose(cls, hc):
     return cls._make(hangul.split_char(hc))