Ejemplos de split_syllables en Python

Lenguaje de programación: Python

Namespace/Package Name: jamotools

Método / Función: split_syllables

Ejemplos en hotexamples.com: 12

Python split_syllables - 12 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de jamotools.split_syllables extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

    def jamo(self, sentences):
        result = []
        for sentence in sentences:
            chars = self.korean.sub('', jamotools.split_syllables(sentence))
            result.append(list(chars))

        return result

Ejemplo n.º 2

Mostrar archivo

def load_volume(PATH, vocabulary):
    file = open(PATH, "r", encoding='UTF-8')
    text = file.read()
    split = jamotools.split_syllables(text)
    indexes = []
    for i in split:
        c = vocabulary.char2index(i)
        if c == -1:
            continue
        else:
            indexes.append(c)
    return np.array(indexes)

Ejemplo n.º 3

Mostrar archivo

def wordedits(word):
    """ Splits the Korean characters into letters, and simply calls all the other functions 
	getting all inserts, removals, etc. in one list.  It then joins the characters back together
	and returns all of the edits in a single list """

    editsret = []
    alleditsjoined = []
    splitword = jamotools.split_syllables(word)
    alleditssplit = list(
        inserts(splitword) + removals(splitword) + swaps(splitword) +
        replaces(splitword))
    for w in alleditssplit:
        alleditsjoined.append(jamotools.join_jamos(w))
    print(alleditsjoined)
    realedits = dictionarycomparer(alleditsjoined)
    return (realedits)

Ejemplo n.º 4

Mostrar archivo

 def _preprocess_korean(self, sent, null='ⅇ'):
     sent = sent.lower()
     sent = re.sub(r'[^가-힣\s\.\,\?\!]', '', sent)
     seq = []
     for c in list(sent):
         if re.match(r'[가-힣]', c):
             jamos = list(
                 jamotools.split_syllables(c))  # use this for positionless
             if jamos[0] == 'ㅇ':  # the 'positionless' nieung
                 jamos = [null] + jamos[1:]
             # print(jamos)
             seq += jamos
         else:
             if c == ' ':
                 c = '▁'
             seq.append(c)
     texts = np.zeros((1, hp.max_N), np.int32)
     texts[0, :len(seq)] = [self.char2idx[char] for char in seq]
     return texts

Ejemplo n.º 5

Mostrar archivo

Archivo: file_preprocess.py Proyecto: somizgh/atom

def txtfile2npyfile(input_path, output_path, vocabulary):

    file_counter = 1
    for root, dirs, files in os.walk(input_path):
        rootpath = os.path.join(os.path.abspath(input_path), root)
        for file in files:
            int_array = []
            filepath = os.path.join(rootpath, file)
            f = open(filepath, encoding="UTF-8")
            for line in f:
                split = jamotools.split_syllables(line)
                for c in split:
                    index = vocabulary.char2index(c)
                    if index > 0:
                        int_array.append(index)
            file_name = "wiki_korean_{0:04}".format(file_counter) + ".npy"
            save_path =os.path.join(os.path.abspath(output_path),file_name)
            np.save(save_path, np.array(int_array))
            print("txtfile2npyfile convert to : {0}".format(file_name)+" done ")
            file_counter = file_counter + 1

Ejemplo n.º 6

Mostrar archivo

def testmodel2(epoch, logs):
    if epoch % 5 != 0 and epoch != 99:
        return

    test_sentence = train_text[:48]
    test_sentence = jamotools.split_syllables(test_sentence)

    next_chars = 300
    for _ in range(next_chars):
        test_text_X = test_sentence[-seq_length:]
        test_text_X = np.array([
            char2idx[c] if c in char2idx else char2idx['UNK']
            for c in test_text_X
        ])
        test_text_X = pad_sequences([test_text_X],
                                    maxlen=seq_length,
                                    padding='pre',
                                    value=char2idx['UNK'])
        output_idx = model.predict_classes(test_text_X)
        test_sentence += idx2char[output_idx[0]]

    print()
    print(jamotools.join_jamos(test_sentence))
    print()

Ejemplo n.º 7

Mostrar archivo

 def test_split_syllables(self, input, output, jamo_type):
     pred = jamotools.split_syllables(input, jamo_type=jamo_type)
     output = ''.join([_hex_string_to_str(h) for h in output])
     self.assertEqual(pred, output)

Ejemplo n.º 8

Mostrar archivo

 def jamochar(self, char):
     char = self.korean.sub('', jamotools.split_syllables(char))
     return char

Ejemplo n.º 9

Mostrar archivo

Archivo: text.py Proyecto: Joovvhan/transformer-tts

def text2encoding(text):
    text = jamotools.split_syllables(text, jamo_type="JAMO")
    return [encoding_dict[char] for char in text]

Ejemplo n.º 10

Mostrar archivo

def ota_translater(word):
    param = jamotools.split_syllables(word)
    result = correct(param)
    return jamotools.join_jamos(result)

Ejemplo n.º 11

Mostrar archivo

# 자모 분리 테스트
import jamotools
import tensorflow as tf
import numpy as np

path_to_file = tf.keras.utils.get_file(
    'toji.txt',
    'https://raw.githubusercontent.com/pykwon/etc/master/rnn_test_toji.txt')
#path_to_file = 'silrok.txt'
train_text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
s = train_text[:100]
print(s)

# 한글 텍스트를 자모 단위로 분리. 한자 등에는 영향 X
s_split = jamotools.split_syllables(s)  # 100글자의 한글이 자모 단위로 분리됨
print(s_split)

# 자모 결합 테스트
s2 = jamotools.join_jamos(s_split)
print(s2)  # 결합된 결과
print(s == s2)  # True 분리 전후의 문장이 비교 결과 같음

# 자모 토큰화 : 텍스트를 자모 단위로 나눕니다. 지연 시간 필요.
train_text_X = jamotools.split_syllables(train_text)
vocab = sorted(set(train_text_X))
vocab.append('UNK')  # 사전에 정의되지 않은 기호가 있을 수 있으므로 'UNK'도 사전에 넣음
print('{} unique characters'.format(len(vocab)))  # 179 unique characters

# vocab list를 숫자로 맵핑하고, 반대도 실행.
char2idx = {u: i for i, u in enumerate(vocab)}

Ejemplo n.º 12

Mostrar archivo

Archivo: make_date_word_txt.py Proyecto: yuuDev01/festabot

import jamotools

region_words = [
    '일월', '이월', '삼월', '사월', '오월', '육월', '칠월', '팔월', '구월', '십월', '십일월', '십이월'
]
# '이일', '삼일', '사일', '오일', '육일', '칠일', '팔일', '구일', '십일', '십일일', '일일', ]

f = open("date_words.txt", 'w')
for v in region_words:
    data = jamotools.split_syllables(v) + '\n'
    f.write(data)

f.close()