Beispiel #1
0
def rhymecheck(w1, w2):
    w1 = g2p.runKoG2P(w1, 'rulebook.txt')
    w2 = g2p.runKoG2P(w2, 'rulebook.txt')
    w1 = w1.split()
    w2 = w2.split()
    #print('변환하기 전의 음운 구성')
    #print(w1)
    #print(w2)
    w1 = vowelconvt(w1)
    w2 = vowelconvt(w2)
    w1 = conconvt(w1)
    w2 = conconvt(w2)
    #print('변환 후의 음운 구성')
    #print(w1)
    #print(w2)
    lcs_res = lcs_length(w1, w2)/(max(len(w1), len(w2)))
    #print("전체 단어의 최장 겹침 수 : " + str(lcs_res))
    res = lcs_res
    return res
Beispiel #2
0
        def _normalize(line):
            _, expanded, text = line.strip().split("|")

            if hp.num_exp == 0:
                text = expanded + u"␃"  # ␃: EOS
                text = runKoG2P(text, "rulebook.txt")
            else:
                text += u"␃"
                if hp.num_exp == 2:
                    text = [j2hcj[char] for char in text]
                elif hp.num_exp == 3:
                    text = [j2sj[char] for char in text]
                elif hp.num_exp == 4:
                    text = [j2shcj[char] for char in text]
                text = chain.from_iterable(text)
            text = [char2idx[char] for char in text]
            return text
Beispiel #3
0
def load_data(mode="train"):
    '''Loads data
      Args:
          mode: "train" or "synthesize".
    '''
    # Load vocabulary
    char2idx, idx2char = load_vocab()

    # load conversion dictionaries
    j2hcj, j2sj, j2shcj = load_j2hcj(), load_j2sj(), load_j2shcj()

    if mode == "train":
        # Parse
        fpaths, text_lengths, texts = [], [], []
        transcript = os.path.join(hp.data, 'transcript.v.1.1.txt')
        lines = codecs.open(transcript, 'rb', 'utf-8').readlines()
        for line in lines:
            fname, _, expanded, text, _ = line.strip().split("|")

            fpath = os.path.join(hp.data, fname)
            fpaths.append(fpath)

            if hp.num_exp == 0:
                text = expanded + u"␃"  # ␃: EOS
                text = runKoG2P(text, "rulebook.txt")
            else:
                text += u"␃"  # ␃: EOS
                if hp.num_exp == 2:
                    text = [j2hcj[char] for char in text]
                elif hp.num_exp == 3:
                    text = [j2sj[char] for char in text]
                elif hp.num_exp == 4:
                    text = [j2shcj[char] for char in text]
                text = chain.from_iterable(text)

            text = [char2idx[char] for char in text]
            text_lengths.append(len(text))
            texts.append(np.array(text, np.int32).tostring())

        return fpaths, text_lengths, texts
    else:  # synthesize on unseen test text.
        # Parse
        def _normalize(line):
            _, expanded, text = line.strip().split("|")

            if hp.num_exp == 0:
                text = expanded + u"␃"  # ␃: EOS
                text = runKoG2P(text, "rulebook.txt")
            else:
                text += u"␃"
                if hp.num_exp == 2:
                    text = [j2hcj[char] for char in text]
                elif hp.num_exp == 3:
                    text = [j2sj[char] for char in text]
                elif hp.num_exp == 4:
                    text = [j2shcj[char] for char in text]
                text = chain.from_iterable(text)
            text = [char2idx[char] for char in text]
            return text

        lines = codecs.open(hp.test_data, 'rb', 'utf8').read().splitlines()
        sents = [_normalize(line) for line in lines[1:]]
        texts = np.zeros((len(sents), hp.max_N), np.int32)
        for i, sent in enumerate(sents):
            texts[i, :len(sent)] = sent
        return texts
Beispiel #4
0
        lcs_res = lcs_length(w1, w2)/(max(len(w1), len(w2)))
        print("전체 단어의 최장 겹침 수 : " + str(lcs_res))
        res = lcs_res
        if res>=0.35:
            flag=1
        if flag==r[2].value:
            cnt=cnt+1
        flag=0

print("총 맞힌 횟수 : "+ str(cnt))
print("정확률 : "+ str(cnt/100))
'''

w1 = input()
w2 = input()
w1 = g2p.runKoG2P(w1, 'rulebook.txt')
w2 = g2p.runKoG2P(w2, 'rulebook.txt')
w1 = w1.split()
w2 = w2.split()
print('변환하기 전의 음운 구성')
print(w1)
print(w2)
w1 = vowelconvt(w1)
w2 = vowelconvt(w2)
w1 = conconvt(w1)
w2 = conconvt(w2)
print('변환 후의 음운 구성')
print(w1)
print(w2)
lcs_res = lcs_length(w1, w2) / (max(len(w1), len(w2)))
print("전체 단어의 최장 겹침 수 : " + str(lcs_res))
Beispiel #5
0
without_blank = []
without_blank_index = []
for index, item in enumerate(total_output_prob_sm_[0]):
    if index == 0:
        continue
    if np.argmax(item) != 111 and np.argmax(item) != 0:
        without_blank.append(item)
        without_blank_index.append(index)


from g2p import runKoG2P
#estimation = total_output_prob_sm_[0]
estimation = np.asarray(without_blank)
estimation_text = vec_seq_to_kor(estimation)
gt = '베인마음속에난무얼찾아볼수있을까서투른판단에더울적해진채로보낸밤우린선을그어버릴까'
gt_phoneme = runKoG2P(gt, 'rulebook.txt')
gt_list = ''
for item in gt_phoneme.split(' '):
    gt_list += Hangul[np.where(table==item)[0][0]] 


gt_vec = kor_seq_to_vec(gt_list)


import numpy as np
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw

distance,path = fastdtw(estimation, gt_vec, dist=euclidean)

final_timing = np.zeros(len(gt_vec),1)