def rhymecheck(w1, w2): w1 = g2p.runKoG2P(w1, 'rulebook.txt') w2 = g2p.runKoG2P(w2, 'rulebook.txt') w1 = w1.split() w2 = w2.split() #print('변환하기 전의 음운 구성') #print(w1) #print(w2) w1 = vowelconvt(w1) w2 = vowelconvt(w2) w1 = conconvt(w1) w2 = conconvt(w2) #print('변환 후의 음운 구성') #print(w1) #print(w2) lcs_res = lcs_length(w1, w2)/(max(len(w1), len(w2))) #print("전체 단어의 최장 겹침 수 : " + str(lcs_res)) res = lcs_res return res
def _normalize(line): _, expanded, text = line.strip().split("|") if hp.num_exp == 0: text = expanded + u"␃" # ␃: EOS text = runKoG2P(text, "rulebook.txt") else: text += u"␃" if hp.num_exp == 2: text = [j2hcj[char] for char in text] elif hp.num_exp == 3: text = [j2sj[char] for char in text] elif hp.num_exp == 4: text = [j2shcj[char] for char in text] text = chain.from_iterable(text) text = [char2idx[char] for char in text] return text
def load_data(mode="train"): '''Loads data Args: mode: "train" or "synthesize". ''' # Load vocabulary char2idx, idx2char = load_vocab() # load conversion dictionaries j2hcj, j2sj, j2shcj = load_j2hcj(), load_j2sj(), load_j2shcj() if mode == "train": # Parse fpaths, text_lengths, texts = [], [], [] transcript = os.path.join(hp.data, 'transcript.v.1.1.txt') lines = codecs.open(transcript, 'rb', 'utf-8').readlines() for line in lines: fname, _, expanded, text, _ = line.strip().split("|") fpath = os.path.join(hp.data, fname) fpaths.append(fpath) if hp.num_exp == 0: text = expanded + u"␃" # ␃: EOS text = runKoG2P(text, "rulebook.txt") else: text += u"␃" # ␃: EOS if hp.num_exp == 2: text = [j2hcj[char] for char in text] elif hp.num_exp == 3: text = [j2sj[char] for char in text] elif hp.num_exp == 4: text = [j2shcj[char] for char in text] text = chain.from_iterable(text) text = [char2idx[char] for char in text] text_lengths.append(len(text)) texts.append(np.array(text, np.int32).tostring()) return fpaths, text_lengths, texts else: # synthesize on unseen test text. # Parse def _normalize(line): _, expanded, text = line.strip().split("|") if hp.num_exp == 0: text = expanded + u"␃" # ␃: EOS text = runKoG2P(text, "rulebook.txt") else: text += u"␃" if hp.num_exp == 2: text = [j2hcj[char] for char in text] elif hp.num_exp == 3: text = [j2sj[char] for char in text] elif hp.num_exp == 4: text = [j2shcj[char] for char in text] text = chain.from_iterable(text) text = [char2idx[char] for char in text] return text lines = codecs.open(hp.test_data, 'rb', 'utf8').read().splitlines() sents = [_normalize(line) for line in lines[1:]] texts = np.zeros((len(sents), hp.max_N), np.int32) for i, sent in enumerate(sents): texts[i, :len(sent)] = sent return texts
lcs_res = lcs_length(w1, w2)/(max(len(w1), len(w2))) print("전체 단어의 최장 겹침 수 : " + str(lcs_res)) res = lcs_res if res>=0.35: flag=1 if flag==r[2].value: cnt=cnt+1 flag=0 print("총 맞힌 횟수 : "+ str(cnt)) print("정확률 : "+ str(cnt/100)) ''' w1 = input() w2 = input() w1 = g2p.runKoG2P(w1, 'rulebook.txt') w2 = g2p.runKoG2P(w2, 'rulebook.txt') w1 = w1.split() w2 = w2.split() print('변환하기 전의 음운 구성') print(w1) print(w2) w1 = vowelconvt(w1) w2 = vowelconvt(w2) w1 = conconvt(w1) w2 = conconvt(w2) print('변환 후의 음운 구성') print(w1) print(w2) lcs_res = lcs_length(w1, w2) / (max(len(w1), len(w2))) print("전체 단어의 최장 겹침 수 : " + str(lcs_res))
without_blank = [] without_blank_index = [] for index, item in enumerate(total_output_prob_sm_[0]): if index == 0: continue if np.argmax(item) != 111 and np.argmax(item) != 0: without_blank.append(item) without_blank_index.append(index) from g2p import runKoG2P #estimation = total_output_prob_sm_[0] estimation = np.asarray(without_blank) estimation_text = vec_seq_to_kor(estimation) gt = '베인마음속에난무얼찾아볼수있을까서투른판단에더울적해진채로보낸밤우린선을그어버릴까' gt_phoneme = runKoG2P(gt, 'rulebook.txt') gt_list = '' for item in gt_phoneme.split(' '): gt_list += Hangul[np.where(table==item)[0][0]] gt_vec = kor_seq_to_vec(gt_list) import numpy as np from scipy.spatial.distance import euclidean from fastdtw import fastdtw distance,path = fastdtw(estimation, gt_vec, dist=euclidean) final_timing = np.zeros(len(gt_vec),1)