Beispiel #1
0
    def inference(self, sentence, output_path):
        g2p = G2p()

        text = kor_preprocess(sentence)
        synthesize(self.model,
                   self.vocoder,
                   text,
                   sentence,
                   prefix='step_{}'.format(200000))
Beispiel #2
0
def convert_g2pk_scripts_pandas(scripts):
    g2pk_scripts = []
    g2p = G2p()
    for index, row in tqdm(scripts.iterrows()):
        script = row['x']
        label = row['y']
        temp = g2p(script)
        # g2pk_scripts.append([temp, label])
        # temp = g2p(script, descriptive=True)
        # g2pk_scripts.append([temp, label])
        # temp = g2p(script, group_vowels=True)
        # g2pk_scripts.append([temp, label])
        # temp = g2p(script, to_syl=False)
        g2pk_scripts.append([temp, label])

    return g2pk_scripts
Beispiel #3
0
def convert_g2pk_scripts(scripts):
    g2pk_scripts = []
    g2p = G2p()
    for idx, item in enumerate(scripts):
        script = item[0]
        label = item[1]
        temp = g2p(script)
        # g2pk_scripts.append([temp, label])
        # temp = g2p(script, descriptive=True)
        # g2pk_scripts.append([temp, label])
        # temp = g2p(script, group_vowels=True)
        # g2pk_scripts.append([temp, label])
        # temp = g2p(script, to_syl=False)
        g2pk_scripts.append([temp, label])

    return g2pk_scripts
def kor_preprocess(text):
    text = text.rstrip(punctuation)
    
    g2p=G2p()
    phone = g2p(text)
    print('after g2p: ',phone)
    phone = h2j(phone)
    print('after h2j: ',phone)
    phone = list(filter(lambda p: p != ' ', phone))
    phone = '{' + '}{'.join(phone) + '}'
    print('phone: ',phone)
    phone = re.sub(r'\{[^\w\s]?\}', '{sp}', phone)
    print('after re.sub: ',phone)
    phone = phone.replace('}{', ' ')

    print('|' + phone + '|')
    sequence = np.array(text_to_sequence(phone,hp.text_cleaners))
    sequence = np.stack([sequence])
    return torch.from_numpy(sequence).long().to(device)
from layers import TacotronSTFT

symbol_to_id = {s: i for i, s in enumerate(symbols)}
id_to_symbol = {i: s for i, s in enumerate(symbols)}

csv_file = '/hd0/speech-aligner/metadata/metadata.csv'
root_dir = '/hd0/dataset/VCTK/VCTK-Corpus/wav48'
data_dir = '/hd0/speech-aligner/preprocessed/VCTK20_engspks'

os.makedirs(data_dir, exist_ok=True)
os.makedirs(os.path.join(data_dir, 'char_seq'), exist_ok=True)
os.makedirs(os.path.join(data_dir, 'phone_seq'), exist_ok=True)
os.makedirs(os.path.join(data_dir, 'melspectrogram'), exist_ok=True)

g2p = G2p()
metadata = {}
with codecs.open(csv_file, 'r', 'utf-8') as fid:
    for line in fid.readlines():
        id, text, spk = line.split("|")
        id = os.path.splitext(id)[0]

        clean_char = custom_english_cleaners(text.rstrip())
        clean_phone = []
        for s in g2p(clean_char.lower()):
            if '@' + s in symbol_to_id:
                clean_phone.append('@' + s)
            else:
                clean_phone.append(s)

        metadata[id] = {'char': clean_char, 'phone': clean_phone}
Beispiel #6
0
import torch
from collections import namedtuple
import os
import scipy.io.wavfile as wavfile
import numpy as np
from tqdm import tqdm
import librosa
import torch

import shutil
import threading
from time import sleep
from scipy import signal

from g2pk import G2p
g2pk = G2p()

import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
from settings import configs

from utils import text2encoding, encoding2text

FTPair = namedtuple('FileTextPair', ['file_path', 'text'])

PHONEME_DICT = dict()


class AudioTextDataset(Dataset):
    def __init__(self, meta_file_path, configs):  # , transform=None):
        self.file_text_pair_list = load_file_text_pair_list(meta_file_path)