def prepare_align(config): in_dir = config["path"]["corpus_path"] out_dir = config["path"]["raw_path"] sampling_rate = config["preprocessing"]["audio"]["sampling_rate"] max_wav_value = config["preprocessing"]["audio"]["max_wav_value"] cleaners = config["preprocessing"]["text"]["text_cleaners"] for speaker in tqdm(os.listdir(in_dir)): for chapter in os.listdir(os.path.join(in_dir, speaker)): for file_name in os.listdir(os.path.join(in_dir, speaker, chapter)): if file_name[-4:] != ".wav": continue base_name = file_name[:-4] text_path = os.path.join(in_dir, speaker, chapter, "{}.normalized.txt".format(base_name)) wav_path = os.path.join(in_dir, speaker, chapter, "{}.wav".format(base_name)) with open(text_path) as f: text = f.readline().strip("\n") text = _clean_text(text, cleaners) os.makedirs(os.path.join(out_dir, speaker), exist_ok=True) wav, _ = librosa.load(wav_path, sampling_rate) wav = wav / max(abs(wav)) * max_wav_value wavfile.write( os.path.join(out_dir, speaker, "{}.wav".format(base_name)), sampling_rate, wav.astype(np.int16), ) with open( os.path.join(out_dir, speaker, "{}.lab".format(base_name)), "w", ) as f1: f1.write(text)
def prepare_data(config): in_dir = config["path"]["corpus_path"] out_dir = config["path"]["raw_path"] sampling_rate = config["preprocessing"]["audio"]["sampling_rate"] max_wav_value = config["preprocessing"]["audio"]["max_wav_value"] cleaners = config["preprocessing"]["text"]["text_cleaners"] speaker = "LJSpeech" with open(os.path.join(in_dir, "metadata.csv"), encoding="utf-8") as f: for line in tqdm(f): parts = line.strip().split("|") base_name = parts[0] text = parts[2] text = _clean_text(text, cleaners) wav_path = os.path.join(in_dir, "wavs", "{}.wav".format(base_name)) if os.path.exists(wav_path): os.makedirs(os.path.join(out_dir, speaker), exist_ok=True) wav, _ = librosa.load(wav_path, sampling_rate) wav = wav / max(abs(wav)) * max_wav_value wavfile.write( os.path.join(out_dir, speaker, "{}.wav".format(base_name)), sampling_rate, wav.astype(np.int16), ) with open( os.path.join(out_dir, speaker, "{}.lab".format(base_name)), "w", ) as f1: f1.write(text)
def normalize_string(s, labels): """ Normalizes string. For example: 'call me at 8:00 pm!' -> 'call me at eight zero zero pm' Args: s: string to normalize labels: labels used during model training. Returns: Normalized string """ def good_token(token, labels): s = set(labels) for t in token: if not t in s: return False return True punctuation = string.punctuation punctuation = punctuation.replace("+", "") punctuation = punctuation.replace("&", "") for l in labels: punctuation = punctuation.replace(l, "") # Turn all punctuation to whitespace table = str.maketrans(punctuation, " " * len(punctuation)) try: text = _clean_text(s, ["english_cleaners"], table).strip() return ''.join([t for t in text if good_token(t, labels=labels)]) except: print("WARNING: Normalizing {} failed".format(s)) return None
def get_text(self, text): text = _clean_text(text, self.text_cleaners) words = re.findall(r'\S*\{.*?\}\S*|\S+', text) text = ' '.join([get_arpabet(word, self.cmudict) if random.random() < self.p_arpabet else word for word in words]) text_norm = torch.LongTensor(text_to_sequence(text)) return text_norm
def prepare_align(in_dir): with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f: for line in f: parts = line.strip().split('|') basename = parts[0] text = parts[2] text = _clean_text(text, hp.text_cleaners) with open(os.path.join(in_dir, 'wavs', '{}.txt'.format(basename)), 'w') as f1: f1.write(text)
def prepare_align(in_dir): for dirpath, dirnames, filenames in tqdm(os.walk(in_dir)): for file in filenames: if file.endswith(".txt"): path_in = os.path.join(dirpath, file) with open(path_in, 'r', encoding='utf-8') as f: lines = f.readlines() assert (len(lines) == 1) text = lines[0] text = _clean_text(text, hp.text_cleaners) path_out = os.path.join(dirpath, file) with open(path_out, 'w', encoding='utf-8') as f: f.write(text)
def prepare_align(in_dir): with open(os.path.join(in_dir, 'prompts.gui'), encoding='utf-8') as f: for line in f: basename = line.strip('\n') wav_path = os.path.join(in_dir, 'wavn', '{}.wav'.format(basename)) if os.path.exists(wav_path): text = re.sub(' +', ' ', re.sub(r'[#@|]', '', next(f).strip())).strip(' ') text = re.sub(r'\s([?.!":,-;\'\"](?:\s|$))', r'\1', text) text = _clean_text(text, hp.text_cleaners) with open( os.path.join(in_dir, 'wavn', '{}.txt'.format(basename)), 'w') as f1: f1.write(text)
def prepare_align(in_dir): for spker in os.listdir(os.path.join(in_dir, 'txt')): for txt_file in os.listdir(os.path.join(in_dir, 'txt', spker)): with open(os.path.join(in_dir, 'txt', spker, txt_file), encoding='utf-8') as f: for line in f: basename = txt_file.replace(".txt", "") text = line text = _clean_text(text, hp.text_cleaners) with open( os.path.join(in_dir, 'wav48', spker, '{}.txt'.format(basename)), 'w') as f1: f1.write(text)
def prepare_align(in_dir): for r, d, f in os.walk(in_dir): for file in f: if file.endswith(".txt"): basename = file.replace('.txt', '') with open( os.path.join(r, file), 'r', ) as rf: text = rf.read().strip() text = _clean_text(text, hp.text_cleaners) with open(os.path.join(in_dir, '{}.txt'.format(basename)), 'w') as f1: f1.write(text)
def get_text(self, text): text = _clean_text(text, self.text_cleaners) words = re.findall(r'\S*\{.*?\}\S*|\S+', text) text = ' '.join([ get_arpabet(word, self.cmudict) if random.random() < self.p_arpabet else word for word in words ]) # from hparams import create_hparams_and_paths # hparams, path = create_hparams_and_paths() with open('config.json') as f: data = f.read() embeeding_config = json.loads(data)["embeeding_config"] text_embedding = TextEmbedding(embeeding_config) text_norm = text_embedding.text_norm(text) from ZaG2P.api import load_model g2p_model, viet_dict = load_model() text_out = text_embedding.g2s(text_norm) sequence = text_embedding.text2seq(text_out) text_norm = torch.LongTensor(sequence) return text_norm
def encode_text(self, text): if type(text) is not unicode: text = text.decode('utf-8') lines = text.splitlines() sents = [] for line in lines: sents.extend(self._sent_detector.tokenize(line.strip())) norm_sents = [ text_normalize( _clean_text(sent, ['english_cleaners']).decode('utf-8')).strip() for sent in sents ] final_sents = [] for sent in norm_sents: chunks = self.chunk_sentence(sent) for chunk in chunks: s = chunk if s.endswith(',') or s.endswith(';'): s = s[:-1] final_sents.append(s + 'E') texts = np.zeros((len(final_sents), hp.max_N), np.int32) for i, sent in enumerate(final_sents): texts[i, :len(sent)] = [self._char2idx[char] for char in sent] return texts
import text from utils import load_filepaths_and_text if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--out_extension", default="cleaned") parser.add_argument("--text_index", default=1, type=int) parser.add_argument("--filelists", nargs="+", default=[ "filelists/ljs_audio_text_val_filelist.txt", "filelists/ljs_audio_text_test_filelist.txt" ]) parser.add_argument("--text_cleaners", nargs="+", default=["english_cleaners2"]) args = parser.parse_args() for filelist in args.filelists: print("START:", filelist) filepaths_and_text = load_filepaths_and_text(filelist) for i in range(len(filepaths_and_text)): original_text = filepaths_and_text[i][args.text_index] cleaned_text = text._clean_text(original_text, args.text_cleaners) filepaths_and_text[i][args.text_index] = cleaned_text new_filelist = filelist + "." + args.out_extension with open(new_filelist, "w", encoding="utf-8") as f: f.writelines(["|".join(x) + "\n" for x in filepaths_and_text])
def clean_text(text): """ This uses Tacotron's text cleaners to do some extra cleaning. For example, One of the steps it takes is to convert numbers into words. """ return _clean_text(text, ['english_cleaners'])