class PhoneticFeature(Feature): # initialize grapheme to phoneme g2p = G2p() # error counter #counter = 0 @classmethod def compute_feature(cls, HL: Headline) -> np.ndarray: # replaced word & replacement word. words = [HL.sentence[HL.word_index], HL.edit] # transcibe each token to arpabet. phones = [" ".join(cls.g2p(w.lower())) for w in words] # for i, w in enumerate(words): # try: # s = " " # words[i] = s.join(cls.g2p(w)) # except KeyError: # # print erroneous key # print(w) # # tracks and prints errors # cls.counter += 1 # print(cls.counter) # calculate levenshtein distance between the two pronunciation. levenshtein_dist = StringMatcher.distance(*phones) # scale using the max difference in "word length" scale_factor = max([len(w) for w in phones]) scaled_dist = levenshtein_dist/scale_factor return np.array([scaled_dist])
def g2p(x): from g2p_en import G2p global _g2p if _g2p is None: _g2p = G2p() return _g2p(x)
def load_phonemes(word, pho_to_words): from g2p_en import G2p g2p = G2p() for p in g2p(word): p = ''.join([c for c in p if not c.isdigit()]) add_word_to_phoneme(p, word, pho_to_words)
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--compact", action="store_true", help="if set, compacts phones", ) args = parser.parse_args() compact = args.compact wrd_to_phn = {} g2p = G2p() for line in sys.stdin: words = line.strip().split() phones = [] for w in words: if w not in wrd_to_phn: wrd_to_phn[w] = g2p(w) if compact: wrd_to_phn[w] = [ p[:-1] if p[-1].isnumeric() else p for p in wrd_to_phn[w] ] phones.extend(wrd_to_phn[w]) try: print(" ".join(phones)) except: print(wrd_to_phn, words, phones, file=sys.stderr) raise
def make_arpabet(text): # g2p functions g2p = G2p() # Define punctuation, prevent punctuation curlies, and make replacement dictionary to fix spacing punc = "!?,.;:#-_'\"()[]\n,." punc = list(punc) punc_key = list(punc) punc_alt = [" " + item for item in punc] punc_dict = {} for key in punc_alt: for value in punc_key: punc_dict[key] = value punc_key.remove(value) break # Text processing text = " ".join(g2p(english_cleaners(text))).split(" ") outlist = [] for item in text: item = item.strip() if item not in punc: item = "{" + item + "}" outlist.append(item) text = " ".join(outlist) for key, replacement in punc_dict.items(): text = text.replace(key, replacement) return text
def get_phonemes(): def to_dict(): phn_dic = {} for tag in phonemes: if tag.split(' ')[0] == 'q': pass else: phn_dic[tag.split(' ')[0]] = tag.split(' ')[-1] return phn_dic def to_list(): phn_list = [strTag.split(' ')[-1] for strTag in phonemes] phn_list = list(set(phn_list)) return phn_list g2p = G2p() phonemes = ['h#'] # h# : start token phonemes.extend(strPhoneme.lower() for strPhoneme in g2p(phonemes)) phonemes.append('h#') # h# : end token labels = [] for label in phonemes: if label in ['q', ' ', "'"]: pass else: label = ''.join([i for i in label if not i.isdigit()]) labels.append(to_list().index(to_dict()[label]) + 1) return numpy.concatenate(labels)
def process_english(text): text = text.rstrip(punctuation) lexicon = read_lexicon('mytext/lexicon/librispeech-lexicon.txt') g2p = G2p() phones = [] words = re.split(r"([,;.\-\?\!\s+])", text) for w in words: if w.lower() in lexicon: phones += lexicon[w.lower()] else: phones += list(filter(lambda p: p != " ", g2p(w))) phones = "{" + "}{".join(phones) + "}" phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones) phones = phones.replace("}{", " ") sequence = np.array( phone_to_sequence( phones, ["english_cleaners"] ) ) #print("Raw Text Sequence: {}".format(text)) #print("Phoneme Sequence: {}".format(phones)) #print(sequence) return phones, np.array(sequence)
def phone_synthesize_solution(self, text): """ Synthesize speech from text by concatenating phonemes selected from the database """ # Convert all words to lower case words = [word.lower() for word in text.split()] phones = [] for word in words: try: # Use cmudict get phonemic representation phones.extend(cmudict.dict()[word][0]) except IndexError: # If word not found in dictionary, use g2p instead g2p = G2p() phones.extend(g2p(word)) print(phones) # Initialize an empty audio segment result = AudioSegment.empty() # Concatenate phonems selected from PHONEMES_DIR for phone in phones: # Ignore accent marker phone = phone[0:-1] if phone[-1].isdigit() else phone # Look up phoneme wav file using phone_map sound_label = phone_map[phone] sound_path = PHONEMES_DIR + str(sound_label) + ".wav" audio = AudioSegment.from_wav(sound_path) result += audio # Write the synthesized .wav file to DST_DIR result.export(DST_DIR + "gen.wav", format="wav") play(result)
def preprocess_english( texts: List[str], preprocess_config, ) -> List[np.array]: sequences = [] for text in texts: text = text.rstrip(punctuation) lexicon = read_lexicon(preprocess_config["path"]["lexicon_path"]) g2p = G2p() phones = [] words = re.split(r"([,;.\-\?\!\s+])", text) words = filter(lambda x: x != " ", words) for w in words: if w.lower() in lexicon: phones += lexicon[w.lower()] else: phones += g2p(w) phones = "{" + "}{".join(phones) + "}" phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones) phones = phones.replace("}{", " ") print("Raw Text Sequence: {}".format(text)) print("Phoneme Sequence: {}".format(phones)) sequence = np.array( text_to_sequence( phones, preprocess_config["preprocessing"]["text"]["text_cleaners"])) sequences.append(sequence) return sequences
def main(): parser = argparse.ArgumentParser() parser.add_argument("root_dirs", nargs="*") parser.add_argument("--insert-silence", "-s", action="store_true") args = parser.parse_args() sil = "<s>" wrd_to_phn = {} g2p = G2p() for line in sys.stdin: words = line.strip().split() phones = [] if args.insert_silence: phones.append(sil) for w in words: if w not in wrd_to_phn: wrd_to_phn[w] = g2p(w) phones.extend(wrd_to_phn[w]) if args.insert_silence: phones.append(sil) try: print(" ".join(phones)) except: print(wrd_to_phn, w, phones, file=sys.stderr) raise
def run(args): punc = '!?,;' # punctuation that we want to keep when no_punc is False g2p = G2p() full_set = set() with open(args.src,'r') as f: f_lines = f.readlines() with open(args.out,'w') as f_out: f_out.write(SEP + 'phn_seq\n') for line in tqdm(f_lines): idx = line.split('|')[0] line = line.split('|')[-1].replace('--','') line_origin = line line = re.sub('[:\"-()]', '', line) # ! ? , . ; if not args.no_punc: for token in punc: line = line.replace(token, '.') else: line = re.sub('[!?,.;]', '', line) # ! ? , . ; try: phn = [phoneme_reduce_mapping[remove_num(phn.lower())] for phn in g2p(line)] except: print(line_origin) print(line) out() full_set = full_set | set(phn) f_out.write(idx + SEP + ' '.join(phn) + '\n') print("Total {} phonemes used.".format(len(full_set)))
def build_phonetic_form_and_save_lyrics( artist_lyrics_map, author_word_to_phonetic_form_dictionary, author_word_list): artist_songs_phonetic_form_map = {} for lyrics_filename, lyrics_text in artist_lyrics_map.items(): phonetic_form_of_lyrics = [] g2p = G2p() lyrics_lines = lyrics_text.split("\n") for line in lyrics_lines: is_verse_line = "<verse_start>" in line or "<verse_end>" in line if is_verse_line: phonetic_form_of_lyrics.append(line) if "<verse_start>" in line: author_word_list.append("<verse_start>") else: author_word_list.append("<verse_end>") continue phonetic_form_of_line = build_phonetic_form_of_line( g2p, line, author_word_to_phonetic_form_dictionary, author_word_list) phonetic_form_of_lyrics.append(phonetic_form_of_line) artist_songs_phonetic_form_map[ lyrics_filename] = make_text_lyrics_of_list_of_lines( phonetic_form_of_lyrics) return artist_songs_phonetic_form_map
def run(args): punc = '!?,;' # punctuation that we want to keep spkr_dict = dict() g2p = G2p() full_set = set() with open(args.src, 'r') as f: f_lines = f.readlines() with open(args.out, 'w') as f_out: f_out.write(SEP + 'phn_seq\n') for line in tqdm(f_lines): idx = line.split('|')[0] spkr = line.split('|')[1] if spkr not in spkr_dict: spkr_dict[spkr] = len(spkr_dict) line = line.split('|')[-1] line = re.sub('[:\"\-()]', '', line) # ! ? , . ; if not args.no_punc: for token in punc: line = line.replace(token, '.') else: line = re.sub('[!?,.;]', '', line) # ! ? , . ; phn = [ phoneme_reduce_mapping[remove_num(phn.lower())] for phn in g2p(line) ] full_set = full_set | set(phn) f_out.write(idx + SEP + ' '.join(phn) + '\n') print("Total {} phonemes used, {} speakers used.".format( len(full_set), len(spkr_dict))) os.makedirs(dirname(args.out_spkr_dict), exist_ok=True) json.dump(spkr_dict, open(args.out_spkr_dict, 'w'), indent=2)
def list_words(*phonemes): from g2p_en import G2p with open('phoneme_samples.csv') as f: reader = csv.DictReader(f, delimiter=',') rows = [row for row in reader] if len(phonemes) == 0: for row in rows: print(row['word']) return for phoneme in phonemes: if len(phonemes) > 1: print(phoneme) g2p = G2p() for row in rows: pho = row['phoneme'] word = row['word'] phos = g2p(word) doc = f"{word} [{' '.join(phos)}]" if pho == phoneme: if len(phonemes) == 1: print(doc) else: print(f'\t{doc}')
def main(): args = get_parser().parse_args() logger.debug(f"Args: {args}") ref_uid_to_tra = load_tra(args.ref_tra) hyp_uid_to_tra = load_tra(args.hyp_tra) assert not bool(set(hyp_uid_to_tra.keys()) - set(ref_uid_to_tra.keys())) lm = kenlm.Model(args.kenlm_path) skipwords = set(args.skipwords.split(",")) def compute_lm_score(s): s = " ".join(w for w in s.split() if w not in skipwords) s = s.upper() if args.uppercase else s return lm.score(s) g2p, g2p_dict = None, None if args.phonemize: if args.phonemize_lexicon: g2p_dict = load_lex(args.phonemize_lexicon) else: g2p = G2p() wer = compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p, g2p_dict) lm_ppl = compute_lm_ppl(hyp_uid_to_tra, compute_lm_score) gt_wer = -math.inf if args.gt_tra: gt_uid_to_tra = load_tra(args.gt_tra) gt_wer = compute_wer(gt_uid_to_tra, hyp_uid_to_tra, None, None) score = math.log(lm_ppl) * max(wer, args.min_vt_uer) logging.info(f"{args.hyp_tra}: score={score:.4f}; wer={wer*100:.2f}%; lm_ppl={lm_ppl:.4f}; gt_wer={gt_wer*100:.2f}%")
def __init__(self, hp, split='train'): self.hp = hp self.split = split self.data_files = self._get_data_files(hp.dataset, hp.data_dir, hp.data_file) self.mel_matrix = librosa.filters.mel(sr=22050, n_fft=1024, n_mels=80) self.g2p_en = G2p()
class Text2machineSeq: from g2p_en import G2p from text import symbols from string import punctuation from text import text_to_sequence g2p = G2p() def __init__(self, lexicon_path, text_cleaners, useG2p=True): self.useG2p = useG2p self.lexicon = self.read_lexicon(lexicon_path) self.text_cleaners = text_cleaners @staticmethod def getSymbols(): print(f"nText2machineSeq.symbols: {len(Text2machineSeq.symbols)}") return Text2machineSeq.symbols @staticmethod def read_lexicon(lex_path): lexicon = {} with open(getPath(lex_path)) as f: for line in f: temp = re.split(r"\s+", line.strip("\n")) word = temp[0] phones = temp[1:] if word.lower() not in lexicon: lexicon[word.lower()] = phones return lexicon def text2seq(self, text, verbose=False): useG2p = self.useG2p lexicon = self.lexicon g2p = self.g2p phones = [] text = text.rstrip(Text2machineSeq.punctuation) if useG2p: words = re.split(r"([,;.\-\?\!\s+])", text) for w in words: if w.lower() in lexicon: phones += lexicon[w.lower()] else: phones += list(filter(lambda p: p != " ", g2p(w))) phones = "{" + "}{".join(phones) + "}" phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones) phones = phones.replace("}{", " ") else: phones = text sequence = np.array( Text2machineSeq.text_to_sequence(phones, self.text_cleaners)) if verbose: print(f"Raw Text Sequence [{int(useG2p)}]: {text}") print(f"Phoneme Sequence [{int(useG2p)}]: {phones}") print(f"Machine Language [{int(useG2p)}]: {sequence}") if useG2p: phoneList = replaces(phones, '{', '', '}', '').split() assert len(phoneList) == len(sequence) p2s = [f'{p}-{s}' for p, s in zip(phoneList, sequence)] print(f"phone2machine [{int(useG2p)}]: {' '.join(p2s)}") return sequence
def processtxtph(self,intxt): g2p = G2p() ptext = self._clean_text(intxt,[self.cleaner_names]) phs = _g2p2synth(g2p(ptext)) arpatxt = " ".join(phs) ids = self._arpabet_to_sequence(arpatxt) return ids, arpatxt
def __init__(self, nltk_data_directory: Path): # workaround for https://github.com/Kyubyong/g2p/issues/12 nltk_data_directory.mkdir(exist_ok=True, parents=True) nltk.download("averaged_perceptron_tagger", download_dir=nltk_data_directory) nltk.download("cmudict", download_dir=nltk_data_directory) nltk.download("punkt", download_dir=nltk_data_directory) nltk.data.path.append(nltk_data_directory.resolve()) from g2p_en import G2p self._g2p = G2p()
def grapheme_to_phoneme(text): """Converts prapheme to phoneme with punctuation""" g2p = G2p() phones = [] words = filter(None, re.split(r"([,:;.\(\)\'\-\?\!\s+])", text)) for w in words: if w in punctuation: phones += [w] else: phones += list(filter(lambda p: p != " ", g2p(w))) return phones
def get_ARPABET_phonetic_transcription(word_list): """ :param word_list (list): List of words to encode with ARPABET phonetic transcription :return arpabet_word_list (list): List of lists of enocded phonemes """ g2p = G2p() arpabet_word_list = [] for word in word_list: transcription = g2p(word) arpabet_word_list.append(transcription) return arpabet_word_list
class TextProcessor: phonemes = [ 'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', 'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH' ] g2p = G2p() def __init__(self, hparams): self.units = self.graphemes = hparams.graphemes # self.phonemes = hparams.phonemes self.phonemize = hparams.use_phonemes if self.phonemize: self.units = self.phonemes self.specials = hparams.specials self.punctuations = hparams.punctuations self.units = self.specials + self.units + self.punctuations self.txt2idx = {txt: idx for idx, txt in enumerate(self.units)} self.idx2txt = {idx: txt for idx, txt in enumerate(self.units)} def normalize(self, text): text = text.lower() text = re.sub("[ ]+", " ", text) # keep_re = "[^" + str(self.graphemes+self.punctuations) +"]" # text = re.sub(keep_re, " ", text) # remove text = [ch if ch in self.graphemes+self.punctuations else ' ' for ch in text] text = list(text) if self.phonemize: text = self.g2p(''.join(text)) return text def __call__(self, texts, max_n=None): if not isinstance(texts, (str, list)): raise TypeError("Inputs must be str or list(str)") if isinstance(texts, str): texts = [texts] normalized_texts = [self.normalize(line) for line in texts] # text normalization tlens = [len(l) for l in normalized_texts] max_n = max_n or max(tlens) texts = np.zeros((len(normalized_texts), max_n), np.long) for i, text in enumerate(normalized_texts): texts[i, :len(text)] = [self.txt2idx.get(ch, 1) for ch in text] return texts, tlens
def process_sents(sents, args): g2p = G2p() out_sents = [] res_wrds = load_reserve_word(args.reserve_word) for sent in sents: col1 = "" if args.reserve_first_column: col1, sent = sent.split(None, 1) sent = process_sent(sent, g2p, res_wrds, args) if args.reserve_first_column and col1 != "": sent = f"{col1} {sent}" out_sents.append(sent) return out_sents
def preprocess(text): g2p = G2p() phone = g2p(text) phone = list(filter(lambda p: p != ' ', phone)) phone = '{'+ '}{'.join(phone) + '}' # '{A}{B}{$}{C}', $ represents silent phones phone = re.sub(r'\{[^\w\s]?\}', ' ', phone) phone = phone.replace('}{', ' ') print('|' + phone + '|') sequence = np.array(text_to_sequence(phone, hp.text_cleaners)) sequence = np.stack([sequence]) return torch.from_numpy(sequence).long().to(device)
def __init__(self): self.g2p = G2p() extra_syms = [' '] self.ph2id = self.g2p.p2idx.copy() self.id2ph = self.g2p.idx2p.copy() for sym in extra_syms: self.ph2id[sym] = len(self.id2ph) self.id2ph[len(self.id2ph)] = sym self.start_token = '<s>' self.end_token = '</s>' self.pad_token = '<pad>' self.unk_token = '<unk>'
def Say(text): #converts text to phonemes if(text == 'QUIT'): exit() g2p = G2p() out = g2p(text) #identify sounds from phoneme name output = AudioSegment.silent(duration=100) for i, pho in enumerate(out): if (pho == 'HH'): pho = 'H' elif (pho == 'NX'): pho = 'NG' elif (pho == 'TH'): pho = 'DH' if (pho[-1].isalpha() != True): pho = pho[:-1] if (out[i].isspace() or out[i] == '' or out[i] == "'" or out[i] == "-" or out[i] =='.' or out[i] == ',' or out[i] == '!' or out[i] == '?'): audio = AudioSegment.silent(duration=300) audio.fade_in(duration=300).fade_out(duration=300) output = output.append(audio, crossfade=10) else: phonemes[pho]= phonemes[pho].fade_in(duration=25) phonemes[pho] = phonemes[pho].fade_out(duration=25) #phonemes[pho] = normalize(phonemes[pho]) #print(len(phonemes[pho])) try: output = output.append(phonemes[pho], crossfade=95) #print("Crossfade completed") except: try: output = output.append(phonemes[pho], crossfade=25) print(pho) print(str(len(output)) + " | " + str(len(phonemes[pho]))) except: output = output.append(phonemes[pho], crossfade=0) print("last resort: " + pho) output = output.append(AudioSegment.silent(duration=10), crossfade=0) output += AudioSegment.silent(duration=300) output = normalize(output) output.set_frame_rate(44100) play(output) print(text)
def preprocess(text): text = text.rstrip(punctuation) g2p = G2p() phone = g2p(text) phone = list(filter(lambda p: p != ' ', phone)) phone = '{' + '}{'.join(phone) + '}' phone = re.sub(r'\{[^\w\s]?\}', '{sp}', phone) phone = phone.replace('}{', ' ') print('|' + phone + '|') sequence = np.array(text_to_sequence(phone, hp.text_cleaners)) sequence = np.stack([sequence]) return torch.from_numpy(sequence).long().to(device)
def __init__(self): # TODO Move this into a config File, give option of different models self.trans_type = "phn" dict_path = "/home/ntrusse2/espnet/downloads/en/fastspeech/data/lang_1phn/phn_train_no_dev_units.txt" model_path = "/home/ntrusse2/espnet/downloads/en/fastspeech/exp/phn_train_no_dev_pytorch_train_tacotron2.v3_fastspeech.v4.single/results/model.last1.avg.best" vocoder_path = "/home/ntrusse2/espnet/downloads/en/parallel_wavegan/ljspeech.parallel_wavegan.v2/checkpoint-400000steps.pkl" vocoder_conf = "/home/ntrusse2/espnet/downloads/en/parallel_wavegan/ljspeech.parallel_wavegan.v2/config.yml" # Copied right out of the examples on ESPNETs DEMO self.device = torch.device("cuda") print("Loading Torch Model...") self.idim, odim, train_args = get_model_conf(model_path) model_class = dynamic_import(train_args.model_module) model = model_class(self.idim, odim, train_args) torch_load(model_path, model) self.model = model.eval().to(self.device) self.inference_args = Namespace( **{ "threshold": 0.5, "minlenratio": 0.0, "maxlenratio": 10.0, "use_attention_constraint": True, "backward_window": 1, "forward_window": 3, }) print("Loading Vocoder...") with open(vocoder_conf) as f: self.config = yaml.load(f, Loader=yaml.Loader) vocoder_class = self.config.get("generator_type", "ParallelWaveGANGenerator") vocoder = getattr(parallel_wavegan.models, vocoder_class)(**self.config["generator_params"]) vocoder.load_state_dict( torch.load(vocoder_path, map_location="cpu")["model"]["generator"]) vocoder.remove_weight_norm() self.vocoder = vocoder.eval().to(self.device) print("Loading Text Frontend...") with open(dict_path) as f: lines = f.readlines() lines = [line.replace("\n", "").split(" ") for line in lines] self.char_to_id = {c: int(i) for c, i in lines} self.g2p = G2p() self.pad_fn = torch.nn.ReplicationPad1d( self.config["generator_params"].get("aux_context_window", 0)) self.use_noise_input = vocoder_class == "ParallelWaveGANGenerator"
def phonemes_encoding(sources, add_sos_eos_pad_tokens=True, idx_to_phonemes=None, phonemes_to_idx=None, **kwargs): ''' Encodes given sources into numerical vectors Params: * sources : list of str * sos_tok (optional) : str * eos_tok (optional) : str * pad_tok (optional) : str * idx_to_letters (optional) : list of str * letters_to_idx (optional) : dict Returns: sources_encoded, idx_to_phonemes, phonemes_to_idx : list of list of int, list of str, dict ''' if add_sos_eos_pad_tokens: sos_tok, eos_tok, pad_tok = kwargs.get( 'sos_tok', '<sos>'), kwargs.get('eos_tok', '<eos>'), kwargs.get('pad_tok', '<pad>') g2p = G2p() converted_sources = [g2p(s.lower()) for s in tqdm(sources)] if idx_to_phonemes is None or phonemes_to_idx is None: phonemes = list( sorted(set([p for s in converted_sources for p in s]))) idx_to_phonemes = [ sos_tok, eos_tok, pad_tok ] + phonemes if add_sos_eos_pad_tokens else phonemes phonemes_to_idx = {p: i for i, p in enumerate(idx_to_phonemes)} sources_encoded = [[phonemes_to_idx[p] for p in s] for s in converted_sources] if add_sos_eos_pad_tokens: sources_encoded = Data.add_sos_eos_tokens(sources_encoded, phonemes_to_idx, sos_tok=sos_tok, eos_tok=eos_tok) return sources_encoded, idx_to_phonemes, phonemes_to_idx
class TextProcessor: g2p = G2p() def __init__(self, hparams): self.units = self.graphemes = hparams.graphemes self.phonemes = hparams.phonemes self.phonemize = hparams.use_phonemes if self.phonemize: self.units = self.phonemes self.specials = hparams.specials self.punctuations = hparams.punctuations self.units = self.specials + self.units + self.punctuations self.txt2idx = {txt: idx for idx, txt in enumerate(self.units)} self.idx2txt = {idx: txt for idx, txt in enumerate(self.units)} def normalize(self, text): text = text.lower() text = re.sub("[ ]+", " ", text) # keep_re = "[^" + str(self.graphemes+self.punctuations) +"]" # text = re.sub(keep_re, " ", text) # remove text = [ ch if ch in self.graphemes + self.punctuations else ' ' for ch in text ] text = list(text) if self.phonemize: text = self.g2p(''.join(text)) return text def __call__(self, texts, max_n=None): if not isinstance(texts, (str, list)): raise TypeError("Inputs must be str or list(str)") if isinstance(texts, str): texts = [texts] normalized_texts = [self.normalize(line) for line in texts] # text normalization tlens = [len(l) for l in normalized_texts] max_n = max_n or max(tlens) texts = np.zeros((len(normalized_texts), max_n), np.long) for i, text in enumerate(normalized_texts): texts[i, :len(text)] = [self.txt2idx.get(ch, 1) for ch in text] return texts, tlens