def g2p(text, trans_type="char"): text = jaconv.normalize(text) if trans_type == "char": text = pyopenjtalk.g2p(text, kana=True) elif trans_type == "phn": text = pyopenjtalk.g2p(text, kana=False) else: assert False return text
def pyopenjtalk_g2p(text) -> List[str]: import pyopenjtalk # phones is a str object separated by space phones = pyopenjtalk.g2p(text, kana=False) phones = phones.split(" ") return phones
def build_vocab(df, vocab_path): print(f"building vocab ...") vocab_dict = {"<unk>": 1, "<eos>": 2, "<pad>": 3} vocab_set = [] for row in tqdm(df.itertuples()): text = row.text.replace(" ", "") # remove spaces phones = pyopenjtalk.g2p(text, join=False) # remove pause phones = [phone for phone in phones if phone != "pau"] for phone in phones: if phone not in vocab_set: vocab_set.append(phone) # alphabetical order vocab_set.sort() wlines = [] for v in vocab_set: index = len(vocab_dict) + 1 vocab_dict[v] = index for v, index in vocab_dict.items(): wlines.append(f"{v} {index:d}\n") with open(vocab_path, "w", encoding="utf-8") as f: f.writelines(wlines) print(f"vocabulary saved to {vocab_path}") return Vocab(vocab_path)
def test_g2p_phone(): for text, pron in [ ("こんにちは", "k o N n i ch i w a"), ("ななみんです", "n a n a m i N d e s U"), ("ハローユーチューブ", "h a r o o y u u ch u u b u"), ]: p = pyopenjtalk.g2p(text, kana=False) assert p == pron
def test_g2p_kana(): for text, pron in [ ("今日もこんにちは", "キョーモコンニチワ"), ("いやあん", "イヤーン"), ("パソコンのとりあえず知っておきたい使い方", "パソコンノトリアエズシッテオキタイツカイカタ"), ]: p = pyopenjtalk.g2p(text, kana=True) assert p == pron
def frontend(text): """Clean text and then convert to id sequence.""" text = pyopenjtalk.g2p(text, kana=False) print(f"Cleaned text: {text}") charseq = text.split(" ") idseq = [] for c in charseq: if c.isspace(): idseq += [char_to_id["<space>"]] elif c not in char_to_id.keys(): idseq += [char_to_id["<unk>"]] else: idseq += [char_to_id[c]] idseq += [idim - 1] # <eos> return torch.LongTensor(idseq).view(-1).to(device)
def text_to_sequence(self, text, inference=False): sequence = [] # Check for curly braces and treat their contents as ARPAbet: if inference: text = pyopenjtalk.g2p(text) text = text.replace("I", "i") text = text.replace("U", "u") print(f"phoneme seq: {text}") for symbol in text.split(): idx = self.symbol_to_id[symbol] sequence.append(idx) # add eos tokens sequence += [self.eos_id] return sequence
def parse_label(meta_data): with open(meta_data) as f: for line in f: file_id, text = line.strip().split('|', 1) phone = pyopenjtalk.g2p(text) phone = [str(i) for i in phone.split(' ')] phone = list(filter(lambda p: p != 'pau', phone)) #phone = list(filter(lambda p: p != ' ', phone)) phone = list(filter(lambda p: p != ' ', phone)) phone = list(filter(lambda p: p != '', phone)) texts = ' '.join([str(i) for i in text_to_sequence(phone)]) + ' 1' print(f'{file_id}|{texts}') return
def main(args): df = pd.read_table(args.tsv_path) df = df.dropna(subset=["utt_id", "token_id", "text"]) if not os.path.exists(args.vocab): vocab = build_vocab(df, args.vocab) else: vocab = Vocab(args.vocab) print(f"load vocab: {args.vocab}") phone_texts = [] phone_token_ids = [] phone_lens = [] for row in tqdm(df.itertuples()): text = row.text.replace(" ", "") # remove spaces phones = pyopenjtalk.g2p(text, join=False) phone_text = " ".join(phones) phone_token_id = ints2str(vocab.tokens2ids(phones)) phone_texts.append(phone_text) phone_token_ids.append(phone_token_id) phone_lens.append(len(phones)) df["phone_text"] = phone_texts df["phone_token_id"] = phone_token_ids df["plen"] = phone_lens if args.cols is not None: columns = [column for column in args.cols.split(",")] assert (("utt_id" in columns) and ("phone_text" in columns) and ("phone_token_id" in columns)) df = df[columns] if args.out is None: df.to_csv(args.tsv_path.replace(".tsv", "_p2w.tsv"), sep="\t", index=False) else: df.to_csv(args.out, sep="\t", index=False)
def pyopenjtalk_g2p_kana(text) -> List[str]: import pyopenjtalk kanas = pyopenjtalk.g2p(text, kana=True) return list(kanas)