def get_trans_array(ori_phone_file, tgt_phone_file): #transform the orders of phone to kaldi format import utils ori = utils.read_phone_txt(ori_phone_file) ori.append('spn') tgt = utils.read_phone_txt(tgt_phone_file, 0) new_tgt = [] for x in tgt: for s in ['<', '#']: if x.startswith(s): break else: new_tgt.append(x) trans = [] for ix, x in enumerate(new_tgt): for iy, y in enumerate(ori): if x == y: trans.append(iy) return np.array(trans)
import utils import sys import argparse parser = argparse.ArgumentParser() parser.add_argument("phone_table_path", type=str) parser.add_argument("pdf_table_path", type=str) parser.add_argument("--typ", type=str, default='mono', choices=['mono', 'biphone']) args = parser.parse_args() phones = utils.read_phone_txt(args.phone_table_path, 0) valid_phone = [] for x in phones: for s in ['<', '#']: if x.startswith(s): break else: valid_phone.append(x) if args.typ == 'mono': utils.write_phone_file(valid_phone, args.pdf_table_path, True) else: L = [] for i in range(len(valid_phone)): for j in range(len(valid_phone) + 1): if j == 0: L.append('start' + '_' + valid_phone[i]) else:
import sys import utils as utl phone_map_txt = sys.argv[1] phone_list_txt = sys.argv[2] phone_list = utl.read_phone_txt(phone_map_txt) phone_list.append('spn') # for oov word, which is requried for kaldi utl.write_phone_file(phone_list, phone_list_txt)
import utils import sys phone_table_path = sys.argv[1] pdf_table_path = sys.argv[2] phones = utils.read_phone_txt(phone_table_path, 0) L = [] for x in phones: for s in ['<', '#']: if x.startswith(s): break else: L.append(x) utils.write_phone_file(L, pdf_table_path, True)
sys.path.append('scripts/') import utils parser = argparse.ArgumentParser() parser.add_argument("phone_list_txt", type=str) parser.add_argument( "--self_loop_prob", type=float, default=0.5, help= "Probabilty of staying in the same state. (1-self_loop_prob) is the probability of transition to other state" ) args = parser.parse_args() phone_list = utils.read_phone_txt(args.phone_list_txt, 0) same_phone = [] trans_phone = [] for idx, phone in enumerate(phone_list): if phone in ['sil', 'spn']: same_phone.append(idx) if '_' in phone: x, y = phone.split('_') if x == y: same_phone.append(idx) else: trans_phone.append(idx) print("<Topology>") print("<TopologyEntry>")
import sys sys.path.append('scripts/') import utils import math #48 phones monophones_txt = sys.argv[1] all_phones_txt = sys.argv[2] self_loop_prob = sys.argv[3] trans_prob = -math.log(1 - float(self_loop_prob)) phones = utils.read_phone_txt(monophones_txt) cd_phones = utils.read_phone_txt(all_phones_txt, 0) print("0 1 0 0") for idx, phone in enumerate(cd_phones): if '_' in phone: x, y = phone.split('_') iy = phones.index(y) if x == y: print("1 {} {} {}".format(iy + 2, idx, idx)) else: print("1 {} {} {} {:.5f}".format(iy + 2, idx, idx, trans_prob)) for idx, phone in enumerate(cd_phones): if '_' in phone: x, y = phone.split('_') iy = phones.index(y) ix = phones.index(x) if x == y: print("{} {} {} {}".format(ix + 2, iy + 2, idx, idx)) else:
import sys, os sys.path.append('scripts/') import utils monophone_list = sys.argv[1] # not include spn dict_dir = sys.argv[2] phone_list = utils.read_phone_txt(monophone_list) with open(os.path.join(dict_dir, 'extra_questions.txt'), 'w') as f: f.write('') with open(os.path.join(dict_dir, 'optional_silence.txt'), 'w') as f: f.write('sil' + '\n') with open(os.path.join(dict_dir, 'silence_phones.txt'), 'w') as f: f.write('sil' + '\n') f.write('spn' + '\n') with open(os.path.join(dict_dir, 'nonsilence_phones.txt'), 'w') as f: for phone1 in phone_list: for phone2 in phone_list: f.write(phone1 + '_' + phone2 + '\n') with open(os.path.join(dict_dir, 'lexicon.txt'), 'w') as f: for phone1 in phone_list: for phone2 in phone_list: f.write("{} {} {}\n".format(phone2, phone1 + '_' + phone2, phone2 + '_' + phone2)) f.write("{} {}\n".format(phone1, phone1 + '_' + phone1)) f.write("<UNK> spn\n")