def train(datagenes: list, load_model=None): w, h = 1600, 200 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, batch_size=16, n_mels=h, feature_pad_len=w, feature_dim=3, cut_sub=32) model_helper = DCBNN2D(pymap) model_helper.compile(feature_shape=(w, h, 1), label_max_string_length=32, ms_output_size=1423) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) model_helper.fit(vloader)
def train(datagenes: list, load_model=None): w, h = 1600, 200 max_label_len = 64 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader( x_set, y_set, batch_size=16, feature_pad_len=w, n_mels=h, max_label_len=max_label_len, pymap=pymap, melf=MelFeature5(), all_train=False, ) model_helper = WAVEM(pymap) model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index + 1) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) model_helper.fit(vloader, epoch=-1, save_step=100, use_ctc=True)
def train(datagenes: list, load_model=None): w, h = 1600, 200 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader( x_set, y_set, batch_size=16, n_mels=h, feature_pad_len=w, feature_dim=3, pymap=pymap, melf=MelFeature5(), divide_feature_len=8, ) model_helper = DCNN2D(pymap) model_helper.compile(feature_shape=(w, h, 1), ms_output_size=pymap.max_index + 1) # ctcloss 计算要求: index < num_class-1 if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) model_helper.fit(vloader, epoch=-1, use_ctc=True)
def train(datagenes: list, load_model=None,**kwargs): w, h = 1600, 200 max_label_len = 64 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, batch_size=16, feature_pad_len=w, n_mels=h, max_label_len=max_label_len, pymap=pymap, melf=MelFeature4(), divide_feature_len=8, all_train=False, ) model_helper = DCBNN1D(pymap) model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index + 1) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) epoch = kwargs.get("epoch",-1) save_step = kwargs.get("save_step",1000) model_helper.fit(vloader, epoch=epoch, save_step=save_step, use_ctc=True)
def err_count(): ''' 根据音频统计错误数据集 :return: ''' z200 = Z200(config.z200_datapath) thchs = Thchs30(config.thu_datapath) prime = Primewords(config.prime_datapath) stcmd = ST_CMDS(config.stcmd_datapath) aishell = AiShell(config.aishell_datapath) lst = [z200, thchs, prime, stcmd, aishell] from acoustic.ABCDNN import DCBNN1D from util.reader import PinyinMapper,VoiceDatasetList,VoiceLoader from feature.mel_feature import MelFeature5 w, h = 1600, 200 max_label_len = 64 pymap = PinyinMapper(sil_mode=-1) model_helper = DCBNN1D(pymap) model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index + 1) model_helper.load(os.path.join(config.model_dir,"cur_best_DCBNN1D_epoch_722_step_722000.h5")) dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(lst) vloader = VoiceLoader(x_set, y_set, batch_size=16, feature_pad_len=w, n_mels=h, max_label_len=max_label_len, pymap=pymap, melf=MelFeature5(), divide_feature_len=8, all_train=True, ) viter = vloader.create_iter(one_batch = True,return_word=True) with open("data_err.txt","w",encoding="utf-8") as w: for i,batch in enumerate(viter): [_, ys, _, label_len], words = batch py_true_b = pymap.batch_vector2pylist(ys,return_word_list=True,return_list=True) py_pred_b = model_helper.predict(batch) for py_true,py_pred,llen,word in zip(py_true_b,py_pred_b,label_len,words): llen = llen[0] # w.write(f"{pyt}") py_true = py_true[:llen] py_true = " ".join(py_true) py_pred = " ".join(py_pred) print(f"\r——{i*16}.",end="\0",flush=True) # print(word.strip(),py_pred) w.write(f"{word.strip()}\t{py_true}\t{py_pred}\n")
def train(datagene: list, load_model=None): dataset = VoiceDatasetList() _, y_set = dataset.merge_load(datagene, choose_x=False, choose_y=True) max_label_len = 64 pinyin_map = PinyinMapper(sil_mode=0) chs_map = ChsMapper() tloader = TextLoader(y_set, padding_length=max_label_len, pinyin_map=pinyin_map, cut_sub=16, chs_map=chs_map) model_helper = DCNN1D() model_helper.compile(feature_shape=(max_label_len, tloader.max_py_size), ms_input_size=pinyin_map.max_index, ms_output_size=chs_map.categores) if load_model is not None: model_helper.load(load_model) model_helper.fit(tloader, -1)
stcmd = ST_CMDS(config.stcmd_datapath) # 据说还可以 thchs = Thchs30( config.thu_datapath) # 同质性太高,不过好拟合,可以用来测试模型的效果,在这个数据上都没法得到比较好的结果的就没啥使用的必要了 prime = Primewords(config.prime_datapath) aishell = AiShell(config.aishell_datapath) # 据说数据集很差,不用该数据训练 z200 = Z200(config.z200_datapath) # datagenes = [thchs,stcmd,prime,aishell,z200] datagenes = [thchs] if __name__ == "__main__": w, h = 1600, 200 max_label_len = 64 batch_size = 16 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, batch_size=16, feature_pad_len=w, n_mels=h, max_label_len=max_label_len, pymap=pymap, divide_feature_len=8, melf=MelFeature5(), all_train=True) viter = vloader.create_iter(one_batch=True) for i, _ in enumerate(viter):