def train(datagenes: list, load_model=None): w, h = 1600, 200 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, batch_size=16, n_mels=h, feature_pad_len=w, feature_dim=3, cut_sub=32) model_helper = DCBNN2D(pymap) model_helper.compile(feature_shape=(w, h, 1), label_max_string_length=32, ms_output_size=1423) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) model_helper.fit(vloader)
def train(datagene: TextDataGenerator, load_model=None): txtfs = datagene.load_from_path() max_label_len = 200 pinyin_map = PinyinMapper(sil_mode=0) chs_map = ChsMapper() tloader = TextLoader2( txtfs, padding_length=max_label_len, pinyin_map=pinyin_map, chs_map=chs_map, grain=TextLoader2.grain_alpha, cut_sub=175, ) model_helper = SOMMalpha() model_helper.compile(feature_shape=(max_label_len, ), ms_pinyin_size=pinyin_map.max_index, ms_output_size=chs_map.categores) if load_model is not None: model_helper.load(load_model) model_helper.fit(tloader, -1)
def train(datagenes: list, load_model=None): w, h = 1600, 200 max_label_len = 64 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader( x_set, y_set, batch_size=16, feature_pad_len=w, n_mels=h, max_label_len=max_label_len, pymap=pymap, melf=MelFeature5(), all_train=False, ) model_helper = WAVEM(pymap) model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index + 1) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) model_helper.fit(vloader, epoch=-1, save_step=100, use_ctc=True)
def train(datagenes: list, load_model=None,**kwargs): w, h = 1600, 200 max_label_len = 64 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, batch_size=16, feature_pad_len=w, n_mels=h, max_label_len=max_label_len, pymap=pymap, melf=MelFeature4(), divide_feature_len=8, all_train=False, ) model_helper = DCBNN1D(pymap) model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index + 1) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) epoch = kwargs.get("epoch",-1) save_step = kwargs.get("save_step",1000) model_helper.fit(vloader, epoch=epoch, save_step=save_step, use_ctc=True)
def train(datagenes: list, load_model=None): w, h = 1600, 200 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader( x_set, y_set, batch_size=16, n_mels=h, feature_pad_len=w, feature_dim=3, pymap=pymap, melf=MelFeature5(), divide_feature_len=8, ) model_helper = DCNN2D(pymap) model_helper.compile(feature_shape=(w, h, 1), ms_output_size=pymap.max_index + 1) # ctcloss 计算要求: index < num_class-1 if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) model_helper.fit(vloader, epoch=-1, use_ctc=True)
def summary(gene): x_set, y_set = gene.load_from_path() py_map = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, pymap=py_map,vad_cut=False, check=False) print(f"start to summary the {gene.__class__.__name__} dataset") vloader.summery(audio=True, label=True, plot=True, dataset_name=gene.__class__.__name__)
def predict_dchmm(path="./model/DCBNN1D_cur_best.h5"): dcnn = DCHMM(acmodel_input_shape=(1600, 200), acmodel_output_shape=(200, ), lgmodel_input_shape=None, py_map=PinyinMapper(sil_mode=-1), chs_map=ChsMapper()) dcnn.compile(path) while True: pyline, chline, prob = dcnn.record_from_cmd(3) print(pyline, chline, prob)
def real_predict(path): max_label_len = 200 pinyin_map = PinyinMapper(sil_mode=0) chs_map = ChsMapper() model_helper = SOMMalpha() model_helper.compile(feature_shape=(max_label_len, ), ms_pinyin_size=pinyin_map.max_index, ms_output_size=chs_map.categores) model_helper.load(path) while True: string = input("请输入拼音:") xs = [pinyin_map.alist2vector(string)] print(xs) batch = pad_sequences(xs, maxlen=max_label_len, padding="post", truncating="post"), None result = model_helper.predict(batch)[0] print(result.replace("_", ""))
def predict_dcsom(ac_path="./model/DCBNN1D_cur_best.h5", lg_path="./model/language/SOMMalpha_step_18000.h5"): dcs = DCSOM(acmodel_input_shape=(1600, 200), acmodel_output_shape=(200, ), lgmodel_input_shape=(200, ), py_map=PinyinMapper(sil_mode=-1), chs_map=ChsMapper(), divide_feature=8) dcs.compile(ac_path, lg_path) while True: try: print(dcs.record_from_cmd(5)) except: print("[info*]未识别到语音")
def real_predict(path="./model/DCBNN1D_cur_best.h5"): ''' :param path:DCBNN1D的预训练权重文件路径 :return: ''' dcnn = DCHMM( acmodel_input_shape=(1600, 200), acmodel_output_shape=(200,), lgmodel_input_shape=None, py_map=PinyinMapper(sil_mode=-1), chs_map=ChsMapper()) dcnn.compile(path) while True: pyline, chline, prob = dcnn.record_from_cmd(3) print(pyline, chline, prob)
def summary_dcbann1d(datagenes:list, load_model = None): w, h = 1600, 200 max_label_len = 64 dataset = VoiceDatasetList() x_set, y_set = dataset.merge_load(datagenes) pymap = PinyinMapper(sil_mode=-1) vloader = VoiceLoader(x_set, y_set, batch_size= 16, feature_pad_len = w, n_mels=h, max_label_len=max_label_len, pymap=pymap, melf=MelFeature5(), divide_feature_len=8, # cut_sub=64, ) model_helper = DCBNN1D(pymap) model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index+1) if load_model is not None: load_model = os.path.abspath(load_model) model_helper.load(load_model) viter = vloader.create_iter(one_batch=True) all_err_dict = {} with open("./error_dict.txt", "w", encoding="utf-8") as w: for batch in viter: test_res = model_helper.test(batch,use_ctc=True,return_result=True) err_dict = test_res["err_pylist"] for k,lst in err_dict.items(): errlist = all_err_dict.setdefault(k,[]) errlist.extend(lst) for k,v in err_dict.items(): v = set(v) w.write(f"{k},{' '.join(v)}") print(all_err_dict) with open("./error_dict.txt", "w", encoding="utf-8") as w: for k,v in all_err_dict.items(): v = set(v) w.write(f"{k},{' '.join(v)}")
def train_dcnn1d(datagene: list, load_model=None): dataset = VoiceDatasetList() _, y_set = dataset.merge_load(datagene, choose_x=False, choose_y=True) max_label_len = 64 pinyin_map = PinyinMapper(sil_mode=0) chs_map = ChsMapper() tloader = TextLoader(y_set, padding_length=max_label_len, pinyin_map=pinyin_map, cut_sub=16, chs_map=chs_map) model_helper = DCNN1D() model_helper.compile(feature_shape=(max_label_len, tloader.max_py_size), ms_input_size=pinyin_map.max_index, ms_output_size=chs_map.categores) if load_model is not None: model_helper.load(load_model) model_helper.fit(tloader, -1)
def _check(self): '''清洗完后,在根目录下生成一个文件,表示无需再清洗了''' symbol = os.path.join(self.path,"symbol") self.check = os.path.exists(symbol) self.pymap = PinyinMapper() self.chsmap = ChsMapper()
def __init__(self,path): self.path = path self.pymap = PinyinMapper(use_pinyin=True)
def __init__(self, path, strip_tone=False): assert os.path.exists(path), "path not exists!" self.path = path self.strip_tone = strip_tone self.pymap = PinyinMapper() self.chs_map = ChsMapper()
alpha_batch = ["".join(sample) for sample in raw_pylist_batch] alpha_vector_batch = self.py_map.batch_alist2vector(alpha_batch) alpha_vector_batch = TextLoader2.corpus2feature(alpha_vector_batch,self.lgmodel_input_shape[0]) ch_list_batch,prob_batch = self.lg_model.predict([alpha_vector_batch,None],True) pyline = np.concatenate(pylist_batch).tolist() chline = ",".join(ch_list_batch).replace("_","") print(pyline,chline) return pyline,chline,[ctc_prob[0]] if __name__ == "__main__": dcs = DCSOM(acmodel_input_shape=(1600,200), acmodel_output_shape=(200,), lgmodel_input_shape=(200,), py_map=PinyinMapper(sil_mode=-1), chs_map=ChsMapper(), divide_feature=8) # dcs.compile("../model/DCBNN1D_step_326000.h5", # "../model/language/SOMMalpha_step_18000.h5") dcs.compile("../model/DCBNN1D_cur_best.h5", "../model/language/SOMMalpha_step_18000.h5") while True: try: print(dcs.record_from_cmd(5)) except: print("[info*]未识别到语音")
class Dataset: label_mode = "label" #用于生成正确的标签 clean_mode = "clean" #用于在生成正确的标签后进行清洗 train_mode = "train" #用于清洗后提供数据集 '''''' def __init__(self,path): self.path = path self._check() def _check(self): '''清洗完后,在根目录下生成一个文件,表示无需再清洗了''' symbol = os.path.join(self.path,"symbol") self.check = os.path.exists(symbol) self.pymap = PinyinMapper() self.chsmap = ChsMapper() def _pre_process_line(self,line): ''' TODO 处理中文字符串 :param line:str :return: 如果存在字母、数字,则返回None 如果存在标点符号、空格,返回去掉之后的字符串 不考虑汉字是否在字典中 ''' def _pre_process_pyline(self,pyline): ''' TODO 处理拼音字符串 :param pyline: :return: 将所有多余的空格去掉,确保拼音之间只有一个空格 如果拼音不在字典中,则返回None ''' def initial(self): self.label_dataset() self.count_dataset() # self.clean_dataset() def clean(self): self.clean_dataset() def label_dataset(self): '''对数据生成需要的标签,均为在目录下为wav文件生成相应的txt(清华除外,为wav.trn)文件 如果出现错误无法生成数据(即无法同时具备音频和文本),则删除相应的wav文件或标签文件 注意:此时,不保证汉字和拼音一一对应,汉字中可能存在一些无法被注音的数字、字母、标点符号。 ''' print(f"[info*]Create labels in {self.__class__.__name__}.") dataiter = self.create_fs_iter(mode=Dataset.label_mode) for i,(wav_fn,txt_fn,[line,pyline]) in enumerate(dataiter): print(f"\r[info*]Process {i},fn = {txt_fn}",end="\0",flush=True) if not os.path.exists(wav_fn) and os.path.exists(txt_fn): # os.remove(txt_fn) print(f"\n{txt_fn} may not have the wav file {wav_fn}, please check it.") continue if line is None and os.path.exists(wav_fn): # 没有中文但是有wav文件 # os.remove(wav_fn) print(f"\n{wav_fn} not have the labels, it will be deleted.") continue if pyline is None or len(pyline) == 0: # 没有拼音的话 pyline = self.pymap.sent2pylist(line) # 转化为拼音 else: continue # 目前只有清华的数据集全都有,所以不用清洗,不用重写 with open(txt_fn,"w",encoding="utf-8") as w: w.write(f"{line}\n") w.write(f"{pyline}\n") print() def count_dataset(self): '''统计两个词典到数据的根目录下,具体如何整合需要使用者自行整理''' '''加载训练测试用数据集,使用train_mode''' print(f"[info*]Create dicts in {self.__class__.__name__}.") dataiter = self.create_fs_iter(mode=Dataset.train_mode) chs_all_dict = {} py_all_dict = {} for _,txt_fn in dataiter: with open(txt_fn,encoding="utf-8") as f: line = f.readline().strip() pyline = f.readline().strip().split(" ") pyline = [i.strip("5\n") for i in pyline] chs_dict = Datautil.count_label(line) py_dict = Datautil.count_label(pyline) chs_all_dict = Datautil.merge_count_label(chs_dict,chs_all_dict) py_all_dict = Datautil.merge_count_label(py_dict,py_all_dict) Datautil.write_count_result(path=self.path, chs_dict=chs_all_dict, py_dict=py_all_dict) def clean_dataset(self): '''根据最终确定的词典结果进行清理 ''' dataiter = self.create_fs_iter(mode=Dataset.train_mode) # 清洗时候需要的数据格式和train_mode一样 count = 0 oov_count = 0 for i,(wav_fn,txt_fn) in enumerate(dataiter): print(f"\r{i},err_count = {count},oov_count = {oov_count},fn = {txt_fn[:-20]}",end="\0",flush=True) with open(txt_fn,encoding="utf-8") as f: line = f.readline().replace(" ","").replace(" ","").strip() pyline = f.readline().strip().split(" ") new_line = StopwordUtil.clean_line(line) if new_line is None: Datautil.check_remove(wav_fn) Datautil.check_remove(txt_fn) count+=1 elif len(new_line) != len(pyline): Datautil.check_remove(wav_fn) Datautil.check_remove(txt_fn) count+=1 no_oov,oov_dict = self.pymap.check_line(pyline) if not no_oov: oov_count+=1 Datautil.check_remove(wav_fn) Datautil.check_remove(txt_fn) print() def load_dataset(self): '''加载训练测试用数据集,使用train_mode''' dataiter = self.create_fs_iter(mode=Dataset.train_mode) x_set = [] y_set = [] for x,y in dataiter: x_set.append(x) y_set.append(y) return x_set,y_set def create_fs_iter(self,mode="train"): raise NotImplementedError( f"create_fs_iter() must be Implemented in {self.__class__.__name__}")