def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data=DataSpeech(datapath, 'train') #首先获取的是 train数据集 num_data = data.GetDataNum() # 获取数据的数量 yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) #将所有的数据使用生成器进行batch_size的封装,封装成一个个的对象 for epoch in range(epoch): # 迭代轮数 print('[running] train epoch %d .' % epoch) n_step = 0 # 迭代数据数 while True: try: print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step)) # data_genetator是一个生成器函数 #self._model.fit_generator(yielddatas, save_step, nb_worker=2) # 利用Python的生成器,逐个生成数据的batch并进行训练。生成器与模型将并行执行以提高效率。例如,该函数允许我们在CPU上进行实时的数据提升,同时在GPU上进行模型训练 self._model.fit_generator(yielddatas, save_step) # self._model这个是初始化调用creatmodel返回的模型 # samples_per_epoch:整数,当模型处理的样本达到此数目时计一个epoch结束,执行下一个epoch n_step += 1 except StopIteration: print('[error] generator error. please check data format.') break self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step)) #进行模型的保存 self.TestModel(self.datapath, str_dataset='train', data_count = 4) #进行训练集模型的测试 self.TestModel(self.datapath, str_dataset='dev', data_count = 4) # 进行 验证集模型的测试
def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = 'model_speech/speech_model24'): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data=DataSpeech(datapath, 'train') num_data = data.GetDataNum() # 获取数据的数量 yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) for epoch in range(epoch): # 迭代轮数 print('[running] train epoch %d .' % epoch) n_step = 0 # 迭代数据数 while True: try: print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step)) # data_genetator是一个生成器函数 #self._model.fit_generator(yielddatas, save_step, nb_worker=2) self._model.fit_generator(yielddatas, save_step) n_step += 1 except StopIteration: print('[error] generator error. please check data format.') break self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step)) self.TestModel(self.datapath, str_dataset='train', data_count = 4) self.TestModel(self.datapath, str_dataset='dev', data_count = 4)
def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False): ''' 测试检验模型效果 ''' data = DataSpeech(self.datapath, str_dataset) #data.LoadDataList(str_dataset) num_data = data.GetDataNum() # 获取数据的数量 if (data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 data_count = num_data try: ran_num = random.randint(0, num_data - 1) # 获取一个随机数 words_num = 0 word_error_num = 0 nowtime = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) if (out_report == True): txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 txt = '' for i in range(data_count): data_input, data_labels = data.GetData( (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 pre = self.Predict(data_input, data_input.shape[0] // 8) words_n = data_labels.shape[0] # 获取每个句子的字数 words_num += words_n # 把句子的总字数加上 edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 if (edit_distance <= words_n): # 当编辑距离小于等于句子字数时 word_error_num += edit_distance # 使用编辑距离作为错误字数 else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 word_error_num += words_n # 就直接加句子本来的总字数就好了 if (out_report == True): txt += str(i) + '\n' txt += 'True:\t' + str(data_labels) + '\n' txt += 'Pred:\t' + str(pre) + '\n' txt += '\n' print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') if (out_report == True): txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str( word_error_num / words_num * 100) + ' %' txt_obj.write(txt) txt_obj.close() except StopIteration: print('[Error] Model Test Error. please check data format.')
def RecognizeSpeech(self, wavsignal, fs): ''' 最终做语音识别用的函数,识别一个wav序列的语音 不过这里现在还有bug ''' #data = self.data #data = DataSpeech('E:\\语音数据集') #data.LoadDataList('dev') # 获取输入特征 #data_input = GetMfccFeature(wavsignal, fs) #t0=time.time() data_input = GetFrequencyFeature3(wavsignal, fs) #t1=time.time() #print('time cost:',t1-t0) input_length = len(data_input) input_length = input_length // 8 data_input = np.array(data_input, dtype=np.float) #print(data_input,data_input.shape) data_input = data_input.reshape(data_input.shape[0], data_input.shape[1], 1) #t2=time.time() r1 = self.Predict(data_input, input_length) #t3=time.time() #print('time cost:',t3-t2) list_symbol_dic = DataSpeech.GetSymbolList() # 获取拼音列表 r_str = [] for i in r1: r_str.append(list_symbol_dic[i]) return r_str pass
def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data = DataSpeech(datapath, 'train') # num_data = data.GetDataNum() # 获取数据的数量 txt_loss = open( os.path.join(os.getcwd(), 'speech_log_file', 'Test_Report_loss.txt'), mode='a', encoding='UTF-8') txt_obj = open( os.path.join(os.getcwd(), 'speech_log_file', 'Test_Report_accuracy.txt'), mode='a', encoding='UTF-8') saver = tf.train.Saver() with tf.Session() as sess: # sess.run(tf.global_variables_initializer()) saver.restore(sess,os.path.join(os.getcwd(), 'speech_model_file','speech.module-50')) summary_merge = tf.summary.merge_all() train_writter = tf.summary.FileWriter('summary_file',sess.graph) for i in range(51,epoch): yielddatas = data.data_genetator(batch_size, self.MAX_TIME) pbar = tqdm(yielddatas) train_epoch = 0 train_epoch_size = save_step for input,_ in pbar: feed = {self.input_data: input[0],self.label_data: input[1],self.input_length:input[2],self.label_length:input[3], self.is_train:True} _,loss,train_summary = sess.run([self.optimize,self.loss,summary_merge],feed_dict=feed) train_writter.add_summary(train_summary,train_epoch+i*train_epoch_size) pr = 'epoch:%d/%d,train_epoch: %d/%d ,loss: %s'% (epoch,i,train_epoch_size,train_epoch,loss) pbar.set_description(pr) txt = pr + '\n' txt_loss.write(txt) if train_epoch == train_epoch_size: break train_epoch +=1 if train_epoch%3000==0: self.TestMode(data, sess, i,txt_obj) saver.save(sess, os.path.join(os.getcwd(), 'speech_model_file', 'speech.module'), global_step=i) txt_loss.close()
def TrainModel(self, epoch=2, save_step=1000, batch_size=32, start_nstep=0): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data = DataSpeech(self.datapath_thchs30, self.datapath_stcmds, 'train') num_data = data.GetDataNum() # 获取数据的数量 yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) for epoch in range(epoch): # 迭代轮数 self.logger.debug("train epoch %s." % epoch) # print('[running] train epoch %d .' % epoch) n_step = start_nstep # 迭代数据数 while True: try: self.logger.debug('epoch %d . Have train datas %d+' % (epoch, n_step * save_step)) # print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step)) # data_genetator是一个生成器函数 #self._model.fit_generator(yielddatas, save_step, nb_worker=2) self._model.fit_generator(yielddatas, save_step) n_step += 1 except StopIteration: self.logger.error( "generator error. please check data format.") # print('[error] generator error. please check data format.') break self.SaveModel(filename='speech_model_%s_e_%s_step_%s' % (ModelName, epoch, n_step * save_step)) self.TestModel(str_dataset='train', data_count=4) self.TestModel(str_dataset='dev', data_count=4)
def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): ''' 测试检验模型效果 ''' data = DataSpeech(self.datapath, str_dataset) #data.LoadDataList(str_dataset) num_data = data.GetDataNum() # 获取数据的数量 if (data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 data_count = num_data try: ran_num = random.randint(0, num_data - 1) # 获取一个随机数 words_num = 0 word_error_num = 0 nowtime = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) if (out_report == True): txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 txt = '' for i in range(data_count): data_input, data_labels = data.GetData( (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 # 数据格式出错处理 开始 # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 num_bias = 0 while (data_input.shape[0] > self.AUDIO_LENGTH): print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, 'is too long.', '\n A Exception raise when test Speech Model.') num_bias += 1 data_input, data_labels = data.GetData( (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 # 数据格式出错处理 结束 pre = self.Predict(data_input, data_input.shape[0] // 8) words_n = data_labels.shape[0] # 获取每个句子的字数 words_num += words_n # 把句子的总字数加上 edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 if (edit_distance <= words_n): # 当编辑距离小于等于句子字数时 word_error_num += edit_distance # 使用编辑距离作为错误字数 else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 word_error_num += words_n # 就直接加句子本来的总字数就好了 if (i % 10 == 0 and show_ratio == True): print('测试进度:', i, '/', data_count) txt = '' if (out_report == True): txt += str(i) + '\n' txt += 'True:\t' + str(data_labels) + '\n' txt += 'Pred:\t' + str(pre) + '\n' txt += '\n' txt_obj.write(txt) print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') if (out_report == True): txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str( word_error_num / words_num * 100) + ' %' txt_obj.write(txt) txt_obj.close() except StopIteration: print('[Error] Model Test Error. please check data format.')
def TestModel(self, datapath='', str_dataset='dev', data_count=64, out_report=True, show_ratio=True, io_step_print=10, io_step_file=10): ''' 测试检验模型效果 io_step_print 为了减少测试时标准输出的io开销,可以通过调整这个参数来实现 io_step_file 为了减少测试时文件读写的io开销,可以通过调整这个参数来实现 ''' data = DataSpeech(self.datapath, str_dataset) #data.LoadDataList(str_dataset) num_data = data.GetDataNum() # 获取数据的数量 if ( data_count <= 0 or data_count > num_data ): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试(正常使用的是32或者自己传递进来的需要测试的个数) data_count = num_data try: ran_num = random.randint(0, num_data - 1) # 获取一个随机数(0-num_data) words_num = 0 #总得单次数量 word_error_num = 0 #错误的单次数量 nowtime = time.strftime('%Y%m%d_%H%M%S', time.localtime( time.time())) # '20190924_103104' 就是时间日期的一个字符串 if (out_report == True): #如果说输出开关打开的话 txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 txt = '测试报告\n模型编号 ' + ModelName + '\n\n' for i in range(data_count): data_input, data_labels = data.GetData( (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 # 数据格式出错处理 开始 # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 num_bias = 0 while (data_input.shape[0] > self.AUDIO_LENGTH): print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, 'is too long.', '\n A Exception raise when test Speech Model.') num_bias += 1 data_input, data_labels = data.GetData( (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 # 数据格式出错处理 结束 pre = self.Predict(data_input, data_input.shape[0] // 8) #预测的结果 words_n = data_labels.shape[0] # 获取每个句子的字数 words_num += words_n # 把句子的总字数加上 edit_distance = GetEditDistance( data_labels, pre) # 获取编辑距离(预测的结果跟真实的结果之间的编辑距离(整数)) if (edit_distance <= words_n): # 当编辑距离小于等于句子字数时 word_error_num += edit_distance # 使用编辑距离作为错误字数 else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 word_error_num += words_n # 就直接加句子本来的总字数就好了(错误率就是100%) if ((i % io_step_print == 0 or i == data_count - 1) and show_ratio == True): #print('测试进度:',i,'/',data_count) print('Test Count: ', i, '/', data_count) if (out_report == True): if (i % io_step_file == 0 or i == data_count - 1): txt_obj.write(txt) txt = '' txt += str(i) + '\n' txt += 'True:\t' + str(data_labels) + '\n' txt += 'Pred:\t' + str(pre) + '\n' txt += '\n' #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') print( '*[Test Result] Speech Recognition ' + str_dataset + ' set word error ratio: ', word_error_num / words_num * 100, '%') if (out_report == True): #将错误率进行写入 txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str( word_error_num / words_num * 100) + ' %' txt_obj.write(txt) txt = '' txt_obj.close() except StopIteration: print('[Error] Model Test Error. please check data format.')
def TestModel(self, str_dataset='dev', data_count=32, out_report=False, show_ratio=True): ''' 测试检验模型效果 io_step_print 为了减少测试时标准输出的io开销,可以通过调整这个参数来实现 io_step_file 为了减少测试时文件读写的io开销,可以通过调整这个参数来实现 ''' self.logger.debug("test model") data = DataSpeech(self.datapath_thchs30, self.datapath_stcmds, str_dataset) #data.LoadDataList(str_dataset) num_data = data.GetDataNum() # 获取数据的数量 if (data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 data_count = num_data try: ran_num = random.randint(0, num_data - 1) # 获取一个随机数 words_num = 0 word_error_num = 0 nowtime = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) if (out_report == True): txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 txt = '测试报告\n模型编号 ' + ModelName + '\n\n' for i in range(data_count): data_input, data_labels = data.GetData( (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 # 数据格式出错处理 开始 # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 num_bias = 0 while (data_input.shape[0] > self.AUDIO_LENGTH): self.logger.error( 'wave data lenghth of num %s is too long. \n A Exception raise when test Speech Model.' % ((ran_num + i) % num_data)) # print('*[Error]','wave data lenghth of num',(ran_num + i) % num_data, 'is too long.','\n A Exception raise when test Speech Model.') num_bias += 1 data_input, data_labels = data.GetData( (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 # 数据格式出错处理 结束 pre = self.Predict(data_input, data_input.shape[0] // 8) words_n = data_labels.shape[0] # 获取每个句子的字数 words_num += words_n # 把句子的总字数加上 edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 if (edit_distance <= words_n): # 当编辑距离小于等于句子字数时 word_error_num += edit_distance # 使用编辑距离作为错误字数 else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 word_error_num += words_n # 就直接加句子本来的总字数就好了 if (i % 10 == 0 and show_ratio == True): self.logger.debug('Test Count: %s/%s' % (i, data_count)) if (out_report == True): if (i % io_step_file == 0 or i == data_count - 1): txt_obj.write(txt) txt = '' txt += str(i) + '\n' txt += 'True:\t' + str(data_labels) + '\n' txt += 'Pred:\t' + str(pre) + '\n' txt += '\n' #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') self.logger.info( 'Speech Recognition %s set word error ratio: %s%%' % (str_dataset, word_error_num / words_num * 100)) # print('*[Test Result] Speech Recognition ' + str_dataset + ' set word error ratio: ', word_error_num / words_num * 100, '%') if (out_report == True): txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str( word_error_num / words_num * 100) + ' %' txt_obj.write(txt) txt = '' txt_obj.close() except StopIteration: self.logger.error( '[Error] Model Test Error. please check data format.')
def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename=abspath + 'model_speech/m' + ModelName + '/speech_model' + ModelName): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data = DataSpeech(datapath, 'train') num_data = data.GetDataNum() # 获取数据的数量 yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) # 冻结层 for layer in self._model.layers: layerName = str(layer.name) print("layerNAME:" + layerName) if layerName.startswith("conv2d_3") or layerName.startswith( "conv2d_4" ) or layerName.startswith("conv2d_5") or layerName.startswith( "conv2d_6") or layerName.startswith( "conv2d_7") or layerName.startswith( "conv2d_8") or layerName.startswith("conv2d_9"): layer.trainable = False self._model.compile(optimizer='rmsprop', loss='mse') # 可训练层 for x in self._model.trainable_weights: print("可训练层:" + x.name) print('\n') # 不可训练层 for x in self._model.non_trainable_weights: print("冻结层:" + x.name) print('\n') for epoch in range(epoch): # 迭代轮数 print('[running] train epoch %d .' % epoch) n_step = 0 # 迭代数据数 while True: try: print('[message] epoch %d . Have train datas %d+' % (epoch, n_step * save_step)) # data_genetator是一个生成器函数 # self._model.fit_generator(yielddatas, save_step, nb_worker=2) self._model.fit_generator(yielddatas, save_step) n_step += 1 except StopIteration: print('[error] generator error. please check data format.') break self.SaveModel(comment='_e_' + str(epoch) + '_step_' + str(n_step * save_step)) self.TestModel(self.datapath, str_dataset='train', data_count=4) self.TestModel(self.datapath, str_dataset='dev', data_count=4)