def TrainModel(self, datapath, epoch=2, save_step=1000, filename='model_speech/LSTM_CNN_model'): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data = DataSpeech(datapath) data.LoadDataList('train') num_data = DataSpeech.GetDataNum() # 获取数据的数量 for epoch in range(epoch): # 迭代轮数 print('[running] train epoch %d .' % epoch) n_step = 0 # 迭代数据数 while True: try: print('[message] epoch %d . Have train datas %d+' % (epoch, n_step * save_step)) # data_genetator是一个生成器函数 yielddatas = data.data_genetator(self.BATCH_SIZE) self._model.fit_generator(yielddatas, save_step, nb_worker=2) n_step += 1 except StopIteration: print('[error] generator error. please check data format.') break self.SaveModel(comment='_e_' + str(epoch) + '_step_' + str(n_step))
def TestModel(self, datapath, str_dataset='dev'): ''' 测试检验模型效果 ''' data = DataSpeech(datapath) data.LoadDataList(str_dataset) num_data = DataSpeech.GetDataNum() # 获取数据的数量 try: gen = data.data_genetator(num_data) for i in range(1): X, y = gen r = self._model.test_on_batch(X, y) print(r) except StopIteration: print('[Error] Model Test Error. please check data format.')
def RecognizeSpeech(self, wavsignal, fs): ''' 最终做语音识别用的函数,识别一个wav序列的语音 不过这里现在还有bug ''' #data = self.data data = DataSpeech('E:\\语音数据集') data.LoadDataList('dev') # 获取输入特征 #data_input = data.GetMfccFeature(wavsignal, fs) data_input = data.GetFrequencyFeature(wavsignal, fs) arr_zero = np.zeros((1, 200), dtype=np.int16) #一个全是0的行向量 #import matplotlib.pyplot as plt #plt.subplot(111) #plt.imshow(data_input, cmap=plt.get_cmap('gray')) #plt.show() #while(len(data_input)<1600): #长度不够时补全到1600 # data_input = np.row_stack((data_input,arr_zero)) #print(len(data_input)) list_symbol = data.list_symbol # 获取拼音列表 labels = [list_symbol[0]] #while(len(labels) < 64): # labels.append('') labels_num = [] for i in labels: labels_num.append(data.SymbolToNum(i)) data_input = np.array(data_input, dtype=np.int16) data_input = data_input.reshape(data_input.shape[0], data_input.shape[1]) labels_num = np.array(labels_num, dtype=np.int16) labels_num = labels_num.reshape(labels_num.shape[0]) input_length = np.array([data_input.shape[0] // 4 - 3], dtype=np.int16) input_length = np.array(input_length) input_length = input_length.reshape(input_length.shape[0]) label_length = np.array([labels_num.shape[0]], dtype=np.int16) label_length = np.array(label_length) label_length = label_length.reshape(label_length.shape[0]) x = [data_input, labels_num, input_length, label_length] #x = next(data.data_genetator(1, self.AUDIO_LENGTH)) #x = kr.utils.np_utils.to_categorical(x) print(x) x = np.array(x) pred = self._model.predict(x=x) #pred = self._model.predict_on_batch([data_input, labels_num, input_length, label_length]) return [labels, pred] pass
def RecognizeSpeech(self, wavsignal, fs): ''' 最终做语音识别用的函数,识别一个wav序列的语音 不过这里现在还有bug ''' #data = self.data data = DataSpeech('E:\\语音数据集') data.LoadDataList('dev') # 获取输入特征 #data_input = data.GetMfccFeature(wavsignal, fs) data_input = data.GetFrequencyFeature(wavsignal, fs) input_length = len(data_input) input_length = input_length // 4 data_input = np.array(data_input, dtype=np.float) in_len = np.zeros((1), dtype=np.int32) print(in_len.shape) in_len[0] = input_length batch_size = 1 x_in = np.zeros((batch_size, 1600, 200), dtype=np.float) for i in range(batch_size): x_in[i, 0:len(data_input)] = data_input base_pred = self.base_model.predict(x=x_in) print('base_pred:\n', base_pred) #input_length = tf.squeeze(input_length) #decode_pred = self.model_decode(x=[x_in, in_len]) #print(decode_pred) base_pred = base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy=True, beam_width=64, top_paths=1) print('r', r) #r = K.cast(r[0][0], dtype='float32') #print('r1', r) #print('解码完成') r1 = K.get_value(r[0][0]) print('r1', r1) print('r0', r[1]) r2 = K.get_value(r[1]) print(r2) print('解码完成') list_symbol_dic = data.list_symbol # 获取拼音列表 #arr_zero = np.zeros((1, 200), dtype=np.int16) #一个全是0的行向量 #import matplotlib.pyplot as plt #plt.subplot(111) #plt.imshow(data_input, cmap=plt.get_cmap('gray')) #plt.show() #while(len(data_input)<1600): #长度不够时补全到1600 # data_input = np.row_stack((data_input,arr_zero)) #print(len(data_input)) #list_symbol = data.list_symbol # 获取拼音列表 #labels = [ list_symbol[0] ] #while(len(labels) < 64): # labels.append('') #labels_num = [] #for i in labels: # labels_num.append(data.SymbolToNum(i)) #data_input = np.array(data_input, dtype=np.int16) #data_input = data_input.reshape(data_input.shape[0],data_input.shape[1]) #labels_num = np.array(labels_num, dtype=np.int16) #labels_num = labels_num.reshape(labels_num.shape[0]) #input_length = np.array([data_input.shape[0] // 4 - 3], dtype=np.int16) #input_length = np.array(input_length) #input_length = input_length.reshape(input_length.shape[0]) #label_length = np.array([labels_num.shape[0]], dtype=np.int16) #label_length = np.array(label_length) #label_length = label_length.reshape(label_length.shape[0]) #x = [data_input, labels_num, input_length, label_length] #x = next(data.data_genetator(1, self.AUDIO_LENGTH)) #x = kr.utils.np_utils.to_categorical(x) #print(x) #x=np.array(x) #pred = self._model.predict(x=x) #pred = self._model.predict_on_batch([data_input, labels_num, input_length, label_length]) #return [labels,pred] return r1 pass