def test(self): index = 0 next_idx = 20 for index in range(10): next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch( next_idx, 1, n_input, n_context, self.text_labels, self.wav_files, self.word_num_map) print('读入语音文件: ', wav_files[0]) print('开始识别语音数据......') d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess) dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words) for orig, decoded_array in zip(dense_labels, dense_decoded): # 转成string decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words) print('语音原始文本: {}'.format(orig)) print('识别出来的文本: {}'.format(decoded_str)) break self.sess.close()
def test(self): index = 0 next_idx = 20 for index in range(20): next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch( next_idx, 1, n_input, n_context, self.text_labels, self.wav_files, self.word_num_map) print('读入语音文件: ', wav_files[0]) print('开始识别语音数据......') d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess) dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words) for orig, decoded_array in zip(dense_labels, dense_decoded): # 转成string decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words) print('语音的原始文本: {}'.format(orig)) print('识别出来的文本: {}'.format(decoded_str)) break self.sess.close()
def train(self): epochs = 120 # 准备运行训练步骤 section = '\n{0:=^40}\n' print(section.format('开始训练')) train_start = time.time() for epoch in range(epochs): # 样本集迭代次数 epoch_start = time.time() if epoch < self.startepo: continue print("第:", epoch, " 次迭代,一共要迭代 ", epochs, "次") #######################run batch#### n_batches_epoch = int(np.ceil(len(self.text_labels) / batch_size)) print("在本次迭代中一共循环: ", n_batches_epoch, "每次取:", batch_size) train_cost = 0 train_err = 0 next_idx = 0 for batch in range(n_batches_epoch): # 一次batch_size,取多少次 # 取数据 # temp_next_idx, temp_audio_features, temp_audio_features_len, temp_sparse_labels next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch( next_idx, batch_size, n_input, n_context, self.text_labels, self.wav_files, self.word_num_map) # 计算 avg_loss optimizer ; batch_cost, _ = self.sess.run([self.avg_loss, self.optimizer], feed_dict=self.get_feed_dict()) train_cost += batch_cost if (batch + 1) % 70 == 0: rs = self.sess.run(self.merged, feed_dict=self.get_feed_dict()) self.writer.add_summary(rs, batch) print('循环次数:', batch, '损失: ', train_cost / (batch + 1)) d, train_err = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess) dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words) print('错误率: ', train_err) for orig, decoded_array in zip(dense_labels, dense_decoded): # convert to strings decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words) print('语音原始文本: {}'.format(orig)) print('识别出来的文本: {}'.format(decoded_str)) break epoch_duration = time.time() - epoch_start log = '迭代次数 {}/{}, 训练损失: {:.3f}, 错误率: {:.3f}, time: {:.2f} sec' print(log.format(epoch, epochs, train_cost, train_err, epoch_duration)) self.saver.save(self.sess, self.savedir + self.conf.get("FILE_DATA").savefile, global_step=epoch) train_duration = time.time() - train_start print('Training complete, total duration: {:.2f} min'.format(train_duration / 60)) self.sess.close()
def test_target_wav_file(self, wav_files, txt_labels): print('读入语音文件: ', wav_files[0]) print('开始识别语音数据......') self.audio_features, self.audio_features_len, text_vector, text_vector_len = utils.get_audio_mfcc_features( None, wav_files, n_input, n_context, self.word_num_map, txt_labels) self.sparse_labels = utils.sparse_tuple_from(text_vector) d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense( d, default_value=-1).eval(session=self.sess) decoded_str = utils.trans_array_to_text_ch(dense_decoded[0], self.words) print('语音原始文本: {}'.format(txt_labels[0])) print('识别出来的文本: {}'.format(decoded_str)) self.sess.close()
def test_target_wav_file(self, wav_files, txt_labels): print('读入语音文件: ', wav_files[0]) print('开始识别语音数据......') self.audio_features, self.audio_features_len, text_vector, text_vector_len = utils.get_audio_mfcc_features( None, wav_files, n_input, n_context, self.word_num_map, txt_labels) self.sparse_labels = utils.sparse_tuple_from(text_vector) d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess) decoded_str = utils.trans_array_to_text_ch(dense_decoded[0], self.words) print('语音原始文本: {}'.format(txt_labels[0])) print('识别出来的文本: {}'.format(decoded_str)) self.sess.close()
def train(self): epochs = 200 # 准备运行训练步骤 section = '\n{0:=^40}\n' tf.logging.info(section.format('开始训练')) train_start = time.time() for epoch in range(epochs): # 样本集迭代次数 epoch_start = time.time() if epoch < self.startepo: continue tf.logging.info("第" + str(epoch + 1) + "次迭代,一共要迭代" + str(epochs) + "次") #######################run batch#### n_batches_epoch = int(np.ceil(len(self.text_labels) / batch_size)) tf.logging.info("在本次迭代中一共循环" + str(n_batches_epoch) + ",每次取" + str(batch_size)) train_cost = 0 train_err = 0 next_idx = 0 for batch in range(n_batches_epoch): # 一次batch_size,取多少次 # 取数据 # temp_next_idx, temp_audio_features, temp_audio_features_len, temp_sparse_labels tf.logging.info( '%d/%d:%s', batch + 1, n_batches_epoch, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch( next_idx, batch_size, n_input, n_context, self.text_labels, self.wav_files, self.word_num_map, pick_deterministically=False) # 计算 avg_loss optimizer ; batch_cost, _ = self.sess.run([self.avg_loss, self.optimizer], feed_dict=self.get_feed_dict()) train_cost += batch_cost if (batch + 1) % 100 == 0: # rs = self.sess.run(self.merged, feed_dict=self.get_feed_dict()) # self.writer.add_summary(rs, batch) tf.logging.info('循环次数:' + str(batch + 1) + '损失:' + str(train_cost / (batch + 1))) d, train_err = self.sess.run( [self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense( d, default_value=-1).eval(session=self.sess) dense_labels = utils.trans_tuple_to_texts_ch( self.sparse_labels, self.words) tf.logging.info('错误率:' + str(train_err)) for orig, decoded_array in zip(dense_labels, dense_decoded): # convert to strings decoded_str = utils.trans_array_to_text_ch( decoded_array, self.words) tf.logging.info('语音原始文本:{}'.format( orig.encode('utf-8'))) tf.logging.info('识别出来的文本:{}'.format( decoded_str.encode('utf-8'))) break epoch_duration = time.time() - epoch_start log = '迭代次数 {}/{}, 训练损失:{:.3f}, 错误率:{:.3f}, time:{:.2f} sec' tf.logging.info( log.format(epoch + 1, epochs, train_cost, train_err, epoch_duration)) self.saver.save(self.sess, self.savedir + self.conf.get("FILE_DATA").savefile, global_step=epoch + 1) train_duration = time.time() - train_start tf.logging.info('Training complete, total duration:{:.2f} min'.format( train_duration / 60)) self.sess.close()