def testModel(): import difflib import tensorflow as tf import numpy as np yysb = utils.SpeechRecognition() # 1. 准备测试所需数据, 不必和训练数据一致,通过设置data_args.data_type测试, data_args = utils.data_hparams() data_args.data_type = 'test' data_args.shuffle = True data_args.batch_size = 1 test = utils.get_data(data_args) # 2. 进行测试------------------------------------------- word_num = 0 word_error_num = 0 for i in range(100): print('\n 第 ', i, ' 个例子') label = test.han_lst[i] pinyin, hanzi = yysb.predict(os.path.join(test.data_path, test.wav_lst[i]), test.pny_lst[i], label, come_from_file=True) #hanzi = yysb.testPinyin(' '.join(test.pny_lst[i]))#单独测试语言模型用 word_error_num += min(len(label), utils.GetEditDistance(label, hanzi)) word_num += len(label) print('词错误率:', word_error_num / word_num)
def __init__(self, path=None, serviceAddress='http://127.0.0.1:20000/', fun_use=False): self.httpService = serviceAddress #音频波形动态显示,实时显示波形,实时进行离散傅里叶变换分析频域 if path is not None and os.path.isfile(path): self.stream = wave.open(path) self.rate = self.stream.getparams()[2] self.chunk = int(self.rate / 1000 * 25) self.read = self.stream.readframes else: self.rate = 16000 self.chunk = 400 #25*16000/1000针对语音识别25ms为一块这里相同设置 p = pyaudio.PyAudio() self.stream = p.open(format=pyaudio.paInt16, channels=1, rate=self.rate, input=True, frames_per_buffer=self.chunk) self.read = self.stream.read self.yysb = utils.SpeechRecognition(test_flag=False) ''' self.data说明: 按时调用时: 用来记录一整段话的数据,当听到明显声音开始填充,每次都把整个的内容送给语音识别,以期达到效果为: 你 你好 你好啊 当一个指定时间内没有明显声音时则清空 自动判断启停时: 从判断开始的数据开始记录,直到判断停止说话准备清空数据前调用一次API,效果: 你好啊 ''' #self.data=np.ndarray(shape=(0), dtype=np.int16) self.data = [] self.resHan = [] #语音识别结果,类型待定 fig = plt.figure(num='Real-time wave') ax1 = fig.add_subplot(2, 1, 1) #两行一列,第一子图 ax2 = fig.add_subplot(2, 1, 2) #两行一列,第二子图 self.t = np.linspace(0, self.chunk - 1, self.chunk) #ax1.set_xlabel('t') #ax1.set_ylabel('x') self.line1, = ax1.plot([], [], lw=2) ax1.set_xlim(0, self.chunk) ax1.set_ylim(-6000, 6000) self.line2, = ax2.plot([], [], lw=2) ax2.set_xlim(0, self.chunk) ax2.set_ylim(-10, 50) interval = int(1000 * self.chunk / self.rate) #更新间隔/ms if not fun_use: animation.TimedAnimation.__init__(self, fig, interval=interval, blit=True)
''' #查看服务状态 #curl http://localhost:8501/v1/models/lm #查看模型输入输出 #saved_model_cli show --dir /media/yangjinming/DATA/GitHub/AboutPython/AboutDL/语音识别/logs_lm/190612/ --all #关闭服务(把docker容器都杀了,请酌情修改) #sudo docker ps | xargs sudo docker kill #支持的API类型,如果token不在list中则认为无效 API_Surport_List = ['SR'] #是否使用tensorflow serving服务,如果使用这个对外暴露的仅作为中转站 ues_tf_serving = True #tensorflow serving的url地址,基本上只修改IP即可 tf_serving_url = 'http://localhost:8501/v1/models/{}:predict' if not ues_tf_serving: yysb = utils.SpeechRecognition(test_flag=False) class TestHTTPHandle(http.server.BaseHTTPRequestHandler): def setup(self): self.request.settimeout(10) http.server.BaseHTTPRequestHandler.setup(self) def _set_response(self): self.send_response(200) self.send_header('Content-type', 'text/html') self.send_header('Access-Control-Allow-Origin', '*') self.send_header("Access-Control-Allow-Headers", "Content-Type") self.end_headers() def do_GET(self):