def asr_mandarin(req_file: "UploadedFile"): load_data(req_file) keras.backend.clear_session() ms = ModelSpeech(asrPath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') r = ms.RecognizeSpeech_FromFile(filePath) ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = r r = ml.SpeechToText(str_pinyin) return r
def __init__(self): super(App, self).__init__() self.record_name = 'saved_record.wav' self.ms = ModelSpeech('dataset') self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model') self.ml = ModelLanguage('model_language') self.ml.LoadModel() self.title = 'ASR demo' self.left = 10 self.top = 10 self.width = 420 self.height = 400 self.rec = Recorder() #self.rec.start_thread() self.initUI() self.rec.start()
def speech_recognition(f): datapath = '.' modelpath = 'model_speech' + '\\' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') r = ms.RecognizeSpeech_FromFile(f) K.clear_session() print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = r r = ml.SpeechToText(str_pinyin) print('语音转文字结果:\n', r) return r
def predict(video_file): from SpeechModel251 import ModelSpeech from LanguageModel2 import ModelLanguage from keras import backend as K datapath = 'dataset' modelpath = 'model_speech' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + '/m251/speech_model251_e_0_step_60500.model') pinyin = ms.RecognizeSpeech_FromFile(video_file) K.clear_session() ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = pinyin text = ml.SpeechToText(str_pinyin) return pinyin, text
def CTC_tf(current_path): datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = current_path modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) rr = ms.RecognizeSpeech_FromFile(current_path + '\\chunk-00.wav') print('*[提示] 语音识别结果:\n', rr) ml = ModelLanguage('model_language') ml.LoadModel() #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] #str_pinyin = ['ni3', 'hao3','a1'] str_pinyin = rr #str_pinyin = ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4'] r = ml.SpeechToText(str_pinyin) print('语音转文字结果:\n', r) ctc_result = hanzi_pinyin(r) return ctc_result
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @author: nl8590687 用于测试整个一套语音识别系统的程序 语音模型 + 语言模型 """ import platform as plat from SpeechModel251 import ModelSpeech from keras import backend as K import librosa import soundfile as sf modelpath = 'model_speech/' datapath = '.' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') utt_dir = '/home/bicbrv/project/GREAT_audio_demo_server_new/formal_data/enrollment/#0008_智勇/智勇_0.wav' wavsignal, sr = librosa.load(utt_dir, sr=16000) sf.write('testing_asr.wav', wavsignal, sr) r = ms.RecognizeSpeech_FromFile('testing_asr.wav') #K.clear_session() print('*[提示] 语音识别结果:\n', r)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @author: nl8590687 语音识别API的HTTP服务器程序 """ import http.server import urllib import keras from SpeechModel251 import ModelSpeech datapath = 'I:\\python_speech_file' modelpath = 'model_speech\\' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_82500.model') class TestHTTPHandle(http.server.BaseHTTPRequestHandler): def setup(self): self.request.settimeout(10) http.server.BaseHTTPRequestHandler.setup(self) def _set_response(self): self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() def do_GET(self):
if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if(system_type == 'Windows'): datapath = 'C:\\Users\\农夫三拳\\PycharmProjects\\Test\\ASRT_SpeechRecognition-master\\dataset' modelpath = modelpath + '\\' elif(system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_4000.model') ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') r = ms.RecognizeSpeech_FromFile('/home/ASRT_SpeechRecognition-master/dataset/ST-CMDS-20170001_1-OS/20170001P00001A0002.wav') print('*[提示] 语音识别结果:\n',r)
return name if (modelType == "2"): name = 'show/yq-speech_model251_e_0_step_4800.model' return name if (modelType == "3"): name = 'show/hq1speech_model251_e_0_step_410.model' return name if (modelType == "4"): name = 'show/hq2-speech_model251_e_0_step_500.model' return name else: name = '类型输入不正确,请核查。' return name ms = ModelSpeech(datapath) print( '\n*成人语音模型:type=0 \n*直接训练模型:type=1 \n*基于成人模型训练的模型:type=2 \n*迁移学习1.0模型 type=3 \n*迁移学习2.0模型 type=4 \n' ) print('-----请输入您想调用的模型类型。-----') modelType = input("**type**:") # 0 1 2 3 4 name = pickModel(modelType) if name == "类型输入不正确,请核查。": print("模型的类型输入不符合要求,请核查。") else: ms.LoadModel(modelpath + name) r = ms.RecognizeSpeech_FromFile("./dataset/data_child/test/A5_75.wav") K.clear_session()
@app.route('/result/') def show_result(): rs = func_show(r) return render_template('show_result.html', result=rs) @app.route('/test/') def show_test(): with g.as_default(): r_test = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\test.wav') str_pinyin = r_test r_test = ml.SpeechToText(str_pinyin) print("testtesttest") # rs = func_test(r_test) return render_template('show_test.html', result=r_test) # return 'ge' if __name__ == '__main__': g = tf.Graph() with g.as_default(): ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') # r = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\data_thchs30\\test\\D4_750.wav') r = ms.RecognizeSpeech_FromFile( 'F:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00136I0088.wav') r_test = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\test.wav') ml = ModelLanguage('model_language') ml.LoadModel() app.run(debug=True)
#!/usr/bin/env python3 # encoding: utf-8 import logging import os from flask import request, Blueprint, abort, jsonify from werkzeug import secure_filename from LanguageModel import ModelLanguage from SpeechModel251 import ModelSpeech data_path = 'data/train_data/' ms = ModelSpeech(data_path) ms.LoadModel('data/speech_model/speech_model251_e_0_step_12000.model') ml = ModelLanguage('data/model_language/') ml.LoadModel() detect_speech_api = Blueprint('detect_language_api', __name__, template_folder='templates') ALLOWED_EXTENSIONS = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif']) def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS @detect_speech_api.route('/language/recognize/chinese/offline',
if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if(system_type == 'Windows'): datapath = 'E:\\语音数据集' modelpath = modelpath + '\\' elif(system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') #print('*[提示] 语音识别结果:\n',r)
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if(system_type == 'Windows'): datapath = 'dataset' modelpath = modelpath + '\\' elif(system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) #ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model') ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') ml = ModelLanguage('model_language') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) #r = ms.RecognizeSpeech_FromFile('E:\\github\\ASRT_SpeechRecognition\\dataset\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') #r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav') for speech_file in sorted(speech_files): print(speech_file) ml.LoadModel() r = ms.RecognizeSpeech_FromFile(speech_file)
from LanguageModel2 import ModelLanguage from keras import backend as K datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'D:\\语音数据集' elif (system_type == 'Linux'): datapath = 'dataset' else: print('*[Message] Unknown System\n') datapath = 'dataset' ms = ModelSpeech(datapath) #ms.LoadModel(os.path.join(modelpath, 'm22_2', '0', 'speech_model22_e_0_step_257000.model')) ms.LoadModel( os.path.join(modelpath, 'm251', 'speech_model251_e_0_step_12000.model')) #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile( 'D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') #r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav') K.clear_session()
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'Z:\\SpeechData' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) #ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.h5') ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_625000.h5') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile( 'D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') #r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav') K.clear_session() print('*[提示] 语音识别结果:\n', r)
# -*- coding: utf-8 -*- """ @author: nl8590687 语音识别API的HTTP服务器程序 """ import http.server import urllib import keras import os from SpeechModel251 import ModelSpeech from LanguageModel import ModelLanguage datapath = './' modelpath = 'model_speech/' ms = ModelSpeech(datapath) ms.LoadModel( os.path.join(modelpath, 'm251', 'speech_model251_e_0_step_12000.model')) ml = ModelLanguage('model_language') ml.LoadModel() class TestHTTPHandle(http.server.BaseHTTPRequestHandler): def setup(self): self.request.settimeout(10) http.server.BaseHTTPRequestHandler.setup(self) def _set_response(self): self.send_response(200) self.send_header('Content-type', 'text/html')
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'E:\\语音数据集' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) # ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model') ms.LoadModel(modelpath + 'speech_model26_e_0_step_22500.model') # ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) # r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') # r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') # r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') # r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') r = ms.RecognizeSpeech_FromFile( '/media/yean/BE6423686423231F/record/通用语料(不好意思给您带来不便,我们查看一下或到时重新给您补卡).wav') print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language')
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if(system_type == 'Windows'): datapath = 'E:\\语音数据集' modelpath = modelpath + '\\' elif(system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') #r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') r = ms.RecognizeSpeech_FromFile('output.wav')#'E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') print('*[提示] 语音识别结果:\n',r) ml = ModelLanguage('model_language') ml.LoadModel()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @author: nl8590687 语音识别API的HTTP服务器程序 """ import http.server import urllib import tensorflow.keras from SpeechModel251 import ModelSpeech from LanguageModel import ModelLanguage datapath = './' modelpath = 'model_speech/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_12000.model') ml = ModelLanguage('model_language') ml.LoadModel() class TestHTTPHandle(http.server.BaseHTTPRequestHandler): def setup(self): self.request.settimeout(10) http.server.BaseHTTPRequestHandler.setup(self) def _set_response(self): self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers()
#进行配置,使用95%的GPU config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 #config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) datapath = '' modelpath = 'model_speech' if (not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 os.makedirs(modelpath + '/m' + ModelName) system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'D:\\SpeechData' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) #ms.LoadModel(modelpath + 'speech_model251_e_0_step_327500.h5') ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500)
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #进行配置,使用95%的GPU config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 #config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 set_session(tf.Session(config=config)) datapath = '' modelpath = 'model_speech' if (not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'E:\\语音数据集' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + '/m251/' + 'speech_model251_e_0_step_27000.model') print("Model loaded") ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500)
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = '.' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = '.' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile( 'F:\\文件\\Tencent Files\\904292841\\FileRecv\\904292841\\FileRecv\\真实语音数据\\21-30\\22\\22_03.wav' ) K.clear_session() print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language') ml.LoadModel()
if (not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'I:\\python_speech_file' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_79500.model') ms.TestModel(datapath, str_dataset='dev', data_count=256, out_report=True) #for index in range(10): #ms.TestModel(datapath, str_dataset='test', data_count = 512, out_report = True) #print(rate/10) #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') #print('*[提示] 语音识别结果:\n',r)
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = '.' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = '.' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile('C:\\Users\\hezudao\\Desktop\\hello.wav') K.clear_session() print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = r
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @author: nl8590687 语音识别API的HTTP服务器程序 """ import http.server import urllib import keras from SpeechModel251 import ModelSpeech from LanguageModel2 import ModelLanguage datapath = './' modelpath = 'model_speech/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') ml = ModelLanguage('model_language') ml.LoadModel() class TestHTTPHandle(http.server.BaseHTTPRequestHandler): def setup(self): self.request.settimeout(10) http.server.BaseHTTPRequestHandler.setup(self) def _set_response(self): self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers()
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'Z:\\dataset' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = '../dataset' #xuyao xiugai de lujing modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) #ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model') ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_625000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile( '../dataset/ST-CMDS-20170001_1-OS/20170001P00241I0052.wav') #r = ms.RecognizeSpeech_FromFile('D:\dataset\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('D:\\dataset\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('D:\\dataset\\data_thchs30\\data\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('D:\\dataset\\data_thchs30\\data\\D4_750.wav') K.clear_session() print('*[提示] 语音识别结果:\n', r)
#config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 set_session(tf.Session(config=config)) datapath = '' modelpath = 'model_speech' if (not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'E:\\语音数据集' elif (system_type == 'Linux'): datapath = 'dataset' else: print('*[Message] Unknown System\n') datapath = 'dataset' ms = ModelSpeech(datapath) ms.LoadModel( os.path.join(modelpath, 'm251', 'speech_model251_e_0_step_42500.model')) ms.TestModel(datapath, str_dataset='test', data_count=128, out_report=True) #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') #print('*[提示] 语音识别结果:\n',r)
class App(QWidget): def __init__(self): super(App, self).__init__() self.record_name = 'saved_record.wav' self.ms = ModelSpeech('dataset') self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model') self.ml = ModelLanguage('model_language') self.ml.LoadModel() self.title = 'ASR demo' self.left = 10 self.top = 10 self.width = 420 self.height = 400 self.rec = Recorder() #self.rec.start_thread() self.initUI() self.rec.start() #self.setWindowTitle("ASR demo") def initUI(self): self.setWindowTitle(self.title) self.setGeometry(self.left, self.top, self.width, self.height) button = QPushButton('Record', self) button.setToolTip('Press to start recording') button.move(100, 70) button.clicked.connect(self.start_record) button = QPushButton('To Transcript', self) button.setToolTip('Press to convert to transcript') button.move(200, 70) button.clicked.connect(self.stop_record) button = QPushButton('Clear', self) button.setToolTip('Press to clear transcripts') button.move(100, 100) button.clicked.connect(self.clear) self.text_edit = QTextEdit("What you said: ", self) self.text_edit.setReadOnly(True) self.text_edit.move(100, 140) #self.results=QLabel(self) #self.results.move(100,140) self.show() @pyqtSlot() def clear(self): self.text_edit.clear() self.text_edit.append("What you said: ") @pyqtSlot() def start_record(self): self.rec.start() #print('PyQt5 button click') #self.rec.start() @pyqtSlot() def stop_record(self): print(len(self.rec._frames)) #print('PyQt5 button click') self.rec.stop() self.rec.save(self.record_name) r = self.ms.RecognizeSpeech_FromFile(self.record_name) self.w = self.ml.SpeechToText(r) print('语音转文字结果:\n', self.w) self.text_edit.append(self.w)
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'C:\\test' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile('C:\\test\\online_star.wav') print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language') ml.LoadModel() #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] #str_pinyin = ['ni3', 'hao3','a1']
datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = 'D:\\AISHELL-2-sample\\iOS\\data' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) #ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model') ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_82500.model') i = 0 #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) datapath = datapath + '\\wav\\C0936\\IC0936W0001.wav' #r = ms.RecognizeSpeech_FromFile(datapath) #r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') #r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV') r = ms.RecognizeSpeech_FromFile( 'D:\\AISHELL-2-sample\\iOS\\data\\wav\\C0936\\IC0936W0001.wav') print('*[提示] 语音识别结果:\n', r) K.clear_session()