def asr_mandarin(req_file: "UploadedFile"): load_data(req_file) keras.backend.clear_session() ms = ModelSpeech(asrPath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') r = ms.RecognizeSpeech_FromFile(filePath) ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = r r = ml.SpeechToText(str_pinyin) return r
def __init__(self): super(App, self).__init__() self.record_name = 'saved_record.wav' self.ms = ModelSpeech('dataset') self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model') self.ml = ModelLanguage('model_language') self.ml.LoadModel() self.title = 'ASR demo' self.left = 10 self.top = 10 self.width = 420 self.height = 400 self.rec = Recorder() #self.rec.start_thread() self.initUI() self.rec.start()
def speech_recognition(f): datapath = '.' modelpath = 'model_speech' + '\\' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model') r = ms.RecognizeSpeech_FromFile(f) K.clear_session() print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = r r = ml.SpeechToText(str_pinyin) print('语音转文字结果:\n', r) return r
def predict(video_file): from SpeechModel251 import ModelSpeech from LanguageModel2 import ModelLanguage from keras import backend as K datapath = 'dataset' modelpath = 'model_speech' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + '/m251/speech_model251_e_0_step_60500.model') pinyin = ms.RecognizeSpeech_FromFile(video_file) K.clear_session() ml = ModelLanguage('model_language') ml.LoadModel() str_pinyin = pinyin text = ml.SpeechToText(str_pinyin) return pinyin, text
def CTC_tf(current_path): datapath = '' modelpath = 'model_speech' system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 if (system_type == 'Windows'): datapath = current_path modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) rr = ms.RecognizeSpeech_FromFile(current_path + '\\chunk-00.wav') print('*[提示] 语音识别结果:\n', rr) ml = ModelLanguage('model_language') ml.LoadModel() #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] #str_pinyin = ['ni3', 'hao3','a1'] str_pinyin = rr #str_pinyin = ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4'] r = ml.SpeechToText(str_pinyin) print('语音转文字结果:\n', r) ctc_result = hanzi_pinyin(r) return ctc_result
datapath = 'C:\\test' modelpath = modelpath + '\\' elif (system_type == 'Linux'): datapath = 'dataset' modelpath = modelpath + '/' else: print('*[Message] Unknown System\n') datapath = 'dataset' modelpath = modelpath + '/' ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) r = ms.RecognizeSpeech_FromFile('C:\\test\\online_star.wav') print('*[提示] 语音识别结果:\n', r) ml = ModelLanguage('model_language') ml.LoadModel() #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] #str_pinyin = ['ni3', 'hao3','a1'] str_pinyin = r #str_pinyin = ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4'] r = ml.SpeechToText(str_pinyin) print('语音转文字结果:\n', r)
@app.route('/result/') def show_result(): rs = func_show(r) return render_template('show_result.html', result=rs) @app.route('/test/') def show_test(): with g.as_default(): r_test = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\test.wav') str_pinyin = r_test r_test = ml.SpeechToText(str_pinyin) print("testtesttest") # rs = func_test(r_test) return render_template('show_test.html', result=r_test) # return 'ge' if __name__ == '__main__': g = tf.Graph() with g.as_default(): ms = ModelSpeech(datapath) ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model') # r = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\data_thchs30\\test\\D4_750.wav') r = ms.RecognizeSpeech_FromFile( 'F:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00136I0088.wav') r_test = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\test.wav') ml = ModelLanguage('model_language') ml.LoadModel() app.run(debug=True)
class App(QWidget): def __init__(self): super(App, self).__init__() self.record_name = 'saved_record.wav' self.ms = ModelSpeech('dataset') self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model') self.ml = ModelLanguage('model_language') self.ml.LoadModel() self.title = 'ASR demo' self.left = 10 self.top = 10 self.width = 420 self.height = 400 self.rec = Recorder() #self.rec.start_thread() self.initUI() self.rec.start() #self.setWindowTitle("ASR demo") def initUI(self): self.setWindowTitle(self.title) self.setGeometry(self.left, self.top, self.width, self.height) button = QPushButton('Record', self) button.setToolTip('Press to start recording') button.move(100, 70) button.clicked.connect(self.start_record) button = QPushButton('To Transcript', self) button.setToolTip('Press to convert to transcript') button.move(200, 70) button.clicked.connect(self.stop_record) button = QPushButton('Clear', self) button.setToolTip('Press to clear transcripts') button.move(100, 100) button.clicked.connect(self.clear) self.text_edit = QTextEdit("What you said: ", self) self.text_edit.setReadOnly(True) self.text_edit.move(100, 140) #self.results=QLabel(self) #self.results.move(100,140) self.show() @pyqtSlot() def clear(self): self.text_edit.clear() self.text_edit.append("What you said: ") @pyqtSlot() def start_record(self): self.rec.start() #print('PyQt5 button click') #self.rec.start() @pyqtSlot() def stop_record(self): print(len(self.rec._frames)) #print('PyQt5 button click') self.rec.stop() self.rec.save(self.record_name) r = self.ms.RecognizeSpeech_FromFile(self.record_name) self.w = self.ml.SpeechToText(r) print('语音转文字结果:\n', self.w) self.text_edit.append(self.w)