Ejemplo n.º 1
0
def asr_mandarin(req_file: "UploadedFile"):
    load_data(req_file)
    keras.backend.clear_session()
    ms = ModelSpeech(asrPath)
    ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')

    r = ms.RecognizeSpeech_FromFile(filePath)

    ml = ModelLanguage('model_language')
    ml.LoadModel()
    str_pinyin = r
    r = ml.SpeechToText(str_pinyin)
    return r
Ejemplo n.º 2
0
 def __init__(self):
     super(App, self).__init__()
     self.record_name = 'saved_record.wav'
     self.ms = ModelSpeech('dataset')
     self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model')
     self.ml = ModelLanguage('model_language')
     self.ml.LoadModel()
     self.title = 'ASR demo'
     self.left = 10
     self.top = 10
     self.width = 420
     self.height = 400
     self.rec = Recorder()
     #self.rec.start_thread()
     self.initUI()
     self.rec.start()
Ejemplo n.º 3
0
def speech_recognition(f):
    datapath = '.'
    modelpath = 'model_speech' + '\\'

    ms = ModelSpeech(datapath)

    ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')
    r = ms.RecognizeSpeech_FromFile(f)

    K.clear_session()
    print('*[提示] 语音识别结果:\n', r)
    ml = ModelLanguage('model_language')
    ml.LoadModel()
    str_pinyin = r
    r = ml.SpeechToText(str_pinyin)
    print('语音转文字结果:\n', r)
    return r
Ejemplo n.º 4
0
def predict(video_file):
    from SpeechModel251 import ModelSpeech
    from LanguageModel2 import ModelLanguage
    from keras import backend as K

    datapath = 'dataset'
    modelpath = 'model_speech'

    ms = ModelSpeech(datapath)
    ms.LoadModel(modelpath + '/m251/speech_model251_e_0_step_60500.model')

    pinyin = ms.RecognizeSpeech_FromFile(video_file)
    K.clear_session()

    ml = ModelLanguage('model_language')
    ml.LoadModel()

    str_pinyin = pinyin
    text = ml.SpeechToText(str_pinyin)
    return pinyin, text
Ejemplo n.º 5
0
def CTC_tf(current_path):

    datapath = ''
    modelpath = 'model_speech'

    system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
    if (system_type == 'Windows'):
        datapath = current_path
        modelpath = modelpath + '\\'
    elif (system_type == 'Linux'):
        datapath = 'dataset'
        modelpath = modelpath + '/'
    else:
        print('*[Message] Unknown System\n')
        datapath = 'dataset'
        modelpath = modelpath + '/'

    ms = ModelSpeech(datapath)

    ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')

    #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)

    rr = ms.RecognizeSpeech_FromFile(current_path + '\\chunk-00.wav')

    print('*[提示] 语音识别结果:\n', rr)

    ml = ModelLanguage('model_language')
    ml.LoadModel()

    #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5']
    #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1']
    #str_pinyin = ['ni3', 'hao3','a1']
    str_pinyin = rr
    #str_pinyin =  ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4']
    r = ml.SpeechToText(str_pinyin)
    print('语音转文字结果:\n', r)

    ctc_result = hanzi_pinyin(r)

    return ctc_result
Ejemplo n.º 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: nl8590687
用于测试整个一套语音识别系统的程序
语音模型 + 语言模型
"""
import platform as plat

from SpeechModel251 import ModelSpeech
from keras import backend as K

import librosa
import soundfile as sf

modelpath = 'model_speech/'
datapath = '.'

ms = ModelSpeech(datapath)
ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')

utt_dir = '/home/bicbrv/project/GREAT_audio_demo_server_new/formal_data/enrollment/#0008_智勇/智勇_0.wav'

wavsignal, sr = librosa.load(utt_dir, sr=16000)
sf.write('testing_asr.wav', wavsignal, sr)
r = ms.RecognizeSpeech_FromFile('testing_asr.wav')

#K.clear_session()
print('*[提示] 语音识别结果:\n', r)
Ejemplo n.º 7
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: nl8590687
语音识别API的HTTP服务器程序

"""
import http.server
import urllib
import keras
from SpeechModel251 import ModelSpeech

datapath = 'I:\\python_speech_file'
modelpath = 'model_speech\\'
ms = ModelSpeech(datapath)
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_82500.model')



class TestHTTPHandle(http.server.BaseHTTPRequestHandler):
    def setup(self):
        self.request.settimeout(10)
        http.server.BaseHTTPRequestHandler.setup(self)

    def _set_response(self):
        self.send_response(200)
        self.send_header('Content-type', 'text/html')
        self.end_headers()

    def do_GET(self):

if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在
	os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉

system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
if(system_type == 'Windows'):
	datapath = 'C:\\Users\\农夫三拳\\PycharmProjects\\Test\\ASRT_SpeechRecognition-master\\dataset'
	modelpath = modelpath + '\\'
elif(system_type == 'Linux'):
	datapath = 'dataset'
	modelpath = modelpath + '/'
else:
	print('*[Message] Unknown System\n')
	datapath = 'dataset'
	modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_4000.model')

ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True)

#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
r = ms.RecognizeSpeech_FromFile('/home/ASRT_SpeechRecognition-master/dataset/ST-CMDS-20170001_1-OS/20170001P00001A0002.wav')
print('*[提示] 语音识别结果:\n',r)


Ejemplo n.º 9
0
        return name
    if (modelType == "2"):
        name = 'show/yq-speech_model251_e_0_step_4800.model'
        return name
    if (modelType == "3"):
        name = 'show/hq1speech_model251_e_0_step_410.model'
        return name
    if (modelType == "4"):
        name = 'show/hq2-speech_model251_e_0_step_500.model'
        return name
    else:
        name = '类型输入不正确,请核查。'
        return name


ms = ModelSpeech(datapath)

print(
    '\n*成人语音模型:type=0 \n*直接训练模型:type=1 \n*基于成人模型训练的模型:type=2 \n*迁移学习1.0模型 type=3 \n*迁移学习2.0模型 type=4 \n'
)
print('-----请输入您想调用的模型类型。-----')
modelType = input("**type**:")  # 0 1 2 3 4

name = pickModel(modelType)
if name == "类型输入不正确,请核查。":
    print("模型的类型输入不符合要求,请核查。")
else:
    ms.LoadModel(modelpath + name)

    r = ms.RecognizeSpeech_FromFile("./dataset/data_child/test/A5_75.wav")
    K.clear_session()
Ejemplo n.º 10
0
@app.route('/result/')
def show_result():
    rs = func_show(r)
    return render_template('show_result.html', result=rs)


@app.route('/test/')
def show_test():
    with g.as_default():
        r_test = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\test.wav')
        str_pinyin = r_test
        r_test = ml.SpeechToText(str_pinyin)
        print("testtesttest")
    # rs = func_test(r_test)
    return render_template('show_test.html', result=r_test)
    # return 'ge'


if __name__ == '__main__':
    g = tf.Graph()
    with g.as_default():
        ms = ModelSpeech(datapath)
        ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')
        # r = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\data_thchs30\\test\\D4_750.wav')
        r = ms.RecognizeSpeech_FromFile(
            'F:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00136I0088.wav')
        r_test = ms.RecognizeSpeech_FromFile('F:\\语音数据集\\test.wav')
        ml = ModelLanguage('model_language')
        ml.LoadModel()
    app.run(debug=True)
Ejemplo n.º 11
0
#!/usr/bin/env python3
# encoding: utf-8

import logging
import os
from flask import request, Blueprint, abort, jsonify
from werkzeug import secure_filename

from LanguageModel import ModelLanguage
from SpeechModel251 import ModelSpeech

data_path = 'data/train_data/'
ms = ModelSpeech(data_path)
ms.LoadModel('data/speech_model/speech_model251_e_0_step_12000.model')

ml = ModelLanguage('data/model_language/')
ml.LoadModel()

detect_speech_api = Blueprint('detect_language_api',
                              __name__,
                              template_folder='templates')

ALLOWED_EXTENSIONS = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'])


def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS


@detect_speech_api.route('/language/recognize/chinese/offline',
Ejemplo n.º 12
0

if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在
	os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉

system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
if(system_type == 'Windows'):
	datapath = 'E:\\语音数据集'
	modelpath = modelpath + '\\'
elif(system_type == 'Linux'):
	datapath = 'dataset'
	modelpath = modelpath + '/'
else:
	print('*[Message] Unknown System\n')
	datapath = 'dataset'
	modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')

ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True)

#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
#print('*[提示] 语音识别结果:\n',r)


Ejemplo n.º 13
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
if(system_type == 'Windows'):
	datapath = 'dataset'
	modelpath = modelpath + '\\'
elif(system_type == 'Linux'):
	datapath = 'dataset'
	modelpath = modelpath + '/'
else:
	print('*[Message] Unknown System\n')
	datapath = 'dataset'
	modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')
ml = ModelLanguage('model_language')
#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
#r = ms.RecognizeSpeech_FromFile('E:\\github\\ASRT_SpeechRecognition\\dataset\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav')
for speech_file in sorted(speech_files):
	print(speech_file)
	
	ml.LoadModel()
	r = ms.RecognizeSpeech_FromFile(speech_file)
Ejemplo n.º 14
0
from LanguageModel2 import ModelLanguage
from keras import backend as K

datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'D:\\语音数据集'
elif (system_type == 'Linux'):
    datapath = 'dataset'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'

ms = ModelSpeech(datapath)

#ms.LoadModel(os.path.join(modelpath, 'm22_2', '0', 'speech_model22_e_0_step_257000.model'))
ms.LoadModel(
    os.path.join(modelpath, 'm251', 'speech_model251_e_0_step_12000.model'))

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
r = ms.RecognizeSpeech_FromFile(
    'D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav')

K.clear_session()
Ejemplo n.º 15
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'Z:\\SpeechData'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.h5')
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_625000.h5')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
r = ms.RecognizeSpeech_FromFile(
    'D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav')

K.clear_session()

print('*[提示] 语音识别结果:\n', r)
Ejemplo n.º 16
0
# -*- coding: utf-8 -*-
"""
@author: nl8590687
语音识别API的HTTP服务器程序

"""
import http.server
import urllib
import keras
import os
from SpeechModel251 import ModelSpeech
from LanguageModel import ModelLanguage

datapath = './'
modelpath = 'model_speech/'
ms = ModelSpeech(datapath)
ms.LoadModel(
    os.path.join(modelpath, 'm251', 'speech_model251_e_0_step_12000.model'))

ml = ModelLanguage('model_language')
ml.LoadModel()


class TestHTTPHandle(http.server.BaseHTTPRequestHandler):
    def setup(self):
        self.request.settimeout(10)
        http.server.BaseHTTPRequestHandler.setup(self)

    def _set_response(self):
        self.send_response(200)
        self.send_header('Content-type', 'text/html')
Ejemplo n.º 17
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'E:\\语音数据集'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

# ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
ms.LoadModel(modelpath + 'speech_model26_e_0_step_22500.model')

# ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
# r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
# r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
# r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
# r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
r = ms.RecognizeSpeech_FromFile(
    '/media/yean/BE6423686423231F/record/通用语料(不好意思给您带来不便,我们查看一下或到时重新给您补卡).wav')

print('*[提示] 语音识别结果:\n', r)

ml = ModelLanguage('model_language')
Ejemplo n.º 18
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
if(system_type == 'Windows'):
	datapath = 'E:\\语音数据集'
	modelpath = modelpath + '\\'
elif(system_type == 'Linux'):
	datapath = 'dataset'
	modelpath = modelpath + '/'
else:
	print('*[Message] Unknown System\n')
	datapath = 'dataset'
	modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
#r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
r = ms.RecognizeSpeech_FromFile('output.wav')#'E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')

print('*[提示] 语音识别结果:\n',r)


ml = ModelLanguage('model_language')
ml.LoadModel()
Ejemplo n.º 19
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: nl8590687
语音识别API的HTTP服务器程序

"""
import http.server
import urllib
import tensorflow.keras
from SpeechModel251 import ModelSpeech
from LanguageModel import ModelLanguage

datapath = './'
modelpath = 'model_speech/'
ms = ModelSpeech(datapath)
ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_12000.model')

ml = ModelLanguage('model_language')
ml.LoadModel()

class TestHTTPHandle(http.server.BaseHTTPRequestHandler):  
	def setup(self):
		self.request.settimeout(10)
		http.server.BaseHTTPRequestHandler.setup(self)
	
	def _set_response(self):
		self.send_response(200)
		self.send_header('Content-type', 'text/html')
		self.end_headers()
		
#进行配置,使用95%的GPU
config = tf.compat.v1.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.95
#config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

datapath = ''
modelpath = 'model_speech'

if (not os.path.exists(modelpath)):  # 判断保存模型的目录是否存在
    os.makedirs(modelpath)  # 如果不存在,就新建一个,避免之后保存模型的时候炸掉
    os.makedirs(modelpath + '/m' + ModelName)

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'D:\\SpeechData'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

#ms.LoadModel(modelpath + 'speech_model251_e_0_step_327500.h5')
ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
#进行配置,使用95%的GPU
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.95
#config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
set_session(tf.Session(config=config))

datapath = ''
modelpath = 'model_speech'

if (not os.path.exists(modelpath)):  # 判断保存模型的目录是否存在
    os.makedirs(modelpath)  # 如果不存在,就新建一个,避免之后保存模型的时候炸掉

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'E:\\语音数据集'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + '/m251/' + 'speech_model251_e_0_step_27000.model')
print("Model loaded")
ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500)
Ejemplo n.º 22
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = '.'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = '.'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
r = ms.RecognizeSpeech_FromFile(
    'F:\\文件\\Tencent Files\\904292841\\FileRecv\\904292841\\FileRecv\\真实语音数据\\21-30\\22\\22_03.wav'
)

K.clear_session()

print('*[提示] 语音识别结果:\n', r)

ml = ModelLanguage('model_language')
ml.LoadModel()
Ejemplo n.º 23
0
if (not os.path.exists(modelpath)):  # 判断保存模型的目录是否存在
    os.makedirs(modelpath)  # 如果不存在,就新建一个,避免之后保存模型的时候炸掉

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'I:\\python_speech_file'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_79500.model')

ms.TestModel(datapath, str_dataset='dev', data_count=256, out_report=True)

#for index in range(10):
#ms.TestModel(datapath, str_dataset='test', data_count = 512, out_report = True)

#print(rate/10)
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
#print('*[提示] 语音识别结果:\n',r)
Ejemplo n.º 24
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = '.'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = '.'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
r = ms.RecognizeSpeech_FromFile('C:\\Users\\hezudao\\Desktop\\hello.wav')

K.clear_session()

print('*[提示] 语音识别结果:\n', r)

ml = ModelLanguage('model_language')
ml.LoadModel()

str_pinyin = r
Ejemplo n.º 25
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: nl8590687
语音识别API的HTTP服务器程序

"""
import http.server
import urllib
import keras
from SpeechModel251 import ModelSpeech
from LanguageModel2 import ModelLanguage

datapath = './'
modelpath = 'model_speech/'
ms = ModelSpeech(datapath)
ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')

ml = ModelLanguage('model_language')
ml.LoadModel()


class TestHTTPHandle(http.server.BaseHTTPRequestHandler):
    def setup(self):
        self.request.settimeout(10)
        http.server.BaseHTTPRequestHandler.setup(self)

    def _set_response(self):
        self.send_response(200)
        self.send_header('Content-type', 'text/html')
        self.end_headers()
Ejemplo n.º 26
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'Z:\\dataset'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = '../dataset'  #xuyao xiugai de lujing
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_625000.model')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
r = ms.RecognizeSpeech_FromFile(
    '../dataset/ST-CMDS-20170001_1-OS/20170001P00241I0052.wav')
#r = ms.RecognizeSpeech_FromFile('D:\dataset\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\dataset\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\dataset\\data_thchs30\\data\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('D:\\dataset\\data_thchs30\\data\\D4_750.wav')

K.clear_session()

print('*[提示] 语音识别结果:\n', r)
Ejemplo n.º 27
0
#config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
set_session(tf.Session(config=config))

datapath = ''
modelpath = 'model_speech'

if (not os.path.exists(modelpath)):  # 判断保存模型的目录是否存在
    os.makedirs(modelpath)  # 如果不存在,就新建一个,避免之后保存模型的时候炸掉

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'E:\\语音数据集'
elif (system_type == 'Linux'):
    datapath = 'dataset'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'

ms = ModelSpeech(datapath)

ms.LoadModel(
    os.path.join(modelpath, 'm251', 'speech_model251_e_0_step_42500.model'))

ms.TestModel(datapath, str_dataset='test', data_count=128, out_report=True)

#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
#print('*[提示] 语音识别结果:\n',r)
Ejemplo n.º 28
0
class App(QWidget):
    def __init__(self):
        super(App, self).__init__()
        self.record_name = 'saved_record.wav'
        self.ms = ModelSpeech('dataset')
        self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model')
        self.ml = ModelLanguage('model_language')
        self.ml.LoadModel()
        self.title = 'ASR demo'
        self.left = 10
        self.top = 10
        self.width = 420
        self.height = 400
        self.rec = Recorder()
        #self.rec.start_thread()
        self.initUI()
        self.rec.start()

        #self.setWindowTitle("ASR demo")

    def initUI(self):
        self.setWindowTitle(self.title)
        self.setGeometry(self.left, self.top, self.width, self.height)

        button = QPushButton('Record', self)
        button.setToolTip('Press to start recording')
        button.move(100, 70)
        button.clicked.connect(self.start_record)

        button = QPushButton('To Transcript', self)
        button.setToolTip('Press to convert to transcript')
        button.move(200, 70)
        button.clicked.connect(self.stop_record)

        button = QPushButton('Clear', self)
        button.setToolTip('Press to clear transcripts')
        button.move(100, 100)
        button.clicked.connect(self.clear)

        self.text_edit = QTextEdit("What you said: ", self)
        self.text_edit.setReadOnly(True)
        self.text_edit.move(100, 140)

        #self.results=QLabel(self)
        #self.results.move(100,140)

        self.show()

    @pyqtSlot()
    def clear(self):
        self.text_edit.clear()
        self.text_edit.append("What you said: ")

    @pyqtSlot()
    def start_record(self):
        self.rec.start()

        #print('PyQt5 button click')
        #self.rec.start()

    @pyqtSlot()
    def stop_record(self):
        print(len(self.rec._frames))
        #print('PyQt5 button click')
        self.rec.stop()
        self.rec.save(self.record_name)
        r = self.ms.RecognizeSpeech_FromFile(self.record_name)
        self.w = self.ml.SpeechToText(r)
        print('语音转文字结果:\n', self.w)
        self.text_edit.append(self.w)
Ejemplo n.º 29
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'C:\\test'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)

r = ms.RecognizeSpeech_FromFile('C:\\test\\online_star.wav')

print('*[提示] 语音识别结果:\n', r)

ml = ModelLanguage('model_language')
ml.LoadModel()

#str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5']
#str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1']
#str_pinyin = ['ni3', 'hao3','a1']
Ejemplo n.º 30
0
datapath = ''
modelpath = 'model_speech'

system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
if (system_type == 'Windows'):
    datapath = 'D:\\AISHELL-2-sample\\iOS\\data'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_82500.model')
i = 0
#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
datapath = datapath + '\\wav\\C0936\\IC0936W0001.wav'
#r = ms.RecognizeSpeech_FromFile(datapath)

#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV')
r = ms.RecognizeSpeech_FromFile(
    'D:\\AISHELL-2-sample\\iOS\\data\\wav\\C0936\\IC0936W0001.wav')
print('*[提示] 语音识别结果:\n', r)
K.clear_session()