Ejemplo n.º 1
0
用于测试语音识别系统语音模型的程序
"""

import os

from speech_model import ModelSpeech
from model_zoo.speech_model.keras_backend import SpeechModel251BN
from data_loader import DataLoader
from speech_features import Spectrogram

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

AUDIO_LENGTH = 1600
AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251bn = SpeechModel251BN(input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH,
                                        CHANNELS),
                           output_size=OUTPUT_SIZE)
feat = Spectrogram()
evalue_data = DataLoader('dev')
ms = ModelSpeech(sm251bn, feat, max_label_length=64)

ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ms.evaluate_model(data_loader=evalue_data,
                  data_count=-1,
                  out_report=True,
                  show_ratio=True,
                  show_per_step=100)
Ejemplo n.º 2
0
                    type=str,
                    help='the port to listen')
args = parser.parse_args()

app = Flask("ASRT API Service")

AUDIO_LENGTH = 1600
AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251bn = SpeechModel251BN(input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH,
                                        CHANNELS),
                           output_size=OUTPUT_SIZE)
feat = Spectrogram()
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')

ml = ModelLanguage('model_language')
ml.load_model()


class AsrtApiResponse:
    '''
    ASRT语音识别基于HTTP协议的API接口响应类
    '''
    def __init__(self, status_code, status_message='', result=''):
        self.status_code = status_code
        self.status_message = status_message
        self.result = result
Ejemplo n.º 3
0
import os

from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from language_model3 import ModelLanguage

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

AUDIO_LENGTH = 1600
AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251bn = SpeechModel251BN(input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH,
                                        CHANNELS),
                           output_size=OUTPUT_SIZE)
feat = Spectrogram()
ms = ModelSpeech(sm251bn, feat, max_label_length=64)

ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
res = ms.recognize_speech_from_file('filename.wav')
print('*[提示] 声学模型语音识别结果:\n', res)

ml = ModelLanguage('model_language')
ml.load_model()
str_pinyin = res
res = ml.pinyin_to_text(str_pinyin)
print('语音识别最终结果:\n', res)
Ejemplo n.º 4
0
from tensorflow.keras.optimizers import Adam

from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251BN
from data_loader import DataLoader
from speech_features import SpecAugment

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

AUDIO_LENGTH = 1600
AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251bn = SpeechModel251BN(input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH,
                                        CHANNELS),
                           output_size=OUTPUT_SIZE)
feat = SpecAugment()
train_data = DataLoader('train')
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)

#ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ms.train_model(optimizer=opt,
               data_loader=train_data,
               epochs=50,
               save_step=1,
               batch_size=16,
               last_epoch=0)
ms.save_model('save_models/' + sm251bn.get_model_name())