def __init__(self, audio_data: AudioData, url='http://127.0.0.1:8085'): self._text = None wav_data = audio_data.get_wav_data(convert_rate=16000, convert_width=2) request = Request('{}/stt'.format(url), data=wav_data, headers={'Content-Type': 'audio/wav'}) try: response = urlopen(request) except HTTPError as e: raise RuntimeError('Request failed: {}'.format(e.reason)) except URLError as e: raise RuntimeError('Connection failed: {}'.format(e.reason)) response_text = response.read().decode('utf-8') try: result = json.loads(response_text) except (json.JSONDecodeError, ValueError) as e: raise RuntimeError('Json decode error: {}'.format(e)) if 'code' not in result or 'text' not in result or result['code']: raise RuntimeError('Server error: {}: {}'.format( result.get('code', 'None'), result.get('text', 'None'))) self._text = result['text']
def _get_audio(self, audio_data: AudioData): return audio_data.get_wav_data(self._convert_rate, self._convert_width)
def predict_word(audio_data: AudioData, model_map: ModelMap): try: if not os.path.exists(BG_WAV_PATH): print("bg audio is not ready.") return try: os.remove(INPUT_WAV_PATH) except: pass # execute noise reduction with open(INPUT_WAV_PATH + '.tmp', 'wb') as f: f.write(audio_data.get_wav_data()) with noisered.SEMAPHORE: try: os.remove(INPUT_WAV_PATH) except: pass os.rename(INPUT_WAV_PATH + '.tmp', INPUT_WAV_PATH) if not noisered.create_noisered_wav(INPUT_WAV_PATH, NOISERED_WAV_PATH, BG_WAV_PATH): return # load or get model if threading.get_ident() not in model_map.models: print(f"load model. tid:{threading.get_ident()}") model_map.models[threading.get_ident()] = load_model() model = model_map.models[threading.get_ident()] # create input from wav data # io_obj = BytesIO(audio_data.get_wav_data()) # x = create_mfcc_from_io(io_obj) x = create_features(NOISERED_WAV_PATH, FEATURE_TYPE) # x = create_mfcc_from_file(INPUT_WAV_PATH) # complement shortage space print(f"x:{x.shape},{x.dtype} framedata:{len(audio_data.frame_data)}") if x.shape[0] < Tx: # min_val = np.amin(x, axis=0) # print(f"min_val:{min_val.shape}") # calc remaining space size empty_space_size = Tx - x.shape[0] # create remaining space # empty_space = np.tile(min_val, (empty_space_size, 1)) empty_space = np.zeros((empty_space_size, n_freq), dtype=np.float32) # complement data's empty space print(f"emptysp:{empty_space.shape}") x = np.concatenate((x, empty_space), axis=0) # frames = np.array(data) if x.shape[0] > Tx: eprint(f"trim input. from={x.shape[0]} to={Tx}") x = x[:Tx] x = np.float32(np.array([x])) print(f"x:{x.shape},{x.dtype}") # do predict start = timer() predicted = model.predict(x) end = timer() print(f"predicted:{predicted} time:{end - start}") summarize_prediction(predicted[0]) except: traceback.print_exc() raise