def __init__(self, audio_data: AudioData, url='http://127.0.0.1:8085'):
        self._text = None
        wav_data = audio_data.get_wav_data(convert_rate=16000, convert_width=2)
        request = Request('{}/stt'.format(url),
                          data=wav_data,
                          headers={'Content-Type': 'audio/wav'})
        try:
            response = urlopen(request)
        except HTTPError as e:
            raise RuntimeError('Request failed: {}'.format(e.reason))
        except URLError as e:
            raise RuntimeError('Connection failed: {}'.format(e.reason))
        response_text = response.read().decode('utf-8')
        try:
            result = json.loads(response_text)
        except (json.JSONDecodeError, ValueError) as e:
            raise RuntimeError('Json decode error: {}'.format(e))

        if 'code' not in result or 'text' not in result or result['code']:
            raise RuntimeError('Server error: {}: {}'.format(
                result.get('code', 'None'), result.get('text', 'None')))
        self._text = result['text']
예제 #2
0
 def _get_audio(self, audio_data: AudioData):
     return audio_data.get_wav_data(self._convert_rate, self._convert_width)
예제 #3
0
def predict_word(audio_data: AudioData, model_map: ModelMap):
    try:
        if not os.path.exists(BG_WAV_PATH):
            print("bg audio is not ready.")
            return
        try:
            os.remove(INPUT_WAV_PATH)
        except:
            pass

        # execute noise reduction
        with open(INPUT_WAV_PATH + '.tmp', 'wb') as f:
            f.write(audio_data.get_wav_data())
        with noisered.SEMAPHORE:
            try:
                os.remove(INPUT_WAV_PATH)
            except:
                pass
            os.rename(INPUT_WAV_PATH + '.tmp', INPUT_WAV_PATH)
        if not noisered.create_noisered_wav(INPUT_WAV_PATH, NOISERED_WAV_PATH,
                                            BG_WAV_PATH):
            return

        # load or get model
        if threading.get_ident() not in model_map.models:
            print(f"load model. tid:{threading.get_ident()}")
            model_map.models[threading.get_ident()] = load_model()
        model = model_map.models[threading.get_ident()]

        # create input from wav data
        # io_obj = BytesIO(audio_data.get_wav_data())
        # x = create_mfcc_from_io(io_obj)
        x = create_features(NOISERED_WAV_PATH, FEATURE_TYPE)
        # x = create_mfcc_from_file(INPUT_WAV_PATH)

        # complement shortage space
        print(f"x:{x.shape},{x.dtype} framedata:{len(audio_data.frame_data)}")
        if x.shape[0] < Tx:
            # min_val = np.amin(x, axis=0)
            # print(f"min_val:{min_val.shape}")
            # calc remaining space size
            empty_space_size = Tx - x.shape[0]
            # create remaining space
            # empty_space = np.tile(min_val, (empty_space_size, 1))
            empty_space = np.zeros((empty_space_size, n_freq),
                                   dtype=np.float32)
            # complement data's empty space
            print(f"emptysp:{empty_space.shape}")
            x = np.concatenate((x, empty_space), axis=0)
        # frames = np.array(data)
        if x.shape[0] > Tx:
            eprint(f"trim input. from={x.shape[0]} to={Tx}")
            x = x[:Tx]
        x = np.float32(np.array([x]))
        print(f"x:{x.shape},{x.dtype}")

        # do predict
        start = timer()
        predicted = model.predict(x)
        end = timer()
        print(f"predicted:{predicted} time:{end - start}")
        summarize_prediction(predicted[0])
    except:
        traceback.print_exc()
        raise