async def api_speech_to_text() -> str: """Transcribe speech from WAV file.""" no_header = request.args.get("noheader", "false").lower() == "true" assert core is not None # Prefer 16-bit 16Khz mono, but will convert with sox if needed wav_data = await request.data if no_header: # Wrap in WAV wav_data = buffer_to_wav(wav_data) start_time = time.perf_counter() result = await core.transcribe_wav(wav_data) end_time = time.perf_counter() if prefers_json(): return jsonify( { "text": result.text, "likelihood": result.confidence, "transcribe_seconds": (end_time - start_time), "wav_seconds": get_wav_duration(wav_data), } ) return result.text
async def api_speech_to_text() -> str: """Transcribe speech from WAV file.""" global last_voice_wav no_header = request.args.get("noheader", "false").lower() == "true" assert core is not None # Prefer 16-bit 16Khz mono, but will convert with sox if needed wav_data = await request.data if no_header: # Wrap in WAV wav_data = buffer_to_wav(wav_data) last_voice_wav = wav_data start_time = time.perf_counter() result = await core.transcribe_wav(wav_data) end_time = time.perf_counter() # Send to websocket await add_ws_event( "transcription", json.dumps({ "text": result.text, "wakewordId": "default", "siteId": core.siteId }), ) if prefers_json(): return jsonify({ "text": result.text, "likelihood": result.confidence, "transcribe_seconds": (end_time - start_time), "wav_seconds": get_wav_duration(wav_data), }) return result.text