예제 #1
0
async def api_speech_to_text() -> str:
    """Transcribe speech from WAV file."""
    no_header = request.args.get("noheader", "false").lower() == "true"
    assert core is not None

    # Prefer 16-bit 16Khz mono, but will convert with sox if needed
    wav_data = await request.data
    if no_header:
        # Wrap in WAV
        wav_data = buffer_to_wav(wav_data)

    start_time = time.perf_counter()
    result = await core.transcribe_wav(wav_data)
    end_time = time.perf_counter()

    if prefers_json():
        return jsonify(
            {
                "text": result.text,
                "likelihood": result.confidence,
                "transcribe_seconds": (end_time - start_time),
                "wav_seconds": get_wav_duration(wav_data),
            }
        )

    return result.text
예제 #2
0
async def api_speech_to_text() -> str:
    """Transcribe speech from WAV file."""
    global last_voice_wav
    no_header = request.args.get("noheader", "false").lower() == "true"
    assert core is not None

    # Prefer 16-bit 16Khz mono, but will convert with sox if needed
    wav_data = await request.data
    if no_header:
        # Wrap in WAV
        wav_data = buffer_to_wav(wav_data)

    last_voice_wav = wav_data

    start_time = time.perf_counter()
    result = await core.transcribe_wav(wav_data)
    end_time = time.perf_counter()

    # Send to websocket
    await add_ws_event(
        "transcription",
        json.dumps({
            "text": result.text,
            "wakewordId": "default",
            "siteId": core.siteId
        }),
    )

    if prefers_json():
        return jsonify({
            "text": result.text,
            "likelihood": result.confidence,
            "transcribe_seconds": (end_time - start_time),
            "wav_seconds": get_wav_duration(wav_data),
        })

    return result.text