Ejemplo n.º 1
0
async def mic2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
    """Record voice command from microphone"""
    # Listen until silence
    wav_data = buffer_to_wav((await core.record_command(args.timeout)).data)

    # Output WAV data
    sys.stdout.buffer.write(wav_data)
Ejemplo n.º 2
0
def api_stop_recording() -> Response:
    """End recording voice command. Transcribe and handle."""
    assert core is not None
    no_hass = request.args.get("nohass", "false").lower() == "true"

    buffer_name = request.args.get("name", "")
    audio_data = core.stop_recording_wav(buffer_name).data

    wav_data = buffer_to_wav(audio_data)
    logger.debug("Recorded %s byte(s) of audio data" % len(wav_data))

    transcription = core.transcribe_wav(wav_data)
    text = transcription.text
    logger.debug(text)

    intent = core.recognize_intent(text).intent
    intent["speech_confidence"] = transcription.confidence

    intent_json = json.dumps(intent)
    logger.debug(intent_json)
    add_ws_event(WS_EVENT_INTENT, intent_json)

    if not no_hass:
        # Send intent to Home Assistant
        intent = core.handle_intent(intent).intent

    return jsonify(intent)
Ejemplo n.º 3
0
async def api_speech_to_text() -> str:
    """Transcribe speech from WAV file."""
    no_header = request.args.get("noheader", "false").lower() == "true"
    assert core is not None

    # Prefer 16-bit 16Khz mono, but will convert with sox if needed
    wav_data = await request.data
    if no_header:
        # Wrap in WAV
        wav_data = buffer_to_wav(wav_data)

    start_time = time.perf_counter()
    result = await core.transcribe_wav(wav_data)
    end_time = time.perf_counter()

    if prefers_json():
        return jsonify(
            {
                "text": result.text,
                "likelihood": result.confidence,
                "transcribe_seconds": (end_time - start_time),
                "wav_seconds": get_wav_duration(wav_data),
            }
        )

    return result.text
Ejemplo n.º 4
0
    def in_started(self, message: Any, sender: RhasspyActor) -> None:
        """Handle messages in started state."""
        global last_voice_wav

        if isinstance(message, IntentRecognized):
            # Add slots
            intent_slots = {}
            for ev in message.intent.get("entities", []):
                intent_slots[ev["entity"]] = ev["value"]

            message.intent["slots"] = intent_slots

            # Convert to JSON
            intent_json = json.dumps(message.intent)
            self._logger.debug(intent_json)
            asyncio.run_coroutine_threadsafe(add_ws_event("intent", intent_json), loop)
        elif isinstance(message, WakeWordDetected):
            assert core is not None
            wake_json = json.dumps({"wakewordId": message.name, "siteId": core.siteId})
            asyncio.run_coroutine_threadsafe(add_ws_event("wake", wake_json), loop)
        elif isinstance(message, WavTranscription):
            assert core is not None
            transcription_json = json.dumps(
                {
                    "text": message.text,
                    "wakewordId": message.wakewordId,
                    "siteId": core.siteId,
                }
            )
            asyncio.run_coroutine_threadsafe(
                add_ws_event("transcription_json", transcription_json), loop
            )
        elif isinstance(message, VoiceCommand):
            # Save last voice command
            last_voice_wav = buffer_to_wav(message.data)
Ejemplo n.º 5
0
async def mic2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
    # Listen until silence
    wav_data = buffer_to_wav((await core.record_command(args.timeout)).data)

    # Transcribe
    text = (await core.transcribe_wav(wav_data)).text

    # Output text
    print(text)
Ejemplo n.º 6
0
async def api_speech_to_text() -> str:
    """Transcribe speech from WAV file."""
    no_header = request.args.get("noheader", "false").lower() == "true"
    assert core is not None

    # Prefer 16-bit 16Khz mono, but will convert with sox if needed
    wav_data = await request.data
    if no_header:
        # Wrap in WAV
        wav_data = buffer_to_wav(wav_data)

    result = await core.transcribe_wav(wav_data)
    return result.text
Ejemplo n.º 7
0
    def in_awake(self, message: Any, sender: RhasspyActor) -> None:
        """Handle messages in awake state."""
        if isinstance(message, VoiceCommand):
            # Recorded beep
            wav_path = os.path.expandvars(self.profile.get("sounds.recorded", None))
            if wav_path is not None:
                self.send(self.player, PlayWavFile(wav_path))

            # speech -> text
            wav_data = buffer_to_wav(message.data)
            self.send(self.decoder, TranscribeWav(wav_data, handle=message.handle))
            self.transition("decoding")
        else:
            self.handle_any(message, sender)
Ejemplo n.º 8
0
async def mic2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
    # Listen until silence
    wav_data = buffer_to_wav((await core.record_command(args.timeout)).data)

    # Transcribe
    sentence = (await core.transcribe_wav(wav_data)).text

    # Parse
    intent = (await core.recognize_intent(sentence)).intent

    if args.handle:
        intent = (await core.handle_intent(intent)).intent

    # Output JSON
    json.dump(intent, sys.stdout, indent=4)
Ejemplo n.º 9
0
async def api_stop_recording() -> Response:
    """End recording voice command. Transcribe and handle."""
    global last_voice_wav
    assert core is not None
    no_hass = request.args.get("nohass", "false").lower() == "true"

    buffer_name = request.args.get("name", "")
    audio_data = (await core.stop_recording_wav(buffer_name)).data

    wav_data = buffer_to_wav(audio_data)
    logger.debug("Recorded %s byte(s) of audio data", len(wav_data))

    transcription = await core.transcribe_wav(wav_data)
    text = transcription.text
    logger.debug(text)

    # Send to websocket
    await add_ws_event(
        "transcription",
        json.dumps({
            "text": text,
            "wakewordId": "default",
            "siteId": core.siteId
        }),
    )

    intent = (await core.recognize_intent(text)).intent
    intent["speech_confidence"] = transcription.confidence

    intent_json = json.dumps(intent)
    logger.debug(intent_json)
    await add_ws_event("intent", intent_json)

    if not no_hass:
        # Send intent to Home Assistant
        intent = (await core.handle_intent(intent)).intent

    # Save last voice command WAV data
    last_voice_wav = wav_data

    return jsonify(intent)
Ejemplo n.º 10
0
def api_stop_recording() -> Response:
    '''End recording voice command. Transcribe and handle.'''
    assert core is not None
    no_hass = request.args.get('nohass', 'false').lower() == 'true'

    buffer_name = request.args.get('name', '')
    audio_data = core.stop_recording_wav(buffer_name).data

    wav_data = buffer_to_wav(audio_data)
    logger.debug('Recorded %s byte(s) of audio data' % len(wav_data))

    text = core.transcribe_wav(wav_data).text
    logger.debug(text)

    intent = core.recognize_intent(text).intent
    logger.debug(intent)

    if not no_hass:
        # Send intent to Home Assistant
        intent = core.handle_intent(intent).intent

    return jsonify(intent)
Ejemplo n.º 11
0
async def api_speech_to_text() -> str:
    """Transcribe speech from WAV file."""
    global last_voice_wav
    no_header = request.args.get("noheader", "false").lower() == "true"
    assert core is not None

    # Prefer 16-bit 16Khz mono, but will convert with sox if needed
    wav_data = await request.data
    if no_header:
        # Wrap in WAV
        wav_data = buffer_to_wav(wav_data)

    last_voice_wav = wav_data

    start_time = time.perf_counter()
    result = await core.transcribe_wav(wav_data)
    end_time = time.perf_counter()

    # Send to websocket
    await add_ws_event(
        "transcription",
        json.dumps({
            "text": result.text,
            "wakewordId": "default",
            "siteId": core.siteId
        }),
    )

    if prefers_json():
        return jsonify({
            "text": result.text,
            "likelihood": result.confidence,
            "transcribe_seconds": (end_time - start_time),
            "wav_seconds": get_wav_duration(wav_data),
        })

    return result.text