async def mic2wav(core: RhasspyCore, profile: Profile, args: Any) -> None: """Record voice command from microphone""" # Listen until silence wav_data = buffer_to_wav((await core.record_command(args.timeout)).data) # Output WAV data sys.stdout.buffer.write(wav_data)
def api_stop_recording() -> Response: """End recording voice command. Transcribe and handle.""" assert core is not None no_hass = request.args.get("nohass", "false").lower() == "true" buffer_name = request.args.get("name", "") audio_data = core.stop_recording_wav(buffer_name).data wav_data = buffer_to_wav(audio_data) logger.debug("Recorded %s byte(s) of audio data" % len(wav_data)) transcription = core.transcribe_wav(wav_data) text = transcription.text logger.debug(text) intent = core.recognize_intent(text).intent intent["speech_confidence"] = transcription.confidence intent_json = json.dumps(intent) logger.debug(intent_json) add_ws_event(WS_EVENT_INTENT, intent_json) if not no_hass: # Send intent to Home Assistant intent = core.handle_intent(intent).intent return jsonify(intent)
async def api_speech_to_text() -> str: """Transcribe speech from WAV file.""" no_header = request.args.get("noheader", "false").lower() == "true" assert core is not None # Prefer 16-bit 16Khz mono, but will convert with sox if needed wav_data = await request.data if no_header: # Wrap in WAV wav_data = buffer_to_wav(wav_data) start_time = time.perf_counter() result = await core.transcribe_wav(wav_data) end_time = time.perf_counter() if prefers_json(): return jsonify( { "text": result.text, "likelihood": result.confidence, "transcribe_seconds": (end_time - start_time), "wav_seconds": get_wav_duration(wav_data), } ) return result.text
def in_started(self, message: Any, sender: RhasspyActor) -> None: """Handle messages in started state.""" global last_voice_wav if isinstance(message, IntentRecognized): # Add slots intent_slots = {} for ev in message.intent.get("entities", []): intent_slots[ev["entity"]] = ev["value"] message.intent["slots"] = intent_slots # Convert to JSON intent_json = json.dumps(message.intent) self._logger.debug(intent_json) asyncio.run_coroutine_threadsafe(add_ws_event("intent", intent_json), loop) elif isinstance(message, WakeWordDetected): assert core is not None wake_json = json.dumps({"wakewordId": message.name, "siteId": core.siteId}) asyncio.run_coroutine_threadsafe(add_ws_event("wake", wake_json), loop) elif isinstance(message, WavTranscription): assert core is not None transcription_json = json.dumps( { "text": message.text, "wakewordId": message.wakewordId, "siteId": core.siteId, } ) asyncio.run_coroutine_threadsafe( add_ws_event("transcription_json", transcription_json), loop ) elif isinstance(message, VoiceCommand): # Save last voice command last_voice_wav = buffer_to_wav(message.data)
async def mic2text(core: RhasspyCore, profile: Profile, args: Any) -> None: # Listen until silence wav_data = buffer_to_wav((await core.record_command(args.timeout)).data) # Transcribe text = (await core.transcribe_wav(wav_data)).text # Output text print(text)
async def api_speech_to_text() -> str: """Transcribe speech from WAV file.""" no_header = request.args.get("noheader", "false").lower() == "true" assert core is not None # Prefer 16-bit 16Khz mono, but will convert with sox if needed wav_data = await request.data if no_header: # Wrap in WAV wav_data = buffer_to_wav(wav_data) result = await core.transcribe_wav(wav_data) return result.text
def in_awake(self, message: Any, sender: RhasspyActor) -> None: """Handle messages in awake state.""" if isinstance(message, VoiceCommand): # Recorded beep wav_path = os.path.expandvars(self.profile.get("sounds.recorded", None)) if wav_path is not None: self.send(self.player, PlayWavFile(wav_path)) # speech -> text wav_data = buffer_to_wav(message.data) self.send(self.decoder, TranscribeWav(wav_data, handle=message.handle)) self.transition("decoding") else: self.handle_any(message, sender)
async def mic2intent(core: RhasspyCore, profile: Profile, args: Any) -> None: # Listen until silence wav_data = buffer_to_wav((await core.record_command(args.timeout)).data) # Transcribe sentence = (await core.transcribe_wav(wav_data)).text # Parse intent = (await core.recognize_intent(sentence)).intent if args.handle: intent = (await core.handle_intent(intent)).intent # Output JSON json.dump(intent, sys.stdout, indent=4)
async def api_stop_recording() -> Response: """End recording voice command. Transcribe and handle.""" global last_voice_wav assert core is not None no_hass = request.args.get("nohass", "false").lower() == "true" buffer_name = request.args.get("name", "") audio_data = (await core.stop_recording_wav(buffer_name)).data wav_data = buffer_to_wav(audio_data) logger.debug("Recorded %s byte(s) of audio data", len(wav_data)) transcription = await core.transcribe_wav(wav_data) text = transcription.text logger.debug(text) # Send to websocket await add_ws_event( "transcription", json.dumps({ "text": text, "wakewordId": "default", "siteId": core.siteId }), ) intent = (await core.recognize_intent(text)).intent intent["speech_confidence"] = transcription.confidence intent_json = json.dumps(intent) logger.debug(intent_json) await add_ws_event("intent", intent_json) if not no_hass: # Send intent to Home Assistant intent = (await core.handle_intent(intent)).intent # Save last voice command WAV data last_voice_wav = wav_data return jsonify(intent)
def api_stop_recording() -> Response: '''End recording voice command. Transcribe and handle.''' assert core is not None no_hass = request.args.get('nohass', 'false').lower() == 'true' buffer_name = request.args.get('name', '') audio_data = core.stop_recording_wav(buffer_name).data wav_data = buffer_to_wav(audio_data) logger.debug('Recorded %s byte(s) of audio data' % len(wav_data)) text = core.transcribe_wav(wav_data).text logger.debug(text) intent = core.recognize_intent(text).intent logger.debug(intent) if not no_hass: # Send intent to Home Assistant intent = core.handle_intent(intent).intent return jsonify(intent)
async def api_speech_to_text() -> str: """Transcribe speech from WAV file.""" global last_voice_wav no_header = request.args.get("noheader", "false").lower() == "true" assert core is not None # Prefer 16-bit 16Khz mono, but will convert with sox if needed wav_data = await request.data if no_header: # Wrap in WAV wav_data = buffer_to_wav(wav_data) last_voice_wav = wav_data start_time = time.perf_counter() result = await core.transcribe_wav(wav_data) end_time = time.perf_counter() # Send to websocket await add_ws_event( "transcription", json.dumps({ "text": result.text, "wakewordId": "default", "siteId": core.siteId }), ) if prefers_json(): return jsonify({ "text": result.text, "likelihood": result.confidence, "transcribe_seconds": (end_time - start_time), "wav_seconds": get_wav_duration(wav_data), }) return result.text