def post_result() -> None: # STDOUT -> WAV data try: wav_data, _ = self.listen_proc.communicate() except Exception: wav_data = bytes() self._logger.exception("post_result") # Actor will forward audio_data = convert_wav(wav_data) self.send(self.myAddress, VoiceCommand(audio_data, handle=message.handle))
def transcribe_wav(self, wav_data: bytes) -> Tuple[str, float]: """Get text from WAV buffer.""" # Ensure 16-bit 16Khz mono assert self.decoder is not None with io.BytesIO(wav_data) as wav_io: with wave.open(wav_io, "rb") as wav_file: rate, width, channels = ( wav_file.getframerate(), wav_file.getsampwidth(), wav_file.getnchannels(), ) self._logger.debug( "rate=%s, width=%s, channels=%s.", rate, width, channels ) if (rate != 16000) or (width != 2) or (channels != 1): self._logger.info("Need to convert to 16-bit 16Khz mono.") # Use converted data audio_data = convert_wav(wav_data) else: # Use original data audio_data = wav_file.readframes(wav_file.getnframes()) # Process data as an entire utterance start_time = time.time() self.decoder.start_utt() self.decoder.process_raw(audio_data, False, True) self.decoder.end_utt() end_time = time.time() self._logger.debug("Decoded WAV in %s second(s)", end_time - start_time) hyp = self.decoder.hyp() if hyp is not None: confidence = self.decoder.get_logmath().exp(hyp.prob) self._logger.debug("Transcription confidence: %s", confidence) if confidence >= self.min_confidence: # Return best transcription self._logger.debug(hyp.hypstr) return hyp.hypstr, confidence self._logger.warning( "Transcription did not meet confidence threshold: %s < %s", confidence, self.min_confidence, ) # No transcription return "", 0
def in_recording(self, message: Any, sender: RhasspyActor) -> None: """Handle messages in recording state.""" if isinstance(message, MqttMessage): if message.topic == self.topic_audio_frame: # Extract audio data with io.BytesIO(message.payload) as wav_buffer: with wave.open(wav_buffer, mode="rb") as wav_file: rate, width, channels = ( wav_file.getframerate(), wav_file.getsampwidth(), wav_file.getnchannels(), ) if (rate != 16000) or (width != 2) or (channels != 1): audio_data = convert_wav(message.payload) else: # Use original data audio_data = wav_file.readframes( wav_file.getnframes()) data_message = AudioData(audio_data) # Forward to subscribers for receiver in self.receivers: self.send(receiver, data_message) # Append to buffers for buffer_name in self.buffers: self.buffers[buffer_name] += audio_data elif isinstance(message, StartStreaming): self.receivers.append(message.receiver or sender) elif isinstance(message, StartRecordingToBuffer): self.buffers[message.buffer_name] = bytes() elif isinstance(message, StopStreaming): if message.receiver is None: # Clear all receivers self.receivers.clear() else: self.receivers.remove(message.receiver) elif isinstance(message, StopRecordingToBuffer): if message.buffer_name is None: # Clear all buffers self.buffers.clear() else: # Respond with buffer buffer = self.buffers.pop(message.buffer_name, bytes()) self.send(message.receiver or sender, AudioData(buffer))