def post_result() -> None:
                # STDOUT -> WAV data
                try:
                    wav_data, _ = self.listen_proc.communicate()
                except Exception:
                    wav_data = bytes()
                    self._logger.exception("post_result")

                # Actor will forward
                audio_data = convert_wav(wav_data)
                self.send(self.myAddress,
                          VoiceCommand(audio_data, handle=message.handle))
Beispiel #2
0
    def transcribe_wav(self, wav_data: bytes) -> Tuple[str, float]:
        """Get text from WAV buffer."""
        # Ensure 16-bit 16Khz mono
        assert self.decoder is not None
        with io.BytesIO(wav_data) as wav_io:
            with wave.open(wav_io, "rb") as wav_file:
                rate, width, channels = (
                    wav_file.getframerate(),
                    wav_file.getsampwidth(),
                    wav_file.getnchannels(),
                )
                self._logger.debug(
                    "rate=%s, width=%s, channels=%s.", rate, width, channels
                )

                if (rate != 16000) or (width != 2) or (channels != 1):
                    self._logger.info("Need to convert to 16-bit 16Khz mono.")
                    # Use converted data
                    audio_data = convert_wav(wav_data)
                else:
                    # Use original data
                    audio_data = wav_file.readframes(wav_file.getnframes())

        # Process data as an entire utterance
        start_time = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(audio_data, False, True)
        self.decoder.end_utt()
        end_time = time.time()

        self._logger.debug("Decoded WAV in %s second(s)", end_time - start_time)

        hyp = self.decoder.hyp()
        if hyp is not None:
            confidence = self.decoder.get_logmath().exp(hyp.prob)
            self._logger.debug("Transcription confidence: %s", confidence)
            if confidence >= self.min_confidence:
                # Return best transcription
                self._logger.debug(hyp.hypstr)
                return hyp.hypstr, confidence

            self._logger.warning(
                "Transcription did not meet confidence threshold: %s < %s",
                confidence,
                self.min_confidence,
            )

        # No transcription
        return "", 0
Beispiel #3
0
    def in_recording(self, message: Any, sender: RhasspyActor) -> None:
        """Handle messages in recording state."""
        if isinstance(message, MqttMessage):
            if message.topic == self.topic_audio_frame:
                # Extract audio data
                with io.BytesIO(message.payload) as wav_buffer:
                    with wave.open(wav_buffer, mode="rb") as wav_file:
                        rate, width, channels = (
                            wav_file.getframerate(),
                            wav_file.getsampwidth(),
                            wav_file.getnchannels(),
                        )
                        if (rate != 16000) or (width != 2) or (channels != 1):
                            audio_data = convert_wav(message.payload)
                        else:
                            # Use original data
                            audio_data = wav_file.readframes(
                                wav_file.getnframes())

                        data_message = AudioData(audio_data)

                # Forward to subscribers
                for receiver in self.receivers:
                    self.send(receiver, data_message)

                # Append to buffers
                for buffer_name in self.buffers:
                    self.buffers[buffer_name] += audio_data
        elif isinstance(message, StartStreaming):
            self.receivers.append(message.receiver or sender)
        elif isinstance(message, StartRecordingToBuffer):
            self.buffers[message.buffer_name] = bytes()
        elif isinstance(message, StopStreaming):
            if message.receiver is None:
                # Clear all receivers
                self.receivers.clear()
            else:
                self.receivers.remove(message.receiver)
        elif isinstance(message, StopRecordingToBuffer):
            if message.buffer_name is None:
                # Clear all buffers
                self.buffers.clear()
            else:
                # Respond with buffer
                buffer = self.buffers.pop(message.buffer_name, bytes())
                self.send(message.receiver or sender, AudioData(buffer))