コード例 #1
0
    async def async_test_session(self):
        """Check good start/stop session."""
        fake_transcription = Transcription(
            text="this is a test", likelihood=1, transcribe_seconds=0, wav_seconds=0
        )

        def fake_transcribe(stream, *args):
            """Return test trancription."""
            for chunk in stream:
                if not chunk:
                    break

            return fake_transcription

        self.transcriber.transcribe_stream = fake_transcribe

        # Start session
        start_listening = AsrStartListening(
            site_id=self.site_id,
            session_id=self.session_id,
            stop_on_silence=False,
            send_audio_captured=True,
        )
        result = None
        async for response in self.hermes.on_message_blocking(start_listening):
            result = response

        # No response expected
        self.assertIsNone(result)

        # Send in "audio"
        fake_wav_bytes = self.hermes.to_wav_bytes(secrets.token_bytes(100))
        fake_frame = AudioFrame(wav_bytes=fake_wav_bytes)
        async for response in self.hermes.on_message_blocking(
            fake_frame, site_id=self.site_id
        ):
            result = response

        # No response expected
        self.assertIsNone(result)

        # Stop session
        stop_listening = AsrStopListening(
            site_id=self.site_id, session_id=self.session_id
        )

        results = []
        async for response in self.hermes.on_message_blocking(stop_listening):
            results.append(response)

        # Check results
        self.assertEqual(
            results,
            [
                AsrRecordingFinished(site_id=self.site_id, session_id=self.session_id),
                AsrTextCaptured(
                    text=fake_transcription.text,
                    likelihood=fake_transcription.likelihood,
                    seconds=fake_transcription.transcribe_seconds,
                    site_id=self.site_id,
                    session_id=self.session_id,
                ),
                (
                    AsrAudioCaptured(wav_bytes=fake_wav_bytes),
                    {"site_id": self.site_id, "session_id": self.session_id},
                ),
            ],
        )
コード例 #2
0
    def publish_chunks(self):
        """Publish audio chunks to MQTT or UDP."""
        try:
            udp_dest = (self.udp_audio_host, self.udp_audio_port)

            while not self._exit_requested:
                chunk = self.chunk_queue.get()
                if chunk:
                    # MQTT output
                    with io.BytesIO() as wav_buffer:
                        wav_file: wave.Wave_write = wave.open(wav_buffer, "wb")
                        with wav_file:
                            wav_file.setframerate(self.sample_rate)
                            wav_file.setsampwidth(self.sample_width)
                            wav_file.setnchannels(self.channels)
                            wav_file.writeframes(chunk)

                        wav_bytes = wav_buffer.getvalue()

                        if self.udp_output:
                            # UDP output
                            self.udp_socket.sendto(wav_bytes, udp_dest)
                        else:
                            # Publish to output site_id
                            self.publish(
                                AudioFrame(wav_bytes=wav_bytes),
                                site_id=self.output_site_id,
                            )
                        if self._dump_file is not None:
                            # print("tell is", self._dump_file.tell(), end=' ')
                            # write_wave( self._dump_file, wav_bytes, remove_header=True)
                            if USE_SOUNDFILE:
                                # soultion soundfile
                                self._dump_file.write(
                                    np.frombuffer(
                                        wav_bytes[44:],
                                        np.int16))  # removing header!
                            else:
                                # Solution wave, this write always the latest buffer and doesnt happend it!
                                self._dump_file.writeframesraw(
                                    wav_bytes[44:])  # removing header!
                    if self.enable_summary:
                        self.summary_frames_left -= 1
                        if self.summary_frames_left > 0:
                            continue

                        self.summary_frames_left = self.summary_skip_frames
                        if not self.vad:
                            # Create voice activity detector
                            self.vad = webrtcvad.Vad()
                            self.vad.set_mode(self.vad_mode)
                        # webrtcvad needs 16-bit 16Khz mono
                        # TODO: would be possible to split here if demux is not selected? this would avoid resampling,
                        # which is called continuously. (uncomment this code). With the switch --demux a proper channel
                        # is produced
                        # with io.BytesIO(wav_bytes) as wav_io:
                        #     with wave.open(wav_io, "rb") as wav_file:
                        #         if (wav_file.getframerate() != 16000) or \
                        #                 (wav_file.getsampwidth() != 2) or \
                        #                 (wav_file.getnchannels() != 1):
                        #             print("Need Resample: sr={}, width={}, n_ch={}".format(wav_file.getframerate(),
                        #                                                                    wav_file.getsampwidth(),
                        #                                                                    wav_file.getnchannels()))
                        #         else:
                        #             print("No resample")
                        # webrtcvad needs 16-bit 16Khz mono
                        self.vad_audio_data += self.maybe_convert_wav(
                            wav_bytes,
                            sample_rate=16000,
                            sample_width=2,
                            channels=1)
                        is_speech = False
                        # Process in chunks of 30ms for webrtcvad
                        while len(self.vad_audio_data) >= self.vad_chunk_size:
                            vad_chunk = self.vad_audio_data[:self.
                                                            vad_chunk_size]
                            self.vad_audio_data = self.vad_audio_data[
                                self.vad_chunk_size:]
                            # Speech in any chunk counts as speech
                            is_speech = is_speech or self.vad.is_speech(
                                vad_chunk, 16000)
                        # Publish audio summary
                        self.publish(
                            AudioSummary(
                                debiased_energy=AudioSummary.
                                get_debiased_energy(chunk),
                                is_speech=is_speech,
                            ),
                            site_id=self.output_site_id,
                        )

        except Exception as e:
            _LOGGER.exception("publish_chunks")
            self.publish(
                AudioRecordError(error=str(e),
                                 context="publish_chunks",
                                 site_id=self.site_id))
コード例 #3
0
    def publish_chunks(self):
        """Publish audio chunks to MQTT or UDP."""
        try:
            udp_dest = (self.udp_audio_host, self.udp_audio_port)

            while True:
                chunk = self.chunk_queue.get()
                if chunk:
                    if self.test_audio_buffer:
                        # Add to buffer for microphone test
                        self.test_audio_buffer += chunk

                    # MQTT output
                    with io.BytesIO() as wav_buffer:
                        wav_file: wave.Wave_write = wave.open(wav_buffer, "wb")
                        with wav_file:
                            wav_file.setframerate(self.sample_rate)
                            wav_file.setsampwidth(self.sample_width)
                            wav_file.setnchannels(self.channels)
                            wav_file.writeframes(chunk)

                        wav_bytes = wav_buffer.getvalue()

                        if self.udp_output:
                            # UDP output
                            self.udp_socket.sendto(wav_bytes, udp_dest)
                        else:
                            # Publish to output site_id
                            self.publish(
                                AudioFrame(wav_bytes=wav_bytes),
                                site_id=self.output_site_id,
                            )

                    if self.enable_summary:
                        self.summary_frames_left -= 1
                        if self.summary_frames_left > 0:
                            continue

                        self.summary_frames_left = self.summary_skip_frames
                        if not self.vad:
                            # Create voice activity detector
                            self.vad = webrtcvad.Vad()
                            self.vad.set_mode(self.vad_mode)

                        # webrtcvad needs 16-bit 16Khz mono
                        self.vad_audio_data += self.maybe_convert_wav(
                            wav_bytes, sample_rate=16000, sample_width=2, channels=1
                        )

                        is_speech = False

                        # Process in chunks of 30ms for webrtcvad
                        while len(self.vad_audio_data) >= self.vad_chunk_size:
                            vad_chunk = self.vad_audio_data[: self.vad_chunk_size]
                            self.vad_audio_data = self.vad_audio_data[
                                self.vad_chunk_size :
                            ]

                            # Speech in any chunk counts as speech
                            is_speech = is_speech or self.vad.is_speech(
                                vad_chunk, 16000
                            )

                        # Publish audio summary
                        self.publish(
                            AudioSummary(
                                debiased_energy=AudioSummary.get_debiased_energy(chunk),
                                is_speech=is_speech,
                            ),
                            site_id=self.output_site_id,
                        )
        except Exception as e:
            _LOGGER.exception("publish_chunks")
            self.publish(
                AudioRecordError(
                    error=str(e), context="publish_chunks", site_id=self.output_site_id
                )
            )