async def async_test_session(self): """Check good start/stop session.""" fake_transcription = Transcription( text="this is a test", likelihood=1, transcribe_seconds=0, wav_seconds=0 ) def fake_transcribe(stream, *args): """Return test trancription.""" for chunk in stream: if not chunk: break return fake_transcription self.transcriber.transcribe_stream = fake_transcribe # Start session start_listening = AsrStartListening( site_id=self.site_id, session_id=self.session_id, stop_on_silence=False, send_audio_captured=True, ) result = None async for response in self.hermes.on_message_blocking(start_listening): result = response # No response expected self.assertIsNone(result) # Send in "audio" fake_wav_bytes = self.hermes.to_wav_bytes(secrets.token_bytes(100)) fake_frame = AudioFrame(wav_bytes=fake_wav_bytes) async for response in self.hermes.on_message_blocking( fake_frame, site_id=self.site_id ): result = response # No response expected self.assertIsNone(result) # Stop session stop_listening = AsrStopListening( site_id=self.site_id, session_id=self.session_id ) results = [] async for response in self.hermes.on_message_blocking(stop_listening): results.append(response) # Check results self.assertEqual( results, [ AsrRecordingFinished(site_id=self.site_id, session_id=self.session_id), AsrTextCaptured( text=fake_transcription.text, likelihood=fake_transcription.likelihood, seconds=fake_transcription.transcribe_seconds, site_id=self.site_id, session_id=self.session_id, ), ( AsrAudioCaptured(wav_bytes=fake_wav_bytes), {"site_id": self.site_id, "session_id": self.session_id}, ), ], )
def publish_chunks(self): """Publish audio chunks to MQTT or UDP.""" try: udp_dest = (self.udp_audio_host, self.udp_audio_port) while not self._exit_requested: chunk = self.chunk_queue.get() if chunk: # MQTT output with io.BytesIO() as wav_buffer: wav_file: wave.Wave_write = wave.open(wav_buffer, "wb") with wav_file: wav_file.setframerate(self.sample_rate) wav_file.setsampwidth(self.sample_width) wav_file.setnchannels(self.channels) wav_file.writeframes(chunk) wav_bytes = wav_buffer.getvalue() if self.udp_output: # UDP output self.udp_socket.sendto(wav_bytes, udp_dest) else: # Publish to output site_id self.publish( AudioFrame(wav_bytes=wav_bytes), site_id=self.output_site_id, ) if self._dump_file is not None: # print("tell is", self._dump_file.tell(), end=' ') # write_wave( self._dump_file, wav_bytes, remove_header=True) if USE_SOUNDFILE: # soultion soundfile self._dump_file.write( np.frombuffer( wav_bytes[44:], np.int16)) # removing header! else: # Solution wave, this write always the latest buffer and doesnt happend it! self._dump_file.writeframesraw( wav_bytes[44:]) # removing header! if self.enable_summary: self.summary_frames_left -= 1 if self.summary_frames_left > 0: continue self.summary_frames_left = self.summary_skip_frames if not self.vad: # Create voice activity detector self.vad = webrtcvad.Vad() self.vad.set_mode(self.vad_mode) # webrtcvad needs 16-bit 16Khz mono # TODO: would be possible to split here if demux is not selected? this would avoid resampling, # which is called continuously. (uncomment this code). With the switch --demux a proper channel # is produced # with io.BytesIO(wav_bytes) as wav_io: # with wave.open(wav_io, "rb") as wav_file: # if (wav_file.getframerate() != 16000) or \ # (wav_file.getsampwidth() != 2) or \ # (wav_file.getnchannels() != 1): # print("Need Resample: sr={}, width={}, n_ch={}".format(wav_file.getframerate(), # wav_file.getsampwidth(), # wav_file.getnchannels())) # else: # print("No resample") # webrtcvad needs 16-bit 16Khz mono self.vad_audio_data += self.maybe_convert_wav( wav_bytes, sample_rate=16000, sample_width=2, channels=1) is_speech = False # Process in chunks of 30ms for webrtcvad while len(self.vad_audio_data) >= self.vad_chunk_size: vad_chunk = self.vad_audio_data[:self. vad_chunk_size] self.vad_audio_data = self.vad_audio_data[ self.vad_chunk_size:] # Speech in any chunk counts as speech is_speech = is_speech or self.vad.is_speech( vad_chunk, 16000) # Publish audio summary self.publish( AudioSummary( debiased_energy=AudioSummary. get_debiased_energy(chunk), is_speech=is_speech, ), site_id=self.output_site_id, ) except Exception as e: _LOGGER.exception("publish_chunks") self.publish( AudioRecordError(error=str(e), context="publish_chunks", site_id=self.site_id))
def publish_chunks(self): """Publish audio chunks to MQTT or UDP.""" try: udp_dest = (self.udp_audio_host, self.udp_audio_port) while True: chunk = self.chunk_queue.get() if chunk: if self.test_audio_buffer: # Add to buffer for microphone test self.test_audio_buffer += chunk # MQTT output with io.BytesIO() as wav_buffer: wav_file: wave.Wave_write = wave.open(wav_buffer, "wb") with wav_file: wav_file.setframerate(self.sample_rate) wav_file.setsampwidth(self.sample_width) wav_file.setnchannels(self.channels) wav_file.writeframes(chunk) wav_bytes = wav_buffer.getvalue() if self.udp_output: # UDP output self.udp_socket.sendto(wav_bytes, udp_dest) else: # Publish to output site_id self.publish( AudioFrame(wav_bytes=wav_bytes), site_id=self.output_site_id, ) if self.enable_summary: self.summary_frames_left -= 1 if self.summary_frames_left > 0: continue self.summary_frames_left = self.summary_skip_frames if not self.vad: # Create voice activity detector self.vad = webrtcvad.Vad() self.vad.set_mode(self.vad_mode) # webrtcvad needs 16-bit 16Khz mono self.vad_audio_data += self.maybe_convert_wav( wav_bytes, sample_rate=16000, sample_width=2, channels=1 ) is_speech = False # Process in chunks of 30ms for webrtcvad while len(self.vad_audio_data) >= self.vad_chunk_size: vad_chunk = self.vad_audio_data[: self.vad_chunk_size] self.vad_audio_data = self.vad_audio_data[ self.vad_chunk_size : ] # Speech in any chunk counts as speech is_speech = is_speech or self.vad.is_speech( vad_chunk, 16000 ) # Publish audio summary self.publish( AudioSummary( debiased_energy=AudioSummary.get_debiased_energy(chunk), is_speech=is_speech, ), site_id=self.output_site_id, ) except Exception as e: _LOGGER.exception("publish_chunks") self.publish( AudioRecordError( error=str(e), context="publish_chunks", site_id=self.output_site_id ) )