class Recorder: def __init__(self, mqtt_client, device): self.recording = False self.chunk = 1024 self.sample_format = pyaudio.paInt16 self.channels = 2 # you might change this based on your mic self.fs = 44100 self.filename = "output.wav" self.p = pyaudio.PyAudio() self.mqtt_client = mqtt_client self.device = device t0 = {'source': 'initial', 'target': 'ready'} t1 = {'trigger': 'start', 'source': 'ready', 'target': 'recording'} t2 = { 'trigger': 'done', 'source': 'recording', 'target': 'processing', 'effect': 'stop' } t3 = {'trigger': 'done', 'source': 'processing', 'target': 'ready'} s_recording = { 'name': 'recording', 'do': 'record()', "stop": "stop()", "start_timer": "start_timer('stop', 5000)" } s_processing = {'name': 'processing', 'do': 'process()'} self.stm = Machine(name='recorder', transitions=[t0, t1, t2, t3], states=[s_recording, s_processing], obj=self) def record(self): stream = self.p.open(format=self.sample_format, channels=self.channels, rate=self.fs, frames_per_buffer=self.chunk, input=True # input_device_index=x to specify mic input ) self.frames = [] # Initialize array to store frames # Store data in chunks for 3 seconds self.recording = True self.stm.send("start_timer") while self.recording: data = stream.read(self.chunk) self.frames.append(data) # Stop and close the stream stream.stop_stream() stream.close() # Terminate the PortAudio interface self.p.terminate() def stop(self): self.recording = False def process(self): # Save the recorded data as a WAV file wf = wave.open(self.filename, 'wb') wf.setnchannels(self.channels) wf.setsampwidth(self.p.get_sample_size(self.sample_format)) wf.setframerate(self.fs) wf.writeframes(b''.join(self.frames)) wf.close() f = open("output.wav", "rb") imagestring = f.read() f.close() # endoding byteArray = bytearray(imagestring) self.mqtt_client.publish( self.device.make_topic_string("/audio/" + str(self.device.device.id)), payload=byteArray, qos=2) self.device.driver.send("over", "device")
class AudioHelper: def __init__(self): self.recorder = Recorder() t0_r = {'source': 'initial', 'target': 'ready'} t1_r = { 'trigger': 'start_recording', 'source': 'ready', 'target': 'recording' } t2_r = {'trigger': 'done', 'source': 'recording', 'target': 'ready'} s_ready = {'name': 'ready'} s_recording = { 'name': 'recording', 'do': 'record()', "stop": "stop_recording()" } self.stm_recording = Machine(name='stm_recording', transitions=[t0_r, t1_r, t2_r], states=[s_ready, s_recording], obj=self.recorder) self.recorder.stm = self.stm_recording self.speaker = Speaker() t0_s = {'source': 'initial', 'target': 'ready'} t1_s = {'trigger': 'speak', 'source': 'ready', 'target': 'speaking'} t2_s = {'trigger': 'done', 'source': 'speaking', 'target': 'ready'} s1_s = {'name': 'speaking', 'do': 'speak(*)', 'speak': 'defer'} self.stm_speaker = Machine(name='stm_speaker', transitions=[t0_s, t1_s, t2_s], states=[s1_s], obj=self.speaker) self.speaker.stm = self.stm_speaker self.driver = Driver() self.driver.add_machine(self.stm_recording) self.driver.add_machine(self.stm_speaker) self.driver.start() print('Audio Module ready') def play_audio_noStm(self, filename): # Open the sound file wf = wave.open(filename, 'rb') # Create an interface to PortAudio p = pyaudio.PyAudio() # Open a .Stream object to write the WAV file to # 'output = True' indicates that the sound will be played rather than recorded stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) # Read data in chunks data = wf.readframes(CHUNK) #self.playing = True # Play the sound by writing the audio data to the stream while (data != b''): #and self.playing: stream.write(data) data = wf.readframes(CHUNK) # Close and terminate the stream stream.close() p.terminate() def start_recording(self): #print("driver started") self.last_record = [] self.stm_recording.send('start_recording') def stop_recording(self): self.stm_recording.send("stop") def get_tmp_filename(self): #Returns the filename that is used for temp storage return FILENAME def get_recorded_samples(self): return b''.join(self.stm_recording._obj.getFrames()) def text_to_speech(self, text): self.stm_speaker.send('speak', args=[text])
class MqttAPI(): def __init__(self, driver: Driver, serverAPI: ServerAPI, py_audio: PyAudio): self.logger = logging.getLogger("WalkieTalkie") self.players = {} # userID -> audio player object self.serverAPI = serverAPI self.driver = driver self.py_audio = py_audio self.queue = [] self.max_current_priority = -1 MQTT_BROKER = 'mqtt.item.ntnu.no' MQTT_PORT = 1883 # create MQTT client self.logger.info( f'Connecting to MQTT broker {MQTT_BROKER} at port {MQTT_PORT}') self.mqtt_client = mqtt.Client() self.mqtt_client.on_connect = self.on_connect self.mqtt_client.on_message = self.on_message self.mqtt_client.connect(MQTT_BROKER, MQTT_PORT) self.mqtt_client.loop_start() self.update_subscriptions() # Create state machine self.state_machine = Machine(name="queue_manager", transitions=self._get_transitions(), states=self._get_states(), obj=self) driver.add_machine(self.state_machine) def _get_states(self) -> list: return [{ 'name': 'queue', 'entry': 'start_timer("t",300)', 'receive': 'add_to_queue(*)' }, { 'name': 'prioritising', 'do': 'remove_low_priority_items()', 'receive': 'defer' }, { 'name': 'sending', 'do': 'send_queue_to_player()', 'receive': 'defer' }] def _get_transitions(self) -> list: return [ { 'source': 'initial', 'target': 'queue' }, { 'trigger': 't', 'source': 'queue', 'target': 'prioritising' }, { 'trigger': 'done', 'source': 'prioritising', 'target': 'sending' }, { 'trigger': 'done', 'source': 'sending', 'target': 'queue' }, ] def add_to_queue(self, packet: Packet) -> None: self.max_current_priority = max(self.max_current_priority, packet.priority) self.queue.append(packet) def remove_low_priority_items(self) -> None: new_queue = [] for packet in self.queue: if packet.priority >= self.max_current_priority: new_queue.append(packet) self.queue = new_queue def send_queue_to_player(self) -> None: for packet in self.queue: if packet.senderID != self.serverAPI.getUserID(): if packet.senderID not in self.players: newPlayer = self.getNewPlayer() self.players[packet.senderID] = newPlayer decoded_message = packet.get_decoded_message() self.players[packet.senderID].play(decoded_message) self.queue = [] self.max_current_priority = -1 def getNewPlayer(self) -> AudioPlayer: return AudioPlayer(self.driver, self.py_audio) def on_connect(self, client, userdata, flags, rc) -> None: self.logger.info(f'Successfully connected to MQTT broker') def on_message(self, client, userdata, msg) -> None: packet = Packet.deserialize(msg.payload) self.state_machine.send('receive', args=[packet]) self.logger.debug(f'Incoming message to topic {packet.channel}') def subscribe(self, topic: str) -> None: QoS = 0 # returnedMessage = self.mqtt_client.subscribe(topic, QoS) if returnedMessage < 0x80: self.logger.info(f"Successfully subscribed to channel {topic}") else: self.logger.error(f"Could not subscribe to channel {topic}") def update_subscriptions(self) -> None: """ Sync subscribed topics with channel manager """ # Unsubscribe to all channels self.mqtt_client.unsubscribe("#") # Resubscribe to updated channels list for channel in self.serverAPI.get_channels(): print(channel) self.mqtt_client.subscribe(channel) def publish(self, packet: Packet) -> None: serializedMessage = packet.serialize() self.mqtt_client.publish(packet.channel, serializedMessage)
class AudioPlayer(): def __init__(self, driver: Driver, py_audio: PyAudio): self.logger = logging.getLogger("WalkieTalkie") self.py_audio = py_audio self.state_machine = Machine( name="audio_player_" + str(id(self)), # Unique identifier for each audio player object transitions=self._get_transitions(), states=self._get_states(), obj=self) driver.add_machine(self.state_machine) def _get_transitions(self) -> list: return [ { 'source': 'initial', 'target': 'ready' }, { 'trigger': 'receive', 'source': 'ready', 'target': 'playing', 'effect': self._start_player.__name__ }, { 'trigger': 'done', 'source': 'playing', 'target': 'waiting_for_next_chunk' }, { 'trigger': 't', 'source': 'waiting_for_next_chunk', 'target': 'ready', 'effect': self._stop_player.__name__ }, { 'trigger': 'receive', 'source': 'waiting_for_next_chunk', 'target': 'playing', 'effect': 'stop_timer("t")' }, ] def _get_states(self) -> list: return [ { 'name': 'ready' }, { 'name': 'playing', 'do': '_play_chunk(*)', 'receive': 'defer' }, { 'name': 'waiting_for_next_chunk', 'entry': 'start_timer("t", 10000)' }, ] def _play_chunk(self, data) -> None: self.audio_stream.write(data) def _start_player(self) -> None: self.logger.info("Audio player initiated") self.audio_stream = self.py_audio.open(format=pyaudio.paInt16, channels=2, rate=44100, output=True) def _stop_player(self) -> None: self.logger.info("Audio player stopped") self.audio_stream.stop_stream() self.audio_stream.close() def play(self, decoded_message) -> None: """ Play given audio data by sending it to the state machine. If the player is already playing something else, the data will be queued and played at a later point using defer. """ self.state_machine.send("receive", args=[decoded_message])
class AudioRecorder: def __init__(self, mqttAPI: MqttAPI, driver: Driver, py_audio: PyAudio, serverAPI: ServerAPI): self.logger = logging.getLogger("WalkieTalkie") self.serverAPI = serverAPI # Define private variables self._recording = False # Save references self.mqttAPI = mqttAPI self.py_audio = py_audio # Create state machine self.state_machine = Machine(name="audio_recorder", transitions=self._get_transitions(), states=self._get_states(), obj=self) driver.add_machine(self.state_machine) def _record(self, channel) -> None: fs = 44100 # Record at 44100 samples per second chunk = 1024 # Record in chunks of 1024 samples sample_format = pyaudio.paInt16 # 16 bits per sample channels = 2 stream = self.py_audio.open(format=sample_format, channels=channels, rate=fs, frames_per_buffer=chunk, input=True) self._recording = True self.logger.info(f'Audio recording started for channel {channel}') while self._recording: senderID = self.serverAPI.getUserID() priority = self.serverAPI.getChannelPriority(channel) message = stream.read(chunk) encodedMessage = base64.b64encode(message).decode('ascii') packet = Packet(priority, channel, senderID, encodedMessage) self.mqttAPI.publish(packet) self.logger.info(f"Audio recording stopped for channel {channel}") stream.stop_stream() stream.close() def _get_states(self) -> list: return [ { 'name': 'ready' }, { 'name': 'recording', 'do': '_record(*)', "stop": "stop_recording()" }, ] def _get_transitions(self) -> list: return [ { 'source': 'initial', 'target': 'ready' }, { 'trigger': 'start_recording', 'source': 'ready', 'target': 'recording' }, { 'trigger': 'done', 'source': 'recording', 'target': 'ready' }, ] def stop_recording(self) -> None: self._recording = False def start_recording(self, channel) -> None: self.state_machine.send("start_recording", args=[channel])
class VoiceRecognizer: def __init__(self, device_driver, wake_word="delta", end_word="over"): self.over = False self.r = sr.Recognizer() self.mic = sr.Microphone(device_index=choose_from_available_devices()) self.device_driver = device_driver t0 = {'source': 'initial', 'target': 'recognizer_off'} t1 = { 'trigger': 'wake', 'source': 'recognizer_off', 'target': 'recognizer_on_wake', } t2 = { 'trigger': 'wake', 'source': 'recognizer_on_end', 'target': 'recognizer_on_wake', } t3 = { 'trigger': 'end', 'source': 'recognizer_off', 'target': 'recognizer_on_end', } t4 = { 'trigger': 'end', 'source': 'recognizer_on_wake', 'target': 'recognizer_on_end', } t5 = { 'trigger': 'off', 'source': 'recognizer_on_end', 'target': 'recognizer_off' } t6 = { 'trigger': 'off', 'source': 'recognizer_on_wake', 'target': 'recognizer_off' } recognizer_off = { 'name': 'recognizer_off', 'entry': 'state("recognizer_off");turn_off', 'exit': 'turn_on' } recognizer_on_end = { 'name': 'recognizer_on_end', 'entry': 'state("recognizer_on_end");check_for_word("' + end_word + ' ", "over")' } recognizer_on_wake = { 'name': 'recognizer_on_wake', 'entry': 'state("recognizer_on_wake");check_for_word("' + wake_word + '","wake_word")' } self.stm = Machine( name="voice_recognizer", transitions=[t0, t1, t2, t3, t4, t5, t6], obj=self, states=[recognizer_off, recognizer_on_wake, recognizer_on_end]) def recognize_speech_from_mic(self, recognizer, mic): if not isinstance(recognizer, sr.Recognizer): raise TypeError('`recognizer` must be `Recognizer` instance') if not isinstance(mic, sr.Microphone): raise TypeError('`microphone` must be a `Microphone` instance') with mic as source: recognizer.adjust_for_ambient_noise(source) audio = recognizer.listen(source) # r.adjust_for_ambient_noise(source, duration=0.5) response = {"success": True, "error": None, "transcription": None} try: response["transcription"] = recognizer.recognize_google(audio) except sr.RequestError: # API was unreachable or unresponsive response["success"] = False response["error"] = "API unavailable" except sr.UnknownValueError: # speech was unintelligible response["error"] = "Unable to recognize speech" return response def check_for_word(self, word, trigger): print('VOICERECOGNIZER', 'Say something') time.sleep(1) PROMPT_LIMIT = 2 print('VOICERECOGNIZER', self.over) while not self.over: for i in range(PROMPT_LIMIT): print('VOICERECOGNIZER', 'Now') output = self.recognize_speech_from_mic(self.r, self.mic) if output['transcription']: break if not output['success']: break print('VOICERECOGNIZER', "Didn't catch the word. Try again") if output['error']: print('VOICERECOGNIZER', "ERROR: {}".format(output["error"])) print('VOICERECOGNIZER', "You said: {}".format(output["transcription"])) transcript = output["transcription"] if word.lower() in str(transcript).lower(): self.over = True print('VOICERECOGNIZER', "Ended") self.device_driver.send(trigger, "device", kwargs=({ "message": word })) self.stm.send("off") break else: print('VOICERECOGNIZER', "Not ended yet") def turn_off(self): self.over = True def turn_on(self): self.over = False def state(self, state): if not state: return print("VOICERECOGNIZER state: {}".format(state))