async def activate(self, site): # self.log('activate') #if not self.active[site]: if os.path.isdir(self.model_path): # self.log('START DS ASR') self.audio_stream[site] = BytesLoop() self.active[site] = True self.started[site] = False await self.client.subscribe('hermod/' + site + '/microphone/audio') # Load DeepSpeech model # self.log('START DS ASR ACTIVATE '+self.model_path) #deepspeech-0.7.0-models.pbmm modelPath = os.path.join(self.model_path, self.modelFile) scorerPath = os.path.join(self.model_path, 'deepspeech-0.7.0-models.scorer') # lm = os.path.join(self.model_path, 'lm.binary') # trie = os.path.join(self.model_path, 'trie') self.log('START DS ASR ACTIVATE ' + modelPath) # self.models[site] = deepspeech.Model(modelPath, 500) # if lm and trie: # self.models[site].enableDecoderWithLM(lm, trie, 0.75, 1.85) self.models[site] = deepspeech.Model(modelPath) self.models[site].enableExternalScorer(scorerPath) self.stream_contexts[site] = self.models[site].createStream()
async def activate(self, site): #if not self.active[site]: self.audio_stream[site] = BytesLoop() self.active[site] = True self.started[site] = False self._buff[site] = queue.Queue() self.closed[site] = False
async def activate(self, site): """ initialise and activate service for a site """ if os.path.isdir(self.model_path): self.audio_stream[site] = BytesLoop() self.active[site] = True self.started[site] = False self.stream_contexts[site] = self.models.createStream() else: raise Exception("Could not load Deepspeech model file")
async def activate(self, site): """activate hotword service for a site""" self.active[site] = True self.started[site] = False self.audio_stream[site] = BytesLoop() await self.client.subscribe('hermod/' + site + '/microphone/audio') self.porcupine[site] = Porcupine( library_path=LIBRARY_PATH, model_file_path=MODEL_FILE_PATH, keyword_file_paths=self.keyword_file_paths, sensitivities=self.sensitivities)
async def startASRVAD(self, site = ''): self.log('ASRVAD start') text = '' sender = None # reconnect on error while started and no text heard self.empty_count[site] = 0; while site in self.started and self.started[site] and not len(text) > 0 and self.empty_count[site] < 4: #self.empty_count[site] = 0 # clear stream buffer self.audio_stream[site] = BytesLoop() # NEW self.log('ASRVAD CONNECT') async with websockets.connect(self.get_url(), extra_headers=self.get_headers()) as conn: # Send request to watson and waits for the listening response self.connections[site] = conn send = await conn.send(json.dumps(self.get_init_params())) rec = await conn.recv() #print(rec) self.ibmlistening[site] = True # clear task from previous loop if sender: self.log('ASRVAD CLEAR TASK') sender.cancel() sender = asyncio.create_task(self.send_audio(conn,site)) # Keeps receiving transcript until we have the final transcript while True : self.log('ASRVAD RESTART LOOP') if self.empty_count[site] >= 4: await self.client.publish('hermod/'+site+'/timeout',json.dumps({})) await self.client.publish('hermod/'+site+'/dialog/end',json.dumps({"id":self.last_start_id.get(site,'')})) self.started[site] = False break try: rec = await conn.recv() parsed = json.loads(rec) #print(parsed) #if #transcript = parsed["results"][0]["alternatives"][0]["transcript"] #print(transcript) # print('=============================') # print(parsed) # print('=============================') if parsed.get("error",False): self.log('ASRVAD ERROR FROM IBM') self.log(parsed.get('error')) self.empty_count[site] = self.empty_count[site] + 1 self.ibmlistening[site] = False try: #await self.client.publish('hermod/'+site+'/dialog/end',json.dumps({"id":self.last_start_id.get(site,'')})) await conn.close() except Exception: pass break if parsed.get('state',False) and parsed.get('state') == 'listening': self.log('ASRVAD SET LISTENING '+site) self.ibmlistening[site] = True; have_results = False if "results" in parsed: self.log('RESULTS') self.log(parsed["results"]) if len(parsed["results"]) > 0: if "final" in parsed["results"][0]: if parsed["results"][0]["final"]: if len(parsed["results"][0]['alternatives']) > 0: text = str(parsed["results"][0]["alternatives"][0].get("transcript","")) self.log('ASRVAD got text [{}]'.format(text)) if len(text) > 0: # self.log('send content '+site) # self.log(self.client) # self.log('hermod/'+site+'/asr/text') # self.log(json.dumps({'text':text})) have_results = True self.empty_count[site] = 0 await self.client.publish('hermod/'+site+'/asr/text',json.dumps({'text':text,"id":self.last_start_id.get(site,'')})) # self.log('sent content '+text) self.started[site] = False await conn.close() break if not have_results: self.log('ASRVAD incc emtpy f'+ str(self.empty_count[site])) self.empty_count[site] = self.empty_count[site] + 1 self.ibmlistening[site] = False #conn.close() #return False # pass except KeyError as e: self.log('ASRVAD KEYERROR') await conn.close() break except Exception as e: self.log('ASRVAD OTHERR') self.log(e) await conn.close() break # cleanup self.started[site] = False self.ibmlistening[site] = False if sender: sender.cancel() try: await conn.close() except Exception as e: pass
async def activate(self,site): # self.log('activate') self.audio_stream[site] = BytesLoop() self.active[site] = True self.started[site] = False await self.client.subscribe('hermod/'+site+'/microphone/audio')
async def on_message(self, msg): """handle mqtt message""" topic = "{}".format(msg.topic) parts = topic.split("/") site = parts[1] if topic == 'hermod/' + site +'/asr/activate': pass elif topic == 'hermod/' + site +'/asr/deactivate': self.audio_stream.pop(site, '') self.active[site] = False self.started[site] = False elif topic == 'hermod/' + site + '/asr/start': payload = {} payload_text = msg.payload try: payload = json.loads(payload_text) except json.JSONDecodeError: pass self.last_dialog_id[site] = payload.get('id', '') self.audio_stream[site] = BytesLoop() self.active[site] = True self.started[site] = True self.last_audio[site] = time.time() self.audio_stream[site] = BytesLoop() # speech_contexts=[speech.types.SpeechContext( # phrases=['hi', 'good afternoon'], # )]) self.transcoders[site] = Transcoder( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, rate=16000, language=self.config['services']['GoogleAsrService']['language'], mqtt_client=self.client, site=site, last_dialog_id=self.last_dialog_id[site] ) self.transcoders[site].start() await self.client.subscribe('hermod/'+site+'/microphone/audio') # timeout if no packets if site in self.no_packet_timeouts: self.no_packet_timeouts[site].cancel() self.no_packet_timeouts[site] = self.loop.create_task(self.no_packet_timeout(site, msg)) # total time since start if site in self.total_time_timeouts: self.total_time_timeouts[site].cancel() self.total_time_timeouts[site] = self.loop.create_task( \ self.total_time_timeout(site, msg)) elif topic == 'hermod/'+site+'/asr/stop': # clear timeouts if site in self.no_packet_timeouts: self.no_packet_timeouts[site].cancel() # total time since start if site in self.total_time_timeouts: self.total_time_timeouts[site].cancel() self.stop_transcoder(site) self.started[site] = False await self.client.unsubscribe('hermod/'+site+'/microphone/audio') #self.client.publish('hermod/'+site+'/speaker/play',self.turn_off_wav) # elif topic == 'hermod/'+site+'/hotword/detected' : # self.log('clear buffer '+site) # if site in self.ring_buffer: # self.ring_buffer[site].clear() #self.client.publish('hermod/'+site+'/speaker/play',self.turn_off_wav) elif topic == 'hermod/'+site+'/microphone/audio': self.audio_count = self.audio_count + 1 buffer = np.frombuffer(msg.payload, np.int16) frame_slice1 = self.vad.is_speech(buffer[0:480].tobytes(), self.sample_rate) frame_slice2 = self.vad.is_speech(buffer[480:960].tobytes(), self.sample_rate) if not (frame_slice1 or frame_slice2): self.non_speech[site] = self.non_speech.get(site, 0) self.non_speech[site] = self.non_speech[site] + 1 else: self.non_speech[site] = 0 payload = {} # ignore until started if site in self.transcoders and self.started[site]: if site in self.no_packet_timeouts: self.no_packet_timeouts[site].cancel() self.no_packet_timeouts[site] = self.loop.create_task( \ self.no_packet_timeout(site, msg)) # restrict empty packets to transcoder silence_cutoff = 100 if self.non_speech[site] < silence_cutoff: self.transcoders[site].closed = False self.transcoders[site].write(msg.payload) if self.transcoders[site].error and self.transcoders[site].error.code == 11: # easy because no text expected so can send bail out messages directly self.no_packet_timeouts[site].cancel() self.stop_transcoder(site) self.started[site] = False await self.client.publish('hermod/'+site+'/asr/timeout', json.dumps({ "id":self.last_dialog_id[site] })) await self.client.publish('hermod/'+site+'/dialog/end', json.dumps({ "id": self.last_dialog_id[site] })) if self.transcoders[site].transcript: self.stop_transcoder(site) elif self.non_speech[site] == silence_cutoff: self.no_packet_timeouts[site].cancel() self.transcoders[site].write(msg.payload) self.stop_transcoder(site)
async def on_message(self, msg): topic = "{}".format(msg.topic) #self.log("ASR MESSAGE {}".format(topic)) parts = topic.split("/") site = parts[1] activateTopic = 'hermod/' + site + '/asr/activate' deactivateTopic = 'hermod/' + site + '/asr/deactivate' startTopic = 'hermod/' + site + '/asr/start' stopTopic = 'hermod/' + site + '/asr/stop' audioTopic = 'hermod/' + site + '/microphone/audio' hotwordDetectedTopic = 'hermod/' + site + '/hotword/detected' if topic == activateTopic: #self.log('activate ASR '+site) await self.activate(site) elif topic == deactivateTopic: #self.log('deactivate ASR '+site) await self.deactivate(site) elif topic == startTopic: self.log('start ASR ' + site) if site in self.active: # and not site in self.started: self.log('start ASR active ' + site) self.started[site] = True self.last_audio[site] = time.time() self.audio_stream[site] = BytesLoop() # speech_contexts=[speech.types.SpeechContext( # phrases=['hi', 'good afternoon'], # )]) self.transcoders[site] = Transcoder( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, rate=16000, language=self.config['services']['GoogleAsrService'] ['language']) self.transcoders[site].start() await self.client.subscribe('hermod/' + site + '/microphone/audio') #self.loop.create_task(self.startASRVAD(site)) #asyncio.run(self.startASRVAD(site)) #await self.startASR(site) #self.loop.run_in_executor(None,self.startASR,site) #await self.startASRVAD(site) elif topic == stopTopic: self.log('stop ASR ' + site) self.started[site] = False await self.client.unsubscribe('hermod/' + site + '/microphone/audio') #self.client.publish('hermod/'+site+'/speaker/play',self.turn_off_wav) # elif topic == hotwordDetectedTopic: # self.log('clear buffer '+site) # if site in self.ring_buffer: # self.ring_buffer[site].clear() #self.client.publish('hermod/'+site+'/speaker/play',self.turn_off_wav) elif topic == audioTopic: self.audio_count = self.audio_count + 1 # self.log('save audio message {} {} {}'.format(len(msg.payload),site,self.audio_count)) #self.audio_stream[site].write(msg.payload) self.last_audio[site] = time.time() if site in self.transcoders: self.transcoders[site].write(msg.payload) self.transcoders[site].closed = False if self.transcoders[site].transcript: print("GOT TEXT " + self.transcoders[site].transcript) payload_text = msg.payload payload = {} try: payload = json.loads(payload_text) except Exception as e: self.log(e) await self.client.publish( 'hermod/' + site + '/asr/text', json.dumps({ "id": payload.get('id', ''), 'text': self.transcoders[site].transcript })) self.transcoders[site].transcript = None