class KaldiRecognizer(): def __init__(self): #logging.basicConfig(level=logging.INFO) # voxforge/tri2b_mmi_b0.05 model: decoder_conf = { "model": ENGLISH_MODEL_PATH + "final.mdl", "lda-mat": ENGLISH_MODEL_PATH + "final.mat", "word-syms": ENGLISH_MODEL_PATH + "words.txt", "fst": ENGLISH_MODEL_PATH + "HCLG.fst", "silence-phones": "6" } self.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf}) self.__class__.words = [] self.__class__.finished = False self.decoder_pipeline.set_word_handler(self.word_getter) self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished) GObject.threads_init() self.loop = GObject.MainLoop() self.gi_thread = Thread(target=self.loop.run, args=()) self.gi_thread.start() @classmethod def word_getter(self, word): self.words.append(word) @classmethod def set_finished(self, finished): self.finished = True def reset(self): self.__class__.words = [] self.__class__.finished = False def recognize(self, args): with noalsaerr(): p = pyaudio.PyAudio() # Create a PyAudio session # Create a stream stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK) try: data = stream.read( CHUNK) # Get first data frame from the microphone # Loop over the frames of the audio / data chunks while data != '': rms = audioop.rms( data, 2) # Calculate Root Mean Square of current chunk if rms >= THRESHOLD: # If Root Mean Square value is greater than THRESHOLD constant self.decoder_pipeline.init_request( "recognize", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1" ) self.decoder_pipeline.process_data(data) silence_counter = 0 # Define silence counter while silence_counter < SILENCE_DETECTION: # While silence counter value less than SILENCE_DETECTION constant data = stream.read( CHUNK) # Read a new chunk from the stream if LISTENING: stream.write(data, CHUNK) self.decoder_pipeline.process_data(data) rms = audioop.rms( data, 2 ) # Calculate Root Mean Square of current chunk again if rms < THRESHOLD: # If Root Mean Square value is less than THRESHOLD constant silence_counter += 1 # Then increase silence counter else: # Else silence_counter = 0 # Assign zero value to silence counter stream.stop_stream() self.decoder_pipeline.end_request() while not self.finished: time.sleep(0.1) stream.start_stream() words = self.words words = [x for x in words if x != '<#s>'] com = ' '.join(words) t = Thread(target=VirtualAssistant.command, args=(com, args)) t.start() self.reset() data = stream.read(CHUNK) # Read a new chunk from the stream if LISTENING: stream.write(data, CHUNK) except KeyboardInterrupt: stream.stop_stream() stream.close() p.terminate() self.loop.quit() raise KeyboardInterrupt
class ServerWebsocket(WebSocketClient): STATE_CREATED = 0 STATE_CONNECTED = 1 STATE_INITIALIZED = 2 STATE_PROCESSING = 3 STATE_EOS_RECEIVED = 7 STATE_CANCELLING = 8 STATE_FINISHED = 100 def __init__(self, uri): self.uri = uri self.decoder_pipeline = DecoderPipeline(self._on_word, self._on_eos) WebSocketClient.__init__(self, url=uri, heartbeat_freq=10) self.pipeline_initialized = False self.state = self.STATE_CREATED self.last_decoder_message = time.time() self.request_id = "<undefined>" self.timeout_decoder = 5 self.num_segments = 0 self.last_partial_result = "" self.partial_transcript = "" def opened(self): logger.info("Opened websocket connection to server") self.state = self.STATE_CONNECTED self.last_partial_result = "" def guard_timeout(self): global SILENCE_TIMEOUT while self.state in [ self.STATE_EOS_RECEIVED, self.STATE_CONNECTED, self.STATE_INITIALIZED, self.STATE_PROCESSING ]: if time.time() - self.last_decoder_message > SILENCE_TIMEOUT: logger.warning( "%s: More than %d seconds from last decoder hypothesis update, cancelling" % (self.request_id, SILENCE_TIMEOUT)) self.finish_request() event = dict(status=common.STATUS_NO_SPEECH) try: self.send(json.dumps(event)) except: logger.warning("%s: Failed to send error event to master" % (self.request_id)) self.close() return logger.debug( "%s: Checking that decoder hasn't been silent for more than %d seconds" % (self.request_id, SILENCE_TIMEOUT)) time.sleep(1) def received_message(self, m): #reviewing #print "Received Message {}".format(m) print "Received Message" logger.debug("%s: Got message from server of type %s" % (self.request_id, str(type(m)))) if self.state == self.__class__.STATE_CONNECTED: props = json.loads(str(m)) content_type = props['content_type'] self.request_id = props['id'] self.num_segments = 0 #self.decoder_pipeline.init_request(self.request_id, content_type) self.last_decoder_message = time.time() #thread.start_new_thread(self.guard_timeout, ()) logger.info("%s: Started timeout guard" % self.request_id) logger.info("%s: Initialized request" % self.request_id) self.state = self.STATE_INITIALIZED elif m.data == "EOS": #end of file indication if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: self.decoder_pipeline.end_request() self.state = self.STATE_EOS_RECEIVED else: logger.info("%s: Ignoring EOS, worker already in state %d" % (self.request_id, self.state)) else: if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: if isinstance(m, ws4py.messaging.BinaryMessage): print "Process_data called, size: {}, type: {}".format( len(m.data), type(m.data)) self.decoder_pipeline.process_data(m.data) self.state = self.STATE_PROCESSING else: logger.info("%s: Ignoring data, worker already in state %d" % (self.request_id, self.state)) def finish_request(self): if self.state == self.STATE_CONNECTED: # connection closed when we are not doing anything self.decoder_pipeline.finish_request() self.state = self.STATE_FINISHED return if self.state == self.STATE_INITIALIZED: # connection closed when request initialized but with no data sent self.decoder_pipeline.finish_request() self.state = self.STATE_FINISHED return if self.state != self.STATE_FINISHED: logger.info("%s: Master disconnected before decoder reached EOS?" % self.request_id) self.state = self.STATE_CANCELLING self.decoder_pipeline.cancel() counter = 0 while self.state == self.STATE_CANCELLING: counter += 1 if counter > 30: # lost hope that the decoder will ever finish, likely it has hung # FIXME: this might introduce new bugs logger.info("%s: Giving up waiting after %d tries" % (self.request_id, counter)) self.state = self.STATE_FINISHED else: logger.info("%s: Waiting for EOS from decoder" % self.request_id) time.sleep(1) self.decoder_pipeline.finish_request() logger.info("%s: Finished waiting for EOS" % self.request_id) def closed(self, code, reason=None): #done logger.debug("%s: Websocket closed() called" % self.request_id) self.finish_request() logger.debug("%s: Websocket closed() finished" % self.request_id) def _on_word(self, word): #done self.last_decoder_message = time.time() if word != "<#s>": if len(self.partial_transcript) > 0: self.partial_transcript += " " self.partial_transcript += word event = dict(status=common.STATUS_SUCCESS, segment=self.num_segments, result=dict(hypotheses=[ dict(transcript=self.partial_transcript) ], final=False)) self.send(json.dumps(event)) else: #TODO word=<#s> : never called event = dict(status=common.STATUS_SUCCESS, segment=self.num_segments, result=dict(hypotheses=[ dict(transcript=self.partial_transcript) ], final=True)) self.send(json.dumps(event)) self.partial_transcript = "" self.num_segments += 1 def _on_eos(self, data=None): #done (called when stream is over) self.last_decoder_message = time.time() self.state = self.STATE_FINISHED self.close()