class SimluateInput:
    def __init__(self):
        self.ks = KeywordClient(server_url="http://localhost:5000/")
        self.std_spk = "You"
        self.last_hyp = ""
        self.ks.reset()

    def update(self, utterance, delay):
        time.sleep(delay)
        self.ks.replaceLastUtterance(self.last_hyp,utterance, self.std_spk)
        self.last_hyp = utterance

    def add_new(self, utterance,delay):
        time.sleep(delay)
        self.ks.addUtterance(utterance, self.std_spk)

    def complete(self, utterance):
        self.ks.completeUtterance(utterance, self.std_spk)

    def get_delay(self, word):
        return len(word) * 0.015

    def simulateSentence(self, sentence):
        split = sentence.split(" ")
        firstword = split[0]
        self.add_new(firstword,self.get_delay(firstword))
        for x in xrange(2,len(split)+1):
            self.update(' '.join(split[:x]),self.get_delay(split[x-1]))
        self.complete(sentence)
        self.last_hyp = ""
Esempio n. 2
0
class KaldiClient(WebSocketClient):

    def print_devices(self):
        info = self.paudio.get_host_api_info_by_index(0)
        numdevices = info.get('deviceCount')
        #for each audio device, determine if is an input or an output and add it to the appropriate list and dictionary
        for i in range (0,numdevices):
            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels')>0:
                print "Input Device id ", i, " - ", self.paudio.get_device_info_by_host_api_device_index(0,i).get('name')

            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxOutputChannels')>0:
                print "Output Device id ", i, " - ", self.paudio.get_device_info_by_host_api_device_index(0,i).get('name')

    def getAudioDeviceByString(audioDeviceName):
        info = self.paudio.get_host_api_info_by_index(0)
        numdevices = info.get('deviceCount')
        for i in range (0,numdevices):
            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels')>0:
                if audioDeviceName in self.paudio.get_device_info_by_host_api_device_index(0,i).get('name'):
                    return i
        print 'No ',audioDeviceName,' microphone found, defaulting to last available input device...'

        for i in reversed(range (0,numdevices)):
            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels')>0:
                return i

        print 'No input device found! Please connect a microphone or recording device'

        return -1		    

    def __init__(self, filename, url, protocols=None, extensions=None, heartbeat_freq=None, byterate=32000,
                 save_adaptation_state_filename=None, send_adaptation_state_filename=None, keyword_server_url = '', input_microphone_id=-1):
        super(KaldiClient, self).__init__(url, protocols, extensions, heartbeat_freq)
        self.final_hyps = []
        self.fn = filename
        self.byterate = byterate
        self.final_hyp_queue = Queue.Queue()
        self.save_adaptation_state_filename = save_adaptation_state_filename
        self.send_adaptation_state_filename = send_adaptation_state_filename

        self.paudio = pyaudio.PyAudio()
        self.print_devices()
        self.keyword_client = KeywordClient(keyword_server_url)
        self.keyword_client.reset()
        self.send_to_keywordserver = not (keyword_server_url == '')

        #self.keyword_extractor = extract.TermExtractor()
        #self.keyword_extractor.filter = extract.permissiveFilter

        if self.send_to_keywordserver:
            self.keyword_client.addUtterance('','You')
            self.last_hyp = ''

        self.input_microphone_id = input_microphone_id

    #@rate_limited(4)
    def send_data(self, data):
        if data is not None:
            self.send(data, binary=True)

    def opened(self):
        #print "Socket opened!"
        def send_data_to_ws():
            buffer_size = 1024

            if  self.input_microphone_id == -1:
                self.input_microphone_id = self.getAudioDeviceByString("Yamaha")
                if self.input_microphone_id == -1:
                    sys.exit(-1)
                else:
                    print 'Selecting device',self.input_microphone_id,'as input device'

            stream = self.paudio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024, input_device_index = self.input_microphone_id) #buffer   
            #f = open(self.fn, "rb")
            if self.send_adaptation_state_filename is not None:
                print >> sys.stderr, "Sending adaptation state from %s" % self.send_adaptation_state_filename
                try:
                    adaptation_state_props = json.load(open(self.send_adaptation_state_filename, "r"))
                    self.send(json.dumps(dict(adaptation_state=adaptation_state_props)))
                except:
                    e = sys.exc_info()[0]
                    print >> sys.stderr, "Failed to send adaptation state: ",  e
            abort = False
            while not abort:
                block = stream.read(buffer_size)
                self.send_data(block)
            print >> sys.stderr, "Audio sent, now sending EOS"
            self.send("EOS")

        t = threading.Thread(target=send_data_to_ws)
        t.start()

    # received decoding message from upstream Kaldi server
    def received_message(self, m):
        try:
            response = json.loads(str(m))
            #print >> sys.stderr, "RESPONSE:", response
            #print >> sys.stderr, "JSON was:", m
            if response['status'] == 0:
                if 'result' in response:
                    trans = response['result']['hypotheses'][0]['transcript']
                    if response['result']['final']:
                        if trans not in ['a.','I.','i.','the.','but.','one.','it.','she.']:
                            self.final_hyps.append(trans)
                            		    
                            if self.send_to_keywordserver:
                                self.keyword_client.replaceLastUtterance(self.last_hyp, trans, std_speaker)
                                self.keyword_client.completeUtterance(trans, std_speaker)
                                self.keyword_client.addUtterance('',std_speaker)
                                self.last_hyp = ''

                                complete_transcript = '\n'.join(sentence[:-1] for sentence in self.final_hyps)
                                


                            print u'\r\033[K',trans.replace(u'\n', u'\\n')
                    else:
                        if self.send_to_keywordserver:
                            self.keyword_client.replaceLastUtterance(self.last_hyp, trans, std_speaker)
                            self.last_hyp = trans
                        print_trans = trans.replace(u'\n', u'\\n')
                        print u'\r\033[K',print_trans
                if 'adaptation_state' in response:
                    if self.save_adaptation_state_filename:
                        print u'Saving adaptation state to %s' % self.save_adaptation_state_filename
                        with open(self.save_adaptation_state_filename, 'w') as f:
                            f.write(json.dumps(response['adaptation_state']))
            else:
                print  u'Received error from server (status %d)' % response['status']
                if 'message' in response:
                    print 'Error message:',  response['message']
        except Exception:
            print 'Exception in received_message'
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback,
                limit=10, file=sys.stdout)

    def get_full_hyp(self, timeout=60):
        return self.final_hyp_queue.get(timeout)

    def closed(self, code, reason=None):
        #print "Websocket closed() called"
        #print >> sys.stderr
        self.final_hyp_queue.put(' '.join(self.final_hyps))
Esempio n. 3
0
class KaldiClient(WebSocketClient):

    def __init__(self, filename, url, protocols=None, extensions=None, heartbeat_freq=None, byterate=32000,
                 save_adaptation_state_filename=None, send_adaptation_state_filename=None, keyword_server_url='',
                 max_sentences=0):
        super(KaldiClient, self).__init__(url, protocols, extensions, heartbeat_freq)
        self.final_hyps = []
        self.fn = filename
        self.byterate = byterate
        self.final_hyp_queue = Queue.Queue()
        self.save_adaptation_state_filename = save_adaptation_state_filename
        self.send_adaptation_state_filename = send_adaptation_state_filename

        self.keyword_client = KeywordClient(keyword_server_url)
        self.keyword_client.reset()
        self.send_to_keywordserver = not (keyword_server_url == '')

        if self.send_to_keywordserver:
            self.keyword_client.addUtterance('', 'You')
            self.last_hyp = ''

        self.max_sentences = max_sentences

    @rate_limited(4)
    def send_data(self, data):
        self.send(data, binary=True)

    def opened(self):
        # print "Socket opened!"
        def send_data_to_ws():
            f = open(self.fn, "rb")
            if self.send_adaptation_state_filename is not None:
                print >> sys.stderr, "Sending adaptation state from %s" % self.send_adaptation_state_filename
                try:
                    adaptation_state_props = json.load(open(self.send_adaptation_state_filename, "r"))
                    self.send(json.dumps(dict(adaptation_state=adaptation_state_props)))
                except:
                    e = sys.exc_info()[0]
                    print >> sys.stderr, "Failed to send adaptation state: ", e
            for block in iter(lambda: f.read(self.byterate / 4), ""):
                if self.maximum_sentences_reached():
                    break
                self.send_data(block)
            print >> sys.stderr, "Audio sent, now sending EOS"
            self.send("EOS")

        t = threading.Thread(target=send_data_to_ws)
        t.start()

    # received decoding message from upstream Kaldi server
    def received_message(self, m):
        if self.maximum_sentences_reached():
            return

        try:
            response = json.loads(str(m))
            # print >> sys.stderr, "RESPONSE:", response
            # print >> sys.stderr, "JSON was:", m
            if response['status'] == 0:
                if 'result' in response:
                    trans = response['result']['hypotheses'][0]['transcript']
                    if response['result']['final']:
                        if trans not in ['a.', 'I.', 'i.', 'the.', 'but.', 'one.', 'it.', 'she.']:
                            self.final_hyps.append(trans)

                            if self.send_to_keywordserver:
                                self.keyword_client.replaceLastUtterance(self.last_hyp, trans, std_speaker)
                                self.keyword_client.completeUtterance(trans, std_speaker)
                                self.keyword_client.addUtterance('', std_speaker)
                                self.last_hyp = ''

                                complete_transcript = '\n'.join(sentence[:-1] for sentence in self.final_hyps)

                            print u'\r\033[K', trans.replace(u'\n', u'\\n')
                    else:
                        if self.send_to_keywordserver:
                            self.keyword_client.replaceLastUtterance(self.last_hyp, trans, std_speaker)
                            self.last_hyp = trans
                        print_trans = trans.replace(u'\n', u'\\n')
                        print u'\r\033[K', print_trans
                if 'adaptation_state' in response:
                    if self.save_adaptation_state_filename:
                        print u'Saving adaptation state to %s' % self.save_adaptation_state_filename
                        with open(self.save_adaptation_state_filename, 'w') as f:
                            f.write(json.dumps(response['adaptation_state']))
            else:
                print u'Received error from server (status %d)' % response['status']
                if 'message' in response:
                    print 'Error message:', response['message']
        except Exception:
            print 'Exception in received_message'
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback,
                                      limit=10, file=sys.stdout)

    def get_full_hyp(self, timeout=60):
        return self.final_hyp_queue.get(timeout)

    # Returns True if the maximum number of sentences defined by the user have been transcribed.
    def maximum_sentences_reached(self):
        return self.max_sentences != 0 and len(self.final_hyps) >= self.max_sentences

    def closed(self, code, reason=None):
        # print "Websocket closed() called"
        # print >> sys.stderr
        self.final_hyp_queue.put(" ".join(self.final_hyps))
Esempio n. 4
0
class KaldiClient(WebSocketClient):

    def print_devices(self):
        info = self.paudio.get_host_api_info_by_index(0)
        numdevices = info.get('deviceCount')
        #for each audio device, determine if is an input or an output and add it to the appropriate list and dictionary
        for i in range (0,numdevices):
            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels')>0:
                print "Input Device id ", i, " - ", self.paudio.get_device_info_by_host_api_device_index(0,i).get('name')

            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxOutputChannels')>0:
                print "Output Device id ", i, " - ", self.paudio.get_device_info_by_host_api_device_index(0,i).get('name')

    def getYamahaID(self):
        info = self.paudio.get_host_api_info_by_index(0)
        numdevices = info.get('deviceCount')
        for i in range (0,numdevices):
            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels')>0:
                if 'Yamaha' in self.paudio.get_device_info_by_host_api_device_index(0,i).get('name'):
                    return i
        print 'No yamaha microphone found, defaulting to last available input device...'

        for i in reversed(range (0,numdevices)):
            if self.paudio.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels')>0:
                return i

        print 'No input device found! Please connect a microphone or recording device'

        return -1		    

    def __init__(self, filename, url, protocols=None, extensions=None, heartbeat_freq=None, byterate=32000,
                 save_adaptation_state_filename=None, send_adaptation_state_filename=None, keyword_server_url = ''):
        super(KaldiClient, self).__init__(url, protocols, extensions, heartbeat_freq)
        self.final_hyps = []
        self.fn = filename
        self.byterate = byterate
        self.final_hyp_queue = Queue.Queue()
        self.save_adaptation_state_filename = save_adaptation_state_filename
        self.send_adaptation_state_filename = send_adaptation_state_filename

        self.paudio = pyaudio.PyAudio()
        self.print_devices()
        self.keyword_client = KeywordClient(keyword_server_url)
        self.keyword_client.reset()
        self.send_to_keywordserver = not (keyword_server_url == '')

        #self.keyword_extractor = extract.TermExtractor()
        #self.keyword_extractor.filter = extract.permissiveFilter

        if self.send_to_keywordserver:
            self.keyword_client.addUtterance('','You')
            self.last_hyp = ''

    #@rate_limited(4)
    def send_data(self, data):
        if data is not None:
            self.send(data, binary=True)

    def opened(self):
        #print "Socket opened!"
        def send_data_to_ws():
            buffer_size = 1024

            yamahaID = self.getYamahaID()
            if yamahaID == -1:
                sys.exit(-1)
            else:
                print 'Selecting device',yamahaID,'as input device'

            stream = self.paudio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024, input_device_index = yamahaID) #buffer   
            #f = open(self.fn, "rb")
            if self.send_adaptation_state_filename is not None:
                print >> sys.stderr, "Sending adaptation state from %s" % self.send_adaptation_state_filename
                try:
                    adaptation_state_props = json.load(open(self.send_adaptation_state_filename, "r"))
                    self.send(json.dumps(dict(adaptation_state=adaptation_state_props)))
                except:
                    e = sys.exc_info()[0]
                    print >> sys.stderr, "Failed to send adaptation state: ",  e
            abort = False
            while not abort:
                block = stream.read(buffer_size)
                self.send_data(block)
            print >> sys.stderr, "Audio sent, now sending EOS"
            self.send("EOS")

        t = threading.Thread(target=send_data_to_ws)
        t.start()

    # received decoding message from upstream Kaldi server
    def received_message(self, m):
        try:
            response = json.loads(str(m))
            #print >> sys.stderr, "RESPONSE:", response
            #print >> sys.stderr, "JSON was:", m
            if response['status'] == 0:
                if 'result' in response:
                    trans = response['result']['hypotheses'][0]['transcript']
                    if response['result']['final']:
                        if trans not in ['a.','I.','i.','the.','but.','one.','it.','she.']:
                            self.final_hyps.append(trans)
                            		    
                            if self.send_to_keywordserver:
                                self.keyword_client.replaceLastUtterance(self.last_hyp, trans, std_speaker)
                                self.keyword_client.completeUtterance(trans, std_speaker)
                                self.keyword_client.addUtterance('',std_speaker)
                                self.last_hyp = ''

                                complete_transcript = '\n'.join(sentence[:-1] for sentence in self.final_hyps)
                                


                            print u'\r\033[K',trans.replace(u'\n', u'\\n')
                    else:
                        if self.send_to_keywordserver:
                            self.keyword_client.replaceLastUtterance(self.last_hyp, trans, std_speaker)
                            self.last_hyp = trans
                        print_trans = trans.replace(u'\n', u'\\n')
                        print u'\r\033[K',print_trans
                if 'adaptation_state' in response:
                    if self.save_adaptation_state_filename:
                        print u'Saving adaptation state to %s' % self.save_adaptation_state_filename
                        with open(self.save_adaptation_state_filename, 'w') as f:
                            f.write(json.dumps(response['adaptation_state']))
            else:
                print  u'Received error from server (status %d)' % response['status']
                if 'message' in response:
                    print 'Error message:',  response['message']
        except Exception:
            print 'Exception in received_message'
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback,
                limit=10, file=sys.stdout)

    def get_full_hyp(self, timeout=60):
        return self.final_hyp_queue.get(timeout)

    def closed(self, code, reason=None):
        #print "Websocket closed() called"
        #print >> sys.stderr
        self.final_hyp_queue.put(' '.join(self.final_hyps))