def input_usr_utt_nblist(self): """Reads an N-best list of utterances from the input. """ self.init_readline() nblist = UtteranceNBList() i = 1 while i < 100: l = raw_input("User %d: " % i) try: l = l.decode('utf8') except: # if we use ipdb, it already gives us UTF-8-encoded input :-( pass if l.startswith("."): print break try: prob, da = self.parse_input_utt(l) except TextHubException as e: print e continue nblist.add(prob, da) i += 1 nblist.merge() nblist.scale() nblist.add_other() self.write_readline() return nblist
def input_usr_utt_nblist(self): """Reads an N-best list of utterances from the input. """ self.init_readline() nblist = UtteranceNBList() i = 1 while i < 100: l = raw_input("User %d: " % i) try: l = l.decode('utf8') except: # if we use ipdb, it already gives us UTF-8-encoded input :-( pass if l.startswith("."): print break try: prob, da = self.parse_input_utt(l) except TextHubException as e: print e continue nblist.add(prob, da) i += 1 nblist.merge() nblist.scale() nblist.add_other() self.write_readline() return nblist
def input_usr_utt_nblist(self): """Reads an N-best list of utterances from the input. """ self.init_readline() nblist = UtteranceNBList() i = 1 while i < 100: l = raw_input("User %d: " % i) l = l.decode('utf8') if self.f_output_script: self.f_output_script.write(l + '\n') if l.startswith("."): print break try: prob, da = self.parse_input_utt(l) except TextHubException as e: print e continue nblist.add(prob, da) i += 1 nblist.merge() nblist.scale() nblist.add_other() self.write_readline() return nblist
def input_usr_utt_nblist(self): """Reads an N-best list of utterances from the input. """ self.init_readline() nblist = UtteranceNBList() i = 1 while i < 100: l = raw_input("User %d: " % i) l = l.decode('utf8') if self.f_output_script: self.f_output_script.write(l + '\n') if l.startswith("."): print break try: prob, da = self.parse_input_utt(l) except TextHubException as e: print e continue nblist.add(prob, da) i += 1 nblist.merge() nblist.scale() nblist.add_other() self.write_readline() return nblist
def recognize(self, wav): """ Produces hypotheses for the input audio data. Remember that GoogleASR works only with complete wave files. Returns an n-best list of hypotheses. """ # making a file temp for manipulation handle, flac_file_name = mkstemp('TmpSpeechFile.flac') try: # convert wav to flac audio.save_flac(self.cfg, flac_file_name, wav) json_hypotheses = self.get_asr_hypotheses(flac_file_name) except (urllib2.HTTPError, urllib2.URLError) as e: self.syslog.exception('GoogleASR HTTP/URL error: %s' % unicode(e)) json_hypotheses = [ [{'confidence': 1.0, 'utterance': '__google__ __asr__ __exception__'}, ], ] finally: os.close(handle) remove(flac_file_name) try: hyp = json.loads(json_hypotheses) # print "###", hyp nblist = UtteranceNBList() if hyp['status'] == 0: n = len(hyp['hypotheses']) for i, h in enumerate(hyp['hypotheses']): if i == 0: nblist.add(h['confidence'], Utterance(h['utterance'])) conf1 = hyp['hypotheses'][0]['confidence'] else: # guess the confX score nblist.add((1.0-conf1)*(n-i)/(n-1.0)/(n-0.0)*2.0, Utterance(h['utterance'])) elif hyp['status'] == 5: nblist.add(1.0, Utterance('_other_')) except: nblist = UtteranceNBList() nblist.merge() nblist.add_other() return nblist
def test_conversion_of_confnet_into_nblist(self): A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.50, 0.35, 0.15 C1, C2, C3 = 0.60, 0.30, 0.10 correct_nblist = UtteranceNBList() correct_nblist.add(A1*B1*C1, Utterance("A1 B1 C1")) correct_nblist.add(A1*B2*C1, Utterance("A1 B2 C1")) correct_nblist.add(A1*B1*C2, Utterance("A1 B1 C2")) correct_nblist.add(A1*B2*C2, Utterance("A1 B2 C2")) correct_nblist.add(A1*B3*C1, Utterance("A1 B3 C1")) correct_nblist.add(A1*B1*C3, Utterance("A1 B1 C3")) correct_nblist.add(A1*B3*C2, Utterance("A1 B3 C2")) correct_nblist.add(A1*B2*C3, Utterance("A1 B2 C3")) correct_nblist.merge() correct_nblist.add_other() confnet = UtteranceConfusionNetwork() confnet.add([[A1, 'A1'], [A2, 'A2'], [A3, 'A3'],]) confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],]) confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],]) confnet.merge().sort() gen_nblist = confnet.get_utterance_nblist(10) s = [] s.append("") s.append("Confusion network:") s.append(unicode(confnet)) s.append("") s.append("Generated nblist:") s.append(unicode(gen_nblist)) s.append("") s.append("Correct nblist:") s.append(unicode(correct_nblist)) s.append("") print '\n'.join(s) self.assertEqual(unicode(gen_nblist), unicode(correct_nblist))
def get_results(self, timeout=0.6): """" Waits for the complete recognition results from the Julius ASR server. Timeout specifies how long it will wait for the end of message. """ msg = "" # Get results from the server. time_slept = 0.0 while time_slept < timeout: msg_part = self.read_server_message(self.msg_timeout) if not msg_part: # Wait and check whether there is a message. time.sleep(self.cfg['Hub']['main_loop_sleep_time']) time_slept += self.cfg['Hub']['main_loop_sleep_time'] if self.debug >= 2: print "gr.time_slept:", time_slept continue msg += msg_part + '\n' if self.debug: print msg if '<CONFNET>' in msg: break else: raise JuliusASRTimeoutException( "Timeout when waiting for the Julius server results.") # Process the results. """ Typical result returned by the Julius ASR. <STARTPROC/> <INPUT STATUS="LISTEN" TIME="1343896296"/> <INPUT STATUS="STARTREC" TIME="1343896311"/> <STARTRECOG/> <INPUT STATUS="ENDREC" TIME="1343896312"/> <ENDRECOG/> <INPUTPARAM FRAMES="164" MSEC="1640"/> <RECOGOUT> <SHYPO RANK="1" SCORE="-7250.111328"> <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/> <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/> <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/> <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/> <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/> <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/> <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/> </SHYPO> </RECOGOUT> <GRAPHOUT NODENUM="43" ARCNUM="70"> <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/> <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/> <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/> <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/> <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/> <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/> <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/> <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/> ... <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/> <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/> <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/> <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/> <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/> <ARC FROM="0" TO="4"/> <ARC FROM="0" TO="3"/> <ARC FROM="1" TO="7"/> <ARC FROM="1" TO="5"/> <ARC FROM="1" TO="6"/> ... <ARC FROM="38" TO="41"/> <ARC FROM="39" TO="42"/> <ARC FROM="40" TO="42"/> </GRAPHOUT> <CONFNET> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.950">I</ALTERNATIVE> <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE> <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE> <ALTERNATIVE PROB="0.010"></ALTERNATIVE> <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE> <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">A</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE> <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE> <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE> <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> </CONFNET> <INPUT STATUS="LISTEN" TIME="1343896312"/> """ msg = "<RESULTS>" + msg + "</RESULTS>" msg = msg.replace("<s>", "<s>").replace("</s>", "</s>") nblist = UtteranceNBList() doc = xml.dom.minidom.parseString(msg) recogout = doc.getElementsByTagName("RECOGOUT") for el in recogout: shypo = el.getElementsByTagName("SHYPO") for el in shypo: whypo = el.getElementsByTagName("WHYPO") utterance = "" cm = 1.0 for el in whypo: word = el.getAttribute("WORD") utterance += " " + word if word: cm *= float(el.getAttribute("CM")) nblist.add(cm, Utterance(utterance)) nblist.merge() nblist.add_other() cn = UtteranceConfusionNetwork() confnet = doc.getElementsByTagName("CONFNET") for el in confnet: word = el.getElementsByTagName("WORD") for el in word: alternative = el.getElementsByTagName("ALTERNATIVE") word_list = [] for el in alternative: prob = float(el.getAttribute("PROB")) text = get_text_from_xml_node(el) word_list.append([prob, text]) # Filter out empty hypotheses. if len(word_list) == 0: continue if len(word_list) == 1 and len(word_list[0][1]) == 0: continue # Add the word into the confusion network. cn.add(word_list) cn.merge() cn.normalise() cn.prune() cn.normalise() cn.sort() return nblist, cn
def get_results(self, timeout=0.6): """" Waits for the complete recognition results from the Julius ASR server. Timeout specifies how long it will wait for the end of message. """ msg = "" # Get results from the server. time_slept = 0.0 while time_slept < timeout: msg_part = self.read_server_message(self.msg_timeout) if not msg_part: # Wait and check whether there is a message. time.sleep(self.cfg['Hub']['main_loop_sleep_time']) time_slept += self.cfg['Hub']['main_loop_sleep_time'] if self.debug >= 2: print "gr.time_slept:", time_slept continue msg += msg_part + '\n' if self.debug: print msg if '<CONFNET>' in msg: break else: raise JuliusASRTimeoutException( "Timeout when waiting for the Julius server results.") # Process the results. """ Typical result returned by the Julius ASR. <STARTPROC/> <INPUT STATUS="LISTEN" TIME="1343896296"/> <INPUT STATUS="STARTREC" TIME="1343896311"/> <STARTRECOG/> <INPUT STATUS="ENDREC" TIME="1343896312"/> <ENDRECOG/> <INPUTPARAM FRAMES="164" MSEC="1640"/> <RECOGOUT> <SHYPO RANK="1" SCORE="-7250.111328"> <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/> <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/> <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/> <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/> <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/> <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/> <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/> </SHYPO> </RECOGOUT> <GRAPHOUT NODENUM="43" ARCNUM="70"> <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/> <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/> <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/> <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/> <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/> <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/> <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/> <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/> ... <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/> <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/> <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/> <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/> <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/> <ARC FROM="0" TO="4"/> <ARC FROM="0" TO="3"/> <ARC FROM="1" TO="7"/> <ARC FROM="1" TO="5"/> <ARC FROM="1" TO="6"/> ... <ARC FROM="38" TO="41"/> <ARC FROM="39" TO="42"/> <ARC FROM="40" TO="42"/> </GRAPHOUT> <CONFNET> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.950">I</ALTERNATIVE> <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE> <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE> <ALTERNATIVE PROB="0.010"></ALTERNATIVE> <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE> <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">A</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE> <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE> <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE> <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> </CONFNET> <INPUT STATUS="LISTEN" TIME="1343896312"/> """ msg = "<RESULTS>" + msg + "</RESULTS>" msg = msg.replace("<s>", "<s>").replace("</s>", "</s>") nblist = UtteranceNBList() doc = xml.dom.minidom.parseString(msg) recogout = doc.getElementsByTagName("RECOGOUT") for el in recogout: shypo = el.getElementsByTagName("SHYPO") for el in shypo: whypo = el.getElementsByTagName("WHYPO") utterance = "" cm = 1.0 for el in whypo: word = el.getAttribute("WORD") utterance += " " + word if word: cm *= float(el.getAttribute("CM")) nblist.add(cm, Utterance(utterance)) nblist.merge() nblist.add_other() cn = UtteranceConfusionNetwork() confnet = doc.getElementsByTagName("CONFNET") for el in confnet: word = el.getElementsByTagName("WORD") for el in word: alternative = el.getElementsByTagName("ALTERNATIVE") word_list = [] for el in alternative: prob = float(el.getAttribute("PROB")) text = get_text_from_xml_node(el) word_list.append([prob, text]) # Filter out empty hypotheses. if len(word_list) == 0: continue if len(word_list) == 1 and len(word_list[0][1]) == 0: continue # Add the word into the confusion network. cn.add(word_list) cn.merge() cn.normalise() cn.prune() cn.normalise() cn.sort() return nblist, cn