def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost = 1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time = True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")
def get_results(self, timeout=0.6): """" Waits for the complete recognition results from the Julius ASR server. Timeout specifies how long it will wait for the end of message. """ msg = "" # Get results from the server. time_slept = 0.0 while time_slept < timeout: msg_part = self.read_server_message(self.msg_timeout) if not msg_part: # Wait and check whether there is a message. time.sleep(self.cfg['Hub']['main_loop_sleep_time']) time_slept += self.cfg['Hub']['main_loop_sleep_time'] if self.debug >= 2: print "gr.time_slept:", time_slept continue msg += msg_part + '\n' if self.debug: print msg if '<CONFNET>' in msg: break else: raise JuliusASRTimeoutException( "Timeout when waiting for the Julius server results.") # Process the results. """ Typical result returned by the Julius ASR. <STARTPROC/> <INPUT STATUS="LISTEN" TIME="1343896296"/> <INPUT STATUS="STARTREC" TIME="1343896311"/> <STARTRECOG/> <INPUT STATUS="ENDREC" TIME="1343896312"/> <ENDRECOG/> <INPUTPARAM FRAMES="164" MSEC="1640"/> <RECOGOUT> <SHYPO RANK="1" SCORE="-7250.111328"> <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/> <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/> <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/> <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/> <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/> <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/> <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/> </SHYPO> </RECOGOUT> <GRAPHOUT NODENUM="43" ARCNUM="70"> <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/> <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/> <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/> <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/> <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/> <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/> <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/> <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/> ... <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/> <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/> <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/> <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/> <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/> <ARC FROM="0" TO="4"/> <ARC FROM="0" TO="3"/> <ARC FROM="1" TO="7"/> <ARC FROM="1" TO="5"/> <ARC FROM="1" TO="6"/> ... <ARC FROM="38" TO="41"/> <ARC FROM="39" TO="42"/> <ARC FROM="40" TO="42"/> </GRAPHOUT> <CONFNET> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.950">I</ALTERNATIVE> <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE> <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE> <ALTERNATIVE PROB="0.010"></ALTERNATIVE> <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE> <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">A</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE> <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE> <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE> <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> </CONFNET> <INPUT STATUS="LISTEN" TIME="1343896312"/> """ msg = "<RESULTS>" + msg + "</RESULTS>" msg = msg.replace("<s>", "<s>").replace("</s>", "</s>") nblist = UtteranceNBList() doc = xml.dom.minidom.parseString(msg) recogout = doc.getElementsByTagName("RECOGOUT") for el in recogout: shypo = el.getElementsByTagName("SHYPO") for el in shypo: whypo = el.getElementsByTagName("WHYPO") utterance = "" cm = 1.0 for el in whypo: word = el.getAttribute("WORD") utterance += " " + word if word: cm *= float(el.getAttribute("CM")) nblist.add(cm, Utterance(utterance)) nblist.merge() nblist.add_other() cn = UtteranceConfusionNetwork() confnet = doc.getElementsByTagName("CONFNET") for el in confnet: word = el.getElementsByTagName("WORD") for el in word: alternative = el.getElementsByTagName("ALTERNATIVE") word_list = [] for el in alternative: prob = float(el.getAttribute("PROB")) text = get_text_from_xml_node(el) word_list.append([prob, text]) # Filter out empty hypotheses. if len(word_list) == 0: continue if len(word_list) == 1 and len(word_list[0][1]) == 0: continue # Add the word into the confusion network. cn.add(word_list) cn.merge() cn.normalise() cn.prune() cn.normalise() cn.sort() return nblist, cn
def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost=1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time=True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")
def get_results(self, timeout=0.6): """" Waits for the complete recognition results from the Julius ASR server. Timeout specifies how long it will wait for the end of message. """ msg = "" # Get results from the server. time_slept = 0.0 while time_slept < timeout: msg_part = self.read_server_message(self.msg_timeout) if not msg_part: # Wait and check whether there is a message. time.sleep(self.cfg['Hub']['main_loop_sleep_time']) time_slept += self.cfg['Hub']['main_loop_sleep_time'] if self.debug >= 2: print "gr.time_slept:", time_slept continue msg += msg_part + '\n' if self.debug: print msg if '<CONFNET>' in msg: break else: raise JuliusASRTimeoutException( "Timeout when waiting for the Julius server results.") # Process the results. """ Typical result returned by the Julius ASR. <STARTPROC/> <INPUT STATUS="LISTEN" TIME="1343896296"/> <INPUT STATUS="STARTREC" TIME="1343896311"/> <STARTRECOG/> <INPUT STATUS="ENDREC" TIME="1343896312"/> <ENDRECOG/> <INPUTPARAM FRAMES="164" MSEC="1640"/> <RECOGOUT> <SHYPO RANK="1" SCORE="-7250.111328"> <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/> <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/> <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/> <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/> <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/> <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/> <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/> </SHYPO> </RECOGOUT> <GRAPHOUT NODENUM="43" ARCNUM="70"> <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/> <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/> <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/> <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/> <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/> <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/> <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/> <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/> ... <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/> <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/> <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/> <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/> <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/> <ARC FROM="0" TO="4"/> <ARC FROM="0" TO="3"/> <ARC FROM="1" TO="7"/> <ARC FROM="1" TO="5"/> <ARC FROM="1" TO="6"/> ... <ARC FROM="38" TO="41"/> <ARC FROM="39" TO="42"/> <ARC FROM="40" TO="42"/> </GRAPHOUT> <CONFNET> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.950">I</ALTERNATIVE> <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE> <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE> <ALTERNATIVE PROB="0.010"></ALTERNATIVE> <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE> <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">A</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE> <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE> <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE> <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> </CONFNET> <INPUT STATUS="LISTEN" TIME="1343896312"/> """ msg = "<RESULTS>" + msg + "</RESULTS>" msg = msg.replace("<s>", "<s>").replace("</s>", "</s>") nblist = UtteranceNBList() doc = xml.dom.minidom.parseString(msg) recogout = doc.getElementsByTagName("RECOGOUT") for el in recogout: shypo = el.getElementsByTagName("SHYPO") for el in shypo: whypo = el.getElementsByTagName("WHYPO") utterance = "" cm = 1.0 for el in whypo: word = el.getAttribute("WORD") utterance += " " + word if word: cm *= float(el.getAttribute("CM")) nblist.add(cm, Utterance(utterance)) nblist.merge() nblist.add_other() cn = UtteranceConfusionNetwork() confnet = doc.getElementsByTagName("CONFNET") for el in confnet: word = el.getElementsByTagName("WORD") for el in word: alternative = el.getElementsByTagName("ALTERNATIVE") word_list = [] for el in alternative: prob = float(el.getAttribute("PROB")) text = get_text_from_xml_node(el) word_list.append([prob, text]) # Filter out empty hypotheses. if len(word_list) == 0: continue if len(word_list) == 1 and len(word_list[0][1]) == 0: continue # Add the word into the confusion network. cn.add(word_list) cn.merge() cn.normalise() cn.prune() cn.normalise() cn.sort() return nblist, cn