Beispiel #1
0
    def test_session_logger(self):
        cfg = Config.load_configs(config=CONFIG_DICT, use_default=False)

        sl = SessionLogger()

        # test 3 calls at once
        for i in range(3):
            sess_dir = "./%d" % i
            if not os.path.isdir(sess_dir):
                os.mkdir(sess_dir)
            sl.session_start(sess_dir)
            sl.config('config = ' + unicode(cfg))
            sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"])
            sl.input_source("voip")

            sl.dialogue_rec_start(None, "both_complete_dialogue.wav")
            sl.dialogue_rec_start("system", "system_complete_dialogue.wav")
            sl.dialogue_rec_start("user", "user_complete_dialogue.wav")
            sl.dialogue_rec_end("both_complete_dialogue.wav")
            sl.dialogue_rec_end("system_complete_dialogue.wav")
            sl.dialogue_rec_end("user_complete_dialogue.wav")

            sl.turn("system")
            sl.dialogue_act("system", "hello()")
            sl.text("system", "Hello.")
            sl.rec_start("system", "system1.wav")
            sl.rec_end("system1.wav")

            sl.turn("user")
            sl.rec_start("user", "user1.wav")
            sl.rec_end("user1.wav")

            A1, A2, A3 = 0.90, 0.05, 0.05
            B1, B2, B3 = 0.70, 0.20, 0.10
            C1, C2, C3 = 0.80, 0.10, 0.10

            asr_confnet = UtteranceConfusionNetwork()
            asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']])
            asr_confnet.add([[B1, "Chinese"],  [B2, "English"], [B3, 'cheap']])
            asr_confnet.add([[C1, "restaurant"],  [C2, "pub"],   [C3, 'hotel']])
            asr_confnet.merge()
            asr_confnet.normalise()
            asr_confnet.sort()

            asr_nblist = asr_confnet.get_utterance_nblist()

            sl.asr("user", "user1.wav", asr_nblist, asr_confnet)

            slu_confnet = DialogueActConfusionNetwork()
            slu_confnet.add(0.7, DialogueActItem('hello'))
            slu_confnet.add(0.6, DialogueActItem('thankyou'))
            slu_confnet.add(0.4, DialogueActItem('restart'))
            slu_confnet.add(0.1, DialogueActItem('bye'))
            slu_confnet.merge()
            slu_confnet.normalise()
            slu_confnet.sort()

            slu_nblist = slu_confnet.get_da_nblist()

            sl.slu("user", "user1.wav", slu_nblist, slu_confnet)

            sl.turn("system")
            sl.dialogue_act("system", "thankyou()")
            sl.text("system", "Thank you.", cost = 1.0)
            sl.rec_start("system", "system2.wav")
            sl.rec_end("system2.wav")
            sl.barge_in("system", tts_time = True)

            sl.turn("user")
            sl.rec_start("user", "user2.wav")
            sl.rec_end("user2.wav")
            sl.hangup("user")
Beispiel #2
0
    def get_results(self, timeout=0.6):
        """"
        Waits for the complete recognition results from the Julius ASR server.

        Timeout specifies how long it will wait for the end of message.
        """
        msg = ""

        # Get results from the server.
        time_slept = 0.0
        while time_slept < timeout:
            msg_part = self.read_server_message(self.msg_timeout)
            if not msg_part:
                # Wait and check whether there is a message.
                time.sleep(self.cfg['Hub']['main_loop_sleep_time'])
                time_slept += self.cfg['Hub']['main_loop_sleep_time']
                if self.debug >= 2:
                    print "gr.time_slept:", time_slept
                continue

            msg += msg_part + '\n'

            if self.debug:
                print msg

            if '<CONFNET>' in msg:
                break
        else:
            raise JuliusASRTimeoutException(
                "Timeout when waiting for the Julius server results.")

        # Process the results.
        """ Typical result returned by the Julius ASR.

          <STARTPROC/>
          <INPUT STATUS="LISTEN" TIME="1343896296"/>
          <INPUT STATUS="STARTREC" TIME="1343896311"/>
          <STARTRECOG/>
          <INPUT STATUS="ENDREC" TIME="1343896312"/>
          <ENDRECOG/>
          <INPUTPARAM FRAMES="164" MSEC="1640"/>
          <RECOGOUT>
            <SHYPO RANK="1" SCORE="-7250.111328">
              <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/>
              <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/>
              <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/>
              <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/>
              <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/>
              <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/>
              <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/>
            </SHYPO>
          </RECOGOUT>
          <GRAPHOUT NODENUM="43" ARCNUM="70">
              <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/>
              <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/>
              <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/>
              <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/>
              <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/>
              <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/>
              <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/>
              <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/>

              ...

              <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/>
              <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/>
              <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/>
              <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/>
              <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/>
              <ARC FROM="0" TO="4"/>
              <ARC FROM="0" TO="3"/>
              <ARC FROM="1" TO="7"/>
              <ARC FROM="1" TO="5"/>
              <ARC FROM="1" TO="6"/>

              ...

              <ARC FROM="38" TO="41"/>
              <ARC FROM="39" TO="42"/>
              <ARC FROM="40" TO="42"/>
          </GRAPHOUT>
          <CONFNET>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.950">I</ALTERNATIVE>
              <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE>
              <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE>
              <ALTERNATIVE PROB="0.010"></ALTERNATIVE>
              <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE>
              <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">A</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE>
              <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE>
              <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE>
              <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
          </CONFNET>
          <INPUT STATUS="LISTEN" TIME="1343896312"/>

        """
        msg = "<RESULTS>" + msg + "</RESULTS>"
        msg = msg.replace("<s>", "&lt;s&gt;").replace("</s>", "&lt;/s&gt;")

        nblist = UtteranceNBList()

        doc = xml.dom.minidom.parseString(msg)
        recogout = doc.getElementsByTagName("RECOGOUT")
        for el in recogout:
            shypo = el.getElementsByTagName("SHYPO")
            for el in shypo:
                whypo = el.getElementsByTagName("WHYPO")
                utterance = ""
                cm = 1.0
                for el in whypo:
                    word = el.getAttribute("WORD")
                    utterance += " " + word
                    if word:
                        cm *= float(el.getAttribute("CM"))
                nblist.add(cm, Utterance(utterance))

        nblist.merge()
        nblist.add_other()

        cn = UtteranceConfusionNetwork()

        confnet = doc.getElementsByTagName("CONFNET")
        for el in confnet:
            word = el.getElementsByTagName("WORD")
            for el in word:
                alternative = el.getElementsByTagName("ALTERNATIVE")
                word_list = []
                for el in alternative:
                    prob = float(el.getAttribute("PROB"))
                    text = get_text_from_xml_node(el)
                    word_list.append([prob, text])

                # Filter out empty hypotheses.
                if len(word_list) == 0:
                    continue
                if len(word_list) == 1 and len(word_list[0][1]) == 0:
                    continue

                # Add the word into the confusion network.
                cn.add(word_list)

        cn.merge()
        cn.normalise()
        cn.prune()
        cn.normalise()
        cn.sort()

        return nblist, cn
Beispiel #3
0
    def test_session_logger(self):
        cfg = Config.load_configs(config=CONFIG_DICT, use_default=False)

        sl = SessionLogger()

        # test 3 calls at once
        for i in range(3):
            sess_dir = "./%d" % i
            if not os.path.isdir(sess_dir):
                os.mkdir(sess_dir)
            sl.session_start(sess_dir)
            sl.config('config = ' + unicode(cfg))
            sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"])
            sl.input_source("voip")

            sl.dialogue_rec_start(None, "both_complete_dialogue.wav")
            sl.dialogue_rec_start("system", "system_complete_dialogue.wav")
            sl.dialogue_rec_start("user", "user_complete_dialogue.wav")
            sl.dialogue_rec_end("both_complete_dialogue.wav")
            sl.dialogue_rec_end("system_complete_dialogue.wav")
            sl.dialogue_rec_end("user_complete_dialogue.wav")

            sl.turn("system")
            sl.dialogue_act("system", "hello()")
            sl.text("system", "Hello.")
            sl.rec_start("system", "system1.wav")
            sl.rec_end("system1.wav")

            sl.turn("user")
            sl.rec_start("user", "user1.wav")
            sl.rec_end("user1.wav")

            A1, A2, A3 = 0.90, 0.05, 0.05
            B1, B2, B3 = 0.70, 0.20, 0.10
            C1, C2, C3 = 0.80, 0.10, 0.10

            asr_confnet = UtteranceConfusionNetwork()
            asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']])
            asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']])
            asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']])
            asr_confnet.merge()
            asr_confnet.normalise()
            asr_confnet.sort()

            asr_nblist = asr_confnet.get_utterance_nblist()

            sl.asr("user", "user1.wav", asr_nblist, asr_confnet)

            slu_confnet = DialogueActConfusionNetwork()
            slu_confnet.add(0.7, DialogueActItem('hello'))
            slu_confnet.add(0.6, DialogueActItem('thankyou'))
            slu_confnet.add(0.4, DialogueActItem('restart'))
            slu_confnet.add(0.1, DialogueActItem('bye'))
            slu_confnet.merge()
            slu_confnet.normalise()
            slu_confnet.sort()

            slu_nblist = slu_confnet.get_da_nblist()

            sl.slu("user", "user1.wav", slu_nblist, slu_confnet)

            sl.turn("system")
            sl.dialogue_act("system", "thankyou()")
            sl.text("system", "Thank you.", cost=1.0)
            sl.rec_start("system", "system2.wav")
            sl.rec_end("system2.wav")
            sl.barge_in("system", tts_time=True)

            sl.turn("user")
            sl.rec_start("user", "user2.wav")
            sl.rec_end("user2.wav")
            sl.hangup("user")
Beispiel #4
0
    def get_results(self, timeout=0.6):
        """"
        Waits for the complete recognition results from the Julius ASR server.

        Timeout specifies how long it will wait for the end of message.
        """
        msg = ""

        # Get results from the server.
        time_slept = 0.0
        while time_slept < timeout:
            msg_part = self.read_server_message(self.msg_timeout)
            if not msg_part:
                # Wait and check whether there is a message.
                time.sleep(self.cfg['Hub']['main_loop_sleep_time'])
                time_slept += self.cfg['Hub']['main_loop_sleep_time']
                if self.debug >= 2:
                    print "gr.time_slept:", time_slept
                continue

            msg += msg_part + '\n'

            if self.debug:
                print msg

            if '<CONFNET>' in msg:
                break
        else:
            raise JuliusASRTimeoutException(
                "Timeout when waiting for the Julius server results.")

        # Process the results.
        """ Typical result returned by the Julius ASR.

          <STARTPROC/>
          <INPUT STATUS="LISTEN" TIME="1343896296"/>
          <INPUT STATUS="STARTREC" TIME="1343896311"/>
          <STARTRECOG/>
          <INPUT STATUS="ENDREC" TIME="1343896312"/>
          <ENDRECOG/>
          <INPUTPARAM FRAMES="164" MSEC="1640"/>
          <RECOGOUT>
            <SHYPO RANK="1" SCORE="-7250.111328">
              <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/>
              <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/>
              <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/>
              <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/>
              <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/>
              <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/>
              <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/>
            </SHYPO>
          </RECOGOUT>
          <GRAPHOUT NODENUM="43" ARCNUM="70">
              <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/>
              <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/>
              <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/>
              <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/>
              <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/>
              <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/>
              <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/>
              <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/>

              ...

              <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/>
              <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/>
              <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/>
              <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/>
              <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/>
              <ARC FROM="0" TO="4"/>
              <ARC FROM="0" TO="3"/>
              <ARC FROM="1" TO="7"/>
              <ARC FROM="1" TO="5"/>
              <ARC FROM="1" TO="6"/>

              ...

              <ARC FROM="38" TO="41"/>
              <ARC FROM="39" TO="42"/>
              <ARC FROM="40" TO="42"/>
          </GRAPHOUT>
          <CONFNET>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.950">I</ALTERNATIVE>
              <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE>
              <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE>
              <ALTERNATIVE PROB="0.010"></ALTERNATIVE>
              <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE>
              <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">A</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE>
              <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE>
              <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE>
              <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
          </CONFNET>
          <INPUT STATUS="LISTEN" TIME="1343896312"/>

        """
        msg = "<RESULTS>" + msg + "</RESULTS>"
        msg = msg.replace("<s>", "&lt;s&gt;").replace("</s>", "&lt;/s&gt;")

        nblist = UtteranceNBList()

        doc = xml.dom.minidom.parseString(msg)
        recogout = doc.getElementsByTagName("RECOGOUT")
        for el in recogout:
            shypo = el.getElementsByTagName("SHYPO")
            for el in shypo:
                whypo = el.getElementsByTagName("WHYPO")
                utterance = ""
                cm = 1.0
                for el in whypo:
                    word = el.getAttribute("WORD")
                    utterance += " " + word
                    if word:
                        cm *= float(el.getAttribute("CM"))
                nblist.add(cm, Utterance(utterance))

        nblist.merge()
        nblist.add_other()

        cn = UtteranceConfusionNetwork()

        confnet = doc.getElementsByTagName("CONFNET")
        for el in confnet:
            word = el.getElementsByTagName("WORD")
            for el in word:
                alternative = el.getElementsByTagName("ALTERNATIVE")
                word_list = []
                for el in alternative:
                    prob = float(el.getAttribute("PROB"))
                    text = get_text_from_xml_node(el)
                    word_list.append([prob, text])

                # Filter out empty hypotheses.
                if len(word_list) == 0:
                    continue
                if len(word_list) == 1 and len(word_list[0][1]) == 0:
                    continue

                # Add the word into the confusion network.
                cn.add(word_list)

        cn.merge()
        cn.normalise()
        cn.prune()
        cn.normalise()
        cn.sort()

        return nblist, cn