Beispiel #1
0
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)
            try:
                l = l.decode('utf8')
            except:  # if we use ipdb, it already gives us UTF-8-encoded input :-(
                pass
            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #2
0
Datei: thub.py Projekt: AoJ/alex
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)
            try:
                l = l.decode('utf8')
            except:  # if we use ipdb, it already gives us UTF-8-encoded input :-(
                pass
            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #3
0
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)

            l = l.decode('utf8')
            if self.f_output_script:
                self.f_output_script.write(l + '\n')

            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #4
0
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)

            l = l.decode('utf8')
            if self.f_output_script:
                self.f_output_script.write(l + '\n')

            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #5
0
    def recognize(self, wav):
        """ Produces hypotheses for the input audio data.

        Remember that GoogleASR works only with complete wave files.

        Returns an n-best list of hypotheses.
        """

        # making a file temp for manipulation
        handle, flac_file_name = mkstemp('TmpSpeechFile.flac')

        try:
            # convert wav to flac
            audio.save_flac(self.cfg, flac_file_name, wav)
            json_hypotheses = self.get_asr_hypotheses(flac_file_name)
        except (urllib2.HTTPError, urllib2.URLError) as e:
            self.syslog.exception('GoogleASR HTTP/URL error: %s' % unicode(e))
            json_hypotheses = [
                [{'confidence': 1.0, 'utterance': '__google__ __asr__ __exception__'}, ], ]
        finally:
            os.close(handle)
            remove(flac_file_name)

        try:
            hyp = json.loads(json_hypotheses)

            # print "###", hyp

            nblist = UtteranceNBList()

            if hyp['status'] == 0:
                n = len(hyp['hypotheses'])
                for i, h in enumerate(hyp['hypotheses']):
                    if i == 0:
                        nblist.add(h['confidence'], Utterance(h['utterance']))
                        conf1 = hyp['hypotheses'][0]['confidence']
                    else:
                        # guess the confX score
                        nblist.add((1.0-conf1)*(n-i)/(n-1.0)/(n-0.0)*2.0, Utterance(h['utterance']))
            elif hyp['status'] == 5:
                nblist.add(1.0, Utterance('_other_'))
        except:
            nblist = UtteranceNBList()

        nblist.merge()
        nblist.add_other()

        return nblist
Beispiel #6
0
    def test_conversion_of_confnet_into_nblist(self):

        A1, A2, A3 = 0.90, 0.05, 0.05
        B1, B2, B3 = 0.50, 0.35, 0.15
        C1, C2, C3 = 0.60, 0.30, 0.10

        correct_nblist = UtteranceNBList()
        correct_nblist.add(A1*B1*C1, Utterance("A1 B1 C1"))
        correct_nblist.add(A1*B2*C1, Utterance("A1 B2 C1"))
        correct_nblist.add(A1*B1*C2, Utterance("A1 B1 C2"))
        correct_nblist.add(A1*B2*C2, Utterance("A1 B2 C2"))
        correct_nblist.add(A1*B3*C1, Utterance("A1 B3 C1"))
        correct_nblist.add(A1*B1*C3, Utterance("A1 B1 C3"))
        correct_nblist.add(A1*B3*C2, Utterance("A1 B3 C2"))
        correct_nblist.add(A1*B2*C3, Utterance("A1 B2 C3"))
        correct_nblist.merge()
        correct_nblist.add_other()

        confnet = UtteranceConfusionNetwork()
        confnet.add([[A1, 'A1'], [A2, 'A2'], [A3, 'A3'],])
        confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],])
        confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],])
        confnet.merge().sort()

        gen_nblist = confnet.get_utterance_nblist(10)

        s = []
        s.append("")
        s.append("Confusion network:")
        s.append(unicode(confnet))
        s.append("")
        s.append("Generated nblist:")
        s.append(unicode(gen_nblist))
        s.append("")
        s.append("Correct nblist:")
        s.append(unicode(correct_nblist))
        s.append("")
        print '\n'.join(s)

        self.assertEqual(unicode(gen_nblist), unicode(correct_nblist))
Beispiel #7
0
    def get_results(self, timeout=0.6):
        """"
        Waits for the complete recognition results from the Julius ASR server.

        Timeout specifies how long it will wait for the end of message.
        """
        msg = ""

        # Get results from the server.
        time_slept = 0.0
        while time_slept < timeout:
            msg_part = self.read_server_message(self.msg_timeout)
            if not msg_part:
                # Wait and check whether there is a message.
                time.sleep(self.cfg['Hub']['main_loop_sleep_time'])
                time_slept += self.cfg['Hub']['main_loop_sleep_time']
                if self.debug >= 2:
                    print "gr.time_slept:", time_slept
                continue

            msg += msg_part + '\n'

            if self.debug:
                print msg

            if '<CONFNET>' in msg:
                break
        else:
            raise JuliusASRTimeoutException(
                "Timeout when waiting for the Julius server results.")

        # Process the results.
        """ Typical result returned by the Julius ASR.

          <STARTPROC/>
          <INPUT STATUS="LISTEN" TIME="1343896296"/>
          <INPUT STATUS="STARTREC" TIME="1343896311"/>
          <STARTRECOG/>
          <INPUT STATUS="ENDREC" TIME="1343896312"/>
          <ENDRECOG/>
          <INPUTPARAM FRAMES="164" MSEC="1640"/>
          <RECOGOUT>
            <SHYPO RANK="1" SCORE="-7250.111328">
              <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/>
              <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/>
              <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/>
              <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/>
              <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/>
              <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/>
              <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/>
            </SHYPO>
          </RECOGOUT>
          <GRAPHOUT NODENUM="43" ARCNUM="70">
              <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/>
              <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/>
              <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/>
              <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/>
              <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/>
              <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/>
              <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/>
              <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/>

              ...

              <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/>
              <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/>
              <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/>
              <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/>
              <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/>
              <ARC FROM="0" TO="4"/>
              <ARC FROM="0" TO="3"/>
              <ARC FROM="1" TO="7"/>
              <ARC FROM="1" TO="5"/>
              <ARC FROM="1" TO="6"/>

              ...

              <ARC FROM="38" TO="41"/>
              <ARC FROM="39" TO="42"/>
              <ARC FROM="40" TO="42"/>
          </GRAPHOUT>
          <CONFNET>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.950">I</ALTERNATIVE>
              <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE>
              <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE>
              <ALTERNATIVE PROB="0.010"></ALTERNATIVE>
              <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE>
              <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">A</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE>
              <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE>
              <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE>
              <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
          </CONFNET>
          <INPUT STATUS="LISTEN" TIME="1343896312"/>

        """
        msg = "<RESULTS>" + msg + "</RESULTS>"
        msg = msg.replace("<s>", "&lt;s&gt;").replace("</s>", "&lt;/s&gt;")

        nblist = UtteranceNBList()

        doc = xml.dom.minidom.parseString(msg)
        recogout = doc.getElementsByTagName("RECOGOUT")
        for el in recogout:
            shypo = el.getElementsByTagName("SHYPO")
            for el in shypo:
                whypo = el.getElementsByTagName("WHYPO")
                utterance = ""
                cm = 1.0
                for el in whypo:
                    word = el.getAttribute("WORD")
                    utterance += " " + word
                    if word:
                        cm *= float(el.getAttribute("CM"))
                nblist.add(cm, Utterance(utterance))

        nblist.merge()
        nblist.add_other()

        cn = UtteranceConfusionNetwork()

        confnet = doc.getElementsByTagName("CONFNET")
        for el in confnet:
            word = el.getElementsByTagName("WORD")
            for el in word:
                alternative = el.getElementsByTagName("ALTERNATIVE")
                word_list = []
                for el in alternative:
                    prob = float(el.getAttribute("PROB"))
                    text = get_text_from_xml_node(el)
                    word_list.append([prob, text])

                # Filter out empty hypotheses.
                if len(word_list) == 0:
                    continue
                if len(word_list) == 1 and len(word_list[0][1]) == 0:
                    continue

                # Add the word into the confusion network.
                cn.add(word_list)

        cn.merge()
        cn.normalise()
        cn.prune()
        cn.normalise()
        cn.sort()

        return nblist, cn
Beispiel #8
0
    def get_results(self, timeout=0.6):
        """"
        Waits for the complete recognition results from the Julius ASR server.

        Timeout specifies how long it will wait for the end of message.
        """
        msg = ""

        # Get results from the server.
        time_slept = 0.0
        while time_slept < timeout:
            msg_part = self.read_server_message(self.msg_timeout)
            if not msg_part:
                # Wait and check whether there is a message.
                time.sleep(self.cfg['Hub']['main_loop_sleep_time'])
                time_slept += self.cfg['Hub']['main_loop_sleep_time']
                if self.debug >= 2:
                    print "gr.time_slept:", time_slept
                continue

            msg += msg_part + '\n'

            if self.debug:
                print msg

            if '<CONFNET>' in msg:
                break
        else:
            raise JuliusASRTimeoutException(
                "Timeout when waiting for the Julius server results.")

        # Process the results.
        """ Typical result returned by the Julius ASR.

          <STARTPROC/>
          <INPUT STATUS="LISTEN" TIME="1343896296"/>
          <INPUT STATUS="STARTREC" TIME="1343896311"/>
          <STARTRECOG/>
          <INPUT STATUS="ENDREC" TIME="1343896312"/>
          <ENDRECOG/>
          <INPUTPARAM FRAMES="164" MSEC="1640"/>
          <RECOGOUT>
            <SHYPO RANK="1" SCORE="-7250.111328">
              <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/>
              <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/>
              <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/>
              <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/>
              <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/>
              <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/>
              <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/>
            </SHYPO>
          </RECOGOUT>
          <GRAPHOUT NODENUM="43" ARCNUM="70">
              <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/>
              <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/>
              <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/>
              <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/>
              <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/>
              <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/>
              <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/>
              <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/>

              ...

              <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/>
              <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/>
              <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/>
              <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/>
              <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/>
              <ARC FROM="0" TO="4"/>
              <ARC FROM="0" TO="3"/>
              <ARC FROM="1" TO="7"/>
              <ARC FROM="1" TO="5"/>
              <ARC FROM="1" TO="6"/>

              ...

              <ARC FROM="38" TO="41"/>
              <ARC FROM="39" TO="42"/>
              <ARC FROM="40" TO="42"/>
          </GRAPHOUT>
          <CONFNET>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.950">I</ALTERNATIVE>
              <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE>
              <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE>
              <ALTERNATIVE PROB="0.010"></ALTERNATIVE>
              <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE>
              <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">A</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE>
              <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE>
              <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE>
              <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
          </CONFNET>
          <INPUT STATUS="LISTEN" TIME="1343896312"/>

        """
        msg = "<RESULTS>" + msg + "</RESULTS>"
        msg = msg.replace("<s>", "&lt;s&gt;").replace("</s>", "&lt;/s&gt;")

        nblist = UtteranceNBList()

        doc = xml.dom.minidom.parseString(msg)
        recogout = doc.getElementsByTagName("RECOGOUT")
        for el in recogout:
            shypo = el.getElementsByTagName("SHYPO")
            for el in shypo:
                whypo = el.getElementsByTagName("WHYPO")
                utterance = ""
                cm = 1.0
                for el in whypo:
                    word = el.getAttribute("WORD")
                    utterance += " " + word
                    if word:
                        cm *= float(el.getAttribute("CM"))
                nblist.add(cm, Utterance(utterance))

        nblist.merge()
        nblist.add_other()

        cn = UtteranceConfusionNetwork()

        confnet = doc.getElementsByTagName("CONFNET")
        for el in confnet:
            word = el.getElementsByTagName("WORD")
            for el in word:
                alternative = el.getElementsByTagName("ALTERNATIVE")
                word_list = []
                for el in alternative:
                    prob = float(el.getAttribute("PROB"))
                    text = get_text_from_xml_node(el)
                    word_list.append([prob, text])

                # Filter out empty hypotheses.
                if len(word_list) == 0:
                    continue
                if len(word_list) == 1 and len(word_list[0][1]) == 0:
                    continue

                # Add the word into the confusion network.
                cn.add(word_list)

        cn.merge()
        cn.normalise()
        cn.prune()
        cn.normalise()
        cn.sort()

        return nblist, cn