Beispiel #1
0
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)
            try:
                l = l.decode('utf8')
            except:  # if we use ipdb, it already gives us UTF-8-encoded input :-(
                pass
            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #2
0
    def test_parse_with_mutliple_date_rel(self):
        asr_hyp = UtteranceNBList()
        asr_hyp.add(0.1, Utterance("CHTEL BYCH ZITRA ZITRA JET"))

        cn = self.slu.parse(asr_hyp)

        self.assert_(DialogueActItem(dai="inform(date_rel=tomorrow)") in cn)
Beispiel #3
0
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)

            l = l.decode('utf8')
            if self.f_output_script:
                self.f_output_script.write(l + '\n')

            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #4
0
Datei: kaldi.py Projekt: AoJ/alex
    def hyp_out(self):
        """ This defines asynchronous interface for speech recognition.
        Returns recognizers hypotheses about the input speech audio.
        """
        start = time.time()

        # Get hypothesis
        self.decoder.prune_final()
        utt_lik, lat = self.decoder.get_lattice()  # returns acceptor (py)fst.LogVectorFst
        self.decoder.reset(keep_buffer_data=False)

        # Convert lattice to nblist
        nbest = lattice_to_nbest(lat, self.n_best)
        nblist = UtteranceNBList()
        for w, word_ids in nbest:
            words = u' '.join([self.wst[i] for i in word_ids])

            if self.cfg['ASR']['Kaldi']['debug']:
                self.syslog.debug(words)

            p = exp(-w)
            nblist.add(p, Utterance(words))

        # Log
        if len(nbest) == 0:
            nblist.add(1.0, Utterance('Empty hypothesis: Kaldi __FAIL__'))

        nblist.merge()

        if self.cfg['ASR']['Kaldi']['debug']:
            self.syslog.info('utterance "likelihood" is %f' % utt_lik)
            self.syslog.debug('hyp_out: get_lattice+nbest in %s secs' % str(time.time() - start))

        return nblist
Beispiel #5
0
    def test_parse_with_mutliple_date_rel(self):
        asr_hyp = UtteranceNBList()
        asr_hyp.add(0.1, Utterance("CHTEL BYCH ZITRA ZITRA JET"))

        cn = self.slu.parse(asr_hyp)

        self.assert_(DialogueActItem(dai="inform(date_rel=tomorrow)") in cn)
Beispiel #6
0
Datei: thub.py Projekt: AoJ/alex
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)
            try:
                l = l.decode('utf8')
            except:  # if we use ipdb, it already gives us UTF-8-encoded input :-(
                pass
            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #7
0
    def input_usr_utt_nblist(self):
        """Reads an N-best list of utterances from the input. """

        self.init_readline()

        nblist = UtteranceNBList()
        i = 1
        while i < 100:
            l = raw_input("User %d:    " % i)

            l = l.decode('utf8')
            if self.f_output_script:
                self.f_output_script.write(l + '\n')

            if l.startswith("."):
                print
                break

            try:
                prob, da = self.parse_input_utt(l)
            except TextHubException as e:
                print e
                continue

            nblist.add(prob, da)

            i += 1

        nblist.merge()
        nblist.scale()
        nblist.add_other()

        self.write_readline()

        return nblist
Beispiel #8
0
    def test_parse_meta(self):
        utterances_to_understand = [
            (u"ahoj", "hello()", ),
            (u"sbohem čau", "bye()", ),
            (u"jiné", "reqalts()", ),
            (u"začneme znovu", "restart()", ),
            (u"zopakuj", "repeat()", ),
            (u"promiň", "apology()", ),
            (u"co se zeptat", "help()", ),
            (u"haló", "canthearyou()", ),
            (u"nerozuměl jsem", "notunderstood()", ),
            (u"ano jo", "affirm()", ),
            (u"ne ano nechci", "negate()", ),
            (u"děkuji", "thankyou()", ),
            (u"dobře", "ack()", ),
            (u"chci jet", "inform(task=find_connection)", ),
            (u"jak bude", "inform(task=weather)", ),
            (u"nástupiště", "inform(task=find_platform)", ),
            (u"z jaké jede", "request(from_stop)", ),
            (u"kam to jede", "request(to_stop)", ),
            (u"kdy to jede", "request(departure_time)", ),
            (u"za jak dlouho", "request(departure_time_rel)", ),
            (u"kdy tam budem", "request(arrival_time)", ),
            (u"za jak dlouho tam přijedu", "request(arrival_time_rel)", ),
            (u"jak dlouho bude trvat cesta", "request(duration)", ),
            (u"kolik je hodin", "request(current_time)", ),
            (u"jak dlouho trvá přestup", "request(time_transfers)", ),
            (u"kolik přestupů", "request(num_transfers)", ),
            (u"nechci přestup bez jet přímo", "inform(num_transfers=0)", ),
            (u"jeden přestup", "inform(num_transfers=1)", ),
            (u"dva přestupy", "inform(num_transfers=2)", ),
            (u"tři přestupy", "inform(num_transfers=3)", ),
            (u"čtyři přestupy", "inform(num_transfers=4)", ),
            (u"libovolně přestupů", "inform(num_transfers=dontcare)", ),
            (u"jet přímo", "inform(num_transfers=0)", ),
            (u"alternativa libovolný", "inform(alternative=dontcare)", ),
            (u"alternativa první", "inform(alternative=1)", ),
            (u"alternativa druhá", "inform(alternative=2)", ),
            (u"alternativa třetí", "inform(alternative=3)", ),
            (u"alternativa čtvrtá", "inform(alternative=4)", ),
            (u"alternativa páté", "inform(alternative=5)", ),
            (u"předchozí spoj", "inform(alternative=prev)", ),
            (u"nechci předchozí spoj", "deny(alternative=prev)", ),
            (u"poslední spoj", "inform(alternative=last)", ),
            (u"nechci poslední spoj", "deny(alternative=last)", ),
            (u"další spoj", "inform(alternative=next)", ),
            (u"další", "inform(alternative=next)", ),
            (u"předchozí", "inform(alternative=prev)", ),
            (u"jako ve dne", "inform(ampm=pm)", ),
        ]

        for utt, res in utterances_to_understand:
            asr_hyp = UtteranceNBList()
            asr_hyp.add(0.79, Utterance(utt))

            cn = self.slu.parse(asr_hyp)

            self.assertIn(DialogueActItem(dai=res), cn)
Beispiel #9
0
    def get_response(self, utt_text):
        try:
            self.cfg['Logging']['system_logger'].info('User: '******'utt_nbl': utt_nblist})
            return self.process_dm()

        except Exception:
            self.cfg['Logging']['system_logger'].exception(
                'Uncaught exception in WTHUB process.')
            return None
Beispiel #10
0
    def recognize(self, wav):
        """ Produces hypotheses for the input audio data.

        Remember that GoogleASR works only with complete wave files.

        Returns an n-best list of hypotheses.
        """

        # making a file temp for manipulation
        handle, flac_file_name = mkstemp('TmpSpeechFile.flac')

        try:
            # convert wav to flac
            audio.save_flac(self.cfg, flac_file_name, wav)
            json_hypotheses = self.get_asr_hypotheses(flac_file_name)
        except (urllib2.HTTPError, urllib2.URLError) as e:
            self.syslog.exception('GoogleASR HTTP/URL error: %s' % unicode(e))
            json_hypotheses = [
                [{'confidence': 1.0, 'utterance': '__google__ __asr__ __exception__'}, ], ]
        finally:
            os.close(handle)
            remove(flac_file_name)

        try:
            hyp = json.loads(json_hypotheses)

            # print "###", hyp

            nblist = UtteranceNBList()

            if len(hyp['result']) > 0:
                hypotheses = hyp['result'][0]['alternative']
                n = len(hypotheses)
                for i, h in enumerate(hypotheses):
                    if i == 0:
                        nblist.add(h['confidence'], Utterance(h['transcript']))
                        conf1 = h['confidence']
                    else:
                        # guess the confX score
                        nblist.add((1.0-conf1)*(n-i)/(n-1.0)/(n-0.0)*2.0, Utterance(h['transcript']))
        except:
            nblist = UtteranceNBList()

        nblist.merge()
        nblist.add_other()

        return nblist
Beispiel #11
0
    def hyp_out(self):
        """ This defines asynchronous interface for speech recognition.

        Returns:
            ASR hypothesis about the input speech audio.
        """
        start = time.time()

        # Get hypothesis
        self.decoder.prune_final()
        utt_lik, lat = self.decoder.get_lattice(
        )  # returns acceptor (py)fst.LogVectorFst
        self.decoder.reset(keep_buffer_data=False)

        if self.calibration_table:
            lat = lattice_calibration(lat, self.calibration_table)

        self.last_lattice = lat

        # Convert lattice to nblist
        nbest = lattice_to_nbest(lat, self.n_best)
        nblist = UtteranceNBList()

        for w, word_ids in nbest:
            words = u' '.join([self.wst[i] for i in word_ids])

            if self.cfg['ASR']['Kaldi']['debug']:
                self.syslog.debug(words)

            p = exp(-w)
            nblist.add(p, Utterance(words))

        # Log
        if len(nbest) == 0:
            nblist.add(1.0, Utterance('Empty hypothesis: Kaldi __FAIL__'))

        nblist.merge()

        if self.cfg['ASR']['Kaldi']['debug']:
            self.syslog.info('utterance "likelihood" is %f' % utt_lik)
            self.syslog.debug('hyp_out: get_lattice+nbest in %s secs' %
                              str(time.time() - start))

        return nblist
Beispiel #12
0
    def test_parse_X(self):
        from alex.components.slu.dainnclassifier import DAINNClassifier
        
        np.random.seed(0)

        cldb = CategoryLabelDatabase()
        class db:
            database = {
                "task": {
                    "find_connection": ["najít spojení", "najít spoj", "zjistit spojení",
                                        "zjistit spoj", "hledám spojení", 'spojení', 'spoj',
                                       ],
                    "find_platform": ["najít nástupiště", "zjistit nástupiště", ],
                    'weather': ['pocasi', 'jak bude', ],
                },
                "number": {
                    "1": ["jednu"]
                },
                "time": {
                    "now": ["nyní", "teď", "teďka", "hned", "nejbližší", "v tuto chvíli", "co nejdřív"],
                },
            }

        cldb.load(db_mod=db)

        preprocessing = SLUPreprocessing(cldb)
        clf = DAINNClassifier(cldb, preprocessing, features_size=4)

        # Train a simple classifier.
        das = {
            '1': DialogueAct('inform(task=weather)'),
            '2': DialogueAct('inform(time=now)'),
            '3': DialogueAct('inform(task=weather)'),
            '4': DialogueAct('inform(task=connection)'),
        }
        utterances = {
            '1': Utterance('pocasi pocasi pocasi pocasi pocasi'),
            '2': Utterance('hned ted nyni hned ted nyni'),
            '3': Utterance('jak bude jak bude jak bude jak bude'),
            '4': Utterance('kdy a odkat mi to jede'),
        }
        clf.extract_classifiers(das, utterances, verbose=False)
        clf.prune_classifiers(min_classifier_count=0)
        clf.gen_classifiers_data(min_pos_feature_count=0,
                                 min_neg_feature_count=0,
                                 verbose2=False)

        clf.train(inverse_regularisation=1e1, verbose=False)

        # Parse some sentences.
        utterance_list = UtteranceNBList()
        utterance_list.add(0.7, Utterance('pocasi'))
        utterance_list.add(0.7, Utterance('jak bude pocasi'))
        utterance_list.add(0.2, Utterance('hned'))
        utterance_list.add(0.2, Utterance('hned'))
        da_confnet = clf.parse_X(utterance_list, verbose=False)

        self.assertTrue(da_confnet.get_prob(DialogueActItem(dai='inform(task=weather)')) != 0.0)
        self.assertTrue(da_confnet.get_prob(DialogueActItem(dai='inform(time=now)')) != 0.0)
Beispiel #13
0
    def test_conversion_of_confnet_into_nblist(self):

        A1, A2, A3 = 0.90, 0.05, 0.05
        B1, B2, B3 = 0.50, 0.35, 0.15
        C1, C2, C3 = 0.60, 0.30, 0.10

        correct_nblist = UtteranceNBList()
        correct_nblist.add(A1*B1*C1, Utterance("A1 B1 C1"))
        correct_nblist.add(A1*B2*C1, Utterance("A1 B2 C1"))
        correct_nblist.add(A1*B1*C2, Utterance("A1 B1 C2"))
        correct_nblist.add(A1*B2*C2, Utterance("A1 B2 C2"))
        correct_nblist.add(A1*B3*C1, Utterance("A1 B3 C1"))
        correct_nblist.add(A1*B1*C3, Utterance("A1 B1 C3"))
        correct_nblist.add(A1*B3*C2, Utterance("A1 B3 C2"))
        correct_nblist.add(A1*B2*C3, Utterance("A1 B2 C3"))
        correct_nblist.merge()
        correct_nblist.add_other()

        confnet = UtteranceConfusionNetwork()
        confnet.add([[A1, 'A1'], [A2, 'A2'], [A3, 'A3'],])
        confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],])
        confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],])
        confnet.merge().sort()

        gen_nblist = confnet.get_utterance_nblist(10)

        s = []
        s.append("")
        s.append("Confusion network:")
        s.append(unicode(confnet))
        s.append("")
        s.append("Generated nblist:")
        s.append(unicode(gen_nblist))
        s.append("")
        s.append("Correct nblist:")
        s.append(unicode(correct_nblist))
        s.append("")
        print '\n'.join(s)

        self.assertEqual(unicode(gen_nblist), unicode(correct_nblist))
Beispiel #14
0
def process_call_log(fn):
    name = multiprocessing.current_process().name
    asr = []
    nbl = []
    sem = []
    trn = []
    trn_hdc_sem = []
    fcount = 0
    tcount = 0

    f_dir = os.path.dirname(fn)
    print "Process name:", name
    print "File #", fcount
    fcount += 1
    print "Processing:", fn
    doc = xml.dom.minidom.parse(fn)
    turns = doc.getElementsByTagName("turn")
    for i, turn in enumerate(turns):
        if turn.getAttribute('speaker') != 'user':
            continue

        recs = turn.getElementsByTagName("rec")
        trans = turn.getElementsByTagName("asr_transcription")
        asrs = turn.getElementsByTagName("asr")

        if len(recs) != 1:
            print "Skipping a turn {turn} in file: {fn} - recs: {recs}".format(
                turn=i, fn=fn, recs=len(recs))
            continue

        if len(asrs) == 0 and (i + 1) < len(turns):
            next_asrs = turns[i + 1].getElementsByTagName("asr")
            if len(next_asrs) != 2:
                print "Skipping a turn {turn} in file: {fn} - asrs: {asrs} - next_asrs: {next_asrs}".format(
                    turn=i, fn=fn, asrs=len(asrs), next_asrs=len(next_asrs))
                continue
            print "Recovered from missing ASR output by using a delayed ASR output from the following turn of turn {turn}. File: {fn} - next_asrs: {asrs}".format(
                turn=i, fn=fn, asrs=len(next_asrs))
            hyps = next_asrs[0].getElementsByTagName("hypothesis")
        elif len(asrs) == 1:
            hyps = asrs[0].getElementsByTagName("hypothesis")
        elif len(asrs) == 2:
            print "Recovered from EXTRA ASR outputs by using a the last ASR output from the turn. File: {fn} - asrs: {asrs}".format(
                fn=fn, asrs=len(asrs))
            hyps = asrs[-1].getElementsByTagName("hypothesis")
        else:
            print "Skipping a turn {turn} in file {fn} - asrs: {asrs}".format(
                turn=i, fn=fn, asrs=len(asrs))
            continue

        if len(trans) == 0:
            print "Skipping a turn in {fn} - trans: {trans}".format(
                fn=fn, trans=len(trans))
            continue

        wav_key = recs[0].getAttribute('fname')
        wav_path = os.path.join(f_dir, wav_key)

        # FIXME: Check whether the last transcription is really the best! FJ
        t = various.get_text_from_xml_node(trans[-1])
        t = normalise_text(t)

        if '--asr-log' not in sys.argv:
            asr_rec_nbl = asr_rec.rec_wav_file(wav_path)
            a = unicode(asr_rec_nbl.get_best())
        else:
            a = various.get_text_from_xml_node(hyps[0])
            a = normalise_semi_words(a)

        if exclude_slu(t) or 'DOM Element:' in a:
            print "Skipping transcription:", unicode(t)
            print "Skipping ASR output:   ", unicode(a)
            continue

        # The silence does not have a label in the language model.
        t = t.replace('_SIL_', '')

        trn.append((wav_key, t))

        print
        print "Transcritpiton #", tcount
        tcount += 1
        print "Parsing transcription:", unicode(t)
        print "                  ASR:", unicode(a)

        # HDC SLU on transcription
        s = slu.parse_1_best({'utt': Utterance(t)}).get_best_da()
        trn_hdc_sem.append((wav_key, s))

        # 1 best ASR
        asr.append((wav_key, a))

        # N best ASR
        n = UtteranceNBList()
        if '--asr-log' not in sys.argv:
            n = asr_rec_nbl

            print 'ASR RECOGNITION NBLIST\n', unicode(n)
        else:
            for h in hyps:
                txt = various.get_text_from_xml_node(h)
                txt = normalise_semi_words(txt)

                n.add(abs(float(h.getAttribute('p'))), Utterance(txt))

        n.merge()
        n.normalise()

        nbl.append((wav_key, n.serialise()))

        # there is no manual semantics in the transcriptions yet
        sem.append((wav_key, None))

    return asr, nbl, sem, trn, trn_hdc_sem, fcount, tcount
Beispiel #15
0
    def get_results(self, timeout=0.6):
        """"
        Waits for the complete recognition results from the Julius ASR server.

        Timeout specifies how long it will wait for the end of message.
        """
        msg = ""

        # Get results from the server.
        time_slept = 0.0
        while time_slept < timeout:
            msg_part = self.read_server_message(self.msg_timeout)
            if not msg_part:
                # Wait and check whether there is a message.
                time.sleep(self.cfg['Hub']['main_loop_sleep_time'])
                time_slept += self.cfg['Hub']['main_loop_sleep_time']
                if self.debug >= 2:
                    print "gr.time_slept:", time_slept
                continue

            msg += msg_part + '\n'

            if self.debug:
                print msg

            if '<CONFNET>' in msg:
                break
        else:
            raise JuliusASRTimeoutException(
                "Timeout when waiting for the Julius server results.")

        # Process the results.
        """ Typical result returned by the Julius ASR.

          <STARTPROC/>
          <INPUT STATUS="LISTEN" TIME="1343896296"/>
          <INPUT STATUS="STARTREC" TIME="1343896311"/>
          <STARTRECOG/>
          <INPUT STATUS="ENDREC" TIME="1343896312"/>
          <ENDRECOG/>
          <INPUTPARAM FRAMES="164" MSEC="1640"/>
          <RECOGOUT>
            <SHYPO RANK="1" SCORE="-7250.111328">
              <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/>
              <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/>
              <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/>
              <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/>
              <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/>
              <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/>
              <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/>
            </SHYPO>
          </RECOGOUT>
          <GRAPHOUT NODENUM="43" ARCNUM="70">
              <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/>
              <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/>
              <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/>
              <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/>
              <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/>
              <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/>
              <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/>
              <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/>

              ...

              <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/>
              <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/>
              <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/>
              <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/>
              <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/>
              <ARC FROM="0" TO="4"/>
              <ARC FROM="0" TO="3"/>
              <ARC FROM="1" TO="7"/>
              <ARC FROM="1" TO="5"/>
              <ARC FROM="1" TO="6"/>

              ...

              <ARC FROM="38" TO="41"/>
              <ARC FROM="39" TO="42"/>
              <ARC FROM="40" TO="42"/>
          </GRAPHOUT>
          <CONFNET>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.950">I</ALTERNATIVE>
              <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE>
              <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE>
              <ALTERNATIVE PROB="0.010"></ALTERNATIVE>
              <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE>
              <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">A</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE>
              <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE>
              <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE>
              <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
          </CONFNET>
          <INPUT STATUS="LISTEN" TIME="1343896312"/>

        """
        msg = "<RESULTS>" + msg + "</RESULTS>"
        msg = msg.replace("<s>", "&lt;s&gt;").replace("</s>", "&lt;/s&gt;")

        nblist = UtteranceNBList()

        doc = xml.dom.minidom.parseString(msg)
        recogout = doc.getElementsByTagName("RECOGOUT")
        for el in recogout:
            shypo = el.getElementsByTagName("SHYPO")
            for el in shypo:
                whypo = el.getElementsByTagName("WHYPO")
                utterance = ""
                cm = 1.0
                for el in whypo:
                    word = el.getAttribute("WORD")
                    utterance += " " + word
                    if word:
                        cm *= float(el.getAttribute("CM"))
                nblist.add(cm, Utterance(utterance))

        nblist.merge()
        nblist.add_other()

        cn = UtteranceConfusionNetwork()

        confnet = doc.getElementsByTagName("CONFNET")
        for el in confnet:
            word = el.getElementsByTagName("WORD")
            for el in word:
                alternative = el.getElementsByTagName("ALTERNATIVE")
                word_list = []
                for el in alternative:
                    prob = float(el.getAttribute("PROB"))
                    text = get_text_from_xml_node(el)
                    word_list.append([prob, text])

                # Filter out empty hypotheses.
                if len(word_list) == 0:
                    continue
                if len(word_list) == 1 and len(word_list[0][1]) == 0:
                    continue

                # Add the word into the confusion network.
                cn.add(word_list)

        cn.merge()
        cn.normalise()
        cn.prune()
        cn.normalise()
        cn.sort()

        return nblist, cn
Beispiel #16
0
    def get_results(self, timeout=0.6):
        """"
        Waits for the complete recognition results from the Julius ASR server.

        Timeout specifies how long it will wait for the end of message.
        """
        msg = ""

        # Get results from the server.
        time_slept = 0.0
        while time_slept < timeout:
            msg_part = self.read_server_message(self.msg_timeout)
            if not msg_part:
                # Wait and check whether there is a message.
                time.sleep(self.cfg['Hub']['main_loop_sleep_time'])
                time_slept += self.cfg['Hub']['main_loop_sleep_time']
                if self.debug >= 2:
                    print "gr.time_slept:", time_slept
                continue

            msg += msg_part + '\n'

            if self.debug:
                print msg

            if '<CONFNET>' in msg:
                break
        else:
            raise JuliusASRTimeoutException(
                "Timeout when waiting for the Julius server results.")

        # Process the results.
        """ Typical result returned by the Julius ASR.

          <STARTPROC/>
          <INPUT STATUS="LISTEN" TIME="1343896296"/>
          <INPUT STATUS="STARTREC" TIME="1343896311"/>
          <STARTRECOG/>
          <INPUT STATUS="ENDREC" TIME="1343896312"/>
          <ENDRECOG/>
          <INPUTPARAM FRAMES="164" MSEC="1640"/>
          <RECOGOUT>
            <SHYPO RANK="1" SCORE="-7250.111328">
              <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/>
              <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/>
              <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/>
              <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/>
              <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/>
              <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/>
              <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/>
            </SHYPO>
          </RECOGOUT>
          <GRAPHOUT NODENUM="43" ARCNUM="70">
              <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/>
              <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/>
              <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/>
              <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/>
              <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/>
              <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/>
              <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/>
              <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/>

              ...

              <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/>
              <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/>
              <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/>
              <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/>
              <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/>
              <ARC FROM="0" TO="4"/>
              <ARC FROM="0" TO="3"/>
              <ARC FROM="1" TO="7"/>
              <ARC FROM="1" TO="5"/>
              <ARC FROM="1" TO="6"/>

              ...

              <ARC FROM="38" TO="41"/>
              <ARC FROM="39" TO="42"/>
              <ARC FROM="40" TO="42"/>
          </GRAPHOUT>
          <CONFNET>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.950">I</ALTERNATIVE>
              <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE>
              <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE>
              <ALTERNATIVE PROB="0.010"></ALTERNATIVE>
              <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE>
              <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000">A</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE>
              <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE>
              <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE>
              <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE>
            </WORD>
            <WORD>
              <ALTERNATIVE PROB="1.000"></ALTERNATIVE>
            </WORD>
          </CONFNET>
          <INPUT STATUS="LISTEN" TIME="1343896312"/>

        """
        msg = "<RESULTS>" + msg + "</RESULTS>"
        msg = msg.replace("<s>", "&lt;s&gt;").replace("</s>", "&lt;/s&gt;")

        nblist = UtteranceNBList()

        doc = xml.dom.minidom.parseString(msg)
        recogout = doc.getElementsByTagName("RECOGOUT")
        for el in recogout:
            shypo = el.getElementsByTagName("SHYPO")
            for el in shypo:
                whypo = el.getElementsByTagName("WHYPO")
                utterance = ""
                cm = 1.0
                for el in whypo:
                    word = el.getAttribute("WORD")
                    utterance += " " + word
                    if word:
                        cm *= float(el.getAttribute("CM"))
                nblist.add(cm, Utterance(utterance))

        nblist.merge()
        nblist.add_other()

        cn = UtteranceConfusionNetwork()

        confnet = doc.getElementsByTagName("CONFNET")
        for el in confnet:
            word = el.getElementsByTagName("WORD")
            for el in word:
                alternative = el.getElementsByTagName("ALTERNATIVE")
                word_list = []
                for el in alternative:
                    prob = float(el.getAttribute("PROB"))
                    text = get_text_from_xml_node(el)
                    word_list.append([prob, text])

                # Filter out empty hypotheses.
                if len(word_list) == 0:
                    continue
                if len(word_list) == 1 and len(word_list[0][1]) == 0:
                    continue

                # Add the word into the confusion network.
                cn.add(word_list)

        cn.merge()
        cn.normalise()
        cn.prune()
        cn.normalise()
        cn.sort()

        return nblist, cn
Beispiel #17
0
def main():
    cldb = CategoryLabelDatabase('../data/database.py')
    preprocessing = PTIENSLUPreprocessing(cldb)
    slu = PTIENHDCSLU(preprocessing, cfg={'SLU': {PTIENHDCSLU: {'utt2da': as_project_path("applications/PublicTransportInfoEN/data/utt2da_dict.txt")}}})
    cfg = Config.load_configs(['../kaldi.cfg',], use_default=True)
    asr_rec = asr_factory(cfg)                    

    fn_uniq_trn = 'uniq.trn'
    fn_uniq_trn_hdc_sem = 'uniq.trn.hdc.sem'
    fn_uniq_trn_sem = 'uniq.trn.sem'

    fn_all_sem = 'all.sem'
    fn_all_trn = 'all.trn'
    fn_all_trn_hdc_sem = 'all.trn.hdc.sem'
    fn_all_asr = 'all.asr'
    fn_all_asr_hdc_sem = 'all.asr.hdc.sem'
    fn_all_nbl = 'all.nbl'
    fn_all_nbl_hdc_sem = 'all.nbl.hdc.sem'

    fn_train_sem = 'train.sem'
    fn_train_trn = 'train.trn'
    fn_train_trn_hdc_sem = 'train.trn.hdc.sem'
    fn_train_asr = 'train.asr'
    fn_train_asr_hdc_sem = 'train.asr.hdc.sem'
    fn_train_nbl = 'train.nbl'
    fn_train_nbl_hdc_sem = 'train.nbl.hdc.sem'

    fn_dev_sem = 'dev.sem'
    fn_dev_trn = 'dev.trn'
    fn_dev_trn_hdc_sem = 'dev.trn.hdc.sem'
    fn_dev_asr = 'dev.asr'
    fn_dev_asr_hdc_sem = 'dev.asr.hdc.sem'
    fn_dev_nbl = 'dev.nbl'
    fn_dev_nbl_hdc_sem = 'dev.nbl.hdc.sem'

    fn_test_sem = 'test.sem'
    fn_test_trn = 'test.trn'
    fn_test_trn_hdc_sem = 'test.trn.hdc.sem'
    fn_test_asr = 'test.asr'
    fn_test_asr_hdc_sem = 'test.asr.hdc.sem'
    fn_test_nbl = 'test.nbl'
    fn_test_nbl_hdc_sem = 'test.nbl.hdc.sem'

    indomain_data_dir = "indomain_data"

    print "Generating the SLU train and test data"
    print "-"*120
    ###############################################################################################

    files = []
    files.append(glob.glob(os.path.join(indomain_data_dir, 'asr_transcribed.xml')))
    files.append(glob.glob(os.path.join(indomain_data_dir, '*', 'asr_transcribed.xml')))
    files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', 'asr_transcribed.xml')))
    files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', '*', 'asr_transcribed.xml')))
    files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', '*', '*', 'asr_transcribed.xml')))
    files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', '*', '*', '*', 'asr_transcribed.xml')))
    files = various.flatten(files)

    sem = []
    trn = []
    trn_hdc_sem = []
    asr = []
    asr_hdc_sem = []
    nbl = []
    nbl_hdc_sem = []

    for fn in files[:100000]:
        f_dir = os.path.dirname(fn)

        print "Processing:", fn
        doc = xml.dom.minidom.parse(fn)
        turns = doc.getElementsByTagName("turn")

        for i, turn in enumerate(turns):
            if turn.getAttribute('speaker') != 'user':
                continue

            recs = turn.getElementsByTagName("rec")
            trans = turn.getElementsByTagName("asr_transcription")
            asrs = turn.getElementsByTagName("asr")

            if len(recs) != 1:
                print "Skipping a turn {turn} in file: {fn} - recs: {recs}".format(turn=i,fn=fn, recs=len(recs))
                continue

            if len(asrs) == 0 and (i + 1) < len(turns):
                next_asrs = turns[i+1].getElementsByTagName("asr")
                if len(next_asrs) != 2:
                    print "Skipping a turn {turn} in file: {fn} - asrs: {asrs} - next_asrs: {next_asrs}".format(turn=i, fn=fn, asrs=len(asrs), next_asrs=len(next_asrs))
                    continue
                print "Recovered from missing ASR output by using a delayed ASR output from the following turn of turn {turn}. File: {fn} - next_asrs: {asrs}".format(turn=i, fn=fn, asrs=len(next_asrs))
                hyps = next_asrs[0].getElementsByTagName("hypothesis")
            elif len(asrs) == 1:
                hyps = asrs[0].getElementsByTagName("hypothesis")
            elif len(asrs) == 2:
                print "Recovered from EXTRA ASR outputs by using a the last ASR output from the turn. File: {fn} - asrs: {asrs}".format(fn=fn, asrs=len(asrs))
                hyps = asrs[-1].getElementsByTagName("hypothesis")
            else:
                print "Skipping a turn {turn} in file {fn} - asrs: {asrs}".format(turn=i,fn=fn, asrs=len(asrs))
                continue

            if len(trans) == 0:
                print "Skipping a turn in {fn} - trans: {trans}".format(fn=fn, trans=len(trans))
                continue

            wav_key = recs[0].getAttribute('fname')
            wav_path = os.path.join(f_dir, wav_key)
            
            # FIXME: Check whether the last transcription is really the best! FJ
            t = various.get_text_from_xml_node(trans[-1])
            t = normalise_text(t)

            
            if '--asr-log' not in sys.argv:
                asr_rec_nbl = asr_rec.rec_wav_file(wav_path)
                a = unicode(asr_rec_nbl.get_best())
            else:  
                a = various.get_text_from_xml_node(hyps[0])
                a = normalise_semi_words(a)

            if exclude_slu(t) or 'DOM Element:' in a:
                print "Skipping transcription:", unicode(t)
                print "Skipping ASR output:   ", unicode(a)
                continue

            # The silence does not have a label in the language model.
            t = t.replace('_SIL_','')

            trn.append((wav_key, t))

            print "Parsing transcription:", unicode(t)
            print "                  ASR:", unicode(a)

            # HDC SLU on transcription
            s = slu.parse_1_best({'utt':Utterance(t)}).get_best_da()
            trn_hdc_sem.append((wav_key, s))

            if '--uniq' not in sys.argv:
                # HDC SLU on 1 best ASR
                if '--asr-log' not in sys.argv:
                    a = unicode(asr_rec_nbl.get_best())
                else:  
                    a = various.get_text_from_xml_node(hyps[0])
                    a = normalise_semi_words(a)

                asr.append((wav_key, a))

                s = slu.parse_1_best({'utt':Utterance(a)}).get_best_da()
                asr_hdc_sem.append((wav_key, s))

                # HDC SLU on N best ASR
                n = UtteranceNBList()
                if '--asr-log' not in sys.argv:
                   n = asr_rec_nbl
                   
                   print 'ASR RECOGNITION NBLIST\n',unicode(n)
                else:
                    for h in hyps:
                        txt = various.get_text_from_xml_node(h)
                        txt = normalise_semi_words(txt)

                        n.add(abs(float(h.getAttribute('p'))),Utterance(txt))

                n.merge()
                n.normalise()

                nbl.append((wav_key, n.serialise()))

                if '--fast' not in sys.argv:
                    s = slu.parse_nblist({'utt_nbl':n}).get_best_da()
                nbl_hdc_sem.append((wav_key, s))

            # there is no manual semantics in the transcriptions yet
            sem.append((wav_key, None))


    uniq_trn = {}
    uniq_trn_hdc_sem = {}
    uniq_trn_sem = {}
    trn_set = set()

    sem = dict(trn_hdc_sem)
    for k, v in trn:
        if not v in trn_set:
            trn_set.add(v)
            uniq_trn[k] = v
            uniq_trn_hdc_sem[k] = sem[k]
            uniq_trn_sem[k] = v + " <=> " + unicode(sem[k])

    save_wavaskey(fn_uniq_trn, uniq_trn)
    save_wavaskey(fn_uniq_trn_hdc_sem, uniq_trn_hdc_sem, trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
    save_wavaskey(fn_uniq_trn_sem, uniq_trn_sem)

    # all
    save_wavaskey(fn_all_trn, dict(trn))
    save_wavaskey(fn_all_trn_hdc_sem, dict(trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))

    if '--uniq' not in sys.argv:
        save_wavaskey(fn_all_asr, dict(asr))
        save_wavaskey(fn_all_asr_hdc_sem, dict(asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))

        save_wavaskey(fn_all_nbl, dict(nbl))
        save_wavaskey(fn_all_nbl_hdc_sem, dict(nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))


        seed_value = 10

        random.seed(seed_value)
        random.shuffle(trn)
        random.seed(seed_value)
        random.shuffle(trn_hdc_sem)
        random.seed(seed_value)
        random.shuffle(asr)
        random.seed(seed_value)
        random.shuffle(asr_hdc_sem)
        random.seed(seed_value)
        random.shuffle(nbl)
        random.seed(seed_value)
        random.shuffle(nbl_hdc_sem)

        # trn
        train_trn = trn[:int(0.8*len(trn))]
        dev_trn = trn[int(0.8*len(trn)):int(0.9*len(trn))]
        test_trn = trn[int(0.9*len(trn)):]

        save_wavaskey(fn_train_trn, dict(train_trn))
        save_wavaskey(fn_dev_trn, dict(dev_trn))
        save_wavaskey(fn_test_trn, dict(test_trn))

        # trn_hdc_sem
        train_trn_hdc_sem = trn_hdc_sem[:int(0.8*len(trn_hdc_sem))]
        dev_trn_hdc_sem = trn_hdc_sem[int(0.8*len(trn_hdc_sem)):int(0.9*len(trn_hdc_sem))]
        test_trn_hdc_sem = trn_hdc_sem[int(0.9*len(trn_hdc_sem)):]

        save_wavaskey(fn_train_trn_hdc_sem, dict(train_trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
        save_wavaskey(fn_dev_trn_hdc_sem, dict(dev_trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
        save_wavaskey(fn_test_trn_hdc_sem, dict(test_trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))

        # asr
        train_asr = asr[:int(0.8*len(asr))]
        dev_asr = asr[int(0.8*len(asr)):int(0.9*len(asr))]
        test_asr = asr[int(0.9*len(asr)):]

        save_wavaskey(fn_train_asr, dict(train_asr))
        save_wavaskey(fn_dev_asr, dict(dev_asr))
        save_wavaskey(fn_test_asr, dict(test_asr))

        # asr_hdc_sem
        train_asr_hdc_sem = asr_hdc_sem[:int(0.8*len(asr_hdc_sem))]
        dev_asr_hdc_sem = asr_hdc_sem[int(0.8*len(asr_hdc_sem)):int(0.9*len(asr_hdc_sem))]
        test_asr_hdc_sem = asr_hdc_sem[int(0.9*len(asr_hdc_sem)):]

        save_wavaskey(fn_train_asr_hdc_sem, dict(train_asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
        save_wavaskey(fn_dev_asr_hdc_sem, dict(dev_asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
        save_wavaskey(fn_test_asr_hdc_sem, dict(test_asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))

        # n-best lists
        train_nbl = nbl[:int(0.8*len(nbl))]
        dev_nbl = nbl[int(0.8*len(nbl)):int(0.9*len(nbl))]
        test_nbl = nbl[int(0.9*len(nbl)):]

        save_wavaskey(fn_train_nbl, dict(train_nbl))
        save_wavaskey(fn_dev_nbl, dict(dev_nbl))
        save_wavaskey(fn_test_nbl, dict(test_nbl))

        # nbl_hdc_sem
        train_nbl_hdc_sem = nbl_hdc_sem[:int(0.8*len(nbl_hdc_sem))]
        dev_nbl_hdc_sem = nbl_hdc_sem[int(0.8*len(nbl_hdc_sem)):int(0.9*len(nbl_hdc_sem))]
        test_nbl_hdc_sem = nbl_hdc_sem[int(0.9*len(nbl_hdc_sem)):]

        save_wavaskey(fn_train_nbl_hdc_sem, dict(train_nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
        save_wavaskey(fn_dev_nbl_hdc_sem, dict(dev_nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
        save_wavaskey(fn_test_nbl_hdc_sem, dict(test_nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
Beispiel #18
0
    def test_parse_meta(self):
        utterances_to_understand = [
            (
                u"ahoj",
                "hello()",
            ),
            (
                u"sbohem čau",
                "bye()",
            ),
            (
                u"jiné",
                "reqalts()",
            ),
            (
                u"začneme znovu",
                "restart()",
            ),
            (
                u"zopakuj",
                "repeat()",
            ),
            (
                u"promiň",
                "apology()",
            ),
            (
                u"co se zeptat",
                "help()",
            ),
            (
                u"haló",
                "canthearyou()",
            ),
            (
                u"nerozuměl jsem",
                "notunderstood()",
            ),
            (
                u"ano jo",
                "affirm()",
            ),
            (
                u"ne ano nechci",
                "negate()",
            ),
            (
                u"děkuji",
                "thankyou()",
            ),
            (
                u"dobře",
                "ack()",
            ),
            (
                u"chci jet",
                "inform(task=find_connection)",
            ),
            (
                u"jak bude",
                "inform(task=weather)",
            ),
            (
                u"nástupiště",
                "inform(task=find_platform)",
            ),
            (
                u"z jaké jede",
                "request(from_stop)",
            ),
            (
                u"kam to jede",
                "request(to_stop)",
            ),
            (
                u"kdy to jede",
                "request(departure_time)",
            ),
            (
                u"za jak dlouho",
                "request(departure_time_rel)",
            ),
            (
                u"kdy tam budem",
                "request(arrival_time)",
            ),
            (
                u"za jak dlouho tam přijedu",
                "request(arrival_time_rel)",
            ),
            (
                u"jak dlouho bude trvat cesta",
                "request(duration)",
            ),
            (
                u"kolik je hodin",
                "request(current_time)",
            ),
            (
                u"jak dlouho trvá přestup",
                "request(time_transfers)",
            ),
            (
                u"kolik přestupů",
                "request(num_transfers)",
            ),
            (
                u"nechci přestup bez jet přímo",
                "inform(num_transfers=0)",
            ),
            (
                u"jeden přestup",
                "inform(num_transfers=1)",
            ),
            (
                u"dva přestupy",
                "inform(num_transfers=2)",
            ),
            (
                u"tři přestupy",
                "inform(num_transfers=3)",
            ),
            (
                u"čtyři přestupy",
                "inform(num_transfers=4)",
            ),
            (
                u"libovolně přestupů",
                "inform(num_transfers=dontcare)",
            ),
            (
                u"jet přímo",
                "inform(num_transfers=0)",
            ),
            (
                u"alternativa libovolný",
                "inform(alternative=dontcare)",
            ),
            (
                u"alternativa první",
                "inform(alternative=1)",
            ),
            (
                u"alternativa druhá",
                "inform(alternative=2)",
            ),
            (
                u"alternativa třetí",
                "inform(alternative=3)",
            ),
            (
                u"alternativa čtvrtá",
                "inform(alternative=4)",
            ),
            (
                u"alternativa páté",
                "inform(alternative=5)",
            ),
            (
                u"předchozí spoj",
                "inform(alternative=prev)",
            ),
            (
                u"nechci předchozí spoj",
                "deny(alternative=prev)",
            ),
            (
                u"poslední spoj",
                "inform(alternative=last)",
            ),
            (
                u"nechci poslední spoj",
                "deny(alternative=last)",
            ),
            (
                u"další spoj",
                "inform(alternative=next)",
            ),
            (
                u"další",
                "inform(alternative=next)",
            ),
            (
                u"předchozí",
                "inform(alternative=prev)",
            ),
            (
                u"jako ve dne",
                "inform(ampm=pm)",
            ),
        ]

        for utt, res in utterances_to_understand:
            asr_hyp = UtteranceNBList()
            asr_hyp.add(0.79, Utterance(utt))

            cn = self.slu.parse(asr_hyp)

            self.assertIn(DialogueActItem(dai=res), cn)
Beispiel #19
0
def process_call_log(fn):
    name = multiprocessing.current_process().name
    asr = []
    nbl = []
    sem = []
    trn = []
    trn_hdc_sem = []
    fcount = 0
    tcount = 0

    f_dir = os.path.dirname(fn)
    print "Process name:", name
    print "File #", fcount
    fcount += 1
    print "Processing:", fn
    doc = xml.dom.minidom.parse(fn)
    turns = doc.getElementsByTagName("turn")
    for i, turn in enumerate(turns):
        if turn.getAttribute('speaker') != 'user':
            continue

        recs = turn.getElementsByTagName("rec")
        trans = turn.getElementsByTagName("asr_transcription")
        asrs = turn.getElementsByTagName("asr")

        if len(recs) != 1:
            print "Skipping a turn {turn} in file: {fn} - recs: {recs}".format(turn=i, fn=fn, recs=len(recs))
            continue

        if len(asrs) == 0 and (i + 1) < len(turns):
            next_asrs = turns[i + 1].getElementsByTagName("asr")
            if len(next_asrs) != 2:
                print "Skipping a turn {turn} in file: {fn} - asrs: {asrs} - next_asrs: {next_asrs}".format(turn=i,
                                                                                                            fn=fn,
                                                                                                            asrs=len(
                                                                                                                asrs),
                                                                                                            next_asrs=len(
                                                                                                                next_asrs))
                continue
            print "Recovered from missing ASR output by using a delayed ASR output from the following turn of turn {turn}. File: {fn} - next_asrs: {asrs}".format(
                turn=i, fn=fn, asrs=len(next_asrs))
            hyps = next_asrs[0].getElementsByTagName("hypothesis")
        elif len(asrs) == 1:
            hyps = asrs[0].getElementsByTagName("hypothesis")
        elif len(asrs) == 2:
            print "Recovered from EXTRA ASR outputs by using a the last ASR output from the turn. File: {fn} - asrs: {asrs}".format(
                fn=fn, asrs=len(asrs))
            hyps = asrs[-1].getElementsByTagName("hypothesis")
        else:
            print "Skipping a turn {turn} in file {fn} - asrs: {asrs}".format(turn=i, fn=fn, asrs=len(asrs))
            continue

        if len(trans) == 0:
            print "Skipping a turn in {fn} - trans: {trans}".format(fn=fn, trans=len(trans))
            continue

        wav_key = recs[0].getAttribute('fname')
        wav_path = os.path.join(f_dir, wav_key)

        # FIXME: Check whether the last transcription is really the best! FJ
        t = various.get_text_from_xml_node(trans[-1])
        t = normalise_text(t)

        if '--asr-log' not in sys.argv:
            asr_rec_nbl = asr_rec.rec_wav_file(wav_path)
            a = unicode(asr_rec_nbl.get_best())
        else:
            a = various.get_text_from_xml_node(hyps[0])
            a = normalise_semi_words(a)

        if exclude_slu(t) or 'DOM Element:' in a:
            print "Skipping transcription:", unicode(t)
            print "Skipping ASR output:   ", unicode(a)
            continue

        # The silence does not have a label in the language model.
        t = t.replace('_SIL_', '')

        trn.append((wav_key, t))

        print
        print "Transcritpiton #", tcount
        tcount += 1
        print "Parsing transcription:", unicode(t)
        print "                  ASR:", unicode(a)

        # HDC SLU on transcription
        s = slu.parse_1_best({'utt': Utterance(t)}).get_best_da()
        trn_hdc_sem.append((wav_key, s))


        # 1 best ASR
        asr.append((wav_key, a))

        # N best ASR
        n = UtteranceNBList()
        if '--asr-log' not in sys.argv:
            n = asr_rec_nbl

            print 'ASR RECOGNITION NBLIST\n', unicode(n)
        else:
            for h in hyps:
                txt = various.get_text_from_xml_node(h)
                txt = normalise_semi_words(txt)

                n.add(abs(float(h.getAttribute('p'))), Utterance(txt))

        n.merge()
        n.normalise()

        nbl.append((wav_key, n.serialise()))

        # there is no manual semantics in the transcriptions yet
        sem.append((wav_key, None))

    return asr, nbl, sem, trn, trn_hdc_sem, fcount, tcount