Beispiel #1
0
    def test_conversion_of_confnet_into_nblist(self):

        A1, A2, A3 = 0.90, 0.05, 0.05
        B1, B2, B3 = 0.50, 0.35, 0.15
        C1, C2, C3 = 0.60, 0.30, 0.10

        correct_nblist = UtteranceNBList()
        correct_nblist.add(A1*B1*C1, Utterance("A1 B1 C1"))
        correct_nblist.add(A1*B2*C1, Utterance("A1 B2 C1"))
        correct_nblist.add(A1*B1*C2, Utterance("A1 B1 C2"))
        correct_nblist.add(A1*B2*C2, Utterance("A1 B2 C2"))
        correct_nblist.add(A1*B3*C1, Utterance("A1 B3 C1"))
        correct_nblist.add(A1*B1*C3, Utterance("A1 B1 C3"))
        correct_nblist.add(A1*B3*C2, Utterance("A1 B3 C2"))
        correct_nblist.add(A1*B2*C3, Utterance("A1 B2 C3"))
        correct_nblist.merge()
        correct_nblist.add_other()

        confnet = UtteranceConfusionNetwork()
        confnet.add([[A1, 'A1'], [A2, 'A2'], [A3, 'A3'],])
        confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],])
        confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],])
        confnet.merge().sort()

        gen_nblist = confnet.get_utterance_nblist(10)

        s = []
        s.append("")
        s.append("Confusion network:")
        s.append(unicode(confnet))
        s.append("")
        s.append("Generated nblist:")
        s.append(unicode(gen_nblist))
        s.append("")
        s.append("Correct nblist:")
        s.append(unicode(correct_nblist))
        s.append("")
        print '\n'.join(s)

        self.assertEqual(unicode(gen_nblist), unicode(correct_nblist))
Beispiel #2
0
    def read_audio_write_asr_hypotheses(self):
        # Read input audio.
        if self.local_audio_in:
            if len(self.local_audio_in) > 40:
                print "ASR unprocessed frames:", len(self.local_audio_in)

            if len(self.local_audio_in) > 200:
                print "ASR too many unprocessed frames:", len(
                    self.local_audio_in)
                print "    skipping everything until the end of the segment:", len(
                    self.local_audio_in)
                while len(self.local_audio_in) > 2 and isinstance(
                        self.local_audio_in[0], Frame):
                    skip = self.local_audio_in.popleft()

            # read recorded audio
            data_rec = self.local_audio_in.popleft()

            if isinstance(data_rec, Frame):
                if self.recognition_on:
                    self.asr.rec_in(data_rec)
            elif isinstance(data_rec, Command):
                dr_speech_start = False
                fname = None

                if data_rec.parsed['__name__'] == "speech_start":
                    # check whether there are more then one speech segments
                    segments = [
                        cmd for cmd in self.local_audio_in
                        if isinstance(cmd, Command)
                        and cmd.parsed['__name__'] == "speech_start"
                    ]
                    if len(segments):
                        # there are multiple unprocessed segments in the queue
                        # remove all unprocessed segments except the last
                        print "ASR too many unprocessed speech segments:", len(
                            segments)
                        print "    removed all segments but the last"
                        removed_segments = 0
                        while removed_segments < len(segments):
                            data_rec = self.local_audio_in.popleft()
                            if isinstance(data_rec,
                                          Command) and data_rec.parsed[
                                              '__name__'] == "speech_start":
                                removed_segments += 1

                    dr_speech_start = "speech_start"
                    fname = data_rec.parsed['fname']
                elif data_rec.parsed['__name__'] == "speech_end":
                    dr_speech_start = "speech_end"
                    fname = data_rec.parsed['fname']

                # Check consistency of the input command.
                if dr_speech_start:
                    if ((not self.recognition_on
                         and dr_speech_start != "speech_start")
                            or (self.recognition_on
                                and dr_speech_start != "speech_end")):
                        msg = ('Commands received by the ASR component are '
                               'inconsistent (recognition_on: {rec}; the new '
                               'command: {cmd}').format(
                                   rec=self.recognition_on,
                                   cmd=dr_speech_start)
                        self.system_logger.exception(msg)

                if dr_speech_start == "speech_start":
                    self.commands.send(
                        Command('asr_start(fname="%s")' % fname, 'ASR', 'HUB'))
                    self.recognition_on = True

                    if self.cfg['ASR']['debug']:
                        self.system_logger.debug(
                            'ASR: speech_start(fname="%s")' % fname)

                elif dr_speech_start == "speech_end":
                    self.recognition_on = False

                    if self.cfg['ASR']['debug']:
                        self.system_logger.debug(
                            'ASR: speech_end(fname="%s")' % fname)

                    try:
                        asr_hyp = self.asr.hyp_out()

                        if self.cfg['ASR']['debug']:
                            msg = list()
                            msg.append("ASR Hypothesis")
                            msg.append("-" * 60)
                            msg.append(unicode(asr_hyp))
                            msg.append(u"")
                            msg = u'\n'.join(msg)
                            self.system_logger.debug(msg)

                    except (ASRException, JuliusASRTimeoutException):
                        self.system_logger.debug("Julius ASR Result Timeout.")
                        if self.cfg['ASR']['debug']:
                            msg = list()
                            msg.append("ASR Alternative hypothesis")
                            msg.append("-" * 60)
                            msg.append("sil")
                            msg.append("")
                            msg = u'\n'.join(msg)
                            self.system_logger.debug(msg)

                        asr_hyp = UtteranceConfusionNetwork()
                        asr_hyp.add([
                            [1.0, "_other_"],
                        ])

                    # The ASR component can return either NBList or a confusion
                    # network.
                    if isinstance(asr_hyp, UtteranceNBList):
                        self.session_logger.asr("user", fname, asr_hyp, None)
                    elif isinstance(asr_hyp, UtteranceConfusionNetwork):
                        self.session_logger.asr("user", fname,
                                                asr_hyp.get_utterance_nblist(),
                                                asr_hyp)
                    else:
                        self.session_logger.asr("user", fname, [(-1, asr_hyp)],
                                                None)

                    self.commands.send(
                        Command('asr_end(fname="%s")' % fname, 'ASR', 'HUB'))
                    self.commands.send(ASRHyp(asr_hyp, fname=fname))
                    self.asr_hypotheses_out.send(ASRHyp(asr_hyp, fname=fname))
            else:
                raise ASRException('Unsupported input.')
Beispiel #3
0
Datei: asr.py Projekt: AoJ/alex
    def read_audio_write_asr_hypotheses(self):
        # Read input audio.
        if self.local_audio_in:
            if len(self.local_audio_in) > 40:
                print "ASR unprocessed frames:", len(self.local_audio_in)

            if len(self.local_audio_in) > 200:
                print "ASR too many unprocessed frames:", len(self.local_audio_in)
                print "    skipping everything until the end of the segment:", len(self.local_audio_in)
                while len(self.local_audio_in) > 2 and isinstance(self.local_audio_in[0], Frame):
                    skip = self.local_audio_in.popleft()

            # read recorded audio
            data_rec = self.local_audio_in.popleft()

            if isinstance(data_rec, Frame):
                if self.recognition_on:
                    self.asr.rec_in(data_rec)
            elif isinstance(data_rec, Command):
                dr_speech_start = False
                fname = None

                if data_rec.parsed['__name__'] == "speech_start":
                    # check whether there are more then one speech segments
                    segments = [ cmd for cmd in self.local_audio_in
                                 if isinstance(cmd, Command) and cmd.parsed['__name__'] == "speech_start"]
                    if len(segments):
                        # there are multiple unprocessed segments in the queue
                        # remove all unprocessed segments except the last
                        print "ASR too many unprocessed speech segments:", len(segments)
                        print "    removed all segments but the last"
                        removed_segments = 0
                        while removed_segments < len(segments):
                            data_rec = self.local_audio_in.popleft()
                            if isinstance(data_rec, Command) and data_rec.parsed['__name__'] == "speech_start":
                                removed_segments += 1

                    dr_speech_start = "speech_start"
                    fname = data_rec.parsed['fname']
                elif data_rec.parsed['__name__'] == "speech_end":
                    dr_speech_start = "speech_end"
                    fname = data_rec.parsed['fname']

                # Check consistency of the input command.
                if dr_speech_start:
                    if ((not self.recognition_on and dr_speech_start != "speech_start")
                        or
                        (self.recognition_on and dr_speech_start != "speech_end")):
                        msg = ('Commands received by the ASR component are '
                               'inconsistent (recognition_on: {rec}; the new '
                               'command: {cmd}').format(
                                   rec=self.recognition_on,
                                   cmd=dr_speech_start)
                        self.system_logger.exception(msg)

                if dr_speech_start == "speech_start":
                    self.commands.send(Command('asr_start(fname="%s")' % fname, 'ASR', 'HUB'))
                    self.recognition_on = True

                    if self.cfg['ASR']['debug']:
                        self.system_logger.debug('ASR: speech_start(fname="%s")' % fname)

                elif dr_speech_start == "speech_end":
                    self.recognition_on = False

                    if self.cfg['ASR']['debug']:
                        self.system_logger.debug('ASR: speech_end(fname="%s")' % fname)

                    try:
                        asr_hyp = self.asr.hyp_out()

                        if self.cfg['ASR']['debug']:
                            msg = list()
                            msg.append("ASR Hypothesis")
                            msg.append("-" * 60)
                            msg.append(unicode(asr_hyp))
                            msg.append(u"")
                            msg = u'\n'.join(msg)
                            self.system_logger.debug(msg)

                    except (ASRException, JuliusASRTimeoutException):
                        self.system_logger.debug("Julius ASR Result Timeout.")
                        if self.cfg['ASR']['debug']:
                            msg = list()
                            msg.append("ASR Alternative hypothesis")
                            msg.append("-" * 60)
                            msg.append("sil")
                            msg.append("")
                            msg = u'\n'.join(msg)
                            self.system_logger.debug(msg)

                        asr_hyp = UtteranceConfusionNetwork()
                        asr_hyp.add([[1.0, "_other_"], ])

                    # The ASR component can return either NBList or a confusion
                    # network.
                    if isinstance(asr_hyp, UtteranceNBList):
                        self.session_logger.asr("user", fname, asr_hyp, None)
                    elif isinstance(asr_hyp, UtteranceConfusionNetwork):
                        self.session_logger.asr("user", fname, asr_hyp.get_utterance_nblist(), asr_hyp)
                    else:
                        self.session_logger.asr("user", fname, [(-1, asr_hyp)], None)

                    self.commands.send(Command('asr_end(fname="%s")' % fname, 'ASR', 'HUB'))
                    self.asr_hypotheses_out.send(ASRHyp(asr_hyp, fname=fname))
            else:
                raise ASRException('Unsupported input.')
Beispiel #4
0
    def test_session_logger(self):
        cfg = Config.load_configs(config=CONFIG_DICT, use_default=False)

        sl = SessionLogger()

        # test 3 calls at once
        for i in range(3):
            sess_dir = "./%d" % i
            if not os.path.isdir(sess_dir):
                os.mkdir(sess_dir)
            sl.session_start(sess_dir)
            sl.config('config = ' + unicode(cfg))
            sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"])
            sl.input_source("voip")

            sl.dialogue_rec_start(None, "both_complete_dialogue.wav")
            sl.dialogue_rec_start("system", "system_complete_dialogue.wav")
            sl.dialogue_rec_start("user", "user_complete_dialogue.wav")
            sl.dialogue_rec_end("both_complete_dialogue.wav")
            sl.dialogue_rec_end("system_complete_dialogue.wav")
            sl.dialogue_rec_end("user_complete_dialogue.wav")

            sl.turn("system")
            sl.dialogue_act("system", "hello()")
            sl.text("system", "Hello.")
            sl.rec_start("system", "system1.wav")
            sl.rec_end("system1.wav")

            sl.turn("user")
            sl.rec_start("user", "user1.wav")
            sl.rec_end("user1.wav")

            A1, A2, A3 = 0.90, 0.05, 0.05
            B1, B2, B3 = 0.70, 0.20, 0.10
            C1, C2, C3 = 0.80, 0.10, 0.10

            asr_confnet = UtteranceConfusionNetwork()
            asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']])
            asr_confnet.add([[B1, "Chinese"],  [B2, "English"], [B3, 'cheap']])
            asr_confnet.add([[C1, "restaurant"],  [C2, "pub"],   [C3, 'hotel']])
            asr_confnet.merge()
            asr_confnet.normalise()
            asr_confnet.sort()

            asr_nblist = asr_confnet.get_utterance_nblist()

            sl.asr("user", "user1.wav", asr_nblist, asr_confnet)

            slu_confnet = DialogueActConfusionNetwork()
            slu_confnet.add(0.7, DialogueActItem('hello'))
            slu_confnet.add(0.6, DialogueActItem('thankyou'))
            slu_confnet.add(0.4, DialogueActItem('restart'))
            slu_confnet.add(0.1, DialogueActItem('bye'))
            slu_confnet.merge()
            slu_confnet.normalise()
            slu_confnet.sort()

            slu_nblist = slu_confnet.get_da_nblist()

            sl.slu("user", "user1.wav", slu_nblist, slu_confnet)

            sl.turn("system")
            sl.dialogue_act("system", "thankyou()")
            sl.text("system", "Thank you.", cost = 1.0)
            sl.rec_start("system", "system2.wav")
            sl.rec_end("system2.wav")
            sl.barge_in("system", tts_time = True)

            sl.turn("user")
            sl.rec_start("user", "user2.wav")
            sl.rec_end("user2.wav")
            sl.hangup("user")
Beispiel #5
0
    def test_session_logger(self):
        cfg = Config.load_configs(config=CONFIG_DICT, use_default=False)

        sl = SessionLogger()

        # test 3 calls at once
        for i in range(3):
            sess_dir = "./%d" % i
            if not os.path.isdir(sess_dir):
                os.mkdir(sess_dir)
            sl.session_start(sess_dir)
            sl.config('config = ' + unicode(cfg))
            sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"])
            sl.input_source("voip")

            sl.dialogue_rec_start(None, "both_complete_dialogue.wav")
            sl.dialogue_rec_start("system", "system_complete_dialogue.wav")
            sl.dialogue_rec_start("user", "user_complete_dialogue.wav")
            sl.dialogue_rec_end("both_complete_dialogue.wav")
            sl.dialogue_rec_end("system_complete_dialogue.wav")
            sl.dialogue_rec_end("user_complete_dialogue.wav")

            sl.turn("system")
            sl.dialogue_act("system", "hello()")
            sl.text("system", "Hello.")
            sl.rec_start("system", "system1.wav")
            sl.rec_end("system1.wav")

            sl.turn("user")
            sl.rec_start("user", "user1.wav")
            sl.rec_end("user1.wav")

            A1, A2, A3 = 0.90, 0.05, 0.05
            B1, B2, B3 = 0.70, 0.20, 0.10
            C1, C2, C3 = 0.80, 0.10, 0.10

            asr_confnet = UtteranceConfusionNetwork()
            asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']])
            asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']])
            asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']])
            asr_confnet.merge()
            asr_confnet.normalise()
            asr_confnet.sort()

            asr_nblist = asr_confnet.get_utterance_nblist()

            sl.asr("user", "user1.wav", asr_nblist, asr_confnet)

            slu_confnet = DialogueActConfusionNetwork()
            slu_confnet.add(0.7, DialogueActItem('hello'))
            slu_confnet.add(0.6, DialogueActItem('thankyou'))
            slu_confnet.add(0.4, DialogueActItem('restart'))
            slu_confnet.add(0.1, DialogueActItem('bye'))
            slu_confnet.merge()
            slu_confnet.normalise()
            slu_confnet.sort()

            slu_nblist = slu_confnet.get_da_nblist()

            sl.slu("user", "user1.wav", slu_nblist, slu_confnet)

            sl.turn("system")
            sl.dialogue_act("system", "thankyou()")
            sl.text("system", "Thank you.", cost=1.0)
            sl.rec_start("system", "system2.wav")
            sl.rec_end("system2.wav")
            sl.barge_in("system", tts_time=True)

            sl.turn("user")
            sl.rec_start("user", "user2.wav")
            sl.rec_end("user2.wav")
            sl.hangup("user")