def _build_confusion_network(self, sampled_da_items):
        confusion_net = DialogueActConfusionNetwork()
        for da_items, probs in sampled_da_items:
            for dai, prob in zip(da_items, probs):
                confusion_net.add_merge(prob, dai)

        return confusion_net
    def _build_confusion_network(self, sampled_da_items):
        '''Build confusion network from a list containing DialgoueActItem and their observation probability.'''
        confusion_net = DialogueActConfusionNetwork()
        for da_items, probs in sampled_da_items:
            for dai, prob in zip(da_items, probs):
                confusion_net.add_merge(prob, dai)

        return confusion_net
Exemple #3
0
    def test_add_merge(self):
        dai = DialogueActItem(dai='inform(food=chinese)')
        dacn = DialogueActConfusionNetwork()
        dacn.add_merge(0.5, dai, combine='add')
        self.assertEqual(dacn._get_prob([0]), 0.5)

        dacn.add_merge(0.5, dai, combine='add')
        self.assertEqual(dacn._get_prob([0]), 1.0)
Exemple #4
0
    def test_add_merge(self):
        dai = DialogueActItem(dai='inform(food=chinese)')
        dacn = DialogueActConfusionNetwork()
        dacn.add_merge(0.5, dai, combine='add')
        self.assertEqual(dacn._get_prob([0]), 0.5)

        dacn.add_merge(0.5, dai, combine='add')
        self.assertEqual(dacn._get_prob([0]), 1.0)
Exemple #5
0
    def test_merge_slu_confnets(self):
        confnet1 = DialogueActConfusionNetwork()
        confnet1.add(0.7, DialogueActItem('hello'))
        confnet1.add(0.2, DialogueActItem('bye'))

        confnet2 = DialogueActConfusionNetwork()
        confnet2.add(0.6, DialogueActItem('hello'))
        confnet2.add(0.3, DialogueActItem('restart'))

        confnets = [[0.7, confnet1], [0.3, confnet2]]

        merged_confnets = merge_slu_confnets(confnets)

        correct_merged_confnet = DialogueActConfusionNetwork()
        correct_merged_confnet.add_merge(0.7 * 0.7, DialogueActItem('hello'),
                                         combine='add')
        correct_merged_confnet.add_merge(0.7 * 0.2, DialogueActItem('bye'),
                                         combine='add')
        correct_merged_confnet.add_merge(0.3 * 0.6, DialogueActItem('hello'),
                                         combine='add')
        correct_merged_confnet.add_merge(0.3 * 0.3, DialogueActItem('restart'),
                                         combine='add')

        s = []
        s.append("")
        s.append("Merged confnets:")
        s.append(unicode(merged_confnets))
        s.append("")
        s.append("Correct merged results:")
        s.append(unicode(correct_merged_confnet))
        s.append("")

        self.assertEqual(unicode(merged_confnets), unicode(correct_merged_confnet))
Exemple #6
0
    def test_merge_slu_confnets(self):
        confnet1 = DialogueActConfusionNetwork()
        confnet1.add(0.7, DialogueActItem('hello'))
        confnet1.add(0.2, DialogueActItem('bye'))

        confnet2 = DialogueActConfusionNetwork()
        confnet2.add(0.6, DialogueActItem('hello'))
        confnet2.add(0.3, DialogueActItem('restart'))

        confnets = [[0.7, confnet1], [0.3, confnet2]]

        merged_confnets = merge_slu_confnets(confnets)

        correct_merged_confnet = DialogueActConfusionNetwork()
        correct_merged_confnet.add_merge(0.7 * 0.7,
                                         DialogueActItem('hello'),
                                         combine='add')
        correct_merged_confnet.add_merge(0.7 * 0.2,
                                         DialogueActItem('bye'),
                                         combine='add')
        correct_merged_confnet.add_merge(0.3 * 0.6,
                                         DialogueActItem('hello'),
                                         combine='add')
        correct_merged_confnet.add_merge(0.3 * 0.3,
                                         DialogueActItem('restart'),
                                         combine='add')

        s = []
        s.append("")
        s.append("Merged confnets:")
        s.append(unicode(merged_confnets))
        s.append("")
        s.append("Correct merged results:")
        s.append(unicode(correct_merged_confnet))
        s.append("")
        print '\n'.join(s)

        self.assertEqual(unicode(merged_confnets),
                         unicode(correct_merged_confnet))
Exemple #7
0
    def parse_X(self, utterance, verbose=False):
        if verbose:
            print '='*120
            print 'Parsing X'
            print '-'*120
            print unicode(utterance)

        if self.preprocessing:
            utterance = self.preprocessing.normalise(utterance)
            utterance_fvcs = self.get_fvc(utterance)

        if verbose:
            print unicode(utterance)
            print unicode(utterance_fvcs)


        da_confnet = DialogueActConfusionNetwork()
        for clser in self.trained_classifiers:
            if verbose:
                print "Using classifier: ", unicode(clser)

            if self.parsed_classifiers[clser].value and self.parsed_classifiers[clser].value.startswith('CL_'):
                # process abstracted classifiers

                for f, v, c in utterance_fvcs:
                    cc = "CL_" + c.upper()

                    if self.parsed_classifiers[clser].value == cc:
                        #print clser, f, v, c

                        classifiers_features = self.get_features(utterance, (f, v, cc), utterance_fvcs)
                        classifiers_inputs = np.zeros((1, len(self.classifiers_features_mapping[clser])))
                        classifiers_inputs[0] = classifiers_features.get_feature_vector(self.classifiers_features_mapping[clser])

                        #if verbose:
                        #    print classifiers_features
                        #    print self.classifiers_features_mapping[clser]

                        p = self.trained_classifiers[clser].predict_proba(classifiers_inputs)

                        if verbose:
                            print '  Probability:', p

                        dai = DialogueActItem(self.parsed_classifiers[clser].dat, self.parsed_classifiers[clser].name, v)
                        da_confnet.add_merge(p[0][1], dai, combine='max')
            else:
                # process concrete classifiers
                classifiers_features = self.get_features(utterance, (None, None, None), utterance_fvcs)
                classifiers_inputs = np.zeros((1, len(self.classifiers_features_mapping[clser])))
                classifiers_inputs[0] = classifiers_features.get_feature_vector(self.classifiers_features_mapping[clser])

                #if verbose:
                #    print classifiers_features
                #    print self.classifiers_features_mapping[clser]

                p = self.trained_classifiers[clser].predict_proba(classifiers_inputs)

                if verbose:
                    print '  Probability:', p

                dai = self.parsed_classifiers[clser]
                da_confnet.add_merge(p[0][1], dai, combine='max')

        da_confnet.sort().prune()

        return da_confnet
Exemple #8
0
    def parse_X(self, utterance, verbose=False):
        if verbose:
            print '=' * 120
            print 'Parsing X'
            print '-' * 120
            print unicode(utterance)

        if self.preprocessing:
            utterance = self.preprocessing.normalise(utterance)
            utterance_fvcs = self.get_fvc(utterance)

        if verbose:
            print unicode(utterance)
            print unicode(utterance_fvcs)

        da_confnet = DialogueActConfusionNetwork()
        for clser in self.trained_classifiers:
            if verbose:
                print "Using classifier: ", unicode(clser)

            if self.parsed_classifiers[clser].value and self.parsed_classifiers[
                    clser].value.startswith('CL_'):
                # process abstracted classifiers

                for f, v, c in utterance_fvcs:
                    cc = "CL_" + c.upper()

                    if self.parsed_classifiers[clser].value == cc:
                        #print clser, f, v, c

                        classifiers_features = self.get_features(
                            utterance, (f, v, cc), utterance_fvcs)
                        classifiers_inputs = np.zeros(
                            (1, len(self.classifiers_features_mapping[clser])))
                        classifiers_inputs[
                            0] = classifiers_features.get_feature_vector(
                                self.classifiers_features_mapping[clser])

                        #if verbose:
                        #    print classifiers_features
                        #    print self.classifiers_features_mapping[clser]

                        p = self.trained_classifiers[clser].predict_proba(
                            classifiers_inputs)

                        if verbose:
                            print '  Probability:', p

                        dai = DialogueActItem(
                            self.parsed_classifiers[clser].dat,
                            self.parsed_classifiers[clser].name, v)
                        da_confnet.add_merge(p[0][1], dai, combine='max')
            else:
                # process concrete classifiers
                classifiers_features = self.get_features(
                    utterance, (None, None, None), utterance_fvcs)
                classifiers_inputs = np.zeros(
                    (1, len(self.classifiers_features_mapping[clser])))
                classifiers_inputs[
                    0] = classifiers_features.get_feature_vector(
                        self.classifiers_features_mapping[clser])

                #if verbose:
                #    print classifiers_features
                #    print self.classifiers_features_mapping[clser]

                p = self.trained_classifiers[clser].predict_proba(
                    classifiers_inputs)

                if verbose:
                    print '  Probability:', p

                dai = self.parsed_classifiers[clser]
                da_confnet.add_merge(p[0][1], dai, combine='max')

        da_confnet.sort().prune()

        return da_confnet
Exemple #9
0
    def parse_1_best(self, obs, verbose=False, *args, **kwargs):
        """Parse an utterance into a dialogue act.

        :rtype DialogueActConfusionNetwork
        """

        utterance = obs['utt']

        if isinstance(utterance, UtteranceHyp):
            # Parse just the utterance and ignore the confidence score.
            utterance = utterance.utterance

        if verbose:
            print 'Parsing utterance "{utt}".'.format(utt=utterance)

        res_cn = DialogueActConfusionNetwork()

        dict_da = self.utt2da.get(unicode(utterance), None)
        if dict_da:
            for dai in DialogueAct(dict_da):
                res_cn.add_merge(1.0, dai)
            return res_cn

        utterance = self.preprocessing.normalise_utterance(utterance)
        abutterance, category_labels, abutterance_lenghts = self.abstract_utterance(utterance)

        if verbose:
            print 'After preprocessing: "{utt}".'.format(utt=abutterance)
            print category_labels

        self.parse_non_speech_events(utterance, res_cn)

        utterance = utterance.replace_all(['_noise_'], '').replace_all(['_laugh_'], '').replace_all(['_ehm_hmm_'], '').replace_all(['_inhale_'], '')
        abutterance = abutterance.replace_all(['_noise_'], '').replace_all(['_laugh_'], '').replace_all(['_ehm_hmm_'], '').replace_all(['_inhale_'], '')

        abutterance = self.handle_false_abstractions(abutterance)
        category_labels.add('CITY')
        category_labels.add('VEHICLE')
        category_labels.add('NUMBER')


        if len(res_cn) == 0:
            if 'STOP' in category_labels:
                self.parse_stop(abutterance, res_cn)
            if 'CITY' in category_labels:
                self.parse_city(abutterance, res_cn)
            if 'NUMBER' in category_labels:
                self.parse_number(abutterance)
                if any([word.startswith("TIME") for word in abutterance]):
                    category_labels.add('TIME')
            if 'TIME' in category_labels:
                self.parse_time(abutterance, res_cn)
            if 'DATE_REL' in category_labels:
                self.parse_date_rel(abutterance, res_cn)
            if 'AMPM' in category_labels:
                self.parse_ampm(abutterance, res_cn)
            if 'VEHICLE' in category_labels:
                self.parse_vehicle(abutterance, res_cn)
            if 'TASK' in category_labels:
                self.parse_task(abutterance, res_cn)
            if 'TRAIN_NAME' in category_labels:
                self.parse_train_name(abutterance, res_cn)

            self.parse_meta(utterance, abutterance_lenghts, res_cn)

        return res_cn