def _build_confusion_network(self, sampled_da_items): confusion_net = DialogueActConfusionNetwork() for da_items, probs in sampled_da_items: for dai, prob in zip(da_items, probs): confusion_net.add_merge(prob, dai) return confusion_net
def _build_confusion_network(self, sampled_da_items): '''Build confusion network from a list containing DialgoueActItem and their observation probability.''' confusion_net = DialogueActConfusionNetwork() for da_items, probs in sampled_da_items: for dai, prob in zip(da_items, probs): confusion_net.add_merge(prob, dai) return confusion_net
def test_add_merge(self): dai = DialogueActItem(dai='inform(food=chinese)') dacn = DialogueActConfusionNetwork() dacn.add_merge(0.5, dai, combine='add') self.assertEqual(dacn._get_prob([0]), 0.5) dacn.add_merge(0.5, dai, combine='add') self.assertEqual(dacn._get_prob([0]), 1.0)
def test_merge_slu_confnets(self): confnet1 = DialogueActConfusionNetwork() confnet1.add(0.7, DialogueActItem('hello')) confnet1.add(0.2, DialogueActItem('bye')) confnet2 = DialogueActConfusionNetwork() confnet2.add(0.6, DialogueActItem('hello')) confnet2.add(0.3, DialogueActItem('restart')) confnets = [[0.7, confnet1], [0.3, confnet2]] merged_confnets = merge_slu_confnets(confnets) correct_merged_confnet = DialogueActConfusionNetwork() correct_merged_confnet.add_merge(0.7 * 0.7, DialogueActItem('hello'), combine='add') correct_merged_confnet.add_merge(0.7 * 0.2, DialogueActItem('bye'), combine='add') correct_merged_confnet.add_merge(0.3 * 0.6, DialogueActItem('hello'), combine='add') correct_merged_confnet.add_merge(0.3 * 0.3, DialogueActItem('restart'), combine='add') s = [] s.append("") s.append("Merged confnets:") s.append(unicode(merged_confnets)) s.append("") s.append("Correct merged results:") s.append(unicode(correct_merged_confnet)) s.append("") self.assertEqual(unicode(merged_confnets), unicode(correct_merged_confnet))
def test_merge_slu_confnets(self): confnet1 = DialogueActConfusionNetwork() confnet1.add(0.7, DialogueActItem('hello')) confnet1.add(0.2, DialogueActItem('bye')) confnet2 = DialogueActConfusionNetwork() confnet2.add(0.6, DialogueActItem('hello')) confnet2.add(0.3, DialogueActItem('restart')) confnets = [[0.7, confnet1], [0.3, confnet2]] merged_confnets = merge_slu_confnets(confnets) correct_merged_confnet = DialogueActConfusionNetwork() correct_merged_confnet.add_merge(0.7 * 0.7, DialogueActItem('hello'), combine='add') correct_merged_confnet.add_merge(0.7 * 0.2, DialogueActItem('bye'), combine='add') correct_merged_confnet.add_merge(0.3 * 0.6, DialogueActItem('hello'), combine='add') correct_merged_confnet.add_merge(0.3 * 0.3, DialogueActItem('restart'), combine='add') s = [] s.append("") s.append("Merged confnets:") s.append(unicode(merged_confnets)) s.append("") s.append("Correct merged results:") s.append(unicode(correct_merged_confnet)) s.append("") print '\n'.join(s) self.assertEqual(unicode(merged_confnets), unicode(correct_merged_confnet))
def parse_X(self, utterance, verbose=False): if verbose: print '='*120 print 'Parsing X' print '-'*120 print unicode(utterance) if self.preprocessing: utterance = self.preprocessing.normalise(utterance) utterance_fvcs = self.get_fvc(utterance) if verbose: print unicode(utterance) print unicode(utterance_fvcs) da_confnet = DialogueActConfusionNetwork() for clser in self.trained_classifiers: if verbose: print "Using classifier: ", unicode(clser) if self.parsed_classifiers[clser].value and self.parsed_classifiers[clser].value.startswith('CL_'): # process abstracted classifiers for f, v, c in utterance_fvcs: cc = "CL_" + c.upper() if self.parsed_classifiers[clser].value == cc: #print clser, f, v, c classifiers_features = self.get_features(utterance, (f, v, cc), utterance_fvcs) classifiers_inputs = np.zeros((1, len(self.classifiers_features_mapping[clser]))) classifiers_inputs[0] = classifiers_features.get_feature_vector(self.classifiers_features_mapping[clser]) #if verbose: # print classifiers_features # print self.classifiers_features_mapping[clser] p = self.trained_classifiers[clser].predict_proba(classifiers_inputs) if verbose: print ' Probability:', p dai = DialogueActItem(self.parsed_classifiers[clser].dat, self.parsed_classifiers[clser].name, v) da_confnet.add_merge(p[0][1], dai, combine='max') else: # process concrete classifiers classifiers_features = self.get_features(utterance, (None, None, None), utterance_fvcs) classifiers_inputs = np.zeros((1, len(self.classifiers_features_mapping[clser]))) classifiers_inputs[0] = classifiers_features.get_feature_vector(self.classifiers_features_mapping[clser]) #if verbose: # print classifiers_features # print self.classifiers_features_mapping[clser] p = self.trained_classifiers[clser].predict_proba(classifiers_inputs) if verbose: print ' Probability:', p dai = self.parsed_classifiers[clser] da_confnet.add_merge(p[0][1], dai, combine='max') da_confnet.sort().prune() return da_confnet
def parse_X(self, utterance, verbose=False): if verbose: print '=' * 120 print 'Parsing X' print '-' * 120 print unicode(utterance) if self.preprocessing: utterance = self.preprocessing.normalise(utterance) utterance_fvcs = self.get_fvc(utterance) if verbose: print unicode(utterance) print unicode(utterance_fvcs) da_confnet = DialogueActConfusionNetwork() for clser in self.trained_classifiers: if verbose: print "Using classifier: ", unicode(clser) if self.parsed_classifiers[clser].value and self.parsed_classifiers[ clser].value.startswith('CL_'): # process abstracted classifiers for f, v, c in utterance_fvcs: cc = "CL_" + c.upper() if self.parsed_classifiers[clser].value == cc: #print clser, f, v, c classifiers_features = self.get_features( utterance, (f, v, cc), utterance_fvcs) classifiers_inputs = np.zeros( (1, len(self.classifiers_features_mapping[clser]))) classifiers_inputs[ 0] = classifiers_features.get_feature_vector( self.classifiers_features_mapping[clser]) #if verbose: # print classifiers_features # print self.classifiers_features_mapping[clser] p = self.trained_classifiers[clser].predict_proba( classifiers_inputs) if verbose: print ' Probability:', p dai = DialogueActItem( self.parsed_classifiers[clser].dat, self.parsed_classifiers[clser].name, v) da_confnet.add_merge(p[0][1], dai, combine='max') else: # process concrete classifiers classifiers_features = self.get_features( utterance, (None, None, None), utterance_fvcs) classifiers_inputs = np.zeros( (1, len(self.classifiers_features_mapping[clser]))) classifiers_inputs[ 0] = classifiers_features.get_feature_vector( self.classifiers_features_mapping[clser]) #if verbose: # print classifiers_features # print self.classifiers_features_mapping[clser] p = self.trained_classifiers[clser].predict_proba( classifiers_inputs) if verbose: print ' Probability:', p dai = self.parsed_classifiers[clser] da_confnet.add_merge(p[0][1], dai, combine='max') da_confnet.sort().prune() return da_confnet
def parse_1_best(self, obs, verbose=False, *args, **kwargs): """Parse an utterance into a dialogue act. :rtype DialogueActConfusionNetwork """ utterance = obs['utt'] if isinstance(utterance, UtteranceHyp): # Parse just the utterance and ignore the confidence score. utterance = utterance.utterance if verbose: print 'Parsing utterance "{utt}".'.format(utt=utterance) res_cn = DialogueActConfusionNetwork() dict_da = self.utt2da.get(unicode(utterance), None) if dict_da: for dai in DialogueAct(dict_da): res_cn.add_merge(1.0, dai) return res_cn utterance = self.preprocessing.normalise_utterance(utterance) abutterance, category_labels, abutterance_lenghts = self.abstract_utterance(utterance) if verbose: print 'After preprocessing: "{utt}".'.format(utt=abutterance) print category_labels self.parse_non_speech_events(utterance, res_cn) utterance = utterance.replace_all(['_noise_'], '').replace_all(['_laugh_'], '').replace_all(['_ehm_hmm_'], '').replace_all(['_inhale_'], '') abutterance = abutterance.replace_all(['_noise_'], '').replace_all(['_laugh_'], '').replace_all(['_ehm_hmm_'], '').replace_all(['_inhale_'], '') abutterance = self.handle_false_abstractions(abutterance) category_labels.add('CITY') category_labels.add('VEHICLE') category_labels.add('NUMBER') if len(res_cn) == 0: if 'STOP' in category_labels: self.parse_stop(abutterance, res_cn) if 'CITY' in category_labels: self.parse_city(abutterance, res_cn) if 'NUMBER' in category_labels: self.parse_number(abutterance) if any([word.startswith("TIME") for word in abutterance]): category_labels.add('TIME') if 'TIME' in category_labels: self.parse_time(abutterance, res_cn) if 'DATE_REL' in category_labels: self.parse_date_rel(abutterance, res_cn) if 'AMPM' in category_labels: self.parse_ampm(abutterance, res_cn) if 'VEHICLE' in category_labels: self.parse_vehicle(abutterance, res_cn) if 'TASK' in category_labels: self.parse_task(abutterance, res_cn) if 'TRAIN_NAME' in category_labels: self.parse_train_name(abutterance, res_cn) self.parse_meta(utterance, abutterance_lenghts, res_cn) return res_cn