def _get_slot_value(self, slot): '''Get value describing user intention for the given slot. Raises: RuntimeError: Cant find value for the given slot. ''' item = DialogueActItem() if slot in self.goal.keys(): return self.goal[slot] if slot not in self.goal.keys():#required slot not in goal eq_slots = self._get_equivalent_slots(slot) for s in eq_slots:#gen value from a equivalent slot if s in self.goal.keys(): slot = s break if slot not in self.goal.keys():#dont have compatible slots, get from default values value = self._get_default_slot_value(slot) if value is not None: item.value = value else: for s in eq_slots:#get default of equivalent slots value = self._get_default_slot_value(s) if value is not None: item.value = value item.name = s if item.value is None: #raise RuntimeError('Cant find value for slot %s and its equivalents slot from goal and default slots'%slot) print '!!!!!!???set to None, Cant find value for slot [%s] and its equivalents slot from goal and default slots'%slot item.value = None else: item.value=self.goal[slot] item.name = slot return item.value
def parse_task(self, abutterance, cn): """Detects the task in the input abstract utterance. :param abutterance: :param cn: """ u = abutterance deny = phrase_in(u, ['nechci', 'nehledám']) for i, w in enumerate(u): if w.startswith("TASK="): value = w[5:] if deny: cn.add(1.0, DialogueActItem("deny", 'task', value)) else: cn.add(1.0, DialogueActItem("inform", 'task', value))
def parse_nblist(self, obs, *args, **kwargs): """ Parses an observation featuring an utterance n-best list using the parse_1_best method. Arguments: obs -- a dictionary of observations :: observation type -> observed value where observation type is one of values for `obs_type' used in `ft_props', and observed value is the corresponding observed value for the input args -- further positional arguments that should be passed to the `parse_1_best' method call kwargs -- further keyword arguments that should be passed to the `parse_1_best' method call """ nblist = obs['utt_nbl'] if len(nblist) == 0: return DialogueActConfusionNetwork() obs_wo_nblist = copy.deepcopy(obs) del obs_wo_nblist['utt_nbl'] dacn_list = [] for prob, utt in nblist: if "_other_" == utt: dacn = DialogueActConfusionNetwork() dacn.add(1.0, DialogueActItem("other")) elif "_silence_" == utt: dacn = DialogueActConfusionNetwork() dacn.add(1.0, DialogueActItem("silence")) else: obs_wo_nblist['utt'] = utt dacn = self.parse_1_best(obs_wo_nblist, *args, **kwargs) dacn_list.append((prob, dacn)) dacn = merge_slu_confnets(dacn_list) dacn.prune() dacn.sort() return dacn
def parse_non_speech_events(self, utterance, cn): """ Processes non-speech events in the input utterance. :param utterance: the input utterance :param cn: The output dialogue act item confusion network. :return: None """ u = utterance if len( u.utterance ) == 0 or "_silence_" == u or "__silence__" == u or "_sil_" == u: cn.add(1.0, DialogueActItem("silence")) if "_noise_" == u or "_laugh_" == u or "_ehm_hmm_" == u or "_inhale_" == u: cn.add(1.0, DialogueActItem("null")) if "_other_" == u or "__other__" == u: cn.add(1.0, DialogueActItem("other"))
def generate_task(): task = [] da = DialogueAct() # indicate that we're looking for connection da.append(DialogueActItem('inform', 'task', 'find_connection')) # get two distinct stops from_stop = random.choice(STOPS) to_stop = from_stop while to_stop == from_stop: to_stop = random.choice(STOPS) da.append(DialogueActItem('inform', 'from_stop', from_stop)) da.append(DialogueActItem('inform', 'to_stop', to_stop)) task.append(da) # generate random subsequent questions questions = random.sample(range(6), random.randint(5, 6) - len(task)) query_change = False da = DialogueAct() for question in sorted(questions): dais = QUESTIONS[question] if dais[0].name in ['alternative', 'vehicle', 'time', 'to_stop' ] and not query_change: query_change = True task.append(da) da = DialogueAct() if dais[0].name == 'to_stop': new_to_stop = random.choice(STOPS) while new_to_stop == from_stop or new_to_stop == to_stop: new_to_stop = random.choice(STOPS) dais[0].value = new_to_stop da.extend(dais) task.append(da) return task
def parse_date_rel(self, abutterance, cn): """Detects the relative date in the input abstract utterance. :param abutterance: the input abstract utterance. :param cn: The output dialogue act item confusion network. """ u = abutterance confirm = phrase_in(u, ['jede', 'to']) deny = phrase_in(u, ['nechci', 'ne']) for i, w in enumerate(u): if w.startswith("DATE_REL="): value = w[9:] if confirm: cn.add(1.0, DialogueActItem("confirm", 'date_rel', value)) elif deny: cn.add(1.0, DialogueActItem("deny", 'date_rel', value)) else: cn.add(1.0, DialogueActItem("inform", 'date_rel', value))
def parse_vehicle(self, abutterance, cn): """Detects the vehicle (transport type) in the input abstract utterance. :param abutterance: the input abstract utterance. :param cn: The output dialogue act item confusion network. """ u = abutterance confirm = phrase_in(u, 'jede to') deny = any_phrase_in(u, ['nechci jet', 'bez použití']) for i, w in enumerate(u): if w.startswith("VEHICLE="): value = w[8:] if confirm: cn.add(1.0, DialogueActItem("confirm", 'vehicle', value)) elif deny: cn.add(1.0, DialogueActItem("deny", 'vehicle', value)) else: cn.add(1.0, DialogueActItem("inform", 'vehicle', value))
def main(): # initialize tracker and state slots = ["food", "location"] tr = DSTCTracker(slots) state = DSTCState(slots) state.pprint() # try to update state with some information print '---' cn = DialogueActConfusionNetwork() cn.add(0.3, DialogueActItem("inform", "food", "chinese")) cn.add(0.1, DialogueActItem("inform", "food", "indian")) tr.update_state(state, cn) state.pprint() # try to deny some information print '---' cn.add(0.9, DialogueActItem("deny", "food", "chinese")) cn.add(0.1, DialogueActItem("deny", "food", "indian")) tr.update_state(state, cn) state.pprint()
def parse_vehicle(self, abutterance, cn): """Detects the vehicle (transport type) in the input abstract utterance. :param abutterance: :param cn: """ u = abutterance confirm = phrase_in(u, ['jede', 'to']) deny = phrase_in(u, ['nechci', 'jet']) for i, w in enumerate(u): if w.startswith("VEHICLE="): value = w[8:] if confirm: cn.add(1.0, DialogueActItem("confirm", 'vehicle', value)) elif deny: cn.add(1.0, DialogueActItem("deny", 'vehicle', value)) else: cn.add(1.0, DialogueActItem("inform", 'vehicle', value))
def test_get_best_da(self): dacn = DialogueActConfusionNetwork() dacn.add(0.2, DialogueActItem(dai='inform(food=chinese)')) dacn.add(0.7, DialogueActItem(dai='inform(food=czech)')) dacn.add(0.1, DialogueActItem(dai='inform(food=russian)')) da = dacn.get_best_da() self.assertEqual(len(da), 1) self.assertEqual(da.dais[0], DialogueActItem(dai='inform(food=czech)')) dacn = DialogueActConfusionNetwork() dacn.add(0.2, DialogueActItem(dai='inform(food=chinese)')) dacn.add(0.3, DialogueActItem(dai='inform(food=czech)')) dacn.add(0.7, DialogueActItem(dai='inform(food=null)')) dacn.add(0.1, DialogueActItem(dai='inform(food=russian)')) da = dacn.get_best_nonnull_da() self.assertEqual(len(da), 1) self.assertEqual(da.dais[0], DialogueActItem(dai='inform(food=null)'))
def parse_ampm(self, abutterance, cn): """Detects the ampm in the input abstract utterance. :param abutterance: the input abstract utterance. :param cn: The output dialogue act item confusion network. """ u = abutterance confirm = phrase_in(u, ['jede', 'to']) deny = phrase_in(u, ['nechci', 'ne']) for i, w in enumerate(u): if w.startswith("AMPM="): value = w[5:] if not (phrase_in(u, 'dobrou')): if confirm: cn.add(1.0, DialogueActItem("confirm", 'ampm', value)) elif deny: cn.add(1.0, DialogueActItem("deny", 'ampm', value)) else: cn.add(1.0, DialogueActItem("inform", 'ampm', value))
def deabstract(utt, dais): """De-abstract an utterance and a list of corresponding DAIs, so that a specific answer is provided. """ # prepare some data to be used from_stop, to_stop = random.sample(STOPS, 2) time = random.choice(range(7, 23)) ampm = 'am' if time < 12 else 'pm' time %= 12 vehicle = random.choice(['subway', 'bus']) dais_out = [ ] # create a completely new structure, so that we keep the abstract original # process DAIs and deabstract them, along with the utterance for dai in dais: dai_out = DialogueActItem(dai.dat, dai.name, dai.value) dais_out.append(dai_out) if dai.name == 'from_stop': dai_out.value = from_stop utt_r = re.sub(r'(from |^)\*STOP', r'\1%s' % from_stop, utt) elif dai.name == 'to_stop': dai_out.value = to_stop utt_r = re.sub( r'(destination is|arrive (in|at)|to|towards?|for|into) \*STOP', r'\1 %s' % to_stop, utt) elif dai.name == 'vehicle': dai_out.value = vehicle utt_r = re.sub(r'\*VEHICLE', vehicle, utt) elif 'time' in dai.name and dai.dat == 'inform': dai_out.value = unicode(time) + ':00' utt_r = re.sub(r'\*NUMBER', WORD_FOR_NUMBER[time], utt) elif 'ampm' in dai.name: dai_out.value = ampm utt_r = re.sub(r'(in the) \*AMPM', r'\1 %s' % word_for_ampm(time, ampm), utt) utt_r = re.sub(r'\*AMPM', ampm, utt_r) elif dai.value is None or '*' not in dai.value or dai.dat != 'inform': continue # some things do not need deabstracting else: raise NotImplementedError('Cannot deabstract slot: ' + dai.name + " -- " + utt) if utt_r == utt: raise NotImplementedError('Cannot replace slot: ' + dai.name + " -- " + utt + " / " + unicode(dais)) utt = utt_r return utt, dais_out
def parse_train_name(self, abutterance, cn): """Detects the train name in the input abstract utterance. :param abutterance: :param cn: """ category_label = "TRAIN_NAME=" u = abutterance for i, w in enumerate(u): if w.startswith(category_label): value = w[len(category_label):] cn.add_merge(1.0, DialogueActItem("inform", 'train_name', value))
def _infer_last_talked_about_slots(self, user_da, system_da): """This adds dialogue act items to support inference of the last slots the user talked about.""" old_user_da = deepcopy(user_da) new_user_da = DialogueActConfusionNetwork() colliding_slots = {} done_slots = set() for prob, user_dai in user_da: new_user_dais = [] lta_tsvs = self.ontology.last_talked_about(user_dai.dat, user_dai.name, user_dai.value) for name, value in lta_tsvs: new_user_dais.append(DialogueActItem("inform", name, value)) if name in done_slots: if not name in colliding_slots: colliding_slots[name] = set() colliding_slots[name].add(value) else: done_slots.add(name) if new_user_dais: for nudai in new_user_dais: if not nudai in new_user_da: new_user_da.add(prob, nudai) # In case of collisions, prefer the current last talked about values if it is one of the colliding values. # If there is a collision and the current last talked about value is not among the colliding values, do not # consider the colliding DA's at all. invalid_das = set() for prob, da in set(new_user_da): if da.name in colliding_slots and self[ da.name].mpv() in colliding_slots[da.name]: if not da.value == self[da.name].mpv(): invalid_das.add(da) elif da.name in colliding_slots: invalid_das.add(da) for invalid_da in invalid_das: new_user_da.remove(invalid_da) old_user_da.merge(new_user_da, combine='max') return old_user_da
def test_sort(self): dacn = DialogueActConfusionNetwork() dacn.add(0.05, DialogueActItem(dai='inform(food=chinese)')) dacn.add(1.0, DialogueActItem(dai='inform(food=czech)')) dacn.add(0.00005, DialogueActItem(dai='inform(food=russian)')) dacn.sort() cn = list(dacn) self.assertEqual(cn[0][1], DialogueActItem(dai='inform(food=czech)')) self.assertEqual(cn[1][1], DialogueActItem(dai='inform(food=chinese)')) self.assertEqual(cn[2][1], DialogueActItem(dai='inform(food=russian)'))
def context_resolution(self, user_da, system_da): """Resolves and converts meaning of some user dialogue acts given the context.""" old_user_da = deepcopy(user_da) new_user_da = DialogueActConfusionNetwork() if isinstance(system_da, DialogueAct): for system_dai in system_da: for prob, user_dai in user_da: new_user_dai = None if system_dai.dat == "confirm" and user_dai.dat == "affirm": new_user_dai = DialogueActItem("inform", system_dai.name, system_dai.value) elif system_dai.dat == "confirm" and user_dai.dat == "negate": new_user_dai = DialogueActItem("deny", system_dai.name, system_dai.value) elif system_dai.dat == "request" and user_dai.dat == "inform" and \ user_dai.name in self.ontology['context_resolution'] and \ system_dai.name in self.ontology['context_resolution'][user_dai.name] and \ user_dai.value == "dontcare": new_user_dai = DialogueActItem("inform", system_dai.name, system_dai.value) elif system_dai.dat == "request" and user_dai.dat == "inform" and \ user_dai.name in self.ontology['context_resolution'] and \ system_dai.name in self.ontology['context_resolution'][user_dai.name] and \ self.ontology.slot_has_value(system_dai.name, user_dai.value): new_user_dai = DialogueActItem("inform", system_dai.name, user_dai.value) elif system_dai.dat == "request" and system_dai.name != "" and \ user_dai.dat == "affirm" and self.ontology.slot_is_binary(system_dai.name): new_user_dai = DialogueActItem("inform", system_dai.name, "true") elif system_dai.dat == "request" and system_dai.name != "" and \ user_dai.dat == "negate" and self.ontology.slot_is_binary(system_dai.name): new_user_dai = DialogueActItem("inform", system_dai.name, "false") if new_user_dai: new_user_da.add(prob, new_user_dai) old_user_da.extend(new_user_da) return old_user_da
def test_merge_slu_confnets(self): confnet1 = DialogueActConfusionNetwork() confnet1.add(0.7, DialogueActItem('hello')) confnet1.add(0.2, DialogueActItem('bye')) confnet2 = DialogueActConfusionNetwork() confnet2.add(0.6, DialogueActItem('hello')) confnet2.add(0.3, DialogueActItem('restart')) confnets = [[0.7, confnet1], [0.3, confnet2]] merged_confnets = merge_slu_confnets(confnets) correct_merged_confnet = DialogueActConfusionNetwork() correct_merged_confnet.add_merge(0.7 * 0.7, DialogueActItem('hello'), combine='add') correct_merged_confnet.add_merge(0.7 * 0.2, DialogueActItem('bye'), combine='add') correct_merged_confnet.add_merge(0.3 * 0.6, DialogueActItem('hello'), combine='add') correct_merged_confnet.add_merge(0.3 * 0.3, DialogueActItem('restart'), combine='add') s = [] s.append("") s.append("Merged confnets:") s.append(unicode(merged_confnets)) s.append("") s.append("Correct merged results:") s.append(unicode(correct_merged_confnet)) s.append("") print '\n'.join(s) self.assertEqual(unicode(merged_confnets), unicode(correct_merged_confnet))
def last_talked_about(self, user_da, system_da): """This adds dialogue act items to support inference of the last slots the user talked about.""" old_user_da = deepcopy(user_da) new_user_da = DialogueActConfusionNetwork() for prob, user_dai in user_da: new_user_dais = [] lta_tsvs = self.ontology.last_talked_about(user_dai.dat, user_dai.name, user_dai.value) for name, value in lta_tsvs: new_user_dais.append(DialogueActItem("inform", name, value)) if new_user_dais: for nudai in new_user_dais: new_user_da.add(prob, nudai) old_user_da.extend(new_user_da) return old_user_da
def deabstract(utt, dais): """De-abstract an utterance and a list of corresponding DAIs, so that a specific answer is provided. """ # prepare some data to be used from_stop, to_stop = random.sample(STOPS, 2) time = random.choice(range(7, 23)) ampm = 'am' if time < 12 else 'pm' time %= 12 vehicle = random.choice(['subway', 'bus']) dais_out = [] # create a completely new structure, so that we keep the abstract original # process DAIs and deabstract them, along with the utterance for dai in dais: dai_out = DialogueActItem(dai.dat, dai.name, dai.value) dais_out.append(dai_out) if dai.name == 'from_stop': dai_out.value = from_stop utt_r = re.sub(r'(from |^)\*STOP', r'\1%s' % from_stop, utt) elif dai.name == 'to_stop': dai_out.value = to_stop utt_r = re.sub(r'(destination is|arrive (in|at)|to|towards?|for|into) \*STOP', r'\1 %s' % to_stop, utt) elif dai.name == 'vehicle': dai_out.value = vehicle utt_r = re.sub(r'\*VEHICLE', vehicle, utt) elif 'time' in dai.name and dai.dat == 'inform': dai_out.value = unicode(time) + ':00' utt_r = re.sub(r'\*NUMBER', WORD_FOR_NUMBER[time], utt) elif 'ampm' in dai.name: dai_out.value = ampm utt_r = re.sub(r'(in the) \*AMPM', r'\1 %s' % word_for_ampm(time, ampm), utt) utt_r = re.sub(r'\*AMPM', ampm, utt_r) elif dai.value is None or '*' not in dai.value or dai.dat != 'inform': continue # some things do not need deabstracting else: raise NotImplementedError('Cannot deabstract slot: ' + dai.name + " -- " + utt) if utt_r == utt: raise NotImplementedError('Cannot replace slot: ' + dai.name + " -- " + utt + " / " + unicode(dais)) utt = utt_r return utt, dais_out
def test_switching_tasks(self): hdc_policy = self._build_policy() self.mox.StubOutWithMock(hdc_policy.weather, 'get_weather') self.mox.StubOutWithMock(hdc_policy, 'get_directions') hdc_policy.weather.get_weather(city=u'Praha', daily=False, lat=u'50.0755381', lon=u'14.4378005', time=None).AndReturn(None) hdc_policy.get_directions(mox.IgnoreArg(), check_conflict=True).AndReturn([DialogueActItem(dai="inform(time=10:00)")]) self.mox.ReplayAll() state = DeterministicDiscriminativeDialogueState(self.cfg, self.ontology) system_input = DialogueActConfusionNetwork() res = hdc_policy.get_da(state) # User says she wants weather so the task should be weather. user_input = self._build_user_input("inform(task=weather)") state.update(user_input, system_input) res = hdc_policy.get_da(state) self.assertEqual(state['lta_task'].mpv(), 'weather') # User wants to find a connection so the task should be find_connection. user_input = self._build_user_input(u"inform(task=find_connection)", u"inform(to_stop=Malostranská)", u"inform(from_stop=Anděl)") state.update(user_input, system_input) res = hdc_policy.get_da(state) self.assertEqual(state['lta_task'].mpv(), 'find_connection') self.mox.VerifyAll()
def test_get_da_nblist(self): # Simple case with one good hypothesis. dacn = DialogueActConfusionNetwork() dacn.add(0.05, DialogueActItem(dai='inform(food=chinese)')) dacn.add(0.9, DialogueActItem(dai='inform(food=czech)')) dacn.add(0.05, DialogueActItem(dai='inform(food=russian)')) nblist = dacn.get_da_nblist() best_da = nblist.get_best_da() expected_da = DialogueAct(da_str='inform(food=czech)') self.assertEqual(best_da, expected_da) # More good hypotheses dacn = DialogueActConfusionNetwork() dacn.add(0.05, DialogueActItem(dai='inform(food=chinese)')) dacn.add(0.9, DialogueActItem(dai='inform(food=czech)')) dacn.add(0.9, DialogueActItem(dai='inform(food=russian)')) nblist = dacn.get_da_nblist() best_da = nblist.get_best_da() expected_da = DialogueAct( da_str='inform(food=czech)&inform(food=russian)') self.assertEqual(best_da, expected_da)
def gen_classifiers_data(self, verbose=False): # generate training data self.classifiers_outputs = defaultdict(list) self.classifiers_cls = defaultdict(list) self.classifiers_features = defaultdict(list) self.parsed_classifiers = {} for clser in self.classifiers: self.parsed_classifiers[clser] = DialogueActItem() self.parsed_classifiers[clser].parse(clser) for utt_idx in self.utterances_list: if verbose: print "-" * 120 print unicode(self.utterances[utt_idx]) print unicode(self.das[utt_idx]) for clser in self.classifiers: if self.parsed_classifiers[ clser].value and self.parsed_classifiers[ clser].value.startswith('CL_'): # process abstracted classifiers for i, (dai, (f, v, c)) in enumerate( zip(self.das_abstracted[utt_idx], self.das_category_labels[utt_idx])): if clser == dai and self.parsed_classifiers[ clser].value and self.parsed_classifiers[ clser].value == c: if verbose: print "+ Matching a classifier in the abstracted dai, and matching category label" self.classifiers_outputs[clser].append(1.0) self.classifiers_cls[clser].append( self.das_category_labels[utt_idx][i]) elif clser != dai and self.parsed_classifiers[ clser].value and self.parsed_classifiers[ clser].value == c: if verbose: print "- NON-Matching a classifier in the abstracted dai, and matching category label" self.classifiers_outputs[clser].append(0.0) self.classifiers_cls[clser].append( self.das_category_labels[utt_idx][i]) else: if verbose: print "- NON-Matching a classifier in the abstracted dai, and NON-matching category label" self.classifiers_outputs[clser].append(0.0) self.classifiers_cls[clser].append( (None, None, None)) self.classifiers_features[clser].append( self.get_features( self.utterances[utt_idx], self.das_category_labels[utt_idx][i], self.das_category_labels[utt_idx])) if verbose: print " @", clser, i, dai, f, v, c else: # process concrete classifiers if clser in self.das_abstracted[utt_idx]: if verbose: print "+ Matching a classifier " self.classifiers_outputs[clser].append(1.0) self.classifiers_cls[clser].append((None, None, None)) else: if verbose: print "- NON-Matching a classifier" self.classifiers_outputs[clser].append(0.0) self.classifiers_cls[clser].append((None, None, None)) self.classifiers_features[clser].append( self.get_features(self.utterances[utt_idx], (None, None, None), self.das_category_labels[utt_idx])) if verbose: print " @", clser for clser in self.classifiers: self.classifiers_outputs[clser] = np.array( self.classifiers_outputs[clser]) if verbose: print clser print zip(self.classifiers_outputs[clser], self.classifiers_cls[clser])
def _build_dialogue_act_items(self, act_in, act_out, answer_type, overridden_properties): '''Build return acts for a type of answer act. Args: act_in: The metadata presenting the system act such as slots-values act_out: A string figure out the type of answer act such as inform or affirm answer_type: A string describe answer type which can be whether direct answer, over answer or complete answer. overridden_properties: A dict of properties which will used to override the default setting of return act. Returns: A list of DialogueActItem object. Raises: RuntiemError: Cant find value for a slot which requires, in setting, a value must be filled. NotImplementedError: The source providing value for a slot was not implemented. ''' #print act_in #print '---building', act_out #print answer_type if act_out not in self.act_used_slots.keys():#saving this action used this slot self.act_used_slots[act_out] = set() act_out_des = self.metadata['dialogue_act_definitions'][act_out] act_out_des = self._override_act_descriptions(overridden_properties, act_out_des) #print 'act_out_des_override' #iprint(act_out_des) da_items = [] combined_slots = self._get_combined_slots(act_in, act_out_des, answer_type, self.act_used_slots[act_out]) for slot in combined_slots: item = DialogueActItem() item.dat = act_out if act_out_des['slot_included']: item.name = slot if act_out_des['value_included']: if act_out_des['value_from']=='goal': if slot not in self.goal.keys():#required slot not in goal eq_slots = self._get_equivalent_slots(slot) for s in eq_slots:#gen value from a equivalent slot if s in self.goal.keys(): slot = s break if slot not in self.goal.keys():#dont have compatible slots, get from default values value = self._get_default_slot_value(slot) if value is not None: item.value = value else: for s in eq_slots:#get default of equivalent slots value = self._get_default_slot_value(s) if value is not None: item.value = value item.name = s break if item.value is None: raise RuntimeError('Cant find value for slot %s and its equivalents slot from goal and default slots'%slot) else: item.value=self.goal[slot] item.name = slot elif act_out_des['value_from']=='sys_da': item.value = act_in['slot_value'][slot] elif act_out_des['value_from']=='function': item.value = act_out_des['value_fun']() else: raise NotImplementedError('value_from=%s unhandled yet'%act_out_des['value_from']) self.act_used_slots[act_out].add(slot)#save to the list of used slot for this act_out if item not in da_items: da_items.append(item) act_without_slot = False if 'act_without_slot' in act_out_des.keys() and act_out_des['act_without_slot']: act_without_slot = True da_items.append(DialogueActItem(act_out)) if len(combined_slots)==0 and len(da_items)==0 and not act_without_slot: #pass print 'Not building act=%s since it requires slots and values but we cant find any slot, value for it'%act_out #raise RuntimeError('Cant find any slot, value for the given dialogue act, %s'%act_out) return da_items
def parse_time(self, abutterance, cn): """Detects the time in the input abstract utterance. :param abutterance: the input abstract utterance. :param cn: The output dialogue act item confusion network. """ u = abutterance # preps_abs = set(["v", "ve", "čas", "o", "po", "před", "kolem"]) preps_rel = set([ "za", ]) test_context = [ ('confirm', 'departure', [ 'jede to', 'odjíždí to', 'je výchozí', 'má to odjezd', 'je odjezd' ], []), ('confirm', 'arrival', ['přijede to', 'přijíždí to', 'má to příjezd', 'je příjezd'], []), ('confirm', '', ['je to', 'myslíte', 'myslíš'], []), ('deny', 'departure', [ 'nechci jet', 'nejedu', 'nechci odjíždět', 'nechci odjezd', 'nechci vyjet', 'nechci vyjíždět', 'nechci vyrážet', 'nechci vyrazit' ], []), ('deny', 'arrival', [ 'nechci přijet', 'nechci přijíždět', 'nechci příjezd', 'nechci dorazit' ], []), ('deny', '', ['ne', 'nechci'], []), ('inform', 'departure', [ 'TASK=find_connection', 'odjezd', 'odjíždet', 'odjíždět', 'odjíždět v', 'odjíždí', 'odjet', 'jedu', 'jede', 'vyrážím', 'vyrážet', 'vyrazit', 'bych jel', 'bych jela', 'bych jet', 'bych tam jel', 'bych tam jela', 'bych tam jet', 'abych jel', 'abych jela', 'jak se dostanu', 'kdy jede', 'jede nějaká', 'jede nějaký', 'VEHICLE=tram', 'chci jet', 'chtěl jet', 'chtěla jet' ], [ 'příjezd', 'přijet', 'dorazit', 'abych přijel', 'abych přijela', 'chci být', 'chtěl bych být' ]), ('inform', 'arrival', [ 'příjezd', 'přijet', 'dorazit', 'abych přijel', 'abych přijela', 'chci být', 'chtěl bych být' ], []), ('inform', '', [], []), ] count_times = 0 for i, w in enumerate(u): if w.startswith("TIME="): count_times += 1 last_time_type = '' last_time = 0 for i, w in enumerate(u): if w.startswith("TIME="): value = w[5:] time_rel = False if i >= 1: if u[i - 1] in preps_rel: time_rel = True if count_times > 1: j, k = last_time, i else: j, k = 0, len(u) if value == "now": if any_phrase_in( u[j:k], ['no a', 'kolik je', 'neslyším', 'už mi neříká']): continue else: time_rel = True for act_type, time_type, phrases_pos, phrases_neg in test_context: if any_phrase_in( u[j:k], phrases_pos) and not any_phrase_in(u, phrases_neg): break if count_times > 1 and not time_type: # use the previous type if there was time before this one time_type = last_time_type last_time_type = time_type slot = (time_type + ('_time_rel' if time_rel else '_time')).lstrip('_') cn.add(1.0, DialogueActItem(act_type, slot, value)) last_time = i + 1
def parse_waypoint(self, abutterance, cn, wp_id, wp_slot_suffix, phr_wp_types, phr_in=None): """Detects stops or cities in the input abstract utterance (called through parse_city or parse_stop). :param abutterance: the input abstract utterance. :param cn: The output dialogue act item confusion network. :param wp_id: waypoint slot category label (e.g. "STOP=", "CITY=") :param wp_slot_suffix: waypoint slot suffix (e.g. "stop", "city") :param phr_wp_types: set of phrases for each waypoint type :param phr_in: phrases for 'in' waypoint type """ u = abutterance N = len(u) # simple "ne" cannot be included as it collides with negation. "ne [,] chci jet z Motola" phr_dai_types = [ ('confirm', set(['jede to', 'odjíždí to', 'je výchozí']), set()), ( 'deny', # positive matches set([ 'nechci', 'nejedu', 'ne z', 'ne od', 'ne na', 'ne do', 'né do', 'ne k', 'nikoliv', 'nechci na', 'nechtěl' ]), # negative matches set([ 'nechci ukončit hovor', 'nechci to tak', 'né to nechci', 'ne to nechci', 'nechci nápovědu', 'nechci chci', 'ne to ne', 'ne ne z' ])) ] last_wp_pos = 0 for i, w in enumerate(u): if w.startswith(wp_id): wp_name = w[len(wp_id):] wp_types = set() dai_type = 'inform' # test short preceding context to find the stop type (from, to, via) wp_precontext = {} for cur_wp_type, phrases in phr_wp_types: wp_precontext[cur_wp_type] = first_phrase_span( u[max(last_wp_pos, i - 5):i], phrases) wp_types |= self._get_closest_wp_type(wp_precontext) # test short following context (0 = from, 1 = to, 2 = via) if not wp_types: if any_phrase_in(u[i:i + 3], phr_wp_types[0][1] | phr_wp_types[2][1]): wp_types.add('to') elif any_phrase_in(u[i:i + 3], phr_wp_types[1][1]): wp_types.add('from') # resolve context according to further preceding/following waypoint name (assuming from-to) if not wp_types: if i >= 1 and u[i - 1].startswith(wp_id): wp_types.add('to') elif i <= N - 2 and u[i + 1].startswith(wp_id): wp_types.add('from') # using 'in' slot if the previous checks did not work and we have phrases for 'in' if not wp_types and phr_in is not None and any_phrase_in( u[max(last_wp_pos, i - 5):i], phr_in): wp_types.add('in') # test utterance type for cur_dai_type, phrases_pos, phrases_neg in phr_dai_types: if any_phrase_in(u[last_wp_pos:i], phrases_pos) and not any_phrase_in( u[last_wp_pos:i], phrases_neg): dai_type = cur_dai_type break # add waypoint to confusion network (standard case: just single type is decided) if len(wp_types) == 1: cn.add( 1.0, DialogueActItem(dai_type, wp_types.pop() + '_' + wp_slot_suffix, wp_name)) # backoff 1: add both 'from' and 'to' waypoint slots elif 'from' in wp_types and 'to' in wp_types: cn.add( 0.501, DialogueActItem(dai_type, 'from_' + wp_slot_suffix, wp_name)) cn.add( 0.499, DialogueActItem(dai_type, 'to_' + wp_slot_suffix, wp_name)) # backoff 2: let the DM decide in context resolution else: cn.add(1.0, DialogueActItem(dai_type, wp_slot_suffix, wp_name)) last_wp_pos = i + 1
def parse_X(self, utterance, verbose=False): if verbose: print '=' * 120 print 'Parsing X' print '-' * 120 print unicode(utterance) if self.preprocessing: utterance = self.preprocessing.normalise(utterance) utterance_fvcs = self.get_fvc(utterance) if verbose: print unicode(utterance) print unicode(utterance_fvcs) da_confnet = DialogueActConfusionNetwork() for clser in self.trained_classifiers: if verbose: print "Using classifier: ", unicode(clser) if self.parsed_classifiers[clser].value and self.parsed_classifiers[ clser].value.startswith('CL_'): # process abstracted classifiers for f, v, c in utterance_fvcs: cc = "CL_" + c.upper() if self.parsed_classifiers[clser].value == cc: #print clser, f, v, c classifiers_features = self.get_features( utterance, (f, v, cc), utterance_fvcs) classifiers_inputs = np.zeros( (1, len(self.classifiers_features_mapping[clser]))) classifiers_inputs[ 0] = classifiers_features.get_feature_vector( self.classifiers_features_mapping[clser]) #if verbose: # print classifiers_features # print self.classifiers_features_mapping[clser] p = self.trained_classifiers[clser].predict_proba( classifiers_inputs) if verbose: print ' Probability:', p dai = DialogueActItem( self.parsed_classifiers[clser].dat, self.parsed_classifiers[clser].name, v) da_confnet.add_merge(p[0][1], dai, combine='max') else: # process concrete classifiers classifiers_features = self.get_features( utterance, (None, None, None), utterance_fvcs) classifiers_inputs = np.zeros( (1, len(self.classifiers_features_mapping[clser]))) classifiers_inputs[ 0] = classifiers_features.get_feature_vector( self.classifiers_features_mapping[clser]) #if verbose: # print classifiers_features # print self.classifiers_features_mapping[clser] p = self.trained_classifiers[clser].predict_proba( classifiers_inputs) if verbose: print ' Probability:', p dai = self.parsed_classifiers[clser] da_confnet.add_merge(p[0][1], dai, combine='max') da_confnet.sort().prune() return da_confnet
def gen_classifiers_data(self, min_pos_feature_count=5, min_neg_feature_count=5, verbose=False, verbose2=False): # generate training data self.classifiers_outputs = defaultdict(list) self.classifiers_cls = defaultdict(list) self.classifiers_features = defaultdict(list) self.classifiers_features_list = {} self.classifiers_features_mapping = {} self.parsed_classifiers = {} for clser in self.classifiers: self.parsed_classifiers[clser] = DialogueActItem() self.parsed_classifiers[clser].parse(clser) for n, clser in enumerate(sorted(self.classifiers)): if verbose or verbose2: print '=' * 120 print 'Generating the training data for the classifier', clser, ' #', n + 1, '/', len( self.classifiers) print '-' * 120 for utt_idx in self.utterances_list: # if verbose: # print "-" * 120 # print unicode(self.utterances[utt_idx]) # print unicode(self.das[utt_idx]) if self.parsed_classifiers[ clser].value and self.parsed_classifiers[ clser].value.startswith('CL_'): # process abstracted classifiers for i, (dai, (f, v, c)) in enumerate( zip(self.das_abstracted[utt_idx], self.das_category_labels[utt_idx])): if clser == dai and self.parsed_classifiers[ clser].value and self.parsed_classifiers[ clser].value == c: if verbose: print "+ Matching a classifier in the abstracted dai, and matching category label" self.classifiers_outputs[clser].append(1.0) self.classifiers_cls[clser].append( self.das_category_labels[utt_idx][i]) elif clser != dai and self.parsed_classifiers[ clser].value and self.parsed_classifiers[ clser].value == c: if verbose: print "- NON-Matching a classifier in the abstracted dai, and matching category label" self.classifiers_outputs[clser].append(0.0) self.classifiers_cls[clser].append( self.das_category_labels[utt_idx][i]) else: if verbose: print "- NON-Matching a classifier in the abstracted dai, and NON-matching category label" self.classifiers_outputs[clser].append(0.0) self.classifiers_cls[clser].append( (None, None, None)) self.classifiers_features[clser].append( self.get_features( self.utterances[utt_idx], self.das_category_labels[utt_idx][i], self.das_category_labels[utt_idx])) if verbose: print " @", clser, i, dai, f, v, c else: # process concrete classifiers if clser in self.das_abstracted[utt_idx]: if verbose: print "+ Matching a classifier " self.classifiers_outputs[clser].append(1.0) self.classifiers_cls[clser].append((None, None, None)) else: if verbose: print "- NON-Matching a classifier" self.classifiers_outputs[clser].append(0.0) self.classifiers_cls[clser].append((None, None, None)) self.classifiers_features[clser].append( self.get_features(self.utterances[utt_idx], (None, None, None), self.das_category_labels[utt_idx])) if verbose: print " @", clser self.classifiers_outputs[clser] = np.array( self.classifiers_outputs[clser]) if verbose: print clser print zip(self.classifiers_outputs[clser], self.classifiers_cls[clser]) self.prune_features(clser, min_pos_feature_count, min_neg_feature_count, verbose=(verbose or verbose2))
def parse_meta(self, utterance, cn): """ Detects all dialogue acts which do not generalise its slot values using CLDB. :param utterance: the input utterance :param cn: The output dialogue act item confusion network. :return: None """ u = utterance if (any_word_in(u, 'ahoj áhoj nazdar zdar') or all_words_in(u, 'dobrý den')): cn.add(1.0, DialogueActItem("hello")) if (any_word_in( u, "nashledanou shledanou schledanou shle nashle sbohem bohem zbohem zbohem konec hledanou " "naschledanou čau čauky čaues shledanó") or phrase_in(u, "dobrou noc") or (not any_word_in(u, "nechci") and phrase_in(u, "ukončit hovor"))): cn.add(1.0, DialogueActItem("bye")) if not any_word_in(u, 'spojení zastávka stanice možnost varianta'): if any_word_in(u, 'jiný jiné jiná jiného'): cn.add(1.0, DialogueActItem("reqalts")) if any_word_in(u, "od začít začneme začněme začni začněte") and any_word_in(u, "začátku znova znovu") or \ any_word_in(u, "reset resetuj restart restartuj zrušit") or \ any_phrase_in(u, ['nové spojení', 'nový spojení', 'nové zadání', 'nový zadání', 'nový spoj']) and not any_word_in(u, "ze") or \ all_words_in(u, "tak jinak") or any_phrase_in(u, ["tak znova", 'zkusíme to ještě jednou']): cn.add(1.0, DialogueActItem("restart")) elif not any_word_in( u, 'spojení zastávka stanice možnost spoj nabídnutý poslední nalezená opakuji' ): if (any_word_in( u, 'zopakovat opakovat znova znovu opakuj zopakuj zopakujte zvopakovat' ) or phrase_in(u, "ještě jednou")): cn.add(1.0, DialogueActItem("repeat")) elif any_word_in(u, "zopakuj zopakujte zopakovat opakovat") and phrase_in( u, "poslední větu"): cn.add(1.0, DialogueActItem("repeat")) if ((len(u) == 1 and any_word_in(u, "pardon pardón promiňte promiň sorry")) or any_phrase_in(u, ['omlouvám se', 'je mi líto'])): cn.add(1.0, DialogueActItem("apology")) if not any_word_in(u, "nechci děkuji"): if any_word_in(u, "nápověda nápovědu pomoc pomoct pomoci pomož pomohla pomohl pomůžete help nevím nevim nechápu") or \ (any_word_in(u, 'co') and any_word_in(u, "zeptat říct dělat")): cn.add(1.0, DialogueActItem("help")) if any_word_in(u, "neslyšíme neslyším halo haló nefunguje cože") or \ (phrase_in(u, "slyšíme se") and not phrase_in(u, "ano slyšíme se")): cn.add(1.0, DialogueActItem('canthearyou')) if all_words_in(u, "nerozuměl jsem") or \ all_words_in(u, "nerozuměla jsem") or \ all_words_in(u, "taky nerozumím") or \ all_words_in(u, "nerozumím vám") or \ (len(u) == 1 and any_word_in(u, "nerozumím")): cn.add(1.0, DialogueActItem('notunderstood')) if any_word_in(u, "ano jo jasně jojo") and \ not any_word_in(u, "nerozuměj nechci vzdávám čau možnost konec") : cn.add(1.0, DialogueActItem("affirm")) if not any_phrase_in(u, ['ne z', 'né do']): if any_word_in(u, "ne né nene nené néé") or \ any_phrase_in(u, ['nechci to tak', 'to nechci', 'to nehledej', 'no nebyli']) or \ len(u) == 1 and any_word_in(u, "nejedu nechci") or \ len(u) == 2 and all_words_in(u, "ano nechci") or \ all_words_in(u, "to je špatně"): cn.add(1.0, DialogueActItem("negate")) if any_word_in(u, 'díky dikec děkuji dekuji děkuju děkují'): cn.add(1.0, DialogueActItem("thankyou")) if (any_word_in(u, 'ok pořádku dobře správně stačí super fajn rozuměl rozuměla slyším') or \ any_phrase_in(u, ['to je vše', 'je to vše', 'je to všechno', 'to bylo všechno', 'to bude všechno', 'už s ničím', 'už s ničim', 'to jsem chtěl slyšet']) or \ (any_word_in(u, "dobrý") and not any_phrase_in(u, ['dobrý den', 'dobrý dén', 'dobrý večer']))) and \ not any_word_in(u, "ano"): cn.add(1.0, DialogueActItem("ack")) if any_phrase_in(u, ['chci jet', 'chtěla jet', 'bych jet', 'bych jel', 'bychom jet', 'bych tam jet', 'jak se dostanu', 'se dostat']) or \ any_word_in(u, "trasa, trasou, trasy, trasu, trase"): cn.add(1.0, DialogueActItem('inform', 'task', 'find_connection')) if any_phrase_in( u, ['jak bude', 'jak dnes bude', 'jak je', 'jak tam bude']): cn.add(1.0, DialogueActItem('inform', 'task', 'weather')) if all_words_in(u, 'od to jede') or \ all_words_in(u, 'z jake jede') or \ all_words_in(u, 'z jaké jede') or \ all_words_in(u, 'z jaké zastávky') or \ all_words_in(u, 'jaká výchozí') or \ all_words_in(u, 'kde začátek') or \ all_words_in(u, 'odkud to jede') or \ all_words_in(u, 'odkud jede') or \ all_words_in(u, 'odkud pojede') or \ all_words_in(u, 'od kud pojede'): cn.add(1.0, DialogueActItem('request', 'from_stop')) if all_words_in(u, 'kam to jede') or \ all_words_in(u, 'na jakou jede') or \ all_words_in(u, 'do jake jede') or \ all_words_in(u, 'do jaké jede') or \ all_words_in(u, 'do jaké zastávky') or \ all_words_in(u, 'co cíl') or \ all_words_in(u, 'jaká cílová') or \ all_words_in(u, 'kde konečná') or \ all_words_in(u, 'kde konečná') or \ all_words_in(u, "kam jede") or \ all_words_in(u, "kam pojede"): cn.add(1.0, DialogueActItem('request', 'to_stop')) if not any_word_in( u, 'za budu bude budem přijede přijedete přijedu dojedu dojede dorazí dorazím dorazíte' ): if all_words_in(u, "kdy jede") or \ all_words_in(u, "v kolik jede") or \ all_words_in(u, "v kolik hodin") or \ all_words_in(u, "kdy to pojede") or \ (any_word_in(u, 'kdy kolik') and any_word_in(u, 'jede odjíždí odjede odjíždíš odjíždíte')) or \ phrase_in(u, 'časový údaj'): cn.add(1.0, DialogueActItem('request', 'departure_time')) if not any_word_in( u, 'budu bude budem přijede přijedete přijedu dojedu dorazí dorazím dorazíte' ): if all_words_in(u, "za jak") and any_word_in(u, 'dlouho dlóho') or \ all_words_in(u, "za kolik minut jede") or \ all_words_in(u, "za kolik minut pojede") or \ all_words_in(u, "za jak pojede") and any_word_in(u, 'dlouho dlóho') : cn.add(1.0, DialogueActItem('request', 'departure_time_rel')) if (all_words_in(u, 'kdy tam') and any_word_in(u, 'budu bude budem')) or \ (all_words_in(u, 'v kolik') and any_word_in(u, 'budu bude budem')) or \ all_words_in(u, 'čas příjezdu') or \ (any_word_in(u, 'kdy kolik') and any_word_in(u, 'příjezd přijede přijedete přijedu přijedem dojedu dorazí ' 'dojede dorazím dorazíte')): cn.add(1.0, DialogueActItem('request', 'arrival_time')) if (all_words_in(u, 'za jak') and any_word_in( u, 'dlouho dlóho' ) and any_word_in( u, 'budu bude budem přijedu přijede přijedem přijedete dojedu dorazí dorazím dorazíte' ) and any_phrase_in(u, [ 'tam', 'v cíli', 'do cíle', 'k cíli', 'cílové zastávce', 'cílové stanici' ])): cn.add(1.0, DialogueActItem('request', 'arrival_time_rel')) if not any_word_in(u, 'za v přestup přestupy'): if all_words_in(u, 'jak') and any_word_in(u, 'dlouho dlóho') and any_word_in(u, "jede pojede trvá trvat") or \ all_words_in(u, "kolik minut") and any_word_in(u, "jede pojede trvá trvat"): cn.add(1.0, DialogueActItem('request', 'duration')) if all_words_in(u, 'kolik je hodin') or \ all_words_in(u, 'kolik máme hodin') or \ all_words_in(u, 'kolik je teď') or \ all_words_in(u, 'kolik je teďka'): cn.add(1.0, DialogueActItem('request', 'current_time')) if any_word_in( u, 'přestupů přestupu přestupy stupňů přestup přestupku přestupky přestupků ' + 'přestupovat přestupuju přestupuji přestupování přestupama přestupem' ): if any_word_in(u, 'čas času dlouho trvá trvají trvat'): cn.add(1.0, DialogueActItem('request', 'time_transfers')) elif any_word_in(u, 'kolik počet kolikrát jsou je'): cn.add(1.0, DialogueActItem('request', 'num_transfers')) elif any_word_in(u, 'nechci bez žádný žádné žáden'): cn.add(1.0, DialogueActItem('inform', 'num_transfers', '0')) elif any_word_in(u, 'jeden jedním jednou'): cn.add(1.0, DialogueActItem('inform', 'num_transfers', '1')) elif any_word_in(u, 'dva dvěma dvěmi dvakrát'): cn.add(1.0, DialogueActItem('inform', 'num_transfers', '2')) elif any_word_in(u, 'tři třema třemi třikrát'): cn.add(1.0, DialogueActItem('inform', 'num_transfers', '3')) elif any_word_in(u, 'čtyři čtyřma čtyřmi čtyřikrát'): cn.add(1.0, DialogueActItem('inform', 'num_transfers', '4')) elif (any_word_in(u, 'libovolně libovolný libovolné') or all_words_in(u, 'bez ohledu') or any_phrase_in( u, ['s přestupem', 's přestupy', 's přestupama'])): cn.add(1.0, DialogueActItem('inform', 'num_transfers', 'dontcare')) if any_phrase_in(u, [ 'přímý spoj', 'přímé spojení', 'přímé spoje', 'přímý spoje', 'přímej spoj', 'přímý spojení', 'jet přímo', 'pojedu přímo', 'dostanu přímo', 'dojedu přímo', 'dostat přímo' ]): cn.add(1.0, DialogueActItem('inform', 'num_transfers', '0')) if any_word_in( u, 'spoj spojení spoje možnost možnosti varianta alternativa cesta cestu cesty ' 'zpoždění stažení nalezená nabídnuté'): if any_word_in(u, 'libovolný') and \ not any_word_in(u, 'první jedna druhá druhý třetí čtvrtá čtvrtý'): cn.add(1.0, DialogueActItem("inform", "alternative", "dontcare")) if any_word_in(u, 'první jedna') and \ not any_word_in(u, 'druhá druhý třetí čtvrtá čtvrtý') and \ not all_words_in(u, 'ještě jedna'): cn.add(1.0, DialogueActItem("inform", "alternative", "1")) if any_word_in(u, 'druhé druhá druhý druhou dva') and \ not any_word_in(u, 'třetí čtvrtá čtvrtý další'): cn.add(1.0, DialogueActItem("inform", "alternative", "2")) if any_word_in(u, 'třetí tři'): cn.add(1.0, DialogueActItem("inform", "alternative", "3")) if any_word_in(u, 'čtvrté čtvrtá čtvrtý čtvrtou čtyři'): cn.add(1.0, DialogueActItem("inform", "alternative", "4")) if any_word_in(u, 'páté pátou'): cn.add(1.0, DialogueActItem("inform", "alternative", "5")) if any_word_in(u, "předchozí před"): if any_phrase_in( u, ["nechci vědět předchozí", "nechci předchozí"]): cn.add(1.0, DialogueActItem("deny", "alternative", "prev")) else: cn.add(1.0, DialogueActItem("inform", "alternative", "prev")) elif any_word_in( u, "poslední znovu znova opakovat zopakovat zopakujte zopakování" ): if any_phrase_in(u, ["nechci poslední"]): cn.add(1.0, DialogueActItem("deny", "alternative", "last")) else: cn.add(1.0, DialogueActItem("inform", "alternative", "last")) elif (any_word_in(u, "další jiné jiná následující pozdější") or \ any_phrase_in(u, ['ještě jedno', 'ještě jednu' , 'ještě jedna', 'ještě jednou', 'ještě zeptat na jedno'])): cn.add(1.0, DialogueActItem("inform", "alternative", "next")) if (len(u) == 1 and any_word_in(u, 'další následující následují později')) or \ ending_phrases_in(u, ['další', 'co dál']): cn.add(1.0, DialogueActItem("inform", "alternative", "next")) if len(u) == 2 and \ (all_words_in(u, "a další") or all_words_in(u, "a později")): cn.add(1.0, DialogueActItem("inform", "alternative", "next")) if len(u) == 1 and any_word_in(u, "předchozí před"): cn.add(1.0, DialogueActItem("inform", "alternative", "prev")) if any_phrase_in(u, ["jako v dne", "jako ve dne"]): cn.add(1.0, DialogueActItem('inform', 'ampm', 'pm')) if ending_phrases_in(u, ["od", "z", "z nádraží"]): cn.add(1.0, DialogueActItem('inform', 'from', '*')) elif ending_phrases_in(u, ["na", "do", "dó"]): cn.add(1.0, DialogueActItem('inform', 'to', '*')) elif ending_phrases_in(u, [ "z zastávky", "z stanice", "výchozí stanice je", "výchozí zastávku" ]): cn.add(1.0, DialogueActItem('inform', 'from_stop', '*')) elif ending_phrases_in(u, [ "na zastávku", "ná zastávků", "do zastávky", "do zástavky", "do zastavky" ]): cn.add(1.0, DialogueActItem('inform', 'to_stop', '*')) elif ending_phrases_in(u, ["přes"]): cn.add(1.0, DialogueActItem('inform', 'via', '*'))
def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost=1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time=True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")
def _build_user_input(self, *args): user_input = DialogueActConfusionNetwork() for arg in args: user_input.add(1.0, DialogueActItem(dai=arg)) return user_input
def parse(self, da_str): # Get the dialogue act type. first_par_idx = da_str.index("(") self.dat = da_str[:first_par_idx] if len(split_by_comma(da_str)) != 1: raise ValueError('Too many (or none -- too few) DAs in CUED DA ' 'representation.') slots_str = da_str[first_par_idx:].lower()[1:-1] if not slots_str: # no slots to process self._dais = list() else: # split slots_str slotstr_list = split_by(slots_str, splitter=',', quotes='"') slots = list() for slot_str in slotstr_list: try: slots.append(CUEDSlot(slot_str)) except ValueError: # Skip slots we cannot parse. pass if self.dat == 'inform': for slot in slots: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) elif self.dat == 'request': for slot in slots: if slot.value: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) else: self._dais.append( DialogueActItem('request', slot.name, slot.value)) elif self.dat == 'confirm': for slot in slots: if slot.name == 'name': self._dais.append( DialogueActItem('inform', slot.name, slot.value)) else: self._dais.append( DialogueActItem('confirm', slot.name, slot.value)) elif self.dat == 'select': # XXX We cannot represent DAIS with multiple slots as of now. # Therefore, the select DAT is split into two DAIs here. self._dais.append( DialogueActItem('select', slots[0].name, slots[0].value)) self._dais.append( DialogueActItem('select', slots[1].name, slots[1].value)) elif self.dat in ('silence', 'thankyou', 'ack', 'bye', 'hangup', 'repeat', 'help', 'restart', 'null'): self._dais.append(DialogueActItem(self.dat)) elif self.dat in ('hello', 'affirm', 'negate', 'reqalts', 'reqmore'): self._dais.append(DialogueActItem(self.dat)) for slot in self._dais: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) elif self.dat == 'deny': self._dais.append( DialogueActItem('deny', slots[0].name, slots[0].value)) for slot in slots[1:]: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) else: raise CuedDialogueActError( 'Unknown CUED DA type "{dat}" when parsing "{da_str}".'. format(dat=self.dat, da_str=da_str)) self._dais_sorted = False
def process_pending_commands(self): """Process all pending commands. Available commands: stop() - stop processing and exit the process flush() - flush input buffers. Now it only flushes the input connection. Return True if the process should terminate. """ while self.commands.poll(): command = self.commands.recv() if self.cfg['DM']['debug']: self.cfg['Logging']['system_logger'].debug(command) if isinstance(command, Command): if command.parsed['__name__'] == 'stop': return True if command.parsed['__name__'] == 'flush': # discard all data in in input buffers while self.slu_hypotheses_in.poll(): data_in = self.slu_hypotheses_in.recv() self.dm.end_dialogue() self.commands.send(Command("flushed()", 'DM', 'HUB')) return False if command.parsed['__name__'] == 'new_dialogue': self.epilogue_state = None self.dm.new_dialogue() self.cfg['Logging']['session_logger'].turn("system") self.dm.log_state() # I should generate the first DM output da = self.dm.da_out() if self.cfg['DM']['debug']: s = [] s.append("DM Output") s.append("-"*60) s.append(unicode(da)) s.append("") s = '\n'.join(s) self.cfg['Logging']['system_logger'].debug(s) self.cfg['Logging']['session_logger'].dialogue_act("system", da) self.commands.send(DMDA(da, 'DM', 'HUB')) return False if command.parsed['__name__'] == 'end_dialogue': self.dm.end_dialogue() return False if command.parsed['__name__'] == 'timeout': # check whether there is a looong silence # if yes then inform the DM silence_time = command.parsed['silence_time'] cn = DialogueActConfusionNetwork() cn.add(1.0, DialogueActItem('silence','time', silence_time)) # process the input DA self.dm.da_in(cn) self.cfg['Logging']['session_logger'].turn("system") self.dm.log_state() if self.epilogue_state and float(silence_time) > 5.0: # a user was silent for too long, therefore hung up self.cfg['Logging']['session_logger'].dialogue_act("system", self.epilogue_da) self.commands.send(DMDA(self.epilogue_da, 'DM', 'HUB')) self.commands.send(Command('hangup()', 'DM', 'HUB')) else: da = self.dm.da_out() if self.cfg['DM']['debug']: s = [] s.append("DM Output") s.append("-"*60) s.append(unicode(da)) s.append("") s = '\n'.join(s) self.cfg['Logging']['system_logger'].debug(s) self.cfg['Logging']['session_logger'].dialogue_act("system", da) self.commands.send(DMDA(da, 'DM', 'HUB')) if da.has_dat("bye"): self.commands.send(Command('hangup()', 'DM', 'HUB')) return False return False
def parse_meta(self, utterance, cn): """ Detects all dialogue acts which do not generalise its slot values using CLDB. :param utterance: :param cn: :return: None """ u = utterance if (any_word_in(u, 'ahoj áhoj nazdar zdar') or all_words_in(u, 'dobrý den')): cn.add(1.0, DialogueActItem("hello")) if (any_word_in( u, "nashledanou shledanou schledanou shle nashle sbohem bohem zbohem zbohem konec hledanou " "naschledanou čau čauky čaues shledanó")): cn.add(1.0, DialogueActItem("bye")) if not any_word_in(u, 'spojení zastávka stanice možnost varianta'): if any_word_in(u, 'jiný jiné jiná jiného'): cn.add(1.0, DialogueActItem("reqalts")) if not any_word_in( u, 'spojení zastávka stanice možnost spoj nabídnutý poslední nalezená začátku opakuji začneme začněme začni začněte' ): if (any_word_in( u, 'zopakovat opakovat znova znovu opakuj zopakuj zopakujte') or phrase_in(u, "ještě jednou")): cn.add(1.0, DialogueActItem("repeat")) if phrase_in(u, "zopakuj poslední větu") or \ phrase_in(u, "zopakujte mi poslední větu") or \ phrase_in(u, "zopakovat poslední větu"): cn.add(1.0, DialogueActItem("repeat")) if len(u) == 1 and any_word_in(u, "pardon pardón promiňte"): cn.add(1.0, DialogueActItem("apology")) if not any_word_in(u, "nechci děkuji"): if any_word_in(u, "nápověda nápovědu pomoc pomoct pomoci pomož pomohla pomohl pomůžete help nevím nevim") or \ all_words_in(u, 'co říct') or \ all_words_in(u, 'co zeptat'): cn.add(1.0, DialogueActItem("help")) if any_word_in(u, "neslyšíme neslyším halo haló"): cn.add(1.0, DialogueActItem('canthearyou')) if all_words_in(u, "nerozuměl jsem") or \ all_words_in(u, "nerozuměla jsem") or \ all_words_in(u, "taky nerozumím") or \ all_words_in(u, "nerozumím vám") or \ (len(u) == 1 and any_word_in(u, "nerozumím")): cn.add(1.0, DialogueActItem('notunderstood')) if any_word_in(u, "ano jo jasně") and \ not any_word_in(u, "nerozuměj nechci vzdávám čau možnost konec") : cn.add(1.0, DialogueActItem("affirm")) if not any_phrase_in(u, [ 'ne z', ]): if any_word_in(u, "ne né nene nené") or \ phrase_in(u, 'nechci to tak') or \ len(u) == 1 and any_word_in(u, "nejedu nechci") or \ len(u) == 2 and all_words_in(u, "ano nechci") or \ all_words_in(u, "to je špatně"): cn.add(1.0, DialogueActItem("negate")) if any_word_in(u, 'díky dikec děkuji dekuji děkuju děkují'): cn.add(1.0, DialogueActItem("thankyou")) if any_word_in(u, 'ok pořádku dobře správně') and \ not any_word_in(u, "ano"): cn.add(1.0, DialogueActItem("ack")) if any_word_in(u, "od začít začneme začněme začni začněte") and any_word_in(u, "začátku znova znovu") or \ any_word_in(u, "reset resetuj restart restartuj") or \ phrase_in(u, 'nové spojení') and not phrase_in(u, 'spojení ze') or \ phrase_in(u, 'nový spojení') and not phrase_in(u, 'spojení ze') or \ phrase_in(u, 'nové zadání') and not any_word_in(u, "ze") or \ phrase_in(u, 'nový zadání') and not any_word_in(u, "ze") or \ phrase_in(u, 'nový spoj') and not phrase_in(u, "spoj ze"): cn.add(1.0, DialogueActItem("restart")) if any_phrase_in(u, [ 'chci jet', 'chtěla jet', 'bych jet', 'bychom jet', 'bych tam jet', ]): cn.add(1.0, DialogueActItem('inform', 'task', 'find_connection')) if any_phrase_in( u, ['jak bude', 'jak dnes bude', 'jak je', 'jak tam bude']): cn.add(1.0, DialogueActItem('inform', 'task', 'weather')) if all_words_in(u, 'od to jede') or \ all_words_in(u, 'z jake jede') or \ all_words_in(u, 'z jaké jede') or \ all_words_in(u, 'z jaké zastávky') or \ all_words_in(u, 'jaká výchozí') or \ all_words_in(u, 'kde začátek') or \ all_words_in(u, 'odkud to jede') or \ all_words_in(u, 'odkud jede') or \ all_words_in(u, 'odkud pojede') or \ all_words_in(u, 'od kud pojede'): cn.add(1.0, DialogueActItem('request', 'from_stop')) if all_words_in(u, 'kam to jede') or \ all_words_in(u, 'na jakou jede') or \ all_words_in(u, 'do jake jede') or \ all_words_in(u, 'do jaké jede') or \ all_words_in(u, 'do jaké zastávky') or \ all_words_in(u, 'co cíl') or \ all_words_in(u, 'jaká cílová') or \ all_words_in(u, 'kde konečná') or \ all_words_in(u, 'kde konečná') or \ all_words_in(u, "kam jede") or \ all_words_in(u, "kam pojede"): cn.add(1.0, DialogueActItem('request', 'to_stop')) if not any_word_in( u, 'za budu bude budem přijede přijedete přijedu dojedu dorazí dorazím dorazíte' ): if all_words_in(u, "kdy jede") or \ all_words_in(u, "v kolik jede") or \ all_words_in(u, "v kolik hodin") or \ all_words_in(u, "kdy to pojede") or \ (any_word_in(u, 'kdy kolik') and any_word_in(u, 'jede odjíždí odjede odjíždíš odjíždíte')): cn.add(1.0, DialogueActItem('request', 'departure_time')) if not any_word_in( u, 'budu bude budem přijede přijedete přijedu dojedu dorazí dorazím dorazíte' ): if all_words_in(u, "za jak dlouho") or \ all_words_in(u, "za kolik minut jede") or \ all_words_in(u, "za kolik minut pojede") or \ all_words_in(u, "za jak dlouho pojede"): cn.add(1.0, DialogueActItem('request', 'departure_time_rel')) if (all_words_in(u, 'kdy tam') and any_word_in(u, 'budu bude budem')) or \ (all_words_in(u, 'v kolik tam') and any_word_in(u, 'budu bude budem')) or \ (all_words_in(u, 'v kolik hodin') and any_word_in(u, 'budu bude budem')) or \ all_words_in(u, 'čas příjezdu') or \ (any_word_in(u, 'kdy kolik') and any_word_in(u, 'příjezd přijede přijedete přijedu přijedem dojedu dorazí ' 'dorazím dorazíte')): cn.add(1.0, DialogueActItem('request', 'arrival_time')) if all_words_in(u, 'za jak dlouho tam') and any_word_in(u, "budu bude budem přijedu přijede přijedem přijedete " "dojedu dorazí dorazím dorazíte") or \ all_words_in(u, 'za jak dlouho budu') and (any_word_in(u, "cílové stanici") or \ any_word_in(u, "cílové zastávce") or \ any_word_in(u, 'cíli')): cn.add(1.0, DialogueActItem('request', 'arrival_time_rel')) if not any_word_in(u, 'za'): if all_words_in(u, 'jak dlouho') and any_word_in( u, "jede pojede trvá trvat"): cn.add(1.0, DialogueActItem('request', 'duration')) if all_words_in(u, 'kolik je hodin') or \ all_words_in(u, 'kolik máme hodin') or \ all_words_in(u, 'kolik je teď') or \ all_words_in(u, 'kolik je teďka'): cn.add(1.0, DialogueActItem('request', 'current_time')) if any_word_in(u, 'kolik počet kolikrát jsou je') and \ any_word_in(u, 'přestupů přestupu přestupy stupňů přestup přestupku přestupky přestupků ' + 'přestupovat přestupuju přestupuji') and \ not any_word_in(u, 'čas času'): cn.add(1.0, DialogueActItem('request', 'num_transfers')) if any_word_in( u, 'spoj spojení spoje možnost možnosti varianta alternativa cesta cestu cesty ' 'zpoždění stažení nalezená'): if any_word_in(u, 'libovolný') and \ not any_word_in(u, 'první jedna druhá druhý třetí čtvrtá čtvrtý'): cn.add(1.0, DialogueActItem("inform", "alternative", "dontcare")) if any_word_in(u, 'první jedna') and \ not any_word_in(u, 'druhá druhý třetí čtvrtá čtvrtý'): cn.add(1.0, DialogueActItem("inform", "alternative", "1")) if any_word_in(u, 'druhé druhá druhý druhou dva')and \ not any_word_in(u, 'třetí čtvrtá čtvrtý další'): cn.add(1.0, DialogueActItem("inform", "alternative", "2")) if any_word_in(u, 'třetí tři'): cn.add(1.0, DialogueActItem("inform", "alternative", "3")) if any_word_in(u, 'čtvrté čtvrtá čtvrtý čtvrtou čtyři'): cn.add(1.0, DialogueActItem("inform", "alternative", "4")) if any_word_in(u, "poslední znovu znova opakovat zopakovat zopakujte zopakování") and \ not all_words_in(u, "předchozí"): cn.add(1.0, DialogueActItem("inform", "alternative", "last")) if any_word_in(u, "další jiné jiná následující pozdější") or \ phrase_in(u, "ještě jedno") or \ phrase_in(u, "ještě jednu"): cn.add(1.0, DialogueActItem("inform", "alternative", "next")) if any_word_in(u, "předchozí před"): if phrase_in(u, "nechci vědět předchozí"): cn.add(1.0, DialogueActItem("deny", "alternative", "prev")) else: cn.add(1.0, DialogueActItem("inform", "alternative", "prev")) if len(u) == 1 and any_word_in(u, 'další následující následují'): cn.add(1.0, DialogueActItem("inform", "alternative", "next")) if len(u) == 2 and \ (all_words_in(u, "a další") or all_words_in(u, "a později")): cn.add(1.0, DialogueActItem("inform", "alternative", "next")) if len(u) == 1 and any_word_in(u, "předchozí před"): cn.add(1.0, DialogueActItem("inform", "alternative", "prev")) if any_phrase_in(u, ["jako v dne", "jako ve dne"]): cn.add(1.0, DialogueActItem('inform', 'ampm', 'pm'))