def policy(self): if self.state.spk_state == self.DialogState.EXIT: return None if len(self.state.input_buffer) == 0: self.state.spk_state = self.DialogState.LISTEN return None if len(self.state.history) > 100: self.state.input_buffer = [] return Action(UserAct.GOODBYE) top_action = self.state.input_buffer[0] self.state.input_buffer.pop(0) if top_action.act == SystemAct.GREET: return Action(UserAct.GREET) elif top_action.act == SystemAct.GOODBYE: return Action(UserAct.GOODBYE) elif top_action.act == SystemAct.IMPLICIT_CONFIRM: if len(top_action.parameters) == 0: raise ValueError( "IMPLICIT_CONFIRM is required to have parameter") slot_type, slot_val = top_action.parameters[0] if self.domain.is_usr_slot(slot_type): # if the confirm is right or usr does not care about this slot if slot_val == self.usr_constrains[ slot_type] or self.usr_constrains[slot_type] is None: return None else: strategy = np.random.choice( self.complexity.reject_style.keys(), p=self.complexity.reject_style.values()) if strategy == "reject": return Action(UserAct.DISCONFIRM, (slot_type, slot_val)) elif strategy == "reject+inform": return [ Action(UserAct.DISCONFIRM, (slot_type, slot_val)), Action(UserAct.INFORM, (slot_type, self.usr_constrains[slot_type])) ] else: raise ValueError("Unknown reject strategy") else: raise ValueError( "Usr cannot handle imp_confirm to non-usr slots") elif top_action.act == SystemAct.EXPLICIT_CONFIRM: if len(top_action.parameters) == 0: raise ValueError( "EXPLICIT_CONFIRM is required to have parameter") slot_type, slot_val = top_action.parameters[0] if self.domain.is_usr_slot(slot_type): # if the confirm is right or usr does not care about this slot if slot_val == self.usr_constrains[slot_type]: return Action(UserAct.CONFIRM, (slot_type, slot_val)) else: return Action(UserAct.DISCONFIRM, (slot_type, slot_val)) else: raise ValueError( "Usr cannot handle imp_confirm to non-usr slots") elif top_action.act == SystemAct.INFORM: if len(top_action.parameters) != 2: raise ValueError( "INFORM needs to contain the constrains and goal (2 parameters)" ) # check if the constrains are the same valid_constrain, wrong_slot = self._constrain_equal(top_action) if valid_constrain: # update the state for goal met complete_goals = self.state.update_goals_met(top_action) next_goal = self.state.unmet_goal() if next_goal is None: slot_key = self._increment_goal() if slot_key is not None: return [ Action(UserAct.NEW_SEARCH, (BaseSysSlot.DEFAULT, None)), Action(UserAct.INFORM, (slot_key, self.usr_constrains[slot_key])) ] else: return [ Action(UserAct.SATISFY, [(g, None) for g in complete_goals]), Action(UserAct.GOODBYE) ] else: ack_act = Action(UserAct.MORE_REQUEST, [(g, None) for g in complete_goals]) if np.random.rand() < self.complexity.yn_question: # find a system slot with yn_templates slot = self.domain.get_sys_slot(next_goal) expected_val = np.random.randint(0, slot.dim) if len( slot.yn_questions.get( slot.vocabulary[expected_val], [])) > 0: # sample a expected value return [ ack_act, Action(UserAct.YN_QUESTION, (slot.name, expected_val)) ] return [ ack_act, Action(UserAct.REQUEST, (next_goal, None)) ] else: # find the wrong concept return Action(UserAct.INFORM, (wrong_slot, self.usr_constrains[wrong_slot])) elif top_action.act == SystemAct.REQUEST: if len(top_action.parameters) == 0: raise ValueError("Request is required to have parameter") slot_type, slot_val = top_action.parameters[0] if slot_type == BaseUsrSlot.NEED: next_goal = self.state.unmet_goal() return Action(UserAct.REQUEST, (next_goal, None)) elif slot_type == BaseUsrSlot.HAPPY: return None elif self.domain.is_usr_slot(slot_type): if len(self.domain.usr_slots) > 1: num_informs = np.random.choice( self.complexity.multi_slots.keys(), p=self.complexity.multi_slots.values(), replace=False) if num_informs > 1: candidates = [ k for k, v in self.usr_constrains.items() if k != slot_type and v is not None ] num_extra = min(num_informs - 1, len(candidates)) if num_extra > 0: extra_keys = np.random.choice(candidates, size=num_extra, replace=False) actions = [ Action(UserAct.INFORM, (key, self.usr_constrains[key])) for key in extra_keys ] actions.insert( 0, Action(UserAct.INFORM, (slot_type, self.usr_constrains[slot_type]))) return actions return Action(UserAct.INFORM, (slot_type, self.usr_constrains[slot_type])) else: raise ValueError( "Usr cannot handle request to this type of parameters") elif top_action.act == SystemAct.CLARIFY: raise ValueError("Cannot handle clarify now") elif top_action.act == SystemAct.ASK_REPEAT: last_usr_actions = self.state.last_actions(self.state.USR) if last_usr_actions is None: raise ValueError("Unexpected ask repeat") return last_usr_actions elif top_action.act == SystemAct.ASK_REPHRASE: last_usr_actions = self.state.last_actions(self.state.USR) if last_usr_actions is None: raise ValueError("Unexpected ask rephrase") for a in last_usr_actions: a.add_parameter(BaseUsrSlot.AGAIN, True) return last_usr_actions elif top_action.act == SystemAct.QUERY: query, goals = top_action.parameters[0], top_action.parameters[1] valid_entries = self.domain.db.select([v for name, v in query]) chosen_entry = valid_entries[ np.random.randint(0, len(valid_entries)), :] results = {} if chosen_entry.shape[0] > 0: for goal in goals: _, slot_id = self.domain.get_sys_slot(goal, return_idx=True) results[goal] = chosen_entry[slot_id] else: print(chosen_entry) raise ValueError("No valid entries") return Action(UserAct.KB_RETURN, [query, results]) else: raise ValueError("Unknown system act %s" % top_action.act)
def policy(self): if len(self.state.history) > 100: return Action(SystemAct.GOODBYE), ([], []) if self.state.spk_state == State.EXIT: return None, ([], []) # import pdb # pdb.set_trace() #if restarting the search # return [Action(SystemAct.REQUEST, (BaseUsrSlot.NEED, None))], ([], []) last_usr = self.state.last_actions(DialogState.USR) # dialog opener if len(self.state.history) == 0: if not self.state.to_restart(): return [ Action(SystemAct.GREET), Action(SystemAct.REQUEST, (BaseUsrSlot.NEED, None)) ], ([], []) else: last_usr = [Action(UserAct.REQUEST)] if last_usr is None: raise ValueError("System should talk first") actions = [] for usr_act in last_usr: # print(usr_act) if usr_act.act == UserAct.GOODBYE: self.state.spk_state = State.EXIT return Action(SystemAct.GOODBYE), ([], []) constraints = [(key, slot.get_maxconf_value()) for key, slot in self.state.usr_beliefs.items()] cols = {'food': 0, 'area': 1, 'pricerange': 2} stat_query = [None, None, None] for c in constraints: a, b = c if a == '#area': stat_query[cols['area']] = b elif a == "#pricerange": stat_query[cols['pricerange']] = b else: stat_query[cols['food']] = b # if random.random() <= 0.1: # advice = self.domain.db.get_advice(stat_query) # if self.state.atleast_1_slot and (len(advice)==0): # return Action(SystemAct.RESTART), ([], []) # else: advice = {} # print('\nquery:{}, advice:{}'.format(stat_query, self.domain.db.get_advice(stat_query))) # print(self.state.atleast_1_slot, len(advice)) if self.state.has_pending_return(): # system goal query = self.state.pending_return goals = {} for goal in self.state.sys_goals.values(): if goal.delivered is False and goal.conf >= BeliefGoal.THRESHOLD: goals[goal.uid] = (goal.value, goal.expected_value) actions.append(Action(SystemAct.INFORM, [dict(query), goals])) actions.append(Action(SystemAct.REQUEST, (BaseUsrSlot.HAPPY, None))) self.state.pending_return = None return actions, (stat_query, advice) # check if it's ready to inform elif self.state.ready_to_inform(): # INFORM + {slot -> usr_constrain} + {goal: goal_value} # user constrains query = [(key, slot.get_maxconf_value()) for key, slot in self.state.usr_beliefs.items()] # system goal goals = [] for goal in self.state.sys_goals.values(): if goal.delivered is False and goal.conf >= BeliefGoal.THRESHOLD: goals.append(goal.uid) if len(goals) == 0: raise ValueError("Empty goal. Debug!") actions.append(Action(SystemAct.QUERY, [query, goals])) return actions, (stat_query, advice) # return [Action(SystemAct.GREET), Action(SystemAct.REQUEST, (BaseUsrSlot.NEED, None))], ([], []) else: if random.random() <= 1.0: advice = self.domain.db.get_advice(stat_query) if self.state.atleast_1_slot and (len(advice) == 0): return Action(SystemAct.RESTART), ([], []) implicit_confirms = [] exp_confirms = [] requests = [] for slot in self.state.usr_beliefs.values(): if slot.max_conf() < slot.EXPLICIT_THRESHOLD: # print(slot.uid) exp_confirms.append( Action(SystemAct.REQUEST, (slot.uid, None))) elif slot.max_conf() < slot.IMPLICIT_THRESHOLD: requests.append( Action(SystemAct.EXPLICIT_CONFIRM, (slot.uid, slot.get_maxconf_value()))) elif slot.max_conf() < slot.GROUND_THRESHOLD: implicit_confirms.append( Action(SystemAct.IMPLICIT_CONFIRM, (slot.uid, slot.get_maxconf_value()))) # print(self.state.usr_beliefs.values(), self.state.sys_goals.values()) for goal in self.state.sys_goals.values(): if BeliefGoal.THRESHOLD > goal.get_conf() > 0: requests.append( Action(SystemAct.REQUEST, (BaseUsrSlot.NEED, None))) break if len(exp_confirms) > 0: random.shuffle(exp_confirms) actions.extend(implicit_confirms + exp_confirms[0:1]) return actions, (stat_query, advice) elif len(requests) > 0: actions.extend(implicit_confirms + requests[0:1]) return actions, (stat_query, advice) else: return implicit_confirms, (stat_query, advice)
def policy(self): if self.state.spk_state == State.EXIT: return None # dialog opener if len(self.state.history) == 0: return [ Action(SystemAct.GREET), Action(SystemAct.REQUEST, (BaseUsrSlot.NEED, None)) ] last_usr = self.state.last_actions(DialogState.USR) if last_usr is None: raise ValueError("System should talk first") actions = [] for usr_act in last_usr: if usr_act.act == UserAct.GOODBYE: self.state.spk_state = State.EXIT return Action(SystemAct.GOODBYE) if self.state.has_pending_return(): # system goal query = self.state.pending_return goals = {} for goal in self.state.sys_goals.values(): if goal.delivered is False and goal.conf >= BeliefGoal.THRESHOLD: goals[goal.uid] = (goal.value, goal.expected_value) actions.append(Action(SystemAct.INFORM, [dict(query), goals])) actions.append(Action(SystemAct.REQUEST, (BaseUsrSlot.HAPPY, None))) self.state.pending_return = None return actions # check if it's ready to inform elif self.state.ready_to_inform(): # INFORM + {slot -> usr_constrain} + {goal: goal_value} # user constrains query = [(key, slot.get_maxconf_value()) for key, slot in self.state.usr_beliefs.items()] # system goal goals = [] for goal in self.state.sys_goals.values(): if goal.delivered is False and goal.conf >= BeliefGoal.THRESHOLD: goals.append(goal.uid) if len(goals) == 0: raise ValueError("Empty goal. Debug!") actions.append(Action(SystemAct.QUERY, [query, goals])) return actions else: implicit_confirms = [] exp_confirms = [] requests = [] for slot in self.state.usr_beliefs.values(): if slot.max_conf() < slot.EXPLICIT_THRESHOLD: exp_confirms.append( Action(SystemAct.REQUEST, (slot.uid, None))) elif slot.max_conf() < slot.IMPLICIT_THRESHOLD: requests.append( Action(SystemAct.EXPLICIT_CONFIRM, (slot.uid, slot.get_maxconf_value()))) elif slot.max_conf() < slot.GROUND_THRESHOLD: implicit_confirms.append( Action(SystemAct.IMPLICIT_CONFIRM, (slot.uid, slot.get_maxconf_value()))) for goal in self.state.sys_goals.values(): if BeliefGoal.THRESHOLD > goal.get_conf() > 0: requests.append( Action(SystemAct.REQUEST, (BaseUsrSlot.NEED, None))) break if len(exp_confirms) > 0: actions.extend(implicit_confirms + exp_confirms[0:1]) return actions elif len(requests) > 0: actions.extend(implicit_confirms + requests[0:1]) return actions else: return implicit_confirms
def policy(self): if self.state.spk_state == self.DialogState.EXIT: return None if len(self.state.input_buffer) == 0: self.state.spk_state = self.DialogState.LISTEN return None if len(self.state.history) > 100: self.state.input_buffer = [] return Action(UserAct.GOODBYE) # 取出缓存的第一system act top_action = self.state.input_buffer[0] self.state.input_buffer.pop(0) if top_action.act == SystemAct.GREET: return Action(UserAct.GREET) elif top_action.act == SystemAct.GOODBYE: return Action(UserAct.GOODBYE) # 系统是确定性澄清 elif top_action.act == SystemAct.IMPLICIT_CONFIRM: if len(top_action.parameters) == 0: raise ValueError( "IMPLICIT_CONFIRM is required to have parameter") # 取出第一对 需要澄清的 slot name 和slot value slot_type, slot_val = top_action.parameters[0] # 判断当前slot 是否是用户slot if self.domain.is_usr_slot(slot_type): # if the confirm is right or usr does not care about this slot # 如果需要澄清的slot满足约束或约束值为None name返回None if slot_val == self.usr_constrains[ slot_type] or self.usr_constrains[slot_type] is None: return None else: # 不满足,则选择 否认曹值动作 或是 否认曹值动作 + 提供正确的曹值 strategy = np.random.choice( self.complexity.reject_style.keys(), p=self.complexity.reject_style.values()) if strategy == "reject": return Action(UserAct.DISCONFIRM, (slot_type, slot_val)) elif strategy == "reject+inform": return [ Action(UserAct.DISCONFIRM, (slot_type, slot_val)), Action(UserAct.INFORM, (slot_type, self.usr_constrains[slot_type])) ] else: raise ValueError("Unknown reject strategy") else: raise ValueError( "Usr cannot handle imp_confirm to non-usr slots") # 如果是非确定性澄清 elif top_action.act == SystemAct.EXPLICIT_CONFIRM: if len(top_action.parameters) == 0: raise ValueError( "EXPLICIT_CONFIRM is required to have parameter") slot_type, slot_val = top_action.parameters[0] if self.domain.is_usr_slot(slot_type): # if the confirm is right or usr does not care about this slot # 满足约束,则返回确认动作 if slot_val == self.usr_constrains[slot_type]: return Action(UserAct.CONFIRM, (slot_type, slot_val)) else: return Action(UserAct.DISCONFIRM, (slot_type, slot_val)) else: raise ValueError( "Usr cannot handle imp_confirm to non-usr slots") # 系统动作是 INFROM elif top_action.act == SystemAct.INFORM: if len(top_action.parameters) != 2: raise ValueError( "INFORM needs to contain the constrains and goal (2 parameters)" ) # 验证系统状态追踪是否满足约束 valid_constrain, wrong_slot = self._constrain_equal(top_action) if valid_constrain: # 满足约束,就跟新goal的状态,随机选择下一个goal complete_goals = self.state.update_goals_met(top_action) next_goal = self.state.unmet_goal() # 下一个goal为None if next_goal is None: # 下一个goal为空,随机更新约束条件 slot_key = self._increment_goal() if slot_key is not None: # 要跟新的约束不为空,则新搜索动作,并告诉系统更新的约束 return [ Action(UserAct.NEW_SEARCH, (BaseSysSlot.DEFAULT, None)), Action(UserAct.INFORM, (slot_key, self.usr_constrains[slot_key])) ] else: # 否则 告诉系统满意搜索结果,返回离开动作 return [ Action(UserAct.SATISFY, [(g, None) for g in complete_goals]), Action(UserAct.GOODBYE) ] else: #下一个goal不为空 ack_act = Action(UserAct.MORE_REQUEST, [(g, None) for g in complete_goals]) # 随机的返回是否是yes or no问题 ,就是用户提供一个goal值,问系统是不是 if np.random.rand() < self.complexity.yn_question: # find a system slot with yn_templates slot = self.domain.get_sys_slot(next_goal) expected_val = np.random.randint(0, slot.dim) if len( slot.yn_questions.get( slot.vocabulary[expected_val], [])) > 0: # sample a expected value return [ ack_act, Action(UserAct.YN_QUESTION, (slot.name, expected_val)) ] # 否则正常的问系统 return [ ack_act, Action(UserAct.REQUEST, (next_goal, None)) ] else: # 如果不满足约束条件,那么告诉系统正确的约束条件 return Action(UserAct.INFORM, (wrong_slot, self.usr_constrains[wrong_slot])) # 如果系统的动作是 request elif top_action.act == SystemAct.REQUEST: if len(top_action.parameters) == 0: raise ValueError("Request is required to have parameter") slot_type, slot_val = top_action.parameters[0] # 系统问的是你还有什么需求 if slot_type == BaseUsrSlot.NEED: # 返回一个没有满足的goal next_goal = self.state.unmet_goal() return Action(UserAct.REQUEST, (next_goal, None)) # 系统返回的槽位时happy,那么什么都不做 elif slot_type == BaseUsrSlot.HAPPY: return None # 如果是系统约束槽位 elif self.domain.is_usr_slot(slot_type): # 采样出随机个数的多余槽位,将动作排在当前槽位之后inform if len(self.domain.usr_slots) > 1: num_informs = np.random.choice( self.complexity.multi_slots.keys(), p=self.complexity.multi_slots.values(), replace=False) if num_informs > 1: candidates = [ k for k, v in self.usr_constrains.items() if k != slot_type and v is not None ] num_extra = min(num_informs - 1, len(candidates)) if num_extra > 0: extra_keys = np.random.choice(candidates, size=num_extra, replace=False) actions = [ Action(UserAct.INFORM, (key, self.usr_constrains[key])) for key in extra_keys ] actions.insert( 0, Action(UserAct.INFORM, (slot_type, self.usr_constrains[slot_type]))) return actions return Action(UserAct.INFORM, (slot_type, self.usr_constrains[slot_type])) else: raise ValueError( "Usr cannot handle request to this type of parameters") # 没有处理澄清 elif top_action.act == SystemAct.CLARIFY: raise ValueError("Cannot handle clarify now") # 再说一遍,就将历史turn取出输出 elif top_action.act == SystemAct.ASK_REPEAT: last_usr_actions = self.state.last_actions(self.state.USR) if last_usr_actions is None: raise ValueError("Unexpected ask repeat") return last_usr_actions # 换一种方式说 elif top_action.act == SystemAct.ASK_REPHRASE: # 取出最近一轮,并将action的参数中追加again标志 last_usr_actions = self.state.last_actions(self.state.USR) if last_usr_actions is None: raise ValueError("Unexpected ask rephrase") for a in last_usr_actions: a.add_parameter(BaseUsrSlot.AGAIN, True) return last_usr_actions # 如果系统是query 问满足约束的goal值是不是你要的, # 就从数据库中采样出goals的值,并告诉系统 elif top_action.act == SystemAct.QUERY: query, goals = top_action.parameters[0], top_action.parameters[1] valid_entries = self.domain.db.select([v for name, v in query]) chosen_entry = valid_entries[ np.random.randint(0, len(valid_entries)), :] results = {} if chosen_entry.shape[0] > 0: for goal in goals: _, slot_id = self.domain.get_sys_slot(goal, return_idx=True) results[goal] = chosen_entry[slot_id] else: print(chosen_entry) raise ValueError("No valid entries") return Action(UserAct.KB_RETURN, [query, results]) else: raise ValueError("Unknown system act %s" % top_action.act)