def step(self, agent_action, global_context, user_input): # print('Agent: {}'.format(agent_action)) reward = reward_function(agent_action, global_context) user_input, global_context = self.get_response_and_take_action( global_context, user_input) return user_input, reward, global_context, global_context[ 'current_state'] == 'done'
def step(self, slot, num_turn): # print("slot", slot) if slot in Required_slots: # agent返回的slot_to_fill是必须要填值的slot inform_slot_value = random.choice(DB[slot]) self.user_action["inform_slots"][slot] = inform_slot_value self.user_action["intent"] = User_actions[slot] done = False success = NO_OUTCOME if slot == "success": done = True success = SUCCESS if slot == "fail" or num_turn > max_round: done = True success = FAIL reward = reward_function(success, max_round) return self.user_action, reward, done, success
def step(self, agent_action): """ Return the user's response, reward, done and success. Parameters: agent_action (dict): The current action of the agent Returns: dict: User response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # Assertions ---- # No unk in agent action informs for value in agent_action["inform_slots"].values(): assert value != "UNK" assert value != "PLACEHOLDER" # No PLACEHOLDER in agent_action at all for value in agent_action["request_slots"].values(): assert value != "PLACEHOLDER" # --------------- print("Agent Action: {}".format(agent_action)) done = False user_response = {"intent": "", "request_slots": {}, "inform_slots": {}} # First check round num, if equal to max then fail if agent_action["round"] == self.max_round: success = FAIL user_response["intent"] = "done" else: user_response = self._return_response() success = self._return_success() if success == FAIL or success == SUCCESS: done = True assert "UNK" not in user_response["inform_slots"].values() assert "PLACEHOLDER" not in user_response["request_slots"].values() reward = reward_function(success, self.max_round) return user_response, reward, done, True if success is 1 else False
def step(self, agent_action): """ Return the user's nlg, reward, done and success. Parameters: agent_action (dict): The current action of the agent Returns: dict: User nlg int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # Assertions ---- # No unk in agent action informs for value in agent_action['inform_slots'].values(): assert value != 'UNK' assert value != 'PLACEHOLDER' # No PLACEHOLDER in agent_action at all for value in agent_action['request_slots'].values(): assert value != 'PLACEHOLDER' # --------------- print('Agent Action: {}'.format(agent_action)) done = False user_response = {'intent': '', 'request_slots': {}, 'inform_slots': {}} # First check round num, if equal to max then fail if agent_action['round'] == self.max_round: success = FAIL user_response['intent'] = 'done' else: user_response = self._return_response() success = self._return_success() if success == FAIL or success == SUCCESS: done = True assert 'UNK' not in user_response['inform_slots'].values() assert 'PLACEHOLDER' not in user_response['request_slots'].values() reward = reward_function(success, self.max_round) return user_response, reward, done, True if success is 1 else False
def step(self, agent_action, num_turn): """ Return the user's response, reward, done and success. Parameters: agent_action (dict): The current action of the agent Returns: dict: User response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # Assertions ---- # No unk in agent action informs # for value in agent_action['inform_slots'].values(): # assert value != 'UNK' # assert value != 'PLACEHOLDER' # # No PLACEHOLDER in agent_action at all # for value in agent_action['request_slots'].values(): # assert value != 'PLACEHOLDER' # --------------- print('Agent Action: {}'.format(agent_action)) # print('Agent Action: {}'.format(nlg.agent_action_nlg(agent_action))) done = False user_response = {'intent': '', 'request_slots': {}, 'inform_slots': {}} # First check round num, if equal to max then fail if num_turn == self.max_round: # 如果当前已经达到max_round,success直接为fail,user_response['intent'] = 'done' success = FAIL user_response['intent'] = 'done' # user的intent为'done',传递给State Tracker后,ST的state全置为0 else: user_response = self._return_response() success = self._return_success() if success == FAIL or success == SUCCESS: # 有success时,done已经为true done = True assert 'UNK' not in user_response['inform_slots'].values() assert 'PLACEHOLDER' not in user_response['request_slots'].values() reward = reward_function(success, self.max_round) return user_response, reward, done, True if success is 1 else False
def step(self, agent_action: DialogAction): self.validate_action(agent_action) self.state.inform_slots.clear() self.state.intent = "" done = False success = NO_OUTCOME # First check round num, if equal to max then fail if agent_action.turn == self.max_round: done = True success = FAIL self.state.intent = "done" self.state.request_slots.clear() else: agent_intent = agent_action.intent if agent_intent == "request": self._response_to_request(agent_action) elif agent_intent == "inform": self._response_to_inform(agent_action) elif agent_intent == "match_found": self._response_to_match_found(agent_action.inform_slots) elif agent_intent == "done": success = self._response_to_done() self.state.intent = "done" self.state.request_slots.clear() done = True self.validate_state(self.state) user_response = DialogAction( self.state.intent, self.state.inform_slots, self.state.request_slots, speaker=USER, ) reward = reward_function( success, self.max_round ) # TODO(tilo): reward-calculation must not be done by user! return user_response, reward, done, True if success is 1 else False
def train(self, global_context): num_batches = len(self.memory) // self.batch_size for b in range(num_batches): batch = random.sample(self.memory, self.batch_size) states = np.array([sample[0] for sample in batch]) next_states = np.array([sample[3] for sample in batch]) assert states.shape == ( self.batch_size, self.state_size), 'States Shape: {}'.format(states.shape) assert next_states.shape == states.shape beh_state_preds = self.dqn_predict(states) if not self.vanilla: beh_next_states_preds = self.dqn_predict( next_states) # For indexing for DDQN inputs = np.zeros((self.batch_size, self.state_size)) targets = np.zeros((self.batch_size, self.num_actions)) for i, (s, a, r, s, g, d) in enumerate(batch): # print('reward: {}'.format(r)) t = beh_state_preds[i] for itr in range(len(t)): t[itr] = reward_function(self.map_index_to_action(itr), g) # print(t) if not self.vanilla: t[a] = r # + self.gamma * tar_next_state_preds[i][np.argmax(beh_next_states_preds[i])] * (not d) else: t[a] = r # + self.gamma * np.amax(tar_next_state_preds[i]) * (not d) inputs[i] = s targets[i] = t # print('Targets: {}'.format(targets)) self.beh_model.fit(inputs, targets, epochs=10, verbose=0)
def step(self, agent_action): """ 返回user sim. 的回答 Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user. Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going. Parameters: agent_action (dict): agent 行为 Returns: dict: User sim. response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # 申明 # agent action中的 inform_slots 的取值不能为 UNK 和PLACEHOLDER for value in agent_action['inform_slots'].values(): assert value != 'UNK' assert value != 'PLACEHOLDER' # agent action中的 request_slots 的取值不能为PLACEHOLDER for value in agent_action['request_slots'].values(): assert value != 'PLACEHOLDER' # ---------------- self.state['inform_slots'].clear() self.state['intent'] = '' done = False success = NO_OUTCOME # 查看round num, 如果达到max_round,则对话失败 if agent_action['round'] == self.max_round: done = True success = FAIL self.state['intent'] = 'done' self.state['request_slots'].clear() # 否则,根据不同的agent intent来作答 else: agent_intent = agent_action['intent'] if agent_intent == 'request': self._response_to_request(agent_action) elif agent_intent == 'inform': self._response_to_inform(agent_action) elif agent_intent == 'match_found': self._response_to_match_found(agent_action) elif agent_intent == 'done': success = self._response_to_done() self.state['intent'] = 'done' self.state['request_slots'].clear() done = True # Assumptions ------- # If request intent, then make sure request slots if self.state['intent'] == 'request': assert self.state['request_slots'] # If inform intent, then make sure inform slots and NO request slots if self.state['intent'] == 'inform': assert self.state['inform_slots'] assert not self.state['request_slots'] assert 'UNK' not in self.state['inform_slots'].values() assert 'PLACEHOLDER' not in self.state['request_slots'].values() # No overlap between rest and hist for key in self.state['rest_slots']: assert key not in self.state['history_slots'] for key in self.state['history_slots']: assert key not in self.state['rest_slots'] # All slots in both rest and hist should contain the slots for goal for inf_key in self.goal['inform_slots']: assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False) for req_key in self.goal['request_slots']: assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key, False), req_key # Anything in the rest should be in the goal for key in self.state['rest_slots']: assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False) assert self.state['intent'] != '' # ----------------------- user_response = {} user_response['intent'] = self.state['intent'] user_response['request_slots'] = copy.deepcopy(self.state['request_slots']) user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots']) reward = reward_function(success, self.max_round) return user_response, reward, done, True if success is 1 else False
def step(self, agent_action,tot_slt_len,stp): """ Return the response of the user sim. to the agent by using rules that simulate a user. Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user. Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going. Parameters: agent_action (dict): The agent action that the user sim. responds to Returns: dict: User sim. response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # Assertions ----- # No UNK in agent action informs prev_slt_len = len(self.state['rest_slots']) #print('Prev Slot Length:',prev_slt_len) for value in agent_action['inform_slots'].values(): assert value != 'UNK' assert value != 'PLACEHOLDER' # No PLACEHOLDER in agent at all for value in agent_action['request_slots'].values(): assert value != 'PLACEHOLDER' # ---------------- self.state['inform_slots'].clear() self.state['intent'] = '' done = False success = NO_OUTCOME # First check round num, if equal to max then fail if agent_action['round'] == self.max_round: done = True success = FAIL self.state['intent'] = 'done' self.state['request_slots'].clear() else: agent_intent = agent_action['intent'] if agent_intent == 'request': self._response_to_request(agent_action) elif agent_intent == 'inform': self._response_to_inform(agent_action) elif agent_intent == 'match_found': self._response_to_match_found(agent_action) elif agent_intent == 'done': success = self._response_to_done() self.state['intent'] = 'done' self.state['request_slots'].clear() done = True # elif agent_intent == 'Rephrase': # r = random.uniform(0,1) # if(r<0.2): # self.state['intent'] = 'thanks' # done = True # self.state['request_slots'].clear() # self.constraint_check == SUCCESS # success = 1 # else: # self._response_to_match_found(agent_action) # Assumptions ------- # If request intent, then make sure request slots if self.state['intent'] == 'request': assert self.state['request_slots'] # If inform intent, then make sure inform slots and NO request slots if self.state['intent'] == 'inform': assert self.state['inform_slots'] assert not self.state['request_slots'] assert 'UNK' not in self.state['inform_slots'].values() assert 'PLACEHOLDER' not in self.state['request_slots'].values() # No overlap between rest and hist for key in self.state['rest_slots']: assert key not in self.state['history_slots'] for key in self.state['history_slots']: assert key not in self.state['rest_slots'] # All slots in both rest and hist should contain the slots for goal for inf_key in self.goal['inform_slots']: assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False) for req_key in self.goal['request_slots']: assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key, False), req_key # Anything in the rest should be in the goal for key in self.state['rest_slots']: assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False) #assert self.state['intent'] != '' # ----------------------- user_response = {} user_response['intent'] = self.state['intent'] user_response['request_slots'] = copy.deepcopy(self.state['request_slots']) user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots']) #print('Current Rest Slots:',self.state['rest_slots']) reward = reward_function(success, self.max_round,self.tot_slt,prev_slt_len,len(self.state['rest_slots']),stp) return user_response, reward, done, True if success is 1 else False
def step(self, agent_action): """ Return the response of the user sim. to the agent by using rules that simulate a user. Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user. Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going. Parameters: agent_action (dict): The agent action that the user sim. responds to Returns: dict: User sim. response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ print('Agent Action: {}'.format( dict2sentence(agent_action, self.sentence_dict))) # Assertions ----- # No UNK in agent action informs for value in agent_action['inform_slots'].values(): assert value != 'UNK' assert value != 'PLACEHOLDER' # No PLACEHOLDER in agent at all for value in agent_action['request_slots'].values(): assert value != 'PLACEHOLDER' # ---------------- self.state['request_slots'].clear() self.state['inform_slots'].clear() self.state['last_intent'] = copy.deepcopy(self.state['intent']) self.state['intent'] = '' self.user_mood.last_mood['emotion'] = copy.deepcopy( self.user_mood.current_mood['emotion']) done = False success = NO_OUTCOME while True: input_emotion = input( 'How do you feel after the agent action? Type "-" = negative, "~" = neutral, "+" = positive:' ) if input_emotion == "-": self.user_mood.current_mood['emotion'] = "negativ" break elif input_emotion == "~": self.user_mood.current_mood['emotion'] = "neutral" break elif input_emotion == "+": self.user_mood.current_mood['emotion'] = "positiv" break # check round num, if equal to max then fail if agent_action['round'] == self.max_round: while True: input_intent = input("Say 'goodbye':") if input_intent == 'goodbye': done = True success = self.success_check self.state['intent'] = 'goodbye' self.state['request_slots'].clear() self.state['inform_slots'].clear() break else: while True: input_intent = input( 'Type your intent: "order_drinks", "inform", "request", "reject", "thanks", "goodbye", "nothing":' ) if input_intent == "reject" or input_intent == "thanks" or input_intent == "goodbye" or input_intent == "nothing": self._only_intent_utternace(input_intent) if input_intent == "thanks": self.success_check = SUCCESS if input_intent == "goodbye": done = True success = self.success_check break if input_intent == "order_drinks": self._order_drinks() break elif input_intent == "inform": self.inform(input_intent) break elif input_intent == "request": self.request(input_intent) break # Assumptions ------- # If request intent, then make sure request slots if self.state['intent'] == 'request': assert self.state['request_slots'] # If inform intent, then make sure inform slots and NO request slots if self.state['intent'] == 'inform': assert self.state['inform_slots'] assert not self.state['request_slots'] assert 'UNK' not in self.state['inform_slots'].values() assert 'PLACEHOLDER' not in self.state['request_slots'].values() # ----------------------- reward = reward_function(success, done, self) user_response = {} user_response['intent'] = self.state['intent'] user_response['request_slots'] = copy.deepcopy( self.state['request_slots']) user_response['inform_slots'] = copy.deepcopy( self.state['inform_slots']) print('Your Action: {}'.format( dict2sentence(user_response, self.sentence_dict))) return user_response, reward, done, True if success is 1 else False
def run_round(state, global_context, user_input, dqn_agent, state_tracker, user, warmup=False): global_context['controller'] = 'user' if user_input['intent'] == 'inform': global_context['current_user_intents'] = ['inform'] global_context['current_user_informs'] = user_input['inform_slots'] global_context['all_user_informs'].append(user_input['inform_slots']) for key in global_context['constant_user_informs'].keys(): if user_input['inform_slots'][key] != '': if global_context['constant_user_informs'][key] == '': global_context['constant_user_informs'][key] = user_input['inform_slots'][key] elif global_context['constant_user_informs'][key] != '': if global_context['constant_user_informs'][key] != user_input['inform_slots'][key]: global_context['constant_user_informs'][key] = user_input['inform_slots'][key] print('Agent: Replacing context of {}'.format(key)) if key == 'task_name': global_context['constant_user_informs']['instruction'] = '' if global_context['constant_user_informs']['task_name'] in ['diagnosis_patterns']: global_context['constant_user_informs']['context_name'] = 'Not Needed' if global_context['constant_user_informs']['task_name'] in ['text_summarization_patterns', 'anomaly_detection_patterns']: global_context['constant_user_informs']['instruction'] = 'Not Needed' elif key == 'context_name': global_context['constant_user_informs']['instruction'] = '' global_context['constant_user_informs']['task_name'] = '' elif user_input['intent'] == 'request': global_context['current_user_intents'] = ['request'] global_context['current_user_requests'] = user_input['request_slots'] global_context['all_user_requests'].append(user_input['request_slots']) for key in global_context['constant_user_requests']: if user_input['request_slots'][key] != '': global_context['constant_user_requests'][key] = user_input['request_slots'][key] elif user_input['intent'] == 'done_success': global_context['current_user_intents'] = ['done_success'] global_context['current_state'] = 'done' elif user_input['intent'] == 'done_fail': global_context['current_user_intents'] = ['done_fail'] global_context['current_state'] = 'done' prev_global_context = global_context action_rewards = {} for curr_action in agent_actions: action_rewards[dqn_agent.map_action_to_index(curr_action)] = reward_function(curr_action, prev_global_context) agent_action = dqn_agent.map_index_to_action(max(action_rewards, key=action_rewards.get)) global_context, next_agent_action = dqn_agent.take_action(agent_action, global_context) # print(next_agent_action) global_context = state_tracker.update_state_tracker_state(global_context) user_action, reward, global_context, done = user.step(next_agent_action, global_context, user_input) global_context = state_tracker.update_state_tracker_state(global_context) next_state = state_tracker.get_state(done) return next_state, reward, done, global_context, next_agent_action
def step(self, agent_action): """ Return the response of the user sim. to the agent by using rules that simulate a user. Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user. Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going. Parameters: agent_action (dict): The agent action that the user sim. responds to Returns: dict: User sim. response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # Assertions ----- # No UNK in agent action informs for value in agent_action['inform_slots'].values(): assert value != 'UNK' assert value != 'PLACEHOLDER' # No PLACEHOLDER in agent at all for value in agent_action['request_slots'].values(): assert value != 'PLACEHOLDER' # ---------------- self.state['inform_slots'].clear() self.state['last_intent'] = self.state['intent'] self.state['intent'] = '' done = False success = NO_OUTCOME agent_intent = agent_action['intent'] if agent_intent == 'utter_nothing': classified_agent_action = self._response_to_utter_nothing( agent_action) if agent_intent == 'joke': classified_agent_action = self._response_to_joke(agent_action) elif agent_intent == 'trigger_user': classified_agent_action = self._response_to_trigger_user( agent_action) elif agent_intent == 'utter_request': classified_agent_action = self._response_to_utter_request( agent_action) elif agent_intent == 'utter_inform': classified_agent_action = self._response_to_utter_inform( agent_action) elif agent_intent == 'find_drink': classified_agent_action = self._response_to_find_drink( agent_action) elif agent_intent == 'utter_goodbye': success = self._check_success() classified_agent_action = 'utter_goodbye' self.state['intent'] = 'goodbye' self.state['request_slots'].clear() done = True # check round num, if equal to max then fail if agent_action['round'] == self.max_round: done = True success = self._check_success() self.state['intent'] = 'goodbye' self.state['request_slots'].clear() self.state['inform_slots'].clear() # Assumptions ------- # If request intent, then make sure request slots if self.state['intent'] == 'request': assert self.state['request_slots'] # If inform intent, then make sure inform slots and NO request slots if self.state['intent'] == 'inform': assert self.state['inform_slots'] assert not self.state['request_slots'] assert 'UNK' not in self.state['inform_slots'].values() assert 'PLACEHOLDER' not in self.state['request_slots'].values() # No overlap between rest and hist for key in self.state['rest_slots']: assert key not in self.state['history_slots'] for key in self.state['history_slots']: assert key not in self.state['rest_slots'] # All slots in both rest and hist should contain the slots for goal for inf_key in self.goal['inform_slots']: assert self.state['history_slots'].get( inf_key, False) or self.state['rest_slots'].get( inf_key, False) for req_key in self.goal['request_slots']: assert self.state['history_slots'].get( req_key, False) or self.state['rest_slots'].get( req_key, False), req_key # Anything in the rest should be in the goal for key in self.state['rest_slots']: assert self.goal['inform_slots'].get( key, False) or self.goal['request_slots'].get(key, False) assert self.state['intent'] != '' # ----------------------- user_response = {} user_response['intent'] = self.state['intent'] user_response['request_slots'] = copy.deepcopy( self.state['request_slots']) user_response['inform_slots'] = copy.deepcopy( self.state['inform_slots']) # int_reward = self.user_mood.mood_reward(self.state['last_intent'], classified_agent_action) # ext_reward = reward_function(success, self.max_round) # reward = int_reward + self.reward_weight * ext_reward # reward = ext_reward reward = reward_function(success, done, self, classified_agent_action, agent_action) if self.user_mood.current_mood['emotion'] == "negativ": self.count += 1 if self.count == 5: done = True #success = FAIL reward -= self.max_round user_response = {} user_response['intent'] = 'goodbye' user_response['request_slots'] = {} user_response['inform_slots'] = {} #outcome = success #return user_response, classified_agent_action, done, outcome, True if success == 1 else False return user_response, reward, done, True if success is 1 else False
def step(self, agent_action): """ Return the response of the user sim. to the agent by using rules that simulate a user. Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user. Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going. Parameters: agent_action (dict): The agent action that the user sim. responds to Returns: dict: User sim. response int: Reward bool: Done flag int: Success: -1, 0 or 1 for loss, neither win nor loss, win """ # Assertions ----- # No UNK in agent action informs for value in agent_action['inform_slots'].values(): assert value != 'UNK' assert value != 'PLACEHOLDER' # No PLACEHOLDER in agent at all for value in agent_action['request_slots'].values(): assert value != 'PLACEHOLDER' # ---------------- self.state['inform_slots'].clear() # self.state['intent'] = '' done = False self.success = NO_OUTCOME # if user intent is thanks and agent not reply with done intent, then punish it # if self.state['intent'] == 'thanks' and agent_action['intent'] != 'done': # self.success = UNSUITABLE # First check round num, if equal to max then fail if agent_action['round'] == self.max_round: # print("max round reached") done = True self.success = FAIL self.state['intent'] = 'done' self.state['request_slots'].clear() else: agent_intent = agent_action['intent'] if agent_intent == 'request': self._response_to_request(agent_action) elif agent_intent == 'inform': self._response_to_inform(agent_action) elif agent_intent == 'match_found': self._response_to_match_found(agent_action) elif agent_intent == 'done': self._response_to_done() self.state['intent'] = 'done' self.state['request_slots'].clear() done = True # Assumptions ------- # If request intent, then make sure request slots if self.state['intent'] == 'request': assert self.state['request_slots'] # If inform intent, then make sure inform slots and NO request slots if self.state['intent'] == 'inform': assert self.state['inform_slots'] assert not self.state['request_slots'] assert 'UNK' not in self.state['inform_slots'].values() assert 'PLACEHOLDER' not in self.state['request_slots'].values() # No overlap between rest and hist for key in self.state['rest_slots']: assert key not in self.state['history_slots'] for key in self.state['history_slots']: assert key not in self.state['rest_slots'] # All slots in both rest and hist should contain the slots for goal for inf_key in self.goal['inform_slots']: # assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False) assert inf_key in self.state['history_slots'] or inf_key in self.state['rest_slots'], inf_key for req_key in self.goal['request_slots']: # assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key, # False), req_key assert req_key in self.state['history_slots'] or req_key in self.state['rest_slots'], req_key # Anything in the rest should be in the goal for key in self.state['rest_slots']: # assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False) assert key in self.goal['inform_slots'] or key in self.goal['request_slots'], key assert self.state['intent'] != '' # ----------------------- user_response = {} user_response['intent'] = self.state['intent'] user_response['request_slots'] = copy.deepcopy(self.state['request_slots']) user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots']) reward = reward_function(self.success, self.max_round) return user_response, reward, done, True if self.success is 1 else False