Python reward_functionの例、utils.reward_function Pythonの例

コード例 #1

0

ファイルを表示

    def step(self, agent_action, global_context, user_input):

        # print('Agent: {}'.format(agent_action))

        reward = reward_function(agent_action, global_context)

        user_input, global_context = self.get_response_and_take_action(
            global_context, user_input)

        return user_input, reward, global_context, global_context[
            'current_state'] == 'done'

コード例 #2

0

ファイルを表示

ファイル: user_sim.py プロジェクト: cp4011/DRL_Hospital

 def step(self, slot, num_turn):
     # print("slot", slot)
     if slot in Required_slots:  # agent返回的slot_to_fill是必须要填值的slot
         inform_slot_value = random.choice(DB[slot])
         self.user_action["inform_slots"][slot] = inform_slot_value
     self.user_action["intent"] = User_actions[slot]
     done = False
     success = NO_OUTCOME
     if slot == "success":
         done = True
         success = SUCCESS
     if slot == "fail" or num_turn > max_round:
         done = True
         success = FAIL
     reward = reward_function(success, max_round)
     return self.user_action, reward, done, success

コード例 #3

0

ファイルを表示

    def step(self, agent_action):
        """
        Return the user's response, reward, done and success.

        Parameters:
            agent_action (dict): The current action of the agent

        Returns:
            dict: User response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        # Assertions ----
        # No unk in agent action informs
        for value in agent_action["inform_slots"].values():
            assert value != "UNK"
            assert value != "PLACEHOLDER"
        # No PLACEHOLDER in agent_action at all
        for value in agent_action["request_slots"].values():
            assert value != "PLACEHOLDER"
        # ---------------

        print("Agent Action: {}".format(agent_action))

        done = False
        user_response = {"intent": "", "request_slots": {}, "inform_slots": {}}

        # First check round num, if equal to max then fail
        if agent_action["round"] == self.max_round:
            success = FAIL
            user_response["intent"] = "done"
        else:
            user_response = self._return_response()
            success = self._return_success()

        if success == FAIL or success == SUCCESS:
            done = True

        assert "UNK" not in user_response["inform_slots"].values()
        assert "PLACEHOLDER" not in user_response["request_slots"].values()

        reward = reward_function(success, self.max_round)

        return user_response, reward, done, True if success is 1 else False

コード例 #4

0

ファイルを表示

    def step(self, agent_action):
        """
        Return the user's nlg, reward, done and success.

        Parameters:
            agent_action (dict): The current action of the agent

        Returns:
            dict: User nlg
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        # Assertions ----
        # No unk in agent action informs
        for value in agent_action['inform_slots'].values():
            assert value != 'UNK'
            assert value != 'PLACEHOLDER'
        # No PLACEHOLDER in agent_action at all
        for value in agent_action['request_slots'].values():
            assert value != 'PLACEHOLDER'
        # ---------------

        print('Agent Action: {}'.format(agent_action))

        done = False
        user_response = {'intent': '', 'request_slots': {}, 'inform_slots': {}}

        # First check round num, if equal to max then fail
        if agent_action['round'] == self.max_round:
            success = FAIL
            user_response['intent'] = 'done'
        else:
            user_response = self._return_response()
            success = self._return_success()

        if success == FAIL or success == SUCCESS:
            done = True

        assert 'UNK' not in user_response['inform_slots'].values()
        assert 'PLACEHOLDER' not in user_response['request_slots'].values()

        reward = reward_function(success, self.max_round)

        return user_response, reward, done, True if success is 1 else False

コード例 #5

0

ファイルを表示

    def step(self, agent_action, num_turn):
        """
        Return the user's response, reward, done and success.
        Parameters:
            agent_action (dict): The current action of the agent
        Returns:
            dict: User response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        # Assertions ----
        # No unk in agent action informs
        # for value in agent_action['inform_slots'].values():
        #     assert value != 'UNK'
        #     assert value != 'PLACEHOLDER'
        # # No PLACEHOLDER in agent_action at all
        # for value in agent_action['request_slots'].values():
        #     assert value != 'PLACEHOLDER'
        # ---------------
        print('Agent Action: {}'.format(agent_action))
        # print('Agent Action: {}'.format(nlg.agent_action_nlg(agent_action)))

        done = False
        user_response = {'intent': '', 'request_slots': {}, 'inform_slots': {}}

        # First check round num, if equal to max then fail
        if num_turn == self.max_round:  # 如果当前已经达到max_round，success直接为fail，user_response['intent'] = 'done'
            success = FAIL
            user_response['intent'] = 'done'    # user的intent为'done'，传递给State Tracker后，ST的state全置为0
        else:
            user_response = self._return_response()
            success = self._return_success()

        if success == FAIL or success == SUCCESS:   # 有success时，done已经为true
            done = True

        assert 'UNK' not in user_response['inform_slots'].values()
        assert 'PLACEHOLDER' not in user_response['request_slots'].values()

        reward = reward_function(success, self.max_round)

        return user_response, reward, done, True if success is 1 else False

コード例 #6

0

ファイルを表示

    def step(self, agent_action: DialogAction):

        self.validate_action(agent_action)

        self.state.inform_slots.clear()
        self.state.intent = ""

        done = False
        success = NO_OUTCOME
        # First check round num, if equal to max then fail
        if agent_action.turn == self.max_round:
            done = True
            success = FAIL
            self.state.intent = "done"
            self.state.request_slots.clear()
        else:
            agent_intent = agent_action.intent
            if agent_intent == "request":
                self._response_to_request(agent_action)
            elif agent_intent == "inform":
                self._response_to_inform(agent_action)
            elif agent_intent == "match_found":
                self._response_to_match_found(agent_action.inform_slots)
            elif agent_intent == "done":
                success = self._response_to_done()
                self.state.intent = "done"
                self.state.request_slots.clear()
                done = True

        self.validate_state(self.state)

        user_response = DialogAction(
            self.state.intent,
            self.state.inform_slots,
            self.state.request_slots,
            speaker=USER,
        )

        reward = reward_function(
            success, self.max_round
        )  # TODO(tilo): reward-calculation must not be done by user!

        return user_response, reward, done, True if success is 1 else False

コード例 #7

0

ファイルを表示

    def train(self, global_context):

        num_batches = len(self.memory) // self.batch_size
        for b in range(num_batches):
            batch = random.sample(self.memory, self.batch_size)

            states = np.array([sample[0] for sample in batch])
            next_states = np.array([sample[3] for sample in batch])

            assert states.shape == (
                self.batch_size,
                self.state_size), 'States Shape: {}'.format(states.shape)
            assert next_states.shape == states.shape

            beh_state_preds = self.dqn_predict(states)
            if not self.vanilla:
                beh_next_states_preds = self.dqn_predict(
                    next_states)  # For indexing for DDQN

            inputs = np.zeros((self.batch_size, self.state_size))
            targets = np.zeros((self.batch_size, self.num_actions))

            for i, (s, a, r, s, g, d) in enumerate(batch):
                # print('reward: {}'.format(r))
                t = beh_state_preds[i]
                for itr in range(len(t)):
                    t[itr] = reward_function(self.map_index_to_action(itr), g)
                # print(t)
                if not self.vanilla:
                    t[a] = r  # + self.gamma * tar_next_state_preds[i][np.argmax(beh_next_states_preds[i])] * (not d)
                else:
                    t[a] = r  # + self.gamma * np.amax(tar_next_state_preds[i]) * (not d)

                inputs[i] = s
                targets[i] = t
                # print('Targets: {}'.format(targets))

            self.beh_model.fit(inputs, targets, epochs=10, verbose=0)

コード例 #8

0

ファイルを表示

    def step(self, agent_action):
        """
        返回user sim. 的回答

        Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user.
        Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.

        Parameters:
            agent_action (dict): agent 行为

        Returns:
            dict: User sim. response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        # 申明
        # agent action中的 inform_slots 的取值不能为 UNK 和PLACEHOLDER
        for value in agent_action['inform_slots'].values():
            assert value != 'UNK'
            assert value != 'PLACEHOLDER'
        # agent action中的 request_slots 的取值不能为PLACEHOLDER
        for value in agent_action['request_slots'].values():
            assert value != 'PLACEHOLDER'
        # ----------------

        self.state['inform_slots'].clear()
        self.state['intent'] = ''

        done = False
        success = NO_OUTCOME
        # 查看round num, 如果达到max_round，则对话失败
        if agent_action['round'] == self.max_round:
            done = True
            success = FAIL
            self.state['intent'] = 'done'
            self.state['request_slots'].clear()
        # 否则，根据不同的agent intent来作答
        else:
            agent_intent = agent_action['intent']
            if agent_intent == 'request':
                self._response_to_request(agent_action)
            elif agent_intent == 'inform':
                self._response_to_inform(agent_action)
            elif agent_intent == 'match_found':
                self._response_to_match_found(agent_action)
            elif agent_intent == 'done':
                success = self._response_to_done()
                self.state['intent'] = 'done'
                self.state['request_slots'].clear()
                done = True

        # Assumptions -------
        # If request intent, then make sure request slots
        if self.state['intent'] == 'request':
            assert self.state['request_slots']
        # If inform intent, then make sure inform slots and NO request slots
        if self.state['intent'] == 'inform':
            assert self.state['inform_slots']
            assert not self.state['request_slots']
        assert 'UNK' not in self.state['inform_slots'].values()
        assert 'PLACEHOLDER' not in self.state['request_slots'].values()
        # No overlap between rest and hist
        for key in self.state['rest_slots']:
            assert key not in self.state['history_slots']
        for key in self.state['history_slots']:
            assert key not in self.state['rest_slots']
        # All slots in both rest and hist should contain the slots for goal
        for inf_key in self.goal['inform_slots']:
            assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False)
        for req_key in self.goal['request_slots']:
            assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key,
                                                                                                   False), req_key
        # Anything in the rest should be in the goal
        for key in self.state['rest_slots']:
            assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False)
        assert self.state['intent'] != ''
        # -----------------------

        user_response = {}
        user_response['intent'] = self.state['intent']
        user_response['request_slots'] = copy.deepcopy(self.state['request_slots'])
        user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots'])

        reward = reward_function(success, self.max_round)

        return user_response, reward, done, True if success is 1 else False

コード例 #9

0

ファイルを表示

ファイル: user_simulator.py プロジェクト: AbhisekTiwari/ACM_DM

    def step(self, agent_action,tot_slt_len,stp):
        """
        Return the response of the user sim. to the agent by using rules that simulate a user.

        Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user.
        Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.

        Parameters:
            agent_action (dict): The agent action that the user sim. responds to

        Returns:
            dict: User sim. response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        # Assertions -----
        # No UNK in agent action informs
        prev_slt_len = len(self.state['rest_slots'])
        #print('Prev Slot Length:',prev_slt_len)
        for value in agent_action['inform_slots'].values():
            assert value != 'UNK'
            assert value != 'PLACEHOLDER'
        # No PLACEHOLDER in agent at all
        for value in agent_action['request_slots'].values():
            assert value != 'PLACEHOLDER'
        # ----------------

        self.state['inform_slots'].clear()
        self.state['intent'] = ''

        done = False
        success = NO_OUTCOME
        # First check round num, if equal to max then fail
        if agent_action['round'] == self.max_round:
            done = True
            success = FAIL
            self.state['intent'] = 'done'
            self.state['request_slots'].clear()
        else:
            agent_intent = agent_action['intent']
            if agent_intent == 'request':
                self._response_to_request(agent_action)
            elif agent_intent == 'inform':
                self._response_to_inform(agent_action)
            elif agent_intent == 'match_found':
                self._response_to_match_found(agent_action)
            elif agent_intent == 'done':
                success = self._response_to_done()
                self.state['intent'] = 'done'
                self.state['request_slots'].clear()
                done = True
#            elif agent_intent == 'Rephrase':
#                r = random.uniform(0,1)
#                if(r<0.2):
#                    self.state['intent'] = 'thanks'
#                    done = True
#                    self.state['request_slots'].clear()
#                    self.constraint_check == SUCCESS
#                    success = 1
#                else:
#                    self._response_to_match_found(agent_action)
                    

        # Assumptions -------
        # If request intent, then make sure request slots
        if self.state['intent'] == 'request':
            assert self.state['request_slots']
        # If inform intent, then make sure inform slots and NO request slots
        if self.state['intent'] == 'inform':
            assert self.state['inform_slots']
            assert not self.state['request_slots']
        assert 'UNK' not in self.state['inform_slots'].values()
        assert 'PLACEHOLDER' not in self.state['request_slots'].values()
        # No overlap between rest and hist
        for key in self.state['rest_slots']:
            assert key not in self.state['history_slots']
        for key in self.state['history_slots']:
            assert key not in self.state['rest_slots']
        # All slots in both rest and hist should contain the slots for goal
        for inf_key in self.goal['inform_slots']:
            assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False)
        for req_key in self.goal['request_slots']:
            assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key,
                                                                                                   False), req_key
        # Anything in the rest should be in the goal
        for key in self.state['rest_slots']:
            assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False)
        #assert self.state['intent'] != ''
        # -----------------------

        user_response = {}
        user_response['intent'] = self.state['intent']
        user_response['request_slots'] = copy.deepcopy(self.state['request_slots'])
        user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots'])
        #print('Current Rest Slots:',self.state['rest_slots'])
        reward = reward_function(success, self.max_round,self.tot_slt,prev_slt_len,len(self.state['rest_slots']),stp)

        return user_response, reward, done, True if success is 1 else False

コード例 #10

0

ファイルを表示

    def step(self, agent_action):
        """
        Return the response of the user sim. to the agent by using rules that simulate a user.

        Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user.
        Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.

        Parameters:
            agent_action (dict): The agent action that the user sim. responds to

        Returns:
            dict: User sim. response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        print('Agent Action: {}'.format(
            dict2sentence(agent_action, self.sentence_dict)))

        # Assertions -----
        # No UNK in agent action informs
        for value in agent_action['inform_slots'].values():
            assert value != 'UNK'
            assert value != 'PLACEHOLDER'
        # No PLACEHOLDER in agent at all
        for value in agent_action['request_slots'].values():
            assert value != 'PLACEHOLDER'
        # ----------------

        self.state['request_slots'].clear()
        self.state['inform_slots'].clear()
        self.state['last_intent'] = copy.deepcopy(self.state['intent'])
        self.state['intent'] = ''
        self.user_mood.last_mood['emotion'] = copy.deepcopy(
            self.user_mood.current_mood['emotion'])

        done = False
        success = NO_OUTCOME

        while True:
            input_emotion = input(
                'How do you feel after the agent action? Type "-" = negative, "~" = neutral, "+" = positive:'
            )
            if input_emotion == "-":
                self.user_mood.current_mood['emotion'] = "negativ"
                break
            elif input_emotion == "~":
                self.user_mood.current_mood['emotion'] = "neutral"
                break
            elif input_emotion == "+":
                self.user_mood.current_mood['emotion'] = "positiv"
                break

        # check round num, if equal to max then fail
        if agent_action['round'] == self.max_round:
            while True:
                input_intent = input("Say 'goodbye':")
                if input_intent == 'goodbye':
                    done = True
                    success = self.success_check
                    self.state['intent'] = 'goodbye'
                    self.state['request_slots'].clear()
                    self.state['inform_slots'].clear()
                    break

        else:
            while True:
                input_intent = input(
                    'Type your intent: "order_drinks", "inform", "request", "reject", "thanks", "goodbye", "nothing":'
                )
                if input_intent == "reject" or input_intent == "thanks" or input_intent == "goodbye" or input_intent == "nothing":
                    self._only_intent_utternace(input_intent)
                    if input_intent == "thanks":
                        self.success_check = SUCCESS
                    if input_intent == "goodbye":
                        done = True
                        success = self.success_check
                    break

                if input_intent == "order_drinks":
                    self._order_drinks()
                    break

                elif input_intent == "inform":
                    self.inform(input_intent)
                    break

                elif input_intent == "request":
                    self.request(input_intent)
                    break

        # Assumptions -------
        # If request intent, then make sure request slots
        if self.state['intent'] == 'request':
            assert self.state['request_slots']
        # If inform intent, then make sure inform slots and NO request slots
        if self.state['intent'] == 'inform':
            assert self.state['inform_slots']
            assert not self.state['request_slots']
        assert 'UNK' not in self.state['inform_slots'].values()
        assert 'PLACEHOLDER' not in self.state['request_slots'].values()
        # -----------------------

        reward = reward_function(success, done, self)

        user_response = {}
        user_response['intent'] = self.state['intent']
        user_response['request_slots'] = copy.deepcopy(
            self.state['request_slots'])
        user_response['inform_slots'] = copy.deepcopy(
            self.state['inform_slots'])

        print('Your Action: {}'.format(
            dict2sentence(user_response, self.sentence_dict)))
        return user_response, reward, done, True if success is 1 else False

コード例 #11

0

ファイルを表示

ファイル: DM.py プロジェクト: ameyasm1154/Bachelors-Project

def run_round(state, global_context, user_input, dqn_agent, state_tracker, user, warmup=False):

	global_context['controller'] = 'user'

	if user_input['intent'] == 'inform':
		global_context['current_user_intents'] = ['inform']
		global_context['current_user_informs'] = user_input['inform_slots']
		global_context['all_user_informs'].append(user_input['inform_slots'])
		for key in global_context['constant_user_informs'].keys():
			if user_input['inform_slots'][key] != '':
				if global_context['constant_user_informs'][key] == '':
					global_context['constant_user_informs'][key] = user_input['inform_slots'][key]
				elif global_context['constant_user_informs'][key] != '':
					if global_context['constant_user_informs'][key] != user_input['inform_slots'][key]:
						global_context['constant_user_informs'][key] = user_input['inform_slots'][key]
						print('Agent: Replacing context of {}'.format(key))
						if key == 'task_name':
							global_context['constant_user_informs']['instruction'] = ''
							if global_context['constant_user_informs']['task_name'] in ['diagnosis_patterns']:
								global_context['constant_user_informs']['context_name'] = 'Not Needed'
							if global_context['constant_user_informs']['task_name'] in ['text_summarization_patterns', 'anomaly_detection_patterns']:
								global_context['constant_user_informs']['instruction'] = 'Not Needed'
						elif key == 'context_name':
							global_context['constant_user_informs']['instruction'] = ''
							global_context['constant_user_informs']['task_name'] = ''

	elif user_input['intent'] == 'request':
		global_context['current_user_intents'] = ['request']
		global_context['current_user_requests'] = user_input['request_slots']
		global_context['all_user_requests'].append(user_input['request_slots'])
		for key in global_context['constant_user_requests']:
			if user_input['request_slots'][key] != '':
				global_context['constant_user_requests'][key] = user_input['request_slots'][key]

	elif user_input['intent'] == 'done_success':
		global_context['current_user_intents'] = ['done_success']
		global_context['current_state'] = 'done'

	elif user_input['intent'] == 'done_fail':
		global_context['current_user_intents'] = ['done_fail']
		global_context['current_state'] = 'done'

	prev_global_context = global_context
	
	action_rewards = {}
	for curr_action in agent_actions:
		action_rewards[dqn_agent.map_action_to_index(curr_action)] = reward_function(curr_action, prev_global_context)
	
	agent_action = dqn_agent.map_index_to_action(max(action_rewards, key=action_rewards.get))
	
	global_context, next_agent_action = dqn_agent.take_action(agent_action, global_context)

	# print(next_agent_action)

	global_context = state_tracker.update_state_tracker_state(global_context)

	user_action, reward, global_context, done = user.step(next_agent_action, global_context, user_input)

	global_context = state_tracker.update_state_tracker_state(global_context)

	next_state = state_tracker.get_state(done)

	return next_state, reward, done, global_context, next_agent_action

コード例 #12

0

ファイルを表示

    def step(self, agent_action):
        """
        Return the response of the user sim. to the agent by using rules that simulate a user.

        Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user.
        Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.

        Parameters:
            agent_action (dict): The agent action that the user sim. responds to

        Returns:
            dict: User sim. response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """

        # Assertions -----
        # No UNK in agent action informs
        for value in agent_action['inform_slots'].values():
            assert value != 'UNK'
            assert value != 'PLACEHOLDER'
        # No PLACEHOLDER in agent at all
        for value in agent_action['request_slots'].values():
            assert value != 'PLACEHOLDER'
        # ----------------

        self.state['inform_slots'].clear()
        self.state['last_intent'] = self.state['intent']
        self.state['intent'] = ''

        done = False
        success = NO_OUTCOME

        agent_intent = agent_action['intent']
        if agent_intent == 'utter_nothing':
            classified_agent_action = self._response_to_utter_nothing(
                agent_action)
        if agent_intent == 'joke':
            classified_agent_action = self._response_to_joke(agent_action)
        elif agent_intent == 'trigger_user':
            classified_agent_action = self._response_to_trigger_user(
                agent_action)
        elif agent_intent == 'utter_request':
            classified_agent_action = self._response_to_utter_request(
                agent_action)
        elif agent_intent == 'utter_inform':
            classified_agent_action = self._response_to_utter_inform(
                agent_action)
        elif agent_intent == 'find_drink':
            classified_agent_action = self._response_to_find_drink(
                agent_action)
        elif agent_intent == 'utter_goodbye':
            success = self._check_success()
            classified_agent_action = 'utter_goodbye'
            self.state['intent'] = 'goodbye'
            self.state['request_slots'].clear()
            done = True

        # check round num, if equal to max then fail
        if agent_action['round'] == self.max_round:
            done = True
            success = self._check_success()
            self.state['intent'] = 'goodbye'
            self.state['request_slots'].clear()
            self.state['inform_slots'].clear()

        # Assumptions -------
        # If request intent, then make sure request slots
        if self.state['intent'] == 'request':
            assert self.state['request_slots']
        # If inform intent, then make sure inform slots and NO request slots
        if self.state['intent'] == 'inform':
            assert self.state['inform_slots']
            assert not self.state['request_slots']
        assert 'UNK' not in self.state['inform_slots'].values()
        assert 'PLACEHOLDER' not in self.state['request_slots'].values()
        # No overlap between rest and hist
        for key in self.state['rest_slots']:
            assert key not in self.state['history_slots']
        for key in self.state['history_slots']:
            assert key not in self.state['rest_slots']
        # All slots in both rest and hist should contain the slots for goal
        for inf_key in self.goal['inform_slots']:
            assert self.state['history_slots'].get(
                inf_key, False) or self.state['rest_slots'].get(
                    inf_key, False)
        for req_key in self.goal['request_slots']:
            assert self.state['history_slots'].get(
                req_key, False) or self.state['rest_slots'].get(
                    req_key, False), req_key
        # Anything in the rest should be in the goal
        for key in self.state['rest_slots']:
            assert self.goal['inform_slots'].get(
                key, False) or self.goal['request_slots'].get(key, False)
        assert self.state['intent'] != ''
        # -----------------------

        user_response = {}
        user_response['intent'] = self.state['intent']
        user_response['request_slots'] = copy.deepcopy(
            self.state['request_slots'])
        user_response['inform_slots'] = copy.deepcopy(
            self.state['inform_slots'])

        # int_reward = self.user_mood.mood_reward(self.state['last_intent'], classified_agent_action)
        # ext_reward = reward_function(success, self.max_round)

        # reward = int_reward + self.reward_weight * ext_reward
        # reward = ext_reward

        reward = reward_function(success, done, self, classified_agent_action,
                                 agent_action)
        if self.user_mood.current_mood['emotion'] == "negativ":
            self.count += 1

            if self.count == 5:
                done = True
                #success = FAIL
                reward -= self.max_round
                user_response = {}
                user_response['intent'] = 'goodbye'
                user_response['request_slots'] = {}
                user_response['inform_slots'] = {}

        #outcome = success
        #return user_response, classified_agent_action, done, outcome, True if success == 1 else False
        return user_response, reward, done, True if success is 1 else False

コード例 #13

0

ファイルを表示

    def step(self, agent_action):
        """
        Return the response of the user sim. to the agent by using rules that simulate a user.

        Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user.
        Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.

        Parameters:
            agent_action (dict): The agent action that the user sim. responds to

        Returns:
            dict: User sim. response
            int: Reward
            bool: Done flag
            int: Success: -1, 0 or 1 for loss, neither win nor loss, win
        """
        
        # Assertions -----
        # No UNK in agent action informs
        for value in agent_action['inform_slots'].values():
            assert value != 'UNK'
            assert value != 'PLACEHOLDER'
        # No PLACEHOLDER in agent at all
        for value in agent_action['request_slots'].values():
            assert value != 'PLACEHOLDER'
        # ----------------

        self.state['inform_slots'].clear()
        # self.state['intent'] = ''

        done = False
        self.success = NO_OUTCOME
        # if user intent is thanks and agent not reply with done intent, then punish it
        # if self.state['intent'] == 'thanks' and agent_action['intent'] != 'done':
        #     self.success = UNSUITABLE
        # First check round num, if equal to max then fail
        if agent_action['round'] == self.max_round:
            # print("max round reached")
            done = True
            self.success = FAIL
            self.state['intent'] = 'done'
            self.state['request_slots'].clear()
        else:
            agent_intent = agent_action['intent']
            if agent_intent == 'request':
                self._response_to_request(agent_action)
            elif agent_intent == 'inform':
                self._response_to_inform(agent_action)
            elif agent_intent == 'match_found':
                self._response_to_match_found(agent_action)
            elif agent_intent == 'done':
                self._response_to_done()
                self.state['intent'] = 'done'
                self.state['request_slots'].clear()
                done = True

        # Assumptions -------
        # If request intent, then make sure request slots
        if self.state['intent'] == 'request':
            assert self.state['request_slots']
        # If inform intent, then make sure inform slots and NO request slots
        if self.state['intent'] == 'inform':
            assert self.state['inform_slots']
            assert not self.state['request_slots']
        assert 'UNK' not in self.state['inform_slots'].values()
        assert 'PLACEHOLDER' not in self.state['request_slots'].values()
        # No overlap between rest and hist
        for key in self.state['rest_slots']:
            assert key not in self.state['history_slots']
        for key in self.state['history_slots']:
            assert key not in self.state['rest_slots']
        # All slots in both rest and hist should contain the slots for goal
        for inf_key in self.goal['inform_slots']:
            # assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False)
            assert inf_key in self.state['history_slots'] or inf_key in self.state['rest_slots'], inf_key

        
        for req_key in self.goal['request_slots']:
            # assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key,
            #                                                                                        False), req_key
            assert req_key in self.state['history_slots'] or req_key in self.state['rest_slots'], req_key
        # Anything in the rest should be in the goal
        for key in self.state['rest_slots']:
            # assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False)
            assert key in self.goal['inform_slots'] or key in self.goal['request_slots'], key
        assert self.state['intent'] != ''
        # -----------------------

        user_response = {}
        user_response['intent'] = self.state['intent']
        user_response['request_slots'] = copy.deepcopy(self.state['request_slots'])
        user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots'])

        reward = reward_function(self.success, self.max_round)

        return user_response, reward, done, True if self.success is 1 else False