예제 #1
0
 def flush_pool(self):
     if self.parameter.get('prioritized_replay'):
         self.experience_replay_pool = PrioritizedReplayBuffer(
             buffer_size=self.master_experience_replay_size)
     else:
         self.experience_replay_pool = deque(
             maxlen=self.master_experience_replay_size)
예제 #2
0
    def __init__(self,
                 action_set,
                 slot_set,
                 disease_symptom,
                 parameter,
                 disease_as_action=True):
        self.parameter = parameter
        symptom_set = set()
        for key, v in disease_symptom.items():
            # print(key, len(v['symptom'].keys()))
            symptom_set = symptom_set | set(list(v['symptom'].keys()))
        # exit(0)

        self.action_set = action_set
        self.slot_set = slot_set
        # self.disease_symptom = disease_symptom
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=parameter.get("experience_replay_pool_size"))
        self.parameter = parameter
        self.candidate_disease_list = []
        self.candidate_symptom_list = []
        #disease_as_action = self.parameter.get("disease_as_action")
        #self.action_space = self._build_action_space(disease_symptom,disease_as_action)
        self.disease_symptom = self.disease_symptom_clip(
            disease_symptom, 2.5, parameter)

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #3
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}
        for key, value in self.input_size_dqn_all.items():
            label = str(key)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            #temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10agent')[0] + 'lower/' + str(
            #        label) + '/model_d10agent' + parameter["saved_model"].split('model_d10agent')[1]
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = value
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #4
0
class AgentHRL_joint(object):
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}
        for key, value in self.input_size_dqn_all.items():
            label = str(key)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            #temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10agent')[0] + 'lower/' + str(
            #        label) + '/model_d10agent' + parameter["saved_model"].split('model_d10agent')[1]
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = value
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }

    def initialize(self):
        """
        Initializing an dialogue session.
        :return: nothing to return.
        """
        self.candidate_disease_list = []
        self.candidate_symptom_list = []
        self.agent_action = {
            "turn": None,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }

    def next(self, state, turn, greedy_strategy, **kwargs):
        """
        Taking action based on different methods, e.g., DQN-based AgentDQN, rule-based AgentRule.
        Detail codes will be implemented in different sub-class of this class.
        :param state: a vector, the representation of current dialogue state.
        :param turn: int, the time step of current dialogue session.
        :return: the agent action, a tuple consists of the selected agent action and action index.
        """
        # disease_symptom are not used in state_rep.
        epsilon = self.parameter.get("epsilon")
        #print(state["turn"])
        if self.parameter.get("state_reduced"):
            state_rep = reduced_state_to_representation_last(
                state=state, slot_set=self.slot_set,
                parameter=self.parameter)  # sequence representation.
        else:
            state_rep = state_to_representation_last(
                state=state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"]
            )  # sequence representation.
        #print(len(state_rep))
        # Master agent takes an action.
        if self.parameter.get("initial_symptom") and state["turn"] > 0:
            pass
        else:
            #print("####")
            if greedy_strategy == True:
                greedy = random.random()
                if greedy < epsilon:
                    self.master_action_index = random.randint(
                        0, len(self.id2lowerAgent))
                    #print(self.master_action_index)
                    #master_action_index = random.sample(list(self.id2lowerAgent.keys()),1)[0]
                else:
                    self.master_action_index = self.master.predict(
                        Xs=[state_rep])[1]
            # Evaluating mode.
            else:
                self.master_action_index = self.master.predict(
                    Xs=[state_rep])[1]
            self.behave_prob = 1 - epsilon + epsilon / (
                len(self.id2lowerAgent) - 1)
            #print(master_action_index)

            if self.parameter.get("prioritized_replay"):
                # print('2')
                Ys = self.master.predict(Xs=[state_rep])[0]
                self.current_action_value = Ys.detach().cpu().numpy()[0][
                    self.master_action_index]
        #print(self.master_action_index)  这里还存在9

        # Lower agent takes an agent.
        #symptom_dist = self.disease_to_symptom_dist[self.id2disease[self.current_lower_agent_id]]
        # 在state_to_representation_last的步骤中,可以自动将不属于slot set中的slot去除掉
        if self.parameter.get("disease_as_action"):
            self.current_lower_agent_id = self.master_action_space[
                self.master_action_index]
            agent_action, lower_action_index = self.id2lowerAgent[str(
                self.current_lower_agent_id)].next(
                    state, turn, greedy_strategy=greedy_strategy)

        else:
            if self.master_action_index > (len(self.id2lowerAgent) - 1):
                agent_action = {
                    'action': 'inform',
                    'inform_slots': {
                        "disease": 'UNK'
                    },
                    'request_slots': {},
                    "explicit_inform_slots": {},
                    "implicit_inform_slots": {}
                }
                agent_action["turn"] = turn
                agent_action["inform_slots"] = {"disease": None}
                agent_action["speaker"] = 'agent'
                agent_action["action_index"] = None
                lower_action_index = -1
            else:
                self.current_lower_agent_id = self.master_action_space[
                    self.master_action_index]
                #print(self.current_lower_agent_id)
                agent_action, lower_action_index = self.id2lowerAgent[str(
                    self.current_lower_agent_id)].next(
                        state, turn, greedy_strategy=greedy_strategy)
                assert len(list(agent_action["request_slots"].keys())) == 1
            #print(self.current_lower_agent_id, lower_action_index)
            #print(agent_action)
        return agent_action, self.master_action_index, lower_action_index

    def next_state_values_DDQN(self, next_state):
        if self.parameter.get("state_reduced"):
            state_rep = reduced_state_to_representation_last(
                state=next_state,
                slot_set=self.slot_set,
                parameter=self.parameter)  # sequence representation.
        else:
            state_rep = state_to_representation_last(
                state=next_state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
        action_index = self.master.predict(Xs=[state_rep])[1]
        Ys = self.master.predict_target(Xs=[state_rep])
        next_action_value = Ys.detach().cpu().numpy()[0][action_index]

        return next_action_value

    def train(self, batch):
        """
        Training the agent.
        Args:
            batch: the sample used to training.
        Return:
             dict with a key `loss` whose value it a float.
        """
        loss = self.master.singleBatch(
            batch=batch,
            params=self.parameter,
            weight_correction=self.parameter.get("weight_correction"))
        return loss

    def update_target_network(self):
        self.master.update_target_network()
        for key in self.id2lowerAgent.keys():
            self.id2lowerAgent[key].update_target_network()

    def save_model(self,
                   model_performance,
                   episodes_index,
                   checkpoint_path=None):
        # Saving master agent
        self.master.save_model(model_performance=model_performance,
                               episodes_index=episodes_index,
                               checkpoint_path=checkpoint_path)
        #Saving lower agent
        for key, lower_agent in self.id2lowerAgent.items():
            temp_checkpoint_path = os.path.join(checkpoint_path,
                                                'lower/' + str(key))
            lower_agent.dqn.save_model(model_performance=model_performance,
                                       episodes_index=episodes_index,
                                       checkpoint_path=temp_checkpoint_path)

    def train_dqn(self):
        """
        Train dqn.
        :return:
        """
        # ('state', 'agent_action', 'reward', 'next_state', 'episode_over')
        # Training of master agent
        cur_bellman_err = 0.0
        batch_size = self.parameter.get("batch_size", 16)
        #print(batch_size)

        priority_scale = self.parameter.get("priority_scale")
        if self.parameter.get("prioritized_replay"):
            for iter in range(
                    math.ceil(self.experience_replay_pool.__len__() /
                              batch_size)):
                batch = self.experience_replay_pool.sample(
                    batch_size=min(batch_size,
                                   self.experience_replay_pool.__len__()),
                    priority_scale=priority_scale)
                loss = self.train(batch=batch)
                cur_bellman_err += loss["loss"]
            print(
                "[Master agent] cur bellman err %.4f, experience replay pool %s"
                % (float(cur_bellman_err) /
                   (self.experience_replay_pool.__len__() + 1e-10),
                   self.experience_replay_pool.__len__()))
            for disease_id, lower_agent in self.id2lowerAgent.items():
                if len(lower_agent.experience_replay_pool) > 120:
                    lower_agent.train_dqn()
        else:
            for iter in range(
                    math.ceil(len(self.experience_replay_pool) / batch_size)):
                batch = random.sample(
                    self.experience_replay_pool,
                    min(batch_size, len(self.experience_replay_pool)))
                loss = self.train(batch=batch)
                cur_bellman_err += loss["loss"]
            print(
                "[Master agent] cur bellman err %.4f, experience replay pool %s"
                % (float(cur_bellman_err) /
                   (len(self.experience_replay_pool) + 1e-10),
                   len(self.experience_replay_pool)))
            if self.count % 10 == 9:
                for group_id, lower_agent in self.id2lowerAgent.items():
                    #if len(lower_agent.experience_replay_pool) ==10000 or (len(lower_agent.experience_replay_pool)-self.past_lower_agent_pool[group_id])>100:
                    if len(lower_agent.experience_replay_pool) > 100:
                        lower_agent.train_dqn(label=group_id)
                        self.past_lower_agent_pool[group_id] = len(
                            lower_agent.experience_replay_pool)

        self.count += 1
        # Training of lower agents.
        #for disease_id, lower_agent in self.id2lowerAgent.items():
        #    lower_agent.train_dqn()

    def reward_shaping(self, state, next_state):
        def delete_item_from_dict(item, value):
            new_item = {}
            for k, v in item.items():
                if v != value: new_item[k] = v
            return new_item

        # slot number in state.
        slot_dict = copy.deepcopy(state["current_slots"]["inform_slots"])
        slot_dict.update(state["current_slots"]["explicit_inform_slots"])
        slot_dict.update(state["current_slots"]["implicit_inform_slots"])
        slot_dict.update(state["current_slots"]["proposed_slots"])
        slot_dict.update(state["current_slots"]["agent_request_slots"])
        slot_dict = delete_item_from_dict(slot_dict,
                                          dialogue_configuration.I_DO_NOT_KNOW)

        next_slot_dict = copy.deepcopy(
            next_state["current_slots"]["inform_slots"])
        next_slot_dict.update(
            next_state["current_slots"]["explicit_inform_slots"])
        next_slot_dict.update(
            next_state["current_slots"]["implicit_inform_slots"])
        next_slot_dict.update(next_state["current_slots"]["proposed_slots"])
        next_slot_dict.update(
            next_state["current_slots"]["agent_request_slots"])
        next_slot_dict = delete_item_from_dict(
            next_slot_dict, dialogue_configuration.I_DO_NOT_KNOW)
        gamma = self.parameter.get("gamma")
        return gamma * len(next_slot_dict) - len(slot_dict)

    def record_training_sample(self, state, agent_action, reward, next_state,
                               episode_over, lower_reward):
        # samples of master agent.
        #print(state)
        #print(reward)

        shaping = self.reward_shaping(state, next_state)
        alpha = self.parameter.get("weight_for_reward_shaping")
        '''
        if reward == self.parameter.get("reward_for_repeated_action"):
            lower_reward = reward
            #reward = reward * 2
        else:
            lower_reward = max(0, shaping * alpha)
            #lower_reward = shaping * alpha
        '''

        if episode_over is True:
            pass
        else:
            reward = reward + alpha * shaping

        # samples of lower agent.
        #print(agent_action)
        if int(agent_action) >= 0:
            #print(lower_reward)
            self.id2lowerAgent[
                self.current_lower_agent_id].record_training_sample(
                    state, agent_action, lower_reward, next_state,
                    episode_over)

        if self.parameter.get("state_reduced"):
            state_rep = reduced_state_to_representation_last(
                state=state, slot_set=self.slot_set,
                parameter=self.parameter)  # sequence representation.
            next_state_rep = reduced_state_to_representation_last(
                state=next_state,
                slot_set=self.slot_set,
                parameter=self.parameter)
        else:
            state_rep = state_to_representation_last(
                state=state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
            next_state_rep = state_to_representation_last(
                state=next_state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
        #print("state", [idx for idx,x in enumerate(state_rep) if x==1], agent_action)
        #print("nexts", [idx for idx,x in enumerate(next_state_rep) if x==1], reward)
        if self.parameter.get("value_as_reward") is True:
            q_values = self.id2lowerAgent[
                self.current_lower_agent_id].get_q_values(state)
            q_values.reshape(q_values.shape[1])
            master_reward = np.max(q_values, axis=1)[0]
        else:
            master_reward = reward
        #print(master_reward)
        self.experience_replay_pool.append(
            (state_rep, self.master_action_index, master_reward,
             next_state_rep, episode_over))

    def record_prioritized_training_sample(self, state, agent_action, reward,
                                           next_state, episode_over, TD_error,
                                           **kwargs):
        shaping = self.reward_shaping(state, next_state)
        alpha = self.parameter.get("weight_for_reward_shaping")
        # if True:
        # print('shaping', shaping)
        # Reward shaping only when non-terminal state.
        if episode_over is True:
            pass
        else:
            reward = reward + alpha * shaping

        if self.parameter.get("state_reduced"):
            state_rep = reduced_state_to_representation_last(
                state=state, slot_set=self.slot_set,
                parameter=self.parameter)  # sequence representation.
            next_state_rep = reduced_state_to_representation_last(
                state=next_state,
                slot_set=self.slot_set,
                parameter=self.parameter)
        else:
            state_rep = state_to_representation_last(
                state=state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
            next_state_rep = state_to_representation_last(
                state=next_state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
        self.experience_replay_pool.add(state_rep, agent_action, reward,
                                        next_state_rep, episode_over, TD_error)

    def flush_pool(self):
        if self.parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)
        #for key, lower_agent in self.id2lowerAgent.items():
        #    self.id2lowerAgent[key].flush_pool()

    def train_mode(self):
        self.master.current_net.train()

    def eval_mode(self):
        self.master.current_net.eval()
예제 #5
0
class Agent(object):
    """
    Basic class of agent.
    """
    def __init__(self,
                 action_set,
                 slot_set,
                 disease_symptom,
                 parameter,
                 disease_as_action=True):
        self.parameter = parameter
        symptom_set = set()
        for key, v in disease_symptom.items():
            # print(key, len(v['symptom'].keys()))
            symptom_set = symptom_set | set(list(v['symptom'].keys()))
        # exit(0)

        self.action_set = action_set
        self.slot_set = slot_set
        # self.disease_symptom = disease_symptom
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=parameter.get("experience_replay_pool_size"))
        self.parameter = parameter
        self.candidate_disease_list = []
        self.candidate_symptom_list = []
        #disease_as_action = self.parameter.get("disease_as_action")
        #self.action_space = self._build_action_space(disease_symptom,disease_as_action)
        self.disease_symptom = self.disease_symptom_clip(
            disease_symptom, 2.5, parameter)

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }

    def initialize(self):
        """
        Initializing an dialogue session.
        :return: nothing to return.
        """
        self.candidate_disease_list = []
        self.candidate_symptom_list = []
        self.agent_action = {
            "turn": None,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }

    def next(self, *args, **kwargs):
        """
        Taking action based on different methods, e.g., DQN-based AgentDQN, rule-based AgentRule.
        Detail codes will be implemented in different sub-class of this class.
        :param state: a vector, the representation of current dialogue state.
        :param turn: int, the time step of current dialogue session.
        :return: the agent action, a tuple consists of the selected agent action and action index.
        """
        raise NotImplementedError(
            'The `next` function of agent has not been implemented.')

    def train(self, batch):
        """
        Training the agent.
        Detail codes will be implemented in different sub-class of this class.
        :param batch: the sample used to training.
        :return:
        """
        raise NotImplementedError(
            'The `train` function of agent has not been implemented.')

    def _build_action_space(self, disease_symptom, disease_as_action):
        """
        Building the Action Space for the RL-based Agent.
        All diseases are treated as actions.
        :return: Action Space, a list of feasible actions.
        """
        feasible_actions = []
        # Adding the request actions. And the slots are extracted from the links between disease and symptom,
        # i.e., disease_symptom
        slot_set = []
        for disease, v in disease_symptom.items():
            slot_set = slot_set + list(v["symptom"])
        slot_set = list(set(slot_set))
        for slot in sorted(slot_set):
            if slot != "disease":
                feasible_actions.append({
                    'action': 'request',
                    'inform_slots': {},
                    'request_slots': {
                        slot: dialogue_configuration.VALUE_UNKNOWN
                    },
                    "explicit_inform_slots": {},
                    "implicit_inform_slots": {}
                })

        # Diseases as actions: inform + disease.
        if self.parameter.get("agent_id").lower() == "agenthrljoint":
            if disease_as_action is True:
                for disease in sorted(disease_symptom.keys()):
                    feasible_actions.append({
                        'action': 'inform',
                        'inform_slots': {
                            "disease": disease
                        },
                        'request_slots': {},
                        "explicit_inform_slots": {},
                        "implicit_inform_slots": {}
                    })
        elif self.parameter.get("agent_id").lower() == "agenthrljoint2":
            if disease_as_action is True:
                for disease in sorted(disease_symptom.keys()):
                    feasible_actions.append({
                        'action': 'inform',
                        'inform_slots': {
                            "disease": disease
                        },
                        'request_slots': {},
                        "explicit_inform_slots": {},
                        "implicit_inform_slots": {}
                    })
            #else:
            #    feasible_actions.append({'action': "return", 'inform_slots': {}, 'request_slots': {},"explicit_inform_slots":{}, "implicit_inform_slots":{}})
        else:
            #print("#########################")
            if disease_as_action is True:
                for disease in sorted(disease_symptom.keys()):
                    feasible_actions.append({
                        'action': 'inform',
                        'inform_slots': {
                            "disease": disease
                        },
                        'request_slots': {},
                        "explicit_inform_slots": {},
                        "implicit_inform_slots": {}
                    })
            else:
                feasible_actions.append({
                    'action': "inform",
                    'inform_slots': {
                        "disease": None
                    },
                    'request_slots': {},
                    "explicit_inform_slots": {},
                    "implicit_inform_slots": {}
                })

        # greeting actions includes Thanks and close dialogue.
        # feasible_actions.append({'action': "confirm_question", 'inform_slots': {}, 'request_slots': {},"explicit_inform_slots":{}, "implicit_inform_slots":{}})
        # feasible_actions.append({'action': "confirm_answer", 'inform_slots': {}, 'request_slots': {},"explicit_inform_slots":{}, "implicit_inform_slots":{}})
        # feasible_actions.append({'action': "deny", 'inform_slots': {}, 'request_slots': {},"explicit_inform_slots":{}, "implicit_inform_slots":{}})
        # feasible_actions.append({'action': dialogue_configuration.CLOSE_DIALOGUE, 'inform_slots': {}, 'request_slots': {},"explicit_inform_slots":{}, "implicit_inform_slots":{}})
        # feasible_actions.append({'action': dialogue_configuration.THANKS, 'inform_slots': {}, 'request_slots': {}, "explicit_inform_slots": {}, "implicit_inform_slots": {}})
        return feasible_actions

    @staticmethod
    def disease_symptom_clip(disease_symptom, denominator, parameter):
        """
        Keep the top min(symptom_num, max_turn//denominator) for each disease, and the related symptoms are sorted
        descendent according to their frequencies.
        Args:
            disease_symptom: a dict, key is the names of diseases, and the corresponding value is a dict too which
                contains the index of this disease and the related symptoms.
            denominator: int, the number of symptoms for each diseases is  max_turn // denominator.
            parameter: the super-parameter.
        Returns:
             and dict, whose keys are the names of diseases, and the values are dicts too with two keys: {'index', symptom}
        """
        max_turn = parameter.get('max_turn')
        temp_disease_symptom = copy.deepcopy(disease_symptom)
        for key, value in disease_symptom.items():
            symptom_list = sorted(value['symptom'].items(),
                                  key=lambda x: x[1],
                                  reverse=True)
            symptom_list = [v[0] for v in symptom_list]
            symptom_list = symptom_list[
                0:min(len(symptom_list), int(max_turn / float(denominator)))]
            temp_disease_symptom[key]['symptom'] = symptom_list
        return temp_disease_symptom

    def record_training_sample(self, state, agent_action, reward, next_state,
                               episode_over, **kwargs):
        symptom_dist_as_input = self.parameter.get("symptom_dist_as_input")
        agent_id = self.parameter.get("agent_id")
        if self.parameter.get("state_reduced"):
            state = reduced_state_to_representation_last(
                state=state,
                slot_set=self.slot_set)  # sequence representation.
            next_state = reduced_state_to_representation_last(
                state=next_state, slot_set=self.slot_set)
        else:
            state = state_to_representation_last(
                state=state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
            next_state = state_to_representation_last(
                state=next_state,
                action_set=self.action_set,
                slot_set=self.slot_set,
                disease_symptom=self.disease_symptom,
                max_turn=self.parameter["max_turn"])
        if symptom_dist_as_input is True and agent_id.lower() == 'agenthrl':
            symptom_dist = kwargs.get('symptom_dist')
            state = np.concatenate((state, symptom_dist), axis=0)
            next_state = np.concatenate((next_state, symptom_dist), axis=0)
        self.experience_replay_pool.append(
            (state, agent_action, reward, next_state, episode_over))

    def flush_pool(self):
        if self.parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=self.parameter.get("experience_replay_pool_size"))

    def train_mode(self):
        """
        Set the agent as the train mode, i.e., the parameters will be updated and dropout will be activated.
        """
        raise NotImplementedError(
            "The `train_mode` function of agent has not been implemented")

    def eval_mode(self):
        """
        Set the agent as the train mode, i.e., the parameters will be unchanged and dropout will be deactivated.
        """
        raise NotImplementedError(
            "The `train_mode` function of agent has not been implemented")
예제 #6
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        #self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)
        if self.parameter.get("data_type") == 'simulated':
            self.input_size_dqn_all = {
                1: 374,
                4: 494,
                5: 389,
                6: 339,
                7: 279,
                12: 304,
                13: 359,
                14: 394,
                19: 414
            }
        elif self.parameter.get("data_type") == 'real':
            self.input_size_dqn_all = {0: 84, 1: 81, 2: 81, 3: 83}
        else:
            raise ValueError

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.pretrained_lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}

        # different label = different disease sysptom
        for key, value in self.input_size_dqn_all.items():
            #dirs = os.listdir(self.parameter.get("label_all_model_path"))
            #for model in dirs:
            #reg = re.compile(r"(?<=label)\d+")
            #match = reg.search(model)
            #label = match.group(0)
            # print(label)
            label = str(key)
            self.master_action_space.append(label)
            # assert len(label) == 1
            # label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            # print(parameter["saved_model"])
            # if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            # else:
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[
                int(label)]
            #temp_parameter[label]["input_size_dqn"] = (len(slot_set)-1) *3
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            # model_path = os.path.join(self.parameter.get("label_all_model_path"), label)
            #temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[int(label)]
            '''
            temp_parameter[label]["input_size_dqn"] = (len(slot_set)) * 3
            #print(slot_set)
            self.pretrained_lowerAgent[label] = LowerAgent(action_set=action_set, slot_set=slot_set,
                                                   disease_symptom=disease_symptom, parameter=temp_parameter[label],
                                                   disease_as_action=True)
            # model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

            self.pretrained_lowerAgent[label].dqn.restore_model(os.path.join(self.parameter.get("label_all_model_path"), model))
            self.pretrained_lowerAgent[label].dqn.current_net.eval()
            self.pretrained_lowerAgent[label].dqn.target_net.eval()
            '''

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = (
                len(self.slot_set) - 1
            ) * 3  # the dictionary of slot_set contains a key of "disease" which need to be removed first.
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        # 患病种类的数量
        self.output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            self.output_size = len(
                self.id2lowerAgent
            ) + 1  # the extra one size is the action of activating disease classifier
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=self.output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        # self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.subtask_terminal = True
        self.subtask_turn = 0
        self.subtask_max_turn = 5
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #7
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=parameter.get("experience_replay_pool_size"))

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            9: 409,
            10: 254,
            11: 304,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        dirs = os.listdir(self.parameter.get("label_all_model_path"))
        for model in dirs:
            #pattern = re.compile(r'(?<=label=)\d+\.?\d*')
            #label = pattern.findall(model)
            reg = re.compile(r"(?<=label)\d+")
            match = reg.search(model)
            label = match.group(0)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + label)
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            temp_parameter["saved_model"] = parameter["saved_model"].split(
                'model_d10agent')[0] + 'lower/' + str(
                    label) + '/model_d10agent' + parameter[
                        "saved_model"].split('model_d10agent')[1]
            temp_parameter["input_size_dqn"] = self.input_size_dqn_all[int(
                label)]
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter,
                disease_as_action=True)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

            self.id2lowerAgent[label].dqn.restore_model(
                os.path.join(self.parameter.get("label_all_model_path"),
                             model))
            self.id2lowerAgent[label].dqn.current_net.eval()
            self.id2lowerAgent[label].dqn.target_net.eval()

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 100)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }