예제 #1
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}
        for key, value in self.input_size_dqn_all.items():
            label = str(key)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            #temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10agent')[0] + 'lower/' + str(
            #        label) + '/model_d10agent' + parameter["saved_model"].split('model_d10agent')[1]
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = value
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #2
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        #self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)
        if self.parameter.get("data_type") == 'simulated':
            self.input_size_dqn_all = {
                1: 374,
                4: 494,
                5: 389,
                6: 339,
                7: 279,
                12: 304,
                13: 359,
                14: 394,
                19: 414
            }
        elif self.parameter.get("data_type") == 'real':
            self.input_size_dqn_all = {0: 84, 1: 81, 2: 81, 3: 83}
        else:
            raise ValueError

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.pretrained_lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}

        # different label = different disease sysptom
        for key, value in self.input_size_dqn_all.items():
            #dirs = os.listdir(self.parameter.get("label_all_model_path"))
            #for model in dirs:
            #reg = re.compile(r"(?<=label)\d+")
            #match = reg.search(model)
            #label = match.group(0)
            # print(label)
            label = str(key)
            self.master_action_space.append(label)
            # assert len(label) == 1
            # label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            # print(parameter["saved_model"])
            # if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            # else:
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[
                int(label)]
            #temp_parameter[label]["input_size_dqn"] = (len(slot_set)-1) *3
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            # model_path = os.path.join(self.parameter.get("label_all_model_path"), label)
            #temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[int(label)]
            '''
            temp_parameter[label]["input_size_dqn"] = (len(slot_set)) * 3
            #print(slot_set)
            self.pretrained_lowerAgent[label] = LowerAgent(action_set=action_set, slot_set=slot_set,
                                                   disease_symptom=disease_symptom, parameter=temp_parameter[label],
                                                   disease_as_action=True)
            # model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

            self.pretrained_lowerAgent[label].dqn.restore_model(os.path.join(self.parameter.get("label_all_model_path"), model))
            self.pretrained_lowerAgent[label].dqn.current_net.eval()
            self.pretrained_lowerAgent[label].dqn.target_net.eval()
            '''

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = (
                len(self.slot_set) - 1
            ) * 3  # the dictionary of slot_set contains a key of "disease" which need to be removed first.
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        # 患病种类的数量
        self.output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            self.output_size = len(
                self.id2lowerAgent
            ) + 1  # the extra one size is the action of activating disease classifier
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=self.output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        # self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.subtask_terminal = True
        self.subtask_turn = 0
        self.subtask_max_turn = 5
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.action_set = action_set
        self.slot_set = slot_set
        self.disease_symptom = disease_symptom
        self.disease_num = parameter.get("disease_number")
        self.slot_dim=1

        ##################
        # Master policy.
        #######################
        input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 100)
        self.output_size = parameter.get('goal_dim', 2*self.disease_num)
        self.dqn = DQN(input_size=input_size + self.output_size,
                       hidden_size=hidden_size,
                       output_size=self.output_size,
                       parameter=parameter,
                       named_tuple=('state', 'agent_action', 'reward', 'next_state', 'episode_over'))
        self.parameter = parameter
        self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        if parameter.get("train_mode") is False :
            self.dqn.restore_model(parameter.get("saved_model"))
            self.dqn.current_net.eval()
            self.dqn.target_net.eval()

        ###############################
        # Internal critic
        ##############################
        # symptom distribution by diseases.
        temp_slot_set = copy.deepcopy(slot_set)
        temp_slot_set.pop('disease')
        self.disease_to_symptom_dist = {}
        self.id2disease = {}
        total_count = np.zeros(len(temp_slot_set))
        for disease, v in self.disease_symptom.items():
            dist = np.zeros(len(temp_slot_set))
            self.id2disease[v['index']] = disease
            for symptom, count in v['symptom'].items():
                dist[temp_slot_set[symptom]] = count
                total_count[temp_slot_set[symptom]] += count
            self.disease_to_symptom_dist[disease] = dist

        for disease in self.disease_to_symptom_dist.keys():
            self.disease_to_symptom_dist[disease] = self.disease_to_symptom_dist[disease] / total_count
        goal_embed_value = [0] * len(disease_symptom)
        for disease in self.disease_to_symptom_dist.keys():
            self.disease_to_symptom_dist[disease] = self.disease_to_symptom_dist[disease] / total_count
            goal_embed_value[disease_symptom[disease]['index']] = list(self.disease_to_symptom_dist[disease])

        self.internal_critic = InternalCritic(input_size=len(temp_slot_set)*self.slot_dim + len(self.disease_symptom), hidden_size=hidden_size,
                                              output_size=len(temp_slot_set), goal_num=len(self.disease_symptom),
                                              goal_embedding_value=goal_embed_value, slot_set=temp_slot_set,
                                              parameter=parameter)
        print(os.getcwd())
        self.internal_critic.restore_model('../agent/pre_trained_internal_critic_dropout_both_one_hot512.pkl')

        #################
        # Lower agent.
        ##############
        temp_parameter = copy.deepcopy(parameter)
        temp_parameter['input_size_dqn'] = input_size + len(self.disease_symptom)
        path_list = parameter['saved_model'].split('/')
        path_list.insert(-1, 'lower')
        temp_parameter['saved_model'] = '/'.join(path_list)
        temp_parameter['gamma'] = temp_parameter['gamma_worker'] # discount factor for the lower agent.
        self.lower_agent = LowerAgent(action_set=action_set, slot_set=slot_set, disease_symptom=disease_symptom,
                                      parameter=temp_parameter,disease_as_action=False)
        named_tuple = ('state', 'agent_action', 'reward', 'next_state', 'episode_over','goal')
        self.lower_agent.dqn.Transition = namedtuple('Transition', named_tuple)
        self.visitation_count = np.zeros(shape=(self.output_size, len(self.lower_agent.action_space))) # [goal_num, lower_action_num]
        if temp_parameter.get("train_mode") is False:
            self.lower_agent.dqn.restore_model(temp_parameter.get("saved_model"))
            self.lower_agent.dqn.current_net.eval()
            self.lower_agent.dqn.target_net.eval()
예제 #4
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=parameter.get("experience_replay_pool_size"))

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            9: 409,
            10: 254,
            11: 304,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        dirs = os.listdir(self.parameter.get("label_all_model_path"))
        for model in dirs:
            #pattern = re.compile(r'(?<=label=)\d+\.?\d*')
            #label = pattern.findall(model)
            reg = re.compile(r"(?<=label)\d+")
            match = reg.search(model)
            label = match.group(0)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + label)
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            temp_parameter["saved_model"] = parameter["saved_model"].split(
                'model_d10agent')[0] + 'lower/' + str(
                    label) + '/model_d10agent' + parameter[
                        "saved_model"].split('model_d10agent')[1]
            temp_parameter["input_size_dqn"] = self.input_size_dqn_all[int(
                label)]
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter,
                disease_as_action=True)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

            self.id2lowerAgent[label].dqn.restore_model(
                os.path.join(self.parameter.get("label_all_model_path"),
                             model))
            self.id2lowerAgent[label].dqn.current_net.eval()
            self.id2lowerAgent[label].dqn.target_net.eval()

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 100)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #5
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.action_set = action_set
        self.slot_set = slot_set
        self.disease_symptom = disease_symptom

        # symptom distribution by diseases.
        temp_slot_set = copy.deepcopy(slot_set)
        temp_slot_set.pop('disease')
        self.disease_to_symptom_dist = {}
        total_count = np.zeros(len(temp_slot_set))
        for disease, v in self.disease_symptom.items():
            dist = np.zeros(len(temp_slot_set))
            for symptom, count in v['symptom'].items():
                dist[temp_slot_set[symptom]] = count
                total_count[temp_slot_set[symptom]] += count
            self.disease_to_symptom_dist[disease] = dist

        for disease in self.disease_to_symptom_dist.keys():
            self.disease_to_symptom_dist[
                disease] = self.disease_to_symptom_dist[disease] / total_count

        ##################################
        # Building lower agents. The state representation that the master agent and lower agents are the same, so the
        # slot set are same for these different agents.
        ###########################
        self.id2disease = {}
        self.id2lowerAgent = {}
        for disease, v in disease_symptom.items():
            self.id2disease[v["index"]] = disease
            temp_disease_symptom = {}
            temp_disease_symptom[disease] = {}
            temp_disease_symptom[disease]["index"] = 0
            temp_disease_symptom[disease]["symptom"] = v["symptom"]
            temp_slot_set = {}
            for symptom in v['symptom'].keys():
                temp_slot_set.setdefault(symptom, len(temp_slot_set))
            temp_parameter = copy.deepcopy(parameter)
            temp_parameter["saved_model"] = parameter["saved_model"].split(
                'model_d4_agent')[0] + 'lower/' + str(
                    v["index"]) + '/model_d4_agent' + parameter[
                        "saved_model"].split('model_d4_agent')[1]
            self.id2lowerAgent[v["index"]] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=temp_disease_symptom,
                parameter=temp_parameter)

        # Master policy.
        input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 100)
        output_size = len(self.id2lowerAgent)
        self.dqn = DQN(input_size=input_size,
                       hidden_size=hidden_size,
                       output_size=output_size,
                       parameter=parameter,
                       named_tuple=('state', 'agent_action', 'reward',
                                    'next_state', 'episode_over',
                                    'behave_prob'))
        self.parameter = parameter
        self.experience_replay_pool = deque(
            maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }