예제 #1
0
 def flush_pool(self):
     if self.parameter.get('prioritized_replay'):
         self.experience_replay_pool = PrioritizedReplayBuffer(
             buffer_size=self.master_experience_replay_size)
     else:
         self.experience_replay_pool = deque(
             maxlen=self.master_experience_replay_size)
예제 #2
0
    def __init__(self,
                 action_set,
                 slot_set,
                 disease_symptom,
                 parameter,
                 disease_as_action=True):
        self.parameter = parameter
        symptom_set = set()
        for key, v in disease_symptom.items():
            # print(key, len(v['symptom'].keys()))
            symptom_set = symptom_set | set(list(v['symptom'].keys()))
        # exit(0)

        self.action_set = action_set
        self.slot_set = slot_set
        # self.disease_symptom = disease_symptom
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=parameter.get("experience_replay_pool_size"))
        self.parameter = parameter
        self.candidate_disease_list = []
        self.candidate_symptom_list = []
        #disease_as_action = self.parameter.get("disease_as_action")
        #self.action_space = self._build_action_space(disease_symptom,disease_as_action)
        self.disease_symptom = self.disease_symptom_clip(
            disease_symptom, 2.5, parameter)

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #3
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}
        for key, value in self.input_size_dqn_all.items():
            label = str(key)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            #temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10agent')[0] + 'lower/' + str(
            #        label) + '/model_d10agent' + parameter["saved_model"].split('model_d10agent')[1]
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = value
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #4
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        #self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        self.master_experience_replay_size = 10000
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=self.master_experience_replay_size)
        else:
            self.experience_replay_pool = deque(
                maxlen=self.master_experience_replay_size)
        if self.parameter.get("data_type") == 'simulated':
            self.input_size_dqn_all = {
                1: 374,
                4: 494,
                5: 389,
                6: 339,
                7: 279,
                12: 304,
                13: 359,
                14: 394,
                19: 414
            }
        elif self.parameter.get("data_type") == 'real':
            self.input_size_dqn_all = {0: 84, 1: 81, 2: 81, 3: 83}
        else:
            raise ValueError

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.pretrained_lowerAgent = {}
        self.master_action_space = []
        temp_parameter = {}

        # different label = different disease sysptom
        for key, value in self.input_size_dqn_all.items():
            #dirs = os.listdir(self.parameter.get("label_all_model_path"))
            #for model in dirs:
            #reg = re.compile(r"(?<=label)\d+")
            #match = reg.search(model)
            #label = match.group(0)
            # print(label)
            label = str(key)
            self.master_action_space.append(label)
            # assert len(label) == 1
            # label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + str(label))
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter[label] = copy.deepcopy(parameter)
            # print(parameter["saved_model"])
            # if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            # else:
            path_list = parameter['saved_model'].split('/')
            path_list.insert(-1, 'lower')
            path_list.insert(-1, str(label))
            temp_parameter[label]['saved_model'] = '/'.join(path_list)
            temp_parameter[label]['gamma'] = temp_parameter[label][
                'gamma_worker']  # discount factor for the lower agent.

            temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[
                int(label)]
            #temp_parameter[label]["input_size_dqn"] = (len(slot_set)-1) *3
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter[label],
                disease_as_action=False)
            # model_path = os.path.join(self.parameter.get("label_all_model_path"), label)
            #temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[int(label)]
            '''
            temp_parameter[label]["input_size_dqn"] = (len(slot_set)) * 3
            #print(slot_set)
            self.pretrained_lowerAgent[label] = LowerAgent(action_set=action_set, slot_set=slot_set,
                                                   disease_symptom=disease_symptom, parameter=temp_parameter[label],
                                                   disease_as_action=True)
            # model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

            self.pretrained_lowerAgent[label].dqn.restore_model(os.path.join(self.parameter.get("label_all_model_path"), model))
            self.pretrained_lowerAgent[label].dqn.current_net.eval()
            self.pretrained_lowerAgent[label].dqn.target_net.eval()
            '''

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = (
                len(self.slot_set) - 1
            ) * 3  # the dictionary of slot_set contains a key of "disease" which need to be removed first.
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 300)
        # 患病种类的数量
        self.output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            self.output_size = len(
                self.id2lowerAgent
            ) + 1  # the extra one size is the action of activating disease classifier
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=self.output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        # self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        self.count = 0
        self.subtask_terminal = True
        self.subtask_turn = 0
        self.subtask_max_turn = 5
        self.past_lower_agent_pool = {
            key: 0
            for key in self.id2lowerAgent.keys()
        }

        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()
            for label, agent in self.id2lowerAgent.items():
                #print(temp_parameter[label])
                self.id2lowerAgent[label].dqn.restore_model(
                    temp_parameter[label]['saved_model'])
                self.id2lowerAgent[label].dqn.current_net.eval()
                self.id2lowerAgent[label].dqn.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }
예제 #5
0
    def __init__(self, action_set, slot_set, disease_symptom, parameter):
        self.parameter = parameter
        self.action_set = action_set
        self.slot_set = slot_set
        self.slot_set.pop("disease")
        self.disease_symptom = disease_symptom
        if parameter.get('prioritized_replay'):
            self.experience_replay_pool = PrioritizedReplayBuffer(
                buffer_size=parameter.get("experience_replay_pool_size"))
        else:
            self.experience_replay_pool = deque(
                maxlen=parameter.get("experience_replay_pool_size"))

        self.input_size_dqn_all = {
            1: 374,
            4: 494,
            5: 389,
            6: 339,
            7: 279,
            9: 409,
            10: 254,
            11: 304,
            12: 304,
            13: 359,
            14: 394,
            19: 414
        }

        self.id2disease = {}
        self.id2lowerAgent = {}
        self.master_action_space = []
        dirs = os.listdir(self.parameter.get("label_all_model_path"))
        for model in dirs:
            #pattern = re.compile(r'(?<=label=)\d+\.?\d*')
            #label = pattern.findall(model)
            reg = re.compile(r"(?<=label)\d+")
            match = reg.search(model)
            label = match.group(0)
            #print(label)
            self.master_action_space.append(label)
            #assert len(label) == 1
            #label = label[0]
            label_all_path = self.parameter.get("file_all")
            label_new_path = os.path.join(label_all_path, 'label' + label)
            disease_symptom = pickle.load(
                open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb'))
            slot_set = pickle.load(
                open(os.path.join(label_new_path, 'slot_set.p'), 'rb'))
            action_set = pickle.load(
                open(os.path.join(label_new_path, 'action_set.p'), 'rb'))

            temp_parameter = copy.deepcopy(parameter)
            #print(parameter["saved_model"])
            #if parameter.get("train_mode"):
            #    temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str(
            #        label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1]
            #else:
            temp_parameter["saved_model"] = parameter["saved_model"].split(
                'model_d10agent')[0] + 'lower/' + str(
                    label) + '/model_d10agent' + parameter[
                        "saved_model"].split('model_d10agent')[1]
            temp_parameter["input_size_dqn"] = self.input_size_dqn_all[int(
                label)]
            self.id2lowerAgent[label] = LowerAgent(
                action_set=action_set,
                slot_set=slot_set,
                disease_symptom=disease_symptom,
                parameter=temp_parameter,
                disease_as_action=True)
            #model_path = os.path.join(self.parameter.get("label_all_model_path"), label)

            self.id2lowerAgent[label].dqn.restore_model(
                os.path.join(self.parameter.get("label_all_model_path"),
                             model))
            self.id2lowerAgent[label].dqn.current_net.eval()
            self.id2lowerAgent[label].dqn.target_net.eval()

        # Master policy.
        if parameter.get("state_reduced"):
            input_size = len(self.slot_set) * 3
        else:
            input_size = parameter.get("input_size_dqn")
        hidden_size = parameter.get("hidden_size_dqn", 100)
        output_size = len(self.id2lowerAgent)
        if self.parameter.get("disease_as_action") == False:
            output_size = len(self.id2lowerAgent) + 1
        #print("input_size",input_size)
        self.master = DQN2(input_size=input_size,
                           hidden_size=hidden_size,
                           output_size=output_size,
                           parameter=parameter,
                           named_tuple=('state', 'agent_action', 'reward',
                                        'next_state', 'episode_over'))
        self.parameter = parameter
        #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size"))
        self.current_lower_agent_id = -1
        self.behave_prob = 1
        print("master:", self.master_action_space)
        if parameter.get("train_mode") is False:
            print("########## master model is restore now ##########")
            self.master.restore_model(parameter.get("saved_model"))
            self.master.current_net.eval()
            self.master.target_net.eval()

        self.agent_action = {
            "turn": 1,
            "action": None,
            "request_slots": {},
            "inform_slots": {},
            "explicit_inform_slots": {},
            "implicit_inform_slots": {},
            "speaker": "agent"
        }