def __init__(self, action_set, slot_set, disease_symptom, parameter): self.parameter = parameter self.action_set = action_set self.slot_set = slot_set self.slot_set.pop("disease") self.disease_symptom = disease_symptom self.master_experience_replay_size = 10000 if parameter.get('prioritized_replay'): self.experience_replay_pool = PrioritizedReplayBuffer( buffer_size=self.master_experience_replay_size) else: self.experience_replay_pool = deque( maxlen=self.master_experience_replay_size) self.input_size_dqn_all = { 1: 374, 4: 494, 5: 389, 6: 339, 7: 279, 12: 304, 13: 359, 14: 394, 19: 414 } self.id2disease = {} self.id2lowerAgent = {} self.master_action_space = [] temp_parameter = {} for key, value in self.input_size_dqn_all.items(): label = str(key) #print(label) self.master_action_space.append(label) #assert len(label) == 1 #label = label[0] label_all_path = self.parameter.get("file_all") label_new_path = os.path.join(label_all_path, 'label' + str(label)) disease_symptom = pickle.load( open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb')) slot_set = pickle.load( open(os.path.join(label_new_path, 'slot_set.p'), 'rb')) action_set = pickle.load( open(os.path.join(label_new_path, 'action_set.p'), 'rb')) temp_parameter[label] = copy.deepcopy(parameter) #print(parameter["saved_model"]) #if parameter.get("train_mode"): # temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str( # label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1] #else: #temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10agent')[0] + 'lower/' + str( # label) + '/model_d10agent' + parameter["saved_model"].split('model_d10agent')[1] path_list = parameter['saved_model'].split('/') path_list.insert(-1, 'lower') path_list.insert(-1, str(label)) temp_parameter[label]['saved_model'] = '/'.join(path_list) temp_parameter[label]['gamma'] = temp_parameter[label][ 'gamma_worker'] # discount factor for the lower agent. temp_parameter[label]["input_size_dqn"] = value self.id2lowerAgent[label] = LowerAgent( action_set=action_set, slot_set=slot_set, disease_symptom=disease_symptom, parameter=temp_parameter[label], disease_as_action=False) #model_path = os.path.join(self.parameter.get("label_all_model_path"), label) # Master policy. if parameter.get("state_reduced"): input_size = len(self.slot_set) * 3 else: input_size = parameter.get("input_size_dqn") hidden_size = parameter.get("hidden_size_dqn", 300) output_size = len(self.id2lowerAgent) if self.parameter.get("disease_as_action") == False: output_size = len(self.id2lowerAgent) + 1 #print("input_size",input_size) self.master = DQN2(input_size=input_size, hidden_size=hidden_size, output_size=output_size, parameter=parameter, named_tuple=('state', 'agent_action', 'reward', 'next_state', 'episode_over')) self.parameter = parameter #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size")) self.current_lower_agent_id = -1 self.behave_prob = 1 print("master:", self.master_action_space) self.count = 0 self.past_lower_agent_pool = { key: 0 for key in self.id2lowerAgent.keys() } if parameter.get("train_mode") is False: print("########## master model is restore now ##########") self.master.restore_model(parameter.get("saved_model")) self.master.current_net.eval() self.master.target_net.eval() for label, agent in self.id2lowerAgent.items(): #print(temp_parameter[label]) self.id2lowerAgent[label].dqn.restore_model( temp_parameter[label]['saved_model']) self.id2lowerAgent[label].dqn.current_net.eval() self.id2lowerAgent[label].dqn.target_net.eval() self.agent_action = { "turn": 1, "action": None, "request_slots": {}, "inform_slots": {}, "explicit_inform_slots": {}, "implicit_inform_slots": {}, "speaker": "agent" }
def __init__(self, action_set, slot_set, disease_symptom, parameter): self.parameter = parameter self.action_set = action_set self.slot_set = slot_set #self.slot_set.pop("disease") self.disease_symptom = disease_symptom self.master_experience_replay_size = 10000 if parameter.get('prioritized_replay'): self.experience_replay_pool = PrioritizedReplayBuffer( buffer_size=self.master_experience_replay_size) else: self.experience_replay_pool = deque( maxlen=self.master_experience_replay_size) if self.parameter.get("data_type") == 'simulated': self.input_size_dqn_all = { 1: 374, 4: 494, 5: 389, 6: 339, 7: 279, 12: 304, 13: 359, 14: 394, 19: 414 } elif self.parameter.get("data_type") == 'real': self.input_size_dqn_all = {0: 84, 1: 81, 2: 81, 3: 83} else: raise ValueError self.id2disease = {} self.id2lowerAgent = {} self.pretrained_lowerAgent = {} self.master_action_space = [] temp_parameter = {} # different label = different disease sysptom for key, value in self.input_size_dqn_all.items(): #dirs = os.listdir(self.parameter.get("label_all_model_path")) #for model in dirs: #reg = re.compile(r"(?<=label)\d+") #match = reg.search(model) #label = match.group(0) # print(label) label = str(key) self.master_action_space.append(label) # assert len(label) == 1 # label = label[0] label_all_path = self.parameter.get("file_all") label_new_path = os.path.join(label_all_path, 'label' + str(label)) disease_symptom = pickle.load( open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb')) slot_set = pickle.load( open(os.path.join(label_new_path, 'slot_set.p'), 'rb')) action_set = pickle.load( open(os.path.join(label_new_path, 'action_set.p'), 'rb')) temp_parameter[label] = copy.deepcopy(parameter) # print(parameter["saved_model"]) # if parameter.get("train_mode"): # temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str( # label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1] # else: path_list = parameter['saved_model'].split('/') path_list.insert(-1, 'lower') path_list.insert(-1, str(label)) temp_parameter[label]['saved_model'] = '/'.join(path_list) temp_parameter[label]['gamma'] = temp_parameter[label][ 'gamma_worker'] # discount factor for the lower agent. temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[ int(label)] #temp_parameter[label]["input_size_dqn"] = (len(slot_set)-1) *3 self.id2lowerAgent[label] = LowerAgent( action_set=action_set, slot_set=slot_set, disease_symptom=disease_symptom, parameter=temp_parameter[label], disease_as_action=False) # model_path = os.path.join(self.parameter.get("label_all_model_path"), label) #temp_parameter[label]["input_size_dqn"] = self.input_size_dqn_all[int(label)] ''' temp_parameter[label]["input_size_dqn"] = (len(slot_set)) * 3 #print(slot_set) self.pretrained_lowerAgent[label] = LowerAgent(action_set=action_set, slot_set=slot_set, disease_symptom=disease_symptom, parameter=temp_parameter[label], disease_as_action=True) # model_path = os.path.join(self.parameter.get("label_all_model_path"), label) self.pretrained_lowerAgent[label].dqn.restore_model(os.path.join(self.parameter.get("label_all_model_path"), model)) self.pretrained_lowerAgent[label].dqn.current_net.eval() self.pretrained_lowerAgent[label].dqn.target_net.eval() ''' # Master policy. if parameter.get("state_reduced"): input_size = ( len(self.slot_set) - 1 ) * 3 # the dictionary of slot_set contains a key of "disease" which need to be removed first. else: input_size = parameter.get("input_size_dqn") hidden_size = parameter.get("hidden_size_dqn", 300) # 患病种类的数量 self.output_size = len(self.id2lowerAgent) if self.parameter.get("disease_as_action") == False: self.output_size = len( self.id2lowerAgent ) + 1 # the extra one size is the action of activating disease classifier #print("input_size",input_size) self.master = DQN2(input_size=input_size, hidden_size=hidden_size, output_size=self.output_size, parameter=parameter, named_tuple=('state', 'agent_action', 'reward', 'next_state', 'episode_over')) self.parameter = parameter # self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size")) self.current_lower_agent_id = -1 self.behave_prob = 1 print("master:", self.master_action_space) self.count = 0 self.subtask_terminal = True self.subtask_turn = 0 self.subtask_max_turn = 5 self.past_lower_agent_pool = { key: 0 for key in self.id2lowerAgent.keys() } if parameter.get("train_mode") is False: print("########## master model is restore now ##########") self.master.restore_model(parameter.get("saved_model")) self.master.current_net.eval() self.master.target_net.eval() for label, agent in self.id2lowerAgent.items(): #print(temp_parameter[label]) self.id2lowerAgent[label].dqn.restore_model( temp_parameter[label]['saved_model']) self.id2lowerAgent[label].dqn.current_net.eval() self.id2lowerAgent[label].dqn.target_net.eval() self.agent_action = { "turn": 1, "action": None, "request_slots": {}, "inform_slots": {}, "explicit_inform_slots": {}, "implicit_inform_slots": {}, "speaker": "agent" }
def __init__(self, action_set, slot_set, disease_symptom, parameter): self.action_set = action_set self.slot_set = slot_set self.disease_symptom = disease_symptom self.disease_num = parameter.get("disease_number") self.slot_dim=1 ################## # Master policy. ####################### input_size = parameter.get("input_size_dqn") hidden_size = parameter.get("hidden_size_dqn", 100) self.output_size = parameter.get('goal_dim', 2*self.disease_num) self.dqn = DQN(input_size=input_size + self.output_size, hidden_size=hidden_size, output_size=self.output_size, parameter=parameter, named_tuple=('state', 'agent_action', 'reward', 'next_state', 'episode_over')) self.parameter = parameter self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size")) if parameter.get("train_mode") is False : self.dqn.restore_model(parameter.get("saved_model")) self.dqn.current_net.eval() self.dqn.target_net.eval() ############################### # Internal critic ############################## # symptom distribution by diseases. temp_slot_set = copy.deepcopy(slot_set) temp_slot_set.pop('disease') self.disease_to_symptom_dist = {} self.id2disease = {} total_count = np.zeros(len(temp_slot_set)) for disease, v in self.disease_symptom.items(): dist = np.zeros(len(temp_slot_set)) self.id2disease[v['index']] = disease for symptom, count in v['symptom'].items(): dist[temp_slot_set[symptom]] = count total_count[temp_slot_set[symptom]] += count self.disease_to_symptom_dist[disease] = dist for disease in self.disease_to_symptom_dist.keys(): self.disease_to_symptom_dist[disease] = self.disease_to_symptom_dist[disease] / total_count goal_embed_value = [0] * len(disease_symptom) for disease in self.disease_to_symptom_dist.keys(): self.disease_to_symptom_dist[disease] = self.disease_to_symptom_dist[disease] / total_count goal_embed_value[disease_symptom[disease]['index']] = list(self.disease_to_symptom_dist[disease]) self.internal_critic = InternalCritic(input_size=len(temp_slot_set)*self.slot_dim + len(self.disease_symptom), hidden_size=hidden_size, output_size=len(temp_slot_set), goal_num=len(self.disease_symptom), goal_embedding_value=goal_embed_value, slot_set=temp_slot_set, parameter=parameter) print(os.getcwd()) self.internal_critic.restore_model('../agent/pre_trained_internal_critic_dropout_both_one_hot512.pkl') ################# # Lower agent. ############## temp_parameter = copy.deepcopy(parameter) temp_parameter['input_size_dqn'] = input_size + len(self.disease_symptom) path_list = parameter['saved_model'].split('/') path_list.insert(-1, 'lower') temp_parameter['saved_model'] = '/'.join(path_list) temp_parameter['gamma'] = temp_parameter['gamma_worker'] # discount factor for the lower agent. self.lower_agent = LowerAgent(action_set=action_set, slot_set=slot_set, disease_symptom=disease_symptom, parameter=temp_parameter,disease_as_action=False) named_tuple = ('state', 'agent_action', 'reward', 'next_state', 'episode_over','goal') self.lower_agent.dqn.Transition = namedtuple('Transition', named_tuple) self.visitation_count = np.zeros(shape=(self.output_size, len(self.lower_agent.action_space))) # [goal_num, lower_action_num] if temp_parameter.get("train_mode") is False: self.lower_agent.dqn.restore_model(temp_parameter.get("saved_model")) self.lower_agent.dqn.current_net.eval() self.lower_agent.dqn.target_net.eval()
def __init__(self, action_set, slot_set, disease_symptom, parameter): self.parameter = parameter self.action_set = action_set self.slot_set = slot_set self.slot_set.pop("disease") self.disease_symptom = disease_symptom if parameter.get('prioritized_replay'): self.experience_replay_pool = PrioritizedReplayBuffer( buffer_size=parameter.get("experience_replay_pool_size")) else: self.experience_replay_pool = deque( maxlen=parameter.get("experience_replay_pool_size")) self.input_size_dqn_all = { 1: 374, 4: 494, 5: 389, 6: 339, 7: 279, 9: 409, 10: 254, 11: 304, 12: 304, 13: 359, 14: 394, 19: 414 } self.id2disease = {} self.id2lowerAgent = {} self.master_action_space = [] dirs = os.listdir(self.parameter.get("label_all_model_path")) for model in dirs: #pattern = re.compile(r'(?<=label=)\d+\.?\d*') #label = pattern.findall(model) reg = re.compile(r"(?<=label)\d+") match = reg.search(model) label = match.group(0) #print(label) self.master_action_space.append(label) #assert len(label) == 1 #label = label[0] label_all_path = self.parameter.get("file_all") label_new_path = os.path.join(label_all_path, 'label' + label) disease_symptom = pickle.load( open(os.path.join(label_new_path, 'disease_symptom.p'), 'rb')) slot_set = pickle.load( open(os.path.join(label_new_path, 'slot_set.p'), 'rb')) action_set = pickle.load( open(os.path.join(label_new_path, 'action_set.p'), 'rb')) temp_parameter = copy.deepcopy(parameter) #print(parameter["saved_model"]) #if parameter.get("train_mode"): # temp_parameter["saved_model"] = parameter["saved_model"].split('model_d10_agent')[0] + 'lower/' + str( # label) + '/model_d10_agent' + parameter["saved_model"].split('model_d10_agent')[1] #else: temp_parameter["saved_model"] = parameter["saved_model"].split( 'model_d10agent')[0] + 'lower/' + str( label) + '/model_d10agent' + parameter[ "saved_model"].split('model_d10agent')[1] temp_parameter["input_size_dqn"] = self.input_size_dqn_all[int( label)] self.id2lowerAgent[label] = LowerAgent( action_set=action_set, slot_set=slot_set, disease_symptom=disease_symptom, parameter=temp_parameter, disease_as_action=True) #model_path = os.path.join(self.parameter.get("label_all_model_path"), label) self.id2lowerAgent[label].dqn.restore_model( os.path.join(self.parameter.get("label_all_model_path"), model)) self.id2lowerAgent[label].dqn.current_net.eval() self.id2lowerAgent[label].dqn.target_net.eval() # Master policy. if parameter.get("state_reduced"): input_size = len(self.slot_set) * 3 else: input_size = parameter.get("input_size_dqn") hidden_size = parameter.get("hidden_size_dqn", 100) output_size = len(self.id2lowerAgent) if self.parameter.get("disease_as_action") == False: output_size = len(self.id2lowerAgent) + 1 #print("input_size",input_size) self.master = DQN2(input_size=input_size, hidden_size=hidden_size, output_size=output_size, parameter=parameter, named_tuple=('state', 'agent_action', 'reward', 'next_state', 'episode_over')) self.parameter = parameter #self.experience_replay_pool = deque(maxlen=parameter.get("experience_replay_pool_size")) self.current_lower_agent_id = -1 self.behave_prob = 1 print("master:", self.master_action_space) if parameter.get("train_mode") is False: print("########## master model is restore now ##########") self.master.restore_model(parameter.get("saved_model")) self.master.current_net.eval() self.master.target_net.eval() self.agent_action = { "turn": 1, "action": None, "request_slots": {}, "inform_slots": {}, "explicit_inform_slots": {}, "implicit_inform_slots": {}, "speaker": "agent" }
def __init__(self, action_set, slot_set, disease_symptom, parameter): self.action_set = action_set self.slot_set = slot_set self.disease_symptom = disease_symptom # symptom distribution by diseases. temp_slot_set = copy.deepcopy(slot_set) temp_slot_set.pop('disease') self.disease_to_symptom_dist = {} total_count = np.zeros(len(temp_slot_set)) for disease, v in self.disease_symptom.items(): dist = np.zeros(len(temp_slot_set)) for symptom, count in v['symptom'].items(): dist[temp_slot_set[symptom]] = count total_count[temp_slot_set[symptom]] += count self.disease_to_symptom_dist[disease] = dist for disease in self.disease_to_symptom_dist.keys(): self.disease_to_symptom_dist[ disease] = self.disease_to_symptom_dist[disease] / total_count ################################## # Building lower agents. The state representation that the master agent and lower agents are the same, so the # slot set are same for these different agents. ########################### self.id2disease = {} self.id2lowerAgent = {} for disease, v in disease_symptom.items(): self.id2disease[v["index"]] = disease temp_disease_symptom = {} temp_disease_symptom[disease] = {} temp_disease_symptom[disease]["index"] = 0 temp_disease_symptom[disease]["symptom"] = v["symptom"] temp_slot_set = {} for symptom in v['symptom'].keys(): temp_slot_set.setdefault(symptom, len(temp_slot_set)) temp_parameter = copy.deepcopy(parameter) temp_parameter["saved_model"] = parameter["saved_model"].split( 'model_d4_agent')[0] + 'lower/' + str( v["index"]) + '/model_d4_agent' + parameter[ "saved_model"].split('model_d4_agent')[1] self.id2lowerAgent[v["index"]] = LowerAgent( action_set=action_set, slot_set=slot_set, disease_symptom=temp_disease_symptom, parameter=temp_parameter) # Master policy. input_size = parameter.get("input_size_dqn") hidden_size = parameter.get("hidden_size_dqn", 100) output_size = len(self.id2lowerAgent) self.dqn = DQN(input_size=input_size, hidden_size=hidden_size, output_size=output_size, parameter=parameter, named_tuple=('state', 'agent_action', 'reward', 'next_state', 'episode_over', 'behave_prob')) self.parameter = parameter self.experience_replay_pool = deque( maxlen=parameter.get("experience_replay_pool_size")) self.current_lower_agent_id = -1 self.behave_prob = 1 self.agent_action = { "turn": 1, "action": None, "request_slots": {}, "inform_slots": {}, "explicit_inform_slots": {}, "implicit_inform_slots": {}, "speaker": "agent" }