class DialogEnv(gym.Env): def __init__( self, user_goals: List[UserGoal], emc_params: Dict, max_round_num: int, database: Dict, slot2values: Dict[str, List[Any]], ) -> None: self.user = UserSimulator(user_goals, max_round_num) self.emc = ErrorModelController(slot2values, emc_params) self.state_tracker = StateTracker(database, max_round_num) self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS)) self.observation_space = gym.spaces.multi_binary.MultiBinary( self.state_tracker.get_state_size()) def step(self, agent_action_index: int): agent_action = map_index_to_action(agent_action_index) self.state_tracker.update_state_agent(agent_action) user_action, reward, done, success = self.user.step(agent_action) if not done: self.emc.infuse_error(user_action) self.state_tracker.update_state_user(user_action) next_state = self.state_tracker.get_state(done) return next_state, reward, done, success def reset(self): self.state_tracker.reset() init_user_action = self.user.reset() self.emc.infuse_error(init_user_action) self.state_tracker.update_state_user(init_user_action) return self.state_tracker.get_state()
def test(): test_env = UserSimulator(disease_symptom_path, disease_symptom_mapping_path) total_rewards = 0 for i_episode in range(25): state = test_env.reset() top_10_predictions = 0 top_5_predictions = 0 for t in count(): action = select_action(np.expand_dims(state, axis=0)) next_state, reward, done, _ = test_env.step(action.item()) state = next_state if done: total_rewards += reward with torch.no_grad(): q_values = policy_net(np.expand_dims(state, axis=0)).squeeze(0) diagnosis_q_values = q_values[test_env.num_symptom:] # # print (diagnosis_q_values.shape) top_10_disease, top_5_disease = test_env.get_top_diseases( diagnosis_q_values) if test_env.goal in top_10_disease: top_10_predictions += 1 if test_env.goal in top_5_disease: top_5_predictions += 1 break # print ('Test Rewards : ', total_rewards/25.) return total_rewards / 25., top_10_predictions / 25., top_5_predictions / 25.
def __init__( self, user_goals: List[UserGoal], emc_params: Dict, max_round_num: int, database: Dict, slot2values: Dict[str, List[Any]], ) -> None: self.user = UserSimulator(user_goals, max_round_num) self.emc = ErrorModelController(slot2values, emc_params) self.state_tracker = StateTracker(database, max_round_num) self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS)) self.observation_space = gym.spaces.multi_binary.MultiBinary( self.state_tracker.get_state_size())
from state_tracker import StateTracker from user_simulator import UserSimulator from agent_dqn import AgentDQN params = { 'experience_replay_pool_size': 10000, 'dqn_hidden_size': 60, 'gamma': 0.9, 'predict_mode': True, 'max_turn': 40, 'trained_model_path': 'data/saved_model.p' } state_tracker = StateTracker() usersim = UserSimulator(3) agent = AgentDQN(params) def run_episode(count): for i in range(count): print("dialog:", i) episode_over = False turn = 0 state_tracker.initialize_episode() agent_action = { 'diaact': 'greeting', 'inform_slots': {}, 'request_slots': {} } state_tracker.update(agent_action=agent_action) print("sys:", agent_action)
# Note: If you get an unpickling error here then run 'pickle_converter.py' and it should fix it database = pickle.load(open(DATABASE_FILE_PATH, 'rb'), encoding='latin1') # Clean DB remove_empty_slots(database) # Load movie dict db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1') # Load goal File user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1') # Init. Objects if USE_USERSIM: user = UserSimulator(user_goals, constants, database) else: user = User(constants) emc = ErrorModelController(db_dict, constants) state_tracker = StateTracker(database, constants) sarsa_agent = SARSAgent(state_tracker.get_state_size(), constants) #dqn_agent = DQNAgent(state_tracker.get_state_size(), constants) def run_round(state, warmup=False): # 1) Agent takes action given state tracker's representation of dialogue (state) agent_action_index, agent_action = sarsa_agent.get_action(state, use_rule=warmup) # 2) Update state tracker with the agent's action state_tracker.update_state_agent(agent_action) # 3) User takes action given agent action
from itertools import count from PIL import Image from user_simulator import UserSimulator import pickle import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torchvision.transforms as T from torch.autograd import Variable disease_symptom_path = 'disease_symptom.json' disease_symptom_mapping_path = 'disease_symptom_mapping.json' env = UserSimulator(disease_symptom_path, disease_symptom_mapping_path) # env = gym.make('CartPole-v0').unwrapped is_ipython = 'inline' in matplotlib.get_backend() if is_ipython: from IPython import display # if gpu is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu") ###################################################################### # Replay Memory # ------------- # # We'll be using experience replay memory for training our DQN. It stores # the transitions that the agent observes, allowing us to reuse this data