class DialogEnv(gym.Env): def __init__( self, user_goals: List[UserGoal], emc_params: Dict, max_round_num: int, database: Dict, slot2values: Dict[str, List[Any]], ) -> None: self.user = UserSimulator(user_goals, max_round_num) self.emc = ErrorModelController(slot2values, emc_params) self.state_tracker = StateTracker(database, max_round_num) self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS)) self.observation_space = gym.spaces.multi_binary.MultiBinary( self.state_tracker.get_state_size()) def step(self, agent_action_index: int): agent_action = map_index_to_action(agent_action_index) self.state_tracker.update_state_agent(agent_action) user_action, reward, done, success = self.user.step(agent_action) if not done: self.emc.infuse_error(user_action) self.state_tracker.update_state_user(user_action) next_state = self.state_tracker.get_state(done) return next_state, reward, done, success def reset(self): self.state_tracker.reset() init_user_action = self.user.reset() self.emc.infuse_error(init_user_action) self.state_tracker.update_state_user(init_user_action) return self.state_tracker.get_state()
def __init__( self, user_goals: List[UserGoal], emc_params: Dict, max_round_num: int, database: Dict, slot2values: Dict[str, List[Any]], ) -> None: self.user = UserSimulator(user_goals, max_round_num) self.emc = ErrorModelController(slot2values, emc_params) self.state_tracker = StateTracker(database, max_round_num) self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS)) self.observation_space = gym.spaces.multi_binary.MultiBinary( self.state_tracker.get_state_size())
# Clean DB remove_empty_slots(database) # Load movie dict db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1') # Load goal File user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1') # Init. Objects if USE_USERSIM: user = UserSimulator(user_goals, constants, database) else: user = User(constants) emc = ErrorModelController(db_dict, constants) state_tracker = StateTracker(database, constants) sarsa_agent = SARSAgent(state_tracker.get_state_size(), constants) #dqn_agent = DQNAgent(state_tracker.get_state_size(), constants) def run_round(state, warmup=False): # 1) Agent takes action given state tracker's representation of dialogue (state) agent_action_index, agent_action = sarsa_agent.get_action(state, use_rule=warmup) # 2) Update state tracker with the agent's action state_tracker.update_state_agent(agent_action) # 3) User takes action given agent action user_action, reward, done, success = user.step(agent_action) if not done: # 4) Infuse error into semantic frame level of user action