Exemple #1
0
def test():
    test_env = UserSimulator(disease_symptom_path,
                             disease_symptom_mapping_path)
    total_rewards = 0
    for i_episode in range(25):
        state = test_env.reset()
        top_10_predictions = 0
        top_5_predictions = 0
        for t in count():
            action = select_action(np.expand_dims(state, axis=0))
            next_state, reward, done, _ = test_env.step(action.item())

            state = next_state

            if done:
                total_rewards += reward
                with torch.no_grad():
                    q_values = policy_net(np.expand_dims(state,
                                                         axis=0)).squeeze(0)
                    diagnosis_q_values = q_values[test_env.num_symptom:]
                # # print (diagnosis_q_values.shape)
                top_10_disease, top_5_disease = test_env.get_top_diseases(
                    diagnosis_q_values)
                if test_env.goal in top_10_disease:
                    top_10_predictions += 1
                if test_env.goal in top_5_disease:
                    top_5_predictions += 1
                break

    # print ('Test Rewards : ', total_rewards/25.)
    return total_rewards / 25., top_10_predictions / 25., top_5_predictions / 25.
Exemple #2
0
class DialogEnv(gym.Env):
    def __init__(
        self,
        user_goals: List[UserGoal],
        emc_params: Dict,
        max_round_num: int,
        database: Dict,
        slot2values: Dict[str, List[Any]],
    ) -> None:

        self.user = UserSimulator(user_goals, max_round_num)
        self.emc = ErrorModelController(slot2values, emc_params)
        self.state_tracker = StateTracker(database, max_round_num)

        self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS))
        self.observation_space = gym.spaces.multi_binary.MultiBinary(
            self.state_tracker.get_state_size())

    def step(self, agent_action_index: int):
        agent_action = map_index_to_action(agent_action_index)
        self.state_tracker.update_state_agent(agent_action)
        user_action, reward, done, success = self.user.step(agent_action)
        if not done:
            self.emc.infuse_error(user_action)
        self.state_tracker.update_state_user(user_action)
        next_state = self.state_tracker.get_state(done)
        return next_state, reward, done, success

    def reset(self):
        self.state_tracker.reset()
        init_user_action = self.user.reset()
        self.emc.infuse_error(init_user_action)
        self.state_tracker.update_state_user(init_user_action)
        return self.state_tracker.get_state()
Exemple #3
0
    plt.ylabel('Prediction Accuracys')
    plt.savefig(default_pred_path)
    plt.close()


test_rewards_list = []
test_episode_list = []
test_top_5_pred_list = []
test_top_10_pred_list = []
for i_episode in range(num_episodes):
    # Initialize the environment and state
    state = env.reset()
    for t in count():
        # Select and perform an action
        action = select_action(np.expand_dims(state, axis=0))
        next_state, reward, done, _ = env.step(action.item())
        reward = torch.tensor([reward], device=device)
        # Store the transition in memory
        memory.push(torch.tensor([state], device=device),
                    torch.tensor([[action]], device=device),
                    torch.tensor([next_state], device=device), reward)
        # Move to the next state
        state = next_state
        # Perform one step of the optimization (on the target network)
        optimize_model()
        if done:
            episode_durations.append(t + 1)
            break
    # Update the target network, copying all weights and biases in DQN
    if i_episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())