Esempio n. 1
0
    for x in range(x_dims):
        for y in range(y_dims):
            utils[(x,y)] = 0

    for state in problem.get_all_states():
        utils[(state.x, state.y)] = state.reward # env.get_state_reward(state)
    return utils


def find_policy_via_policy_iteration(problem,discount_factor):
    policy = init_policy(problem)
    return(policy)

if __name__ == "__main__":
    # Initialize the maze environment
    env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=GRID_WORLD3_REWARDS)
    # env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=None)
    # env = kuimaze.MDPMaze(map_image=MAP, probs=PROBS, grad=GRAD, node_rewards=None)
    env.reset()

    print('====================')
    print('works only in terminal! NOT in IDE!')
    print('press n - next')
    print('press s - skip to end')
    print('====================')

    print(env.get_all_states())
    # policy1 = find_policy_via_value_iteration(env)
    policy = find_policy_via_policy_iteration(env,0.9999)
    env.visualise(get_visualisation_values(policy))
    env.render()
    unchanged = True

    return policy


def find_policy_via_value_iteration(problem, discount_factor, epsilon):
    policy = init_policy(problem)
    return policy


if __name__ == "__main__":
    # Initialize the maze environment
    # env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=GRID_WORLD3_REWARDS)
    # env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=None)
    env = kuimaze.MDPMaze(map_image=MAP,
                          probs=PROBS,
                          grad=GRAD,
                          node_rewards=None)
    env.reset()

    print('====================')
    print('works only in terminal! NOT in IDE!')
    print('press n - next')
    print('press s - skip to end')
    print('====================')

    # agent = mdp_agent.MDP_agent(env)
    # policy = mdp_agent.find_policy_via_policy_iteration(env, 0.1)
    policy = mdp_agent.find_policy_via_value_iteration(env, 0.5, 0.0001)
    env.visualise(get_visualisation_values(policy))
    env.render()
    print('Policy IV:', policy)