for x in range(x_dims): for y in range(y_dims): utils[(x,y)] = 0 for state in problem.get_all_states(): utils[(state.x, state.y)] = state.reward # env.get_state_reward(state) return utils def find_policy_via_policy_iteration(problem,discount_factor): policy = init_policy(problem) return(policy) if __name__ == "__main__": # Initialize the maze environment env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=GRID_WORLD3_REWARDS) # env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=None) # env = kuimaze.MDPMaze(map_image=MAP, probs=PROBS, grad=GRAD, node_rewards=None) env.reset() print('====================') print('works only in terminal! NOT in IDE!') print('press n - next') print('press s - skip to end') print('====================') print(env.get_all_states()) # policy1 = find_policy_via_value_iteration(env) policy = find_policy_via_policy_iteration(env,0.9999) env.visualise(get_visualisation_values(policy)) env.render()
unchanged = True return policy def find_policy_via_value_iteration(problem, discount_factor, epsilon): policy = init_policy(problem) return policy if __name__ == "__main__": # Initialize the maze environment # env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=GRID_WORLD3_REWARDS) # env = kuimaze.MDPMaze(map_image=GRID_WORLD3, probs=PROBS, grad=GRAD, node_rewards=None) env = kuimaze.MDPMaze(map_image=MAP, probs=PROBS, grad=GRAD, node_rewards=None) env.reset() print('====================') print('works only in terminal! NOT in IDE!') print('press n - next') print('press s - skip to end') print('====================') # agent = mdp_agent.MDP_agent(env) # policy = mdp_agent.find_policy_via_policy_iteration(env, 0.1) policy = mdp_agent.find_policy_via_value_iteration(env, 0.5, 0.0001) env.visualise(get_visualisation_values(policy)) env.render() print('Policy IV:', policy)