Ejemplo n.º 1
0
def val_run(discount_factor=0.9):
    maze = read_maze(maze_file)
    env = MazeEnv(maze_file=maze_file)
    obs = env.reset()
    policy, V, num_iter, total_access = value_iteration(
        env, discount_factor=discount_factor)

    coord, action = np.where(policy == 1)
    policy = action.reshape(maze_shape)
    V = V.reshape(maze_shape)
    im = graph_value_policy(V, policy, maze)
    return im
Ejemplo n.º 2
0
#     im = graph_value_policy(V, policy, maze)
#     plt.colorbar()
#     # print(np.sum(VV - VP))
#     # print(VV[5][5])
#     # print(VP[5][5])

# print(p_val)
# print(p_pol)

# discount_factor=0.9
df = 0.9
for df in discount_factors:

    plt.subplots(12)
    plt.subplot(121)
    maze = read_maze(maze_file)
    env = MazeEnv(maze_file=maze_file)
    obs = env.reset()

    prev_time = time()
    policy, V, num_iter, total_access = value_iteration(env,
                                                        discount_factor=df)
    tot_time = time() - prev_time
    value_time.append(tot_time)

    value_iter.append(num_iter)
    value_tot.append(total_access)

    coord, action = np.where(policy == 1)
    policy = action.reshape(maze_shape)
    V = V.reshape(maze_shape)