def val_run(discount_factor=0.9): maze = read_maze(maze_file) env = MazeEnv(maze_file=maze_file) obs = env.reset() policy, V, num_iter, total_access = value_iteration( env, discount_factor=discount_factor) coord, action = np.where(policy == 1) policy = action.reshape(maze_shape) V = V.reshape(maze_shape) im = graph_value_policy(V, policy, maze) return im
# im = graph_value_policy(V, policy, maze) # plt.colorbar() # # print(np.sum(VV - VP)) # # print(VV[5][5]) # # print(VP[5][5]) # print(p_val) # print(p_pol) # discount_factor=0.9 df = 0.9 for df in discount_factors: plt.subplots(12) plt.subplot(121) maze = read_maze(maze_file) env = MazeEnv(maze_file=maze_file) obs = env.reset() prev_time = time() policy, V, num_iter, total_access = value_iteration(env, discount_factor=df) tot_time = time() - prev_time value_time.append(tot_time) value_iter.append(num_iter) value_tot.append(total_access) coord, action = np.where(policy == 1) policy = action.reshape(maze_shape) V = V.reshape(maze_shape)