env.reset() print(ix) sleep(1) env.close() return sim_store if __name__ == "__main__": env = FrozenLakeEnv(is_slippery=True) env.num_actions = env.nA env.num_states = env.nS o = env.reset() dp = DP(env) for _ in range(100): dp.policy_eval() dp.policy_imp() dp.q_values = np.array([dp.q_values[s] for s in dp.q_values]) # exit() # plt.figure(figsize=(8, 8), num="dp_sv") # sns.heatmap(dp.state_values.reshape(4, 4), # cmap="Spectral", annot=True, cbar=False) # plt.figure(figsize=(8, 8), num="dp_q") # sns.heatmap(dp.q_values.reshape(16, 4), # cmap="Spectral", annot=True, cbar=False) # plt.show() # plt.show() birl = Birl(env.num_states) print("Running Sim") birl.sim_store = birl.sim(dp) print("Running Sim Done")