예제 #1
0
                    env.reset()
        print(ix)
        sleep(1)
        env.close()
        return sim_store


if __name__ == "__main__":
    env = FrozenLakeEnv(is_slippery=True)

    env.num_actions = env.nA
    env.num_states = env.nS
    o = env.reset()
    dp = DP(env)
    for _ in range(100):
        dp.policy_eval()
        dp.policy_imp()
    dp.q_values = np.array([dp.q_values[s] for s in dp.q_values])
    # exit()
    # plt.figure(figsize=(8, 8), num="dp_sv")
    # sns.heatmap(dp.state_values.reshape(4, 4),
    #             cmap="Spectral", annot=True, cbar=False)
    # plt.figure(figsize=(8, 8), num="dp_q")
    # sns.heatmap(dp.q_values.reshape(16, 4),
    #             cmap="Spectral", annot=True, cbar=False)
    # plt.show()
    # plt.show()
    birl = Birl(env.num_states)
    print("Running Sim")
    birl.sim_store = birl.sim(dp)
    print("Running Sim Done")