# is not Markov either.
T_list = np.array([
    [[0, 1, 0, 0.0], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]],
    [[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]],
])
R = np.array([[0, 0.5, 0, 0], [0, 0, 0, 1], [0, 0, 0, 2], [0, 0, 0, 0]])
phi = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0],
    [0, 0, 1],
])
mdp1 = MDP(T_list, [R, R], gamma=0.9)
mdp2 = AbstractMDP(mdp1, phi, p0=np.array([1, 0, 0, 0]), t=1)
mdp2 = AbstractMDP(mdp1, phi)
is_markov(mdp2)

pi_g_list = mdp2.piecewise_constant_policies()
pi_a_list = mdp2.abstract_policies()

v_g_list = [vi(mdp1, pi)[0] for pi in pi_g_list]
v_a_list = [vi(mdp2, pi)[0] for pi in pi_a_list]

order_v_g = np.stack(sort_value_fns(v_g_list)).round(4)
order_v_a = np.stack(sort_value_fns(v_a_list)).round(4)

mdp2.p0
agg_state = mdp2.phi.sum(axis=0) > 1
np.stack([mdp2.B(pi, t=1)[agg_state] for pi in pi_g_list])

v_phi_pi_phi_star, _, pi_phi_star = vi(mdp2)
    [2/3, 1/4, 1/12],
    [0, 3/4, 1/4],
])
# T_alt = np.array([
#     [1/2, 3/8, 1/8],
#     [1, 0, 0],
#     [1, 0, 0],
# ])
R = np.array([
    [0, 1, 1],
    [1, 0, 0],
    [1, 0, 0],
])
mdp1 = MDP([T1, T2], [R, R], gamma=0.9)
mdp2 = AbstractMDP(MDP([T0, T1], [R, R], gamma=0.9), np.array([[1,0],[0,1],[0,1]]))
is_hutter_markov(mdp2)
is_markov(mdp2)
v_star, q_star, pi_star = vi(mdp1)
v_star, pi_star

phi = np.array([
    [1, 0],
    [0, 1],
    [0, 1],
])

mdp2 = AbstractMDP(mdp1, phi)
assert is_markov(mdp2)
assert has_block_dynamics(mdp2)
assert not is_hutter_markov(mdp2)