# is not Markov either. T_list = np.array([ [[0, 1, 0, 0.0], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]], [[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]], ]) R = np.array([[0, 0.5, 0, 0], [0, 0, 0, 1], [0, 0, 0, 2], [0, 0, 0, 0]]) phi = np.array([ [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], ]) mdp1 = MDP(T_list, [R, R], gamma=0.9) mdp2 = AbstractMDP(mdp1, phi, p0=np.array([1, 0, 0, 0]), t=1) mdp2 = AbstractMDP(mdp1, phi) is_markov(mdp2) pi_g_list = mdp2.piecewise_constant_policies() pi_a_list = mdp2.abstract_policies() v_g_list = [vi(mdp1, pi)[0] for pi in pi_g_list] v_a_list = [vi(mdp2, pi)[0] for pi in pi_a_list] order_v_g = np.stack(sort_value_fns(v_g_list)).round(4) order_v_a = np.stack(sort_value_fns(v_a_list)).round(4) mdp2.p0 agg_state = mdp2.phi.sum(axis=0) > 1 np.stack([mdp2.B(pi, t=1)[agg_state] for pi in pi_g_list]) v_phi_pi_phi_star, _, pi_phi_star = vi(mdp2)
[2/3, 1/4, 1/12], [0, 3/4, 1/4], ]) # T_alt = np.array([ # [1/2, 3/8, 1/8], # [1, 0, 0], # [1, 0, 0], # ]) R = np.array([ [0, 1, 1], [1, 0, 0], [1, 0, 0], ]) mdp1 = MDP([T1, T2], [R, R], gamma=0.9) mdp2 = AbstractMDP(MDP([T0, T1], [R, R], gamma=0.9), np.array([[1,0],[0,1],[0,1]])) is_hutter_markov(mdp2) is_markov(mdp2) v_star, q_star, pi_star = vi(mdp1) v_star, pi_star phi = np.array([ [1, 0], [0, 1], [0, 1], ]) mdp2 = AbstractMDP(mdp1, phi) assert is_markov(mdp2) assert has_block_dynamics(mdp2) assert not is_hutter_markov(mdp2)