pi_g_list = mdp2.piecewise_constant_policies() pi_a_list = mdp2.abstract_policies() v_g_list = [vi(mdp1, pi)[0] for pi in pi_g_list] v_a_list = [vi(mdp2, pi)[0] for pi in pi_a_list] order_v_g = np.stack(sort_value_fns(v_g_list)).round(4) order_v_a = np.stack(sort_value_fns(v_a_list)).round(4) mdp2.p0 agg_state = mdp2.phi.sum(axis=0) > 1 np.stack([mdp2.B(pi, t=1)[agg_state] for pi in pi_g_list]) v_phi_pi_phi_star, _, pi_phi_star = vi(mdp2) v_pi_phi_star = vi(mdp1, mdp2.get_ground_policy(pi_phi_star))[0] # Look for examples of v_pi_phi_star < v for v in v_g_list: if compare_value_fns(v_pi_phi_star, v) == "<": print('Found example of order mismatch.') break else: print('All examples had proper ordering.') #%% graph_value_fns(v_g_list) #, 'graphviz/non_markov_b/ground_17') graph_value_fns(v_a_list) #, 'graphviz/non_markov_b/abstract_17') v_pi_phi_star np.asarray(v_g_list).round(3) np.asarray(v_a_list).round(3)
[4, 0, 0, 0, 0, 0] ])/4 mdp1 = MDP([T, T], [R, R], gamma=0.9) phi = np.array([ [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], ]) mdp2 = AbstractMDP(mdp1, phi) v_star, q_star, pi_star = vi(mdp1) v_star, pi_star pi_g_list = mdp2.piecewise_constant_policies() pi_a_list = mdp2.abstract_policies() v_g_list = [vi(mdp1, pi)[0] for pi in pi_g_list] v_a_list = [vi(mdp2, pi)[0] for pi in pi_a_list] np.allclose(v_g_list, v_g_list[0]) order_v_g = sorted_order(v_g_list) order_v_a = sorted_order(v_a_list) assert np.allclose(order_v_a, order_v_g) graph_value_fns(v_a_list) graph_value_fns(v_g_list)
v_star, q_star, pi_star = vi(mdp1) v_star, pi_star pi_g_list = mdp2.piecewise_constant_policies() pi_a_list = mdp2.abstract_policies() v_g_list = [vi(mdp1, pi)[0] for pi in pi_g_list] v_a_list = [vi(mdp2, pi)[0] for pi in pi_a_list] order_v_g = sorted_order(v_g_list) order_v_a = sorted_order(v_a_list) assert np.allclose(order_v_a, order_v_g) print('All tests passed.') #%% graph_value_fns(v_g_list, 'graphviz/arbitrary_both/ground_10') graph_value_fns(v_a_list, 'graphviz/arbitrary_both/abstract_10') #%% v_phi_star, q_phi_star, pi_phi_star = vi(mdp2) v_phi_star n_policies = mdp1.n_actions**mdp1.n_states def get_policy(mdp, i): assert i < n_policies pi_string = gmpy.digits(i, mdp.n_actions).zfill(mdp.n_states) pi = np.asarray(list(pi_string), dtype=int) return pi # for each ground-state policy
mdp1 = MDP([T, T.transpose()], [R, R.transpose()], gamma=0.9) phi = np.array([ [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], ]) mdp2 = UniformAbstractMDP(mdp1, phi) is_markov(mdp2) v_g_star, q_g_star, pi_g_star = vi(mdp1) v_g_star, pi_g_star v_a_star, q_a_star, pi_a_star = vi(mdp2) v_a_star, pi_a_star pi_g_list = mdp2.piecewise_constant_policies() pi_a_list = mdp2.abstract_policies() v_g_list = [vi(mdp1, pi)[0] for pi in pi_g_list] v_a_list = [vi(mdp2, pi)[0] for pi in pi_a_list] order_v_g = sorted_order(v_g_list) order_v_a = sorted_order(v_a_list) print(partial_ordering(v_g_list)) print(partial_ordering(v_a_list)) assert np.allclose(order_v_a, order_v_g) #%% graph_value_fns(v_g_list, 'graphviz/non_markov/ground_11') graph_value_fns(v_a_list, 'graphviz/non_markov/abstract_11')
order_v_g = sort_value_fns(v_g_list) order_v_a = sort_value_fns(v_a_list) v_phi_pi_phi_star, _, pi_phi_star = vi(mdp2) v_pi_phi_star = vi(mdp1, mdp2.get_ground_policy(pi_phi_star))[0] # Look for examples of v_pi_phi_star < v for v in v_g_list: if compare_value_fns(v_pi_phi_star, v) == "<": break else: print('No examples found.') #%% graph_value_fns(v_g_list) graph_value_fns(v_a_list) v_pi_phi_star np.asarray(v_g_list).round(3) np.asarray(v_a_list).round(3) #%% # This illustrates an example where V^{\pi_\phi^*} < max_{\pi\in \Pi_\phi} V^{\pi} # Note the fixed weighting scheme. T_list = np.array([[[1., 0., 0.], [1., 0., 0.], [0., 0., 1.]], [[0., 1., 0.], [0., 0., 1.], [0., 1., 0.]]]) R_list = np.array([[[1., 0., 0.], [0.5, 0., 0.], [0., 0., 0.5]], [[0., 1., 0.], [0., 0., 1.], [0., 0.1, 0.]]]) phi = np.array([[0, 1], [1, 0], [0, 1]])