def graph_PI(): n_states = 10 n_actions = 2 det_pis = utils.get_deterministic_policies(n_states, n_actions) print('n pis: {}'.format(len(det_pis))) mdp = utils.build_random_sparse_mdp(n_states, n_actions, 0.5) A = graph.mdp_topology(det_pis) G = nx.from_numpy_array(A) pos = nx.spring_layout(G, iterations=200) basis = graph.construct_mdp_basis(det_pis, mdp) init_pi = utils.softmax(np.random.standard_normal((n_states, n_actions))) init_v = utils.value_functional(mdp.P, mdp.r, init_pi, mdp.discount).squeeze() a = graph.sparse_coeffs(basis, init_v, lr=0.1) pis = utils.solve(search_spaces.policy_iteration(mdp), init_pi) print("\n{} policies to vis".format(len(pis))) for i, pi in enumerate(pis[:-1]): print('Iteration: {}'.format(i)) v = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount).squeeze() a = graph.sparse_coeffs(basis, v, lr=0.1, a_init=a) plt.figure(figsize=(16,16)) nx.draw(G, pos, node_color=a, node_size=150) # plt.show() plt.savefig('figs/pi_graphs/{}.png'.format(i)) plt.close()
def graph_PG(): # ffmpeg -framerate 10 -start_number 0 -i %d.png -c:v libx264 -r 30 -pix_fmt yuv420p out.mp4 n_states = 6 n_actions = 4 det_pis = utils.get_deterministic_policies(n_states, n_actions) print('n pis: {}'.format(len(det_pis))) mdp = utils.build_random_mdp(n_states, n_actions, 0.9) A = graph.mdp_topology(det_pis) G = nx.from_numpy_array(A) pos = nx.spring_layout(G, iterations=200) basis = graph.construct_mdp_basis(det_pis, mdp) init_logits = np.random.standard_normal((n_states, n_actions)) init_v = utils.value_functional(mdp.P, mdp.r, utils.softmax(init_logits), mdp.discount).squeeze() a = graph.sparse_coeffs(basis, init_v, lr=0.1) print('\nSolving PG') pis = utils.solve(search_spaces.policy_gradient_iteration_logits(mdp, 0.1), init_logits) print("\n{} policies to vis".format(len(pis))) n = len(pis) # pis = pis[::n//100] pis = pis[0:20] for i, pi in enumerate(pis[:-1]): print('Iteration: {}'.format(i)) v = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount).squeeze() a = graph.sparse_coeffs(basis, v, lr=0.1, a_init=a) plt.figure() nx.draw(G, pos, node_color=a) # plt.show() plt.savefig('figs/pg_graphs/{}.png'.format(i)) plt.close()
def test_estimation(): n_states = 5 n_actions = 2 det_pis = utils.get_deterministic_policies(mdp.S, mdp.A) mdp = utils.build_random_mdp(n_states, n_actions, 0.9) basis = graph.construct_mdp_basis(det_pis, mdp) v = np.random.random((n_states, )) a = graph.estimate_coeffs(basis.T, v) print(a)
def test_sparse_estimation(): n_states = 5 n_actions = 2 mdp = utils.build_random_mdp(n_states, n_actions, 0.9) det_pis = utils.get_deterministic_policies(mdp.S, mdp.A) basis = graph.construct_mdp_basis(det_pis, mdp) v = utils.value_functional(mdp.P, mdp.r, det_pis[2], mdp.discount).squeeze() a = graph.sparse_coeffs(basis, v) print(a)
def test_everything(): n_states = 5 n_actions = 2 det_pis = utils.get_deterministic_policies(n_states, n_actions) mdp = utils.build_random_mdp(n_states, n_actions, 0.9) A = graph.mdp_topology(det_pis) basis = graph.construct_mdp_basis(det_pis, mdp) # v = np.random.random((n_states, )) v = utils.value_functional(mdp.P, mdp.r, det_pis[2], mdp.discount).squeeze() a = graph.sparse_coeffs(basis, v) G = nx.from_numpy_array(A) pos = nx.spring_layout(G, iterations=200) nx.draw(G, pos, node_color=a) plt.show()