def test_single_column_no_wall(self): mdp = MDP(length=3, width=1, p_walk=1, p_run=1, reward_run=0, reward_walk=0, wall_list=[], exit_list=[((0, 0), 1)], discount=0.7, e=1e-80) mdp.value_iteration() test.assert_array_almost_equal(np.array([[0], [0], [4]]), mdp.policy)
def test_small_value_iteration_no_wall(self): mdp = MDP(width=3, length=3, p_walk=0.7, p_run=0.7, reward_walk=0, reward_run=0, discount=0.1, e=1e-80, exit_list=[((0, 2), 1), ((1, 2), -1)]) mdp.value_iteration() test.assert_array_almost_equal( np.array([[7, 3, 7], [0, 2, 0], [4, 4, 4]]), mdp.policy)
def test_single_column_middle_wall(self): mdp = MDP(length=3, width=1, p_walk=0.7, p_run=0.7, reward_run=-0.1, reward_walk=-0.3, wall_list=[(1, 0)], exit_list=[((0, 0), 1)], discount=0.7, e=1e-80) mdp.value_iteration() str = mdp.out_put() self.assertEqual(str, "Exit\nNone\nRun Up\n")
def test_single_row_middle_wall(self): mdp = MDP(length=1, width=3, p_walk=0.7, p_run=0.7, reward_run=-0.1, reward_walk=-0.3, wall_list=[(0, 1)], exit_list=[((0, 2), 1)], discount=0.7, e=1e-80) mdp.value_iteration() policy = mdp.out_put() self.assertEqual("Run Up,None,Exit\n", policy)
def test_initial(self): mdp = MDP(width=3, length=3, p_walk=0.7, p_run=0.6, reward_walk=1, reward_run=1, discount=0, exit_list=[((0, 2), 1), ((1, 2), -1)]) test.assert_array_equal(np.array([[0, 0, 1], [0, 0, -1], [0, 0, 0]]), mdp.value) p_up_walk_up = np.squeeze(mdp.p[:, :, mdp.up, mdp.walk_up]) test.assert_array_equal(np.ones((3, 3)) * 0.7, p_up_walk_up) p_down_walk_up = np.squeeze(mdp.p[:, :, mdp.down, mdp.walk_up]) test.assert_array_equal(np.zeros((3, 3)), p_down_walk_up) p_left_walk_up = np.squeeze(mdp.p[:, :, mdp.left, mdp.walk_up]) test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_left_walk_up) p_right_walk_up = np.squeeze(mdp.p[:, :, mdp.right, mdp.walk_up]) test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_right_walk_up) for direction in mdp.direction_enum: for action in mdp.action_enum: p = np.squeeze(mdp.p[:, :, direction, action]) if direction == action: # walk_direction = direction test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.7) elif direction == action - 4: # run_direction = direction test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.6)
def test_case1(self): mdp = MDP(width=6, length=5, p_walk=0.8, p_run=0.6, reward_walk=-0.3, reward_run=-0.2, wall_list=[(1, 1), (4, 3)], discount=0.7, exit_list=[((0, 2), 10), ((2, 4), 5)], e=1e-80) mdp.value_iteration() test.assert_equal( np.array([[7., 3., 0., 2., 6., 6.], [0., 0., 0., 0., 0., 2.], [4., 3., 4., 4., 4., 2.], [4., 0., 4., 0., 0., 0.], [4., 4., 4., 4., 4., 2.]]), mdp.policy)
def test_wall_builder(self): mdp = MDP(width=3, length=3, p_walk=0.7, p_run=0.6, reward_walk=1, reward_run=1, discount=0, wall_list=[(1, 1), (0, 1)], exit_list=[((0, 2), 1), ((1, 2), -1)]) walk_up = mdp.wall_up_1 self.assertListEqual([(2, 1), (1, 1)], walk_up) walk_left = mdp.wall_left_1 self.assertListEqual([(2, 1), (1, 1)], walk_up) walk_down = mdp.wall_down_1 self.assertListEqual([(0, 1)], walk_down) walk_right = mdp.wall_right_2 self.assertListEqual([(1, 0), (0, 0)], walk_right) run_up = mdp.wall_up_2 self.assertListEqual([(2, 1), (1, 1), (2, 1)], run_up) run_left = mdp.wall_left_2 self.assertListEqual([(1, 2), (0, 2)], run_left) run_down = mdp.wall_down_2 self.assertListEqual([(0, 1)], run_down) run_right = mdp.wall_right_2 self.assertListEqual([(1, 0), (0, 0)], run_right)
def test_case_11_discount_0(self): configuration = Configuration() width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file( "input11.txt") mdp = MDP(width=width, length=length, p_walk=p_walk, p_run=p_run, reward_walk=r_walk, reward_run=r_run, wall_list=wall_list, discount=discount, exit_list=exit_list, e=1e-80) mdp.value_iteration() str = mdp.out_put() with open("my_output11.txt", 'w') as f: f.write(str)
def test_big_accurate(self): configuration = Configuration() width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file( "input5.txt") mdp = MDP(width=width, length=length, p_walk=p_walk, p_run=p_run, reward_walk=r_walk, reward_run=r_run, wall_list=wall_list, discount=discount, exit_list=exit_list, e=1e-80) mdp.value_iteration() str = mdp.out_put() with open("my_output5.txt", 'w') as f: f.write(str) with open("output_shun.txt", "r") as f: data = f.read() self.assertEqual(data, str)
def test_policy_evaluation(self): mdp = MDP(width=3, length=3, p_walk=0.7, p_run=0.6, reward_walk=1, reward_run=1, discount=0, wall_list=[(1, 1)], exit_list=[((0, 2), 1), ((1, 2), -1)]) mdp.value = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) walk_up = mdp.policy_evaluation(mdp.walk_up) test.assert_array_almost_equal( [[1.15, 2, 2.85], [1.9, 2.9, 3.9], [5.05, 8, 6.75]], walk_up) run_up = mdp.policy_evaluation(mdp.run_up) test.assert_array_almost_equal( np.array([[0.6 + 0.2 + 0.6, 1.2 + 0.4 + 0.4, 1.8 + 0.2 + 0.6], [2.4 + 0.8 + 0.8, 5, 3.6 + 1.2 + 1.2], [0.6 + 1.4 + 1.8, 4.8 + 1.6 + 1.6, 1.8 + 1.4 + 1.8]]), run_up)
def test_output(self): mdp = MDP(width=6, length=5, p_walk=0.8, p_run=0.6, reward_walk=-0.3, reward_run=-0.2, wall_list=[(1, 1), (4, 3)], discount=0.7, exit_list=[((0, 2), 10), ((2, 4), 5)], e=1e-80) mdp.policy = np.array([[7., 3., 0., 2., 6., 6.], [0., 0., 0., 0., 0., 2.], [4., 3., 4., 4., 4., 2.], [4., 0., 4., 0., 0., 0.], [4., 4., 4., 4., 4., 2.]]) str = mdp.out_put() self.assertEqual( str, """Run Right,Walk Right,Exit,Walk Left,Run Left,Run Left Walk Up,None,Walk Up,Walk Up,Walk Up,Walk Left Run Up,Walk Right,Run Up,Run Up,Exit,Walk Left Run Up,Walk Up,Run Up,Walk Up,Walk Up,Walk Up Run Up,Run Up,Run Up,None,Run Up,Walk Left\n""")