Esempio n. 1
0
 def test_single_column_no_wall(self):
     mdp = MDP(length=3,
               width=1,
               p_walk=1,
               p_run=1,
               reward_run=0,
               reward_walk=0,
               wall_list=[],
               exit_list=[((0, 0), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     test.assert_array_almost_equal(np.array([[0], [0], [4]]), mdp.policy)
Esempio n. 2
0
 def test_small_value_iteration_no_wall(self):
     mdp = MDP(width=3,
               length=3,
               p_walk=0.7,
               p_run=0.7,
               reward_walk=0,
               reward_run=0,
               discount=0.1,
               e=1e-80,
               exit_list=[((0, 2), 1), ((1, 2), -1)])
     mdp.value_iteration()
     test.assert_array_almost_equal(
         np.array([[7, 3, 7], [0, 2, 0], [4, 4, 4]]), mdp.policy)
Esempio n. 3
0
 def test_single_column_middle_wall(self):
     mdp = MDP(length=3,
               width=1,
               p_walk=0.7,
               p_run=0.7,
               reward_run=-0.1,
               reward_walk=-0.3,
               wall_list=[(1, 0)],
               exit_list=[((0, 0), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     self.assertEqual(str, "Exit\nNone\nRun Up\n")
Esempio n. 4
0
 def test_single_row_middle_wall(self):
     mdp = MDP(length=1,
               width=3,
               p_walk=0.7,
               p_run=0.7,
               reward_run=-0.1,
               reward_walk=-0.3,
               wall_list=[(0, 1)],
               exit_list=[((0, 2), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     policy = mdp.out_put()
     self.assertEqual("Run Up,None,Exit\n", policy)
Esempio n. 5
0
    def test_initial(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        test.assert_array_equal(np.array([[0, 0, 1], [0, 0, -1], [0, 0, 0]]),
                                mdp.value)

        p_up_walk_up = np.squeeze(mdp.p[:, :, mdp.up, mdp.walk_up])
        test.assert_array_equal(np.ones((3, 3)) * 0.7, p_up_walk_up)

        p_down_walk_up = np.squeeze(mdp.p[:, :, mdp.down, mdp.walk_up])
        test.assert_array_equal(np.zeros((3, 3)), p_down_walk_up)

        p_left_walk_up = np.squeeze(mdp.p[:, :, mdp.left, mdp.walk_up])
        test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_left_walk_up)

        p_right_walk_up = np.squeeze(mdp.p[:, :, mdp.right, mdp.walk_up])
        test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_right_walk_up)

        for direction in mdp.direction_enum:
            for action in mdp.action_enum:
                p = np.squeeze(mdp.p[:, :, direction, action])
                if direction == action:  # walk_direction = direction
                    test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.7)
                elif direction == action - 4:  # run_direction = direction
                    test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.6)
Esempio n. 6
0
 def test_case1(self):
     mdp = MDP(width=6,
               length=5,
               p_walk=0.8,
               p_run=0.6,
               reward_walk=-0.3,
               reward_run=-0.2,
               wall_list=[(1, 1), (4, 3)],
               discount=0.7,
               exit_list=[((0, 2), 10), ((2, 4), 5)],
               e=1e-80)
     mdp.value_iteration()
     test.assert_equal(
         np.array([[7., 3., 0., 2., 6., 6.], [0., 0., 0., 0., 0., 2.],
                   [4., 3., 4., 4., 4., 2.], [4., 0., 4., 0., 0., 0.],
                   [4., 4., 4., 4., 4., 2.]]), mdp.policy)
Esempio n. 7
0
    def test_wall_builder(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  wall_list=[(1, 1), (0, 1)],
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        walk_up = mdp.wall_up_1
        self.assertListEqual([(2, 1), (1, 1)], walk_up)
        walk_left = mdp.wall_left_1
        self.assertListEqual([(2, 1), (1, 1)], walk_up)
        walk_down = mdp.wall_down_1
        self.assertListEqual([(0, 1)], walk_down)
        walk_right = mdp.wall_right_2
        self.assertListEqual([(1, 0), (0, 0)], walk_right)

        run_up = mdp.wall_up_2
        self.assertListEqual([(2, 1), (1, 1), (2, 1)], run_up)
        run_left = mdp.wall_left_2
        self.assertListEqual([(1, 2), (0, 2)], run_left)
        run_down = mdp.wall_down_2
        self.assertListEqual([(0, 1)], run_down)
        run_right = mdp.wall_right_2
        self.assertListEqual([(1, 0), (0, 0)], run_right)
Esempio n. 8
0
 def test_case_11_discount_0(self):
     configuration = Configuration()
     width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file(
         "input11.txt")
     mdp = MDP(width=width,
               length=length,
               p_walk=p_walk,
               p_run=p_run,
               reward_walk=r_walk,
               reward_run=r_run,
               wall_list=wall_list,
               discount=discount,
               exit_list=exit_list,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     with open("my_output11.txt", 'w') as f:
         f.write(str)
Esempio n. 9
0
 def test_big_accurate(self):
     configuration = Configuration()
     width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file(
         "input5.txt")
     mdp = MDP(width=width,
               length=length,
               p_walk=p_walk,
               p_run=p_run,
               reward_walk=r_walk,
               reward_run=r_run,
               wall_list=wall_list,
               discount=discount,
               exit_list=exit_list,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     with open("my_output5.txt", 'w') as f:
         f.write(str)
     with open("output_shun.txt", "r") as f:
         data = f.read()
         self.assertEqual(data, str)
Esempio n. 10
0
    def test_policy_evaluation(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  wall_list=[(1, 1)],
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        mdp.value = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        walk_up = mdp.policy_evaluation(mdp.walk_up)
        test.assert_array_almost_equal(
            [[1.15, 2, 2.85], [1.9, 2.9, 3.9], [5.05, 8, 6.75]], walk_up)

        run_up = mdp.policy_evaluation(mdp.run_up)
        test.assert_array_almost_equal(
            np.array([[0.6 + 0.2 + 0.6, 1.2 + 0.4 + 0.4, 1.8 + 0.2 + 0.6],
                      [2.4 + 0.8 + 0.8, 5, 3.6 + 1.2 + 1.2],
                      [0.6 + 1.4 + 1.8, 4.8 + 1.6 + 1.6, 1.8 + 1.4 + 1.8]]),
            run_up)
Esempio n. 11
0
    def test_output(self):
        mdp = MDP(width=6,
                  length=5,
                  p_walk=0.8,
                  p_run=0.6,
                  reward_walk=-0.3,
                  reward_run=-0.2,
                  wall_list=[(1, 1), (4, 3)],
                  discount=0.7,
                  exit_list=[((0, 2), 10), ((2, 4), 5)],
                  e=1e-80)
        mdp.policy = np.array([[7., 3., 0., 2., 6., 6.],
                               [0., 0., 0., 0., 0., 2.],
                               [4., 3., 4., 4., 4., 2.],
                               [4., 0., 4., 0., 0., 0.],
                               [4., 4., 4., 4., 4., 2.]])
        str = mdp.out_put()
        self.assertEqual(
            str, """Run Right,Walk Right,Exit,Walk Left,Run Left,Run Left
Walk Up,None,Walk Up,Walk Up,Walk Up,Walk Left
Run Up,Walk Right,Run Up,Run Up,Exit,Walk Left
Run Up,Walk Up,Run Up,Walk Up,Walk Up,Walk Up
Run Up,Run Up,Run Up,None,Run Up,Walk Left\n""")