コード例 #1
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_single_column_no_wall(self):
     mdp = MDP(length=3,
               width=1,
               p_walk=1,
               p_run=1,
               reward_run=0,
               reward_walk=0,
               wall_list=[],
               exit_list=[((0, 0), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     test.assert_array_almost_equal(np.array([[0], [0], [4]]), mdp.policy)
コード例 #2
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_small_value_iteration_no_wall(self):
     mdp = MDP(width=3,
               length=3,
               p_walk=0.7,
               p_run=0.7,
               reward_walk=0,
               reward_run=0,
               discount=0.1,
               e=1e-80,
               exit_list=[((0, 2), 1), ((1, 2), -1)])
     mdp.value_iteration()
     test.assert_array_almost_equal(
         np.array([[7, 3, 7], [0, 2, 0], [4, 4, 4]]), mdp.policy)
コード例 #3
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_single_column_middle_wall(self):
     mdp = MDP(length=3,
               width=1,
               p_walk=0.7,
               p_run=0.7,
               reward_run=-0.1,
               reward_walk=-0.3,
               wall_list=[(1, 0)],
               exit_list=[((0, 0), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     self.assertEqual(str, "Exit\nNone\nRun Up\n")
コード例 #4
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_single_row_middle_wall(self):
     mdp = MDP(length=1,
               width=3,
               p_walk=0.7,
               p_run=0.7,
               reward_run=-0.1,
               reward_walk=-0.3,
               wall_list=[(0, 1)],
               exit_list=[((0, 2), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     policy = mdp.out_put()
     self.assertEqual("Run Up,None,Exit\n", policy)
コード例 #5
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
    def test_initial(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        test.assert_array_equal(np.array([[0, 0, 1], [0, 0, -1], [0, 0, 0]]),
                                mdp.value)

        p_up_walk_up = np.squeeze(mdp.p[:, :, mdp.up, mdp.walk_up])
        test.assert_array_equal(np.ones((3, 3)) * 0.7, p_up_walk_up)

        p_down_walk_up = np.squeeze(mdp.p[:, :, mdp.down, mdp.walk_up])
        test.assert_array_equal(np.zeros((3, 3)), p_down_walk_up)

        p_left_walk_up = np.squeeze(mdp.p[:, :, mdp.left, mdp.walk_up])
        test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_left_walk_up)

        p_right_walk_up = np.squeeze(mdp.p[:, :, mdp.right, mdp.walk_up])
        test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_right_walk_up)

        for direction in mdp.direction_enum:
            for action in mdp.action_enum:
                p = np.squeeze(mdp.p[:, :, direction, action])
                if direction == action:  # walk_direction = direction
                    test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.7)
                elif direction == action - 4:  # run_direction = direction
                    test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.6)
コード例 #6
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_case1(self):
     mdp = MDP(width=6,
               length=5,
               p_walk=0.8,
               p_run=0.6,
               reward_walk=-0.3,
               reward_run=-0.2,
               wall_list=[(1, 1), (4, 3)],
               discount=0.7,
               exit_list=[((0, 2), 10), ((2, 4), 5)],
               e=1e-80)
     mdp.value_iteration()
     test.assert_equal(
         np.array([[7., 3., 0., 2., 6., 6.], [0., 0., 0., 0., 0., 2.],
                   [4., 3., 4., 4., 4., 2.], [4., 0., 4., 0., 0., 0.],
                   [4., 4., 4., 4., 4., 2.]]), mdp.policy)
コード例 #7
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
    def test_wall_builder(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  wall_list=[(1, 1), (0, 1)],
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        walk_up = mdp.wall_up_1
        self.assertListEqual([(2, 1), (1, 1)], walk_up)
        walk_left = mdp.wall_left_1
        self.assertListEqual([(2, 1), (1, 1)], walk_up)
        walk_down = mdp.wall_down_1
        self.assertListEqual([(0, 1)], walk_down)
        walk_right = mdp.wall_right_2
        self.assertListEqual([(1, 0), (0, 0)], walk_right)

        run_up = mdp.wall_up_2
        self.assertListEqual([(2, 1), (1, 1), (2, 1)], run_up)
        run_left = mdp.wall_left_2
        self.assertListEqual([(1, 2), (0, 2)], run_left)
        run_down = mdp.wall_down_2
        self.assertListEqual([(0, 1)], run_down)
        run_right = mdp.wall_right_2
        self.assertListEqual([(1, 0), (0, 0)], run_right)
コード例 #8
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_case_11_discount_0(self):
     configuration = Configuration()
     width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file(
         "input11.txt")
     mdp = MDP(width=width,
               length=length,
               p_walk=p_walk,
               p_run=p_run,
               reward_walk=r_walk,
               reward_run=r_run,
               wall_list=wall_list,
               discount=discount,
               exit_list=exit_list,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     with open("my_output11.txt", 'w') as f:
         f.write(str)
コード例 #9
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
 def test_big_accurate(self):
     configuration = Configuration()
     width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file(
         "input5.txt")
     mdp = MDP(width=width,
               length=length,
               p_walk=p_walk,
               p_run=p_run,
               reward_walk=r_walk,
               reward_run=r_run,
               wall_list=wall_list,
               discount=discount,
               exit_list=exit_list,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     with open("my_output5.txt", 'w') as f:
         f.write(str)
     with open("output_shun.txt", "r") as f:
         data = f.read()
         self.assertEqual(data, str)
コード例 #10
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
    def test_policy_evaluation(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  wall_list=[(1, 1)],
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        mdp.value = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        walk_up = mdp.policy_evaluation(mdp.walk_up)
        test.assert_array_almost_equal(
            [[1.15, 2, 2.85], [1.9, 2.9, 3.9], [5.05, 8, 6.75]], walk_up)

        run_up = mdp.policy_evaluation(mdp.run_up)
        test.assert_array_almost_equal(
            np.array([[0.6 + 0.2 + 0.6, 1.2 + 0.4 + 0.4, 1.8 + 0.2 + 0.6],
                      [2.4 + 0.8 + 0.8, 5, 3.6 + 1.2 + 1.2],
                      [0.6 + 1.4 + 1.8, 4.8 + 1.6 + 1.6, 1.8 + 1.4 + 1.8]]),
            run_up)
コード例 #11
0
ファイル: test_MDP.py プロジェクト: liyiran/assignment3
    def test_output(self):
        mdp = MDP(width=6,
                  length=5,
                  p_walk=0.8,
                  p_run=0.6,
                  reward_walk=-0.3,
                  reward_run=-0.2,
                  wall_list=[(1, 1), (4, 3)],
                  discount=0.7,
                  exit_list=[((0, 2), 10), ((2, 4), 5)],
                  e=1e-80)
        mdp.policy = np.array([[7., 3., 0., 2., 6., 6.],
                               [0., 0., 0., 0., 0., 2.],
                               [4., 3., 4., 4., 4., 2.],
                               [4., 0., 4., 0., 0., 0.],
                               [4., 4., 4., 4., 4., 2.]])
        str = mdp.out_put()
        self.assertEqual(
            str, """Run Right,Walk Right,Exit,Walk Left,Run Left,Run Left
Walk Up,None,Walk Up,Walk Up,Walk Up,Walk Left
Run Up,Walk Right,Run Up,Run Up,Exit,Walk Left
Run Up,Walk Up,Run Up,Walk Up,Walk Up,Walk Up
Run Up,Run Up,Run Up,None,Run Up,Walk Left\n""")