Example #1
 def test_A_method(self):
     Test if the get_actions method of the class MDP is returning the action list of the defined MDP
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     self.assertEqual(mdp1.get_actions(), actions1,
                      'get_actions Method test failed')
     self.assertEqual(mdp2.get_actions(), actions2,
                      'get_actions Method test failed')
Example #2
 def test_successor_states_method(self):
     Test if the successor_states method of the class MDP is returning the possible successor states when
     beeing in a state s and take action a
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     self.assertEqual(mdp1.successor_states('s1', 'a1'), ['s1', 's2'],
                      'successor_states Method test failed')
     self.assertEqual(mdp2.successor_states('b', '1'), ['a', 'c'],
                      'successor_states Method test failed')
Example #3
 def test_applicable_actions_method(self):
     Test if the applicable_actions method of the class MDP is returning the correct actions that can be taken when
     beeing in a state s
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     self.assertEqual(mdp1.applicable_actions('s1'), ['a1'],
                      'applicable_actions Method test failed')
     self.assertEqual(mdp2.applicable_actions('a'), ['1', '2'],
                      'applicable_actions Method test failed')
Example #4
 def test_build_P(self):
     Test if the build_P Method is returning the correct Probability matrix for a given policy
     # build MDPs and policies
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     p1 = np.array([[0.9, 1], [0.1, 0]])
     policy1 = {'s1': 'a1', 's2': 'a2'}
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     p2 = np.array([[0, 0.5, 0.2], [1, 0, 0.8], [0, 0.5, 0]])
     policy2 = {'a': '1', 'b': '1', 'c': '1'}
     # Tests
     npt.assert_array_equal(mdp1.build_P(policy1), p1,
                            'build_P Method test failed')
     npt.assert_array_equal(mdp2.build_P(policy2), p2,
                            'build_P Method test failed')
Example #5
 def test_R_method(self):
     Test if the get_reward method of the class MDP is returning the correct probability
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     r_return1 = {
         element: mdp1.get_reward(element[0], element[1], element[2])
         for element in probabilities1
     r_return2 = {
         element: mdp2.get_reward(element[0], element[1], element[2])
         for element in probabilities2
     self.assertEqual(r_return1, rewards1, 'get_reward Method test failed')
     self.assertEqual(r_return2, rewards2, 'get_reward Method test failed')
Example #6
 def test_build_R(self):
     Test if the build_R Method is returning the correct return for a state
     # build MDPs and policy
     r = np.array([18, 0])
     policy = {'s1': 'a1', 's2': 'a2'}
     mdp = MDP(states1, actions1, probabilities1, rewards1)
     # Test
     npt.assert_array_equal(mdp.build_R(policy), r,
                            'build_R Method test failed')
Example #7
    def test_value_iteration(self):
        Test if the value_iteration algorithm is returning the correct optimal policy and value function
        # build MDPs and optimal policy
        mdp1 = MDP(states1, actions1, probabilities1, rewards1)
        optimal_policy1 = {'s1': 'a1', 's2': 'a2'}
        policy1, value_function1 = mdp1.value_iteration(0.5, 10000, 1e-7)
        mdp2 = MDP(states2, actions2, probabilities2, rewards2)
        optimal_policy2 = {'a': '2', 'b': '1', 'c': '1'}
        policy2, value_function2 = mdp2.value_iteration(0.9, 10000, 1e-7)

        # Tests for optimal policy and optimal value function
        self.assertEqual(policy1, optimal_policy1,
                         'bellman_eq_policy Method test failed')
        self.assertEqual(value_function1, {'s1': 34.285714255234645, 's2': 17.142857112377495}, \
                         'bellman_eq_policy Method test failed')
        self.assertEqual(policy2, optimal_policy2,
                         'bellman_eq_policy Method test failed')
        self.assertEqual(value_function2, {'a': 11.007194195249909, 'b': 11.45683448301969, 'c': 12.23021577798372}, \
                         'bellman_eq_policy Method test failed')
Example #8
 def test_bellman_eq_policy(self):
     Test if the bellman_eq_policy Method is returning the correct value functions for a given policy
     # build MDPs and policies with value functions
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     policy1 = {'s1': 'a1', 's2': 'a2'}
     value_function1 = {'s1': 34.2857, 's2': 17.1429}
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     policy2 = {'a': '1', 'b': '1', 'c': '1'}
     value_function2 = {'a': 8.63, 'b': 9.59, 'c': 10.46}
     # calculate return and round it
     return_v_1 = mdp1.bellman_eq_policy(policy1, 0.5)
     for state in return_v_1:
         return_v_1[state] = round(return_v_1[state], 4)
     return_v_2 = mdp2.bellman_eq_policy(policy2, 0.9)
     for state in return_v_2:
         return_v_2[state] = round(return_v_2[state], 2)
     # Tests
     self.assertEqual(return_v_1, value_function1,
                      'bellman_eq_policy Method test failed')
     self.assertEqual(return_v_2, value_function2,
                      'bellman_eq_policy Method test failed')