Python MDP.MDP Examples

Programming Language: Python

Namespace/Package Name: algorithms.mdp

Class/Type: MDP

Method/Function: MDP

Examples at hotexamples.com: 8

Python MDP.MDP - 8 examples found. These are the top rated real world Python examples of algorithms.mdp.MDP.MDP extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MDP(8)

applicable_actions(1)

bellman_eq_policy(1)

build_P(1)

build_R(1)

get_actions(1)

get_probability(1)

get_reward(1)

get_states(1)

successor_states(1)

value_iteration(1)

Example #1

Show file

 def test_A_method(self):
     '''
     Test if the get_actions method of the class MDP is returning the action list of the defined MDP
     '''
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     self.assertEqual(mdp1.get_actions(), actions1,
                      'get_actions Method test failed')
     self.assertEqual(mdp2.get_actions(), actions2,
                      'get_actions Method test failed')

Example #2

Show file

 def test_successor_states_method(self):
     '''
     Test if the successor_states method of the class MDP is returning the possible successor states when
     beeing in a state s and take action a
     '''
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     self.assertEqual(mdp1.successor_states('s1', 'a1'), ['s1', 's2'],
                      'successor_states Method test failed')
     self.assertEqual(mdp2.successor_states('b', '1'), ['a', 'c'],
                      'successor_states Method test failed')

Example #3

Show file

 def test_applicable_actions_method(self):
     '''
     Test if the applicable_actions method of the class MDP is returning the correct actions that can be taken when
     beeing in a state s
     '''
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     self.assertEqual(mdp1.applicable_actions('s1'), ['a1'],
                      'applicable_actions Method test failed')
     self.assertEqual(mdp2.applicable_actions('a'), ['1', '2'],
                      'applicable_actions Method test failed')

Example #4

Show file

 def test_build_P(self):
     '''
     Test if the build_P Method is returning the correct Probability matrix for a given policy
     '''
     # build MDPs and policies
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     p1 = np.array([[0.9, 1], [0.1, 0]])
     policy1 = {'s1': 'a1', 's2': 'a2'}
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     p2 = np.array([[0, 0.5, 0.2], [1, 0, 0.8], [0, 0.5, 0]])
     policy2 = {'a': '1', 'b': '1', 'c': '1'}
     # Tests
     npt.assert_array_equal(mdp1.build_P(policy1), p1,
                            'build_P Method test failed')
     npt.assert_array_equal(mdp2.build_P(policy2), p2,
                            'build_P Method test failed')

Example #5

Show file

 def test_R_method(self):
     '''
     Test if the get_reward method of the class MDP is returning the correct probability
     '''
     # build MDPs
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     # Tests
     r_return1 = {
         element: mdp1.get_reward(element[0], element[1], element[2])
         for element in probabilities1
     }
     r_return2 = {
         element: mdp2.get_reward(element[0], element[1], element[2])
         for element in probabilities2
     }
     self.assertEqual(r_return1, rewards1, 'get_reward Method test failed')
     self.assertEqual(r_return2, rewards2, 'get_reward Method test failed')

Example #6

Show file

 def test_build_R(self):
     '''
     Test if the build_R Method is returning the correct return for a state
     '''
     # build MDPs and policy
     r = np.array([18, 0])
     policy = {'s1': 'a1', 's2': 'a2'}
     mdp = MDP(states1, actions1, probabilities1, rewards1)
     # Test
     npt.assert_array_equal(mdp.build_R(policy), r,
                            'build_R Method test failed')

Example #7

Show file

    def test_value_iteration(self):
        '''
        Test if the value_iteration algorithm is returning the correct optimal policy and value function
        '''
        # build MDPs and optimal policy
        mdp1 = MDP(states1, actions1, probabilities1, rewards1)
        optimal_policy1 = {'s1': 'a1', 's2': 'a2'}
        policy1, value_function1 = mdp1.value_iteration(0.5, 10000, 1e-7)
        mdp2 = MDP(states2, actions2, probabilities2, rewards2)
        optimal_policy2 = {'a': '2', 'b': '1', 'c': '1'}
        policy2, value_function2 = mdp2.value_iteration(0.9, 10000, 1e-7)

        # Tests for optimal policy and optimal value function
        self.assertEqual(policy1, optimal_policy1,
                         'bellman_eq_policy Method test failed')
        self.assertEqual(value_function1, {'s1': 34.285714255234645, 's2': 17.142857112377495}, \
                         'bellman_eq_policy Method test failed')
        self.assertEqual(policy2, optimal_policy2,
                         'bellman_eq_policy Method test failed')
        self.assertEqual(value_function2, {'a': 11.007194195249909, 'b': 11.45683448301969, 'c': 12.23021577798372}, \
                         'bellman_eq_policy Method test failed')

Example #8

Show file

 def test_bellman_eq_policy(self):
     '''
     Test if the bellman_eq_policy Method is returning the correct value functions for a given policy
     '''
     # build MDPs and policies with value functions
     mdp1 = MDP(states1, actions1, probabilities1, rewards1)
     policy1 = {'s1': 'a1', 's2': 'a2'}
     value_function1 = {'s1': 34.2857, 's2': 17.1429}
     mdp2 = MDP(states2, actions2, probabilities2, rewards2)
     policy2 = {'a': '1', 'b': '1', 'c': '1'}
     value_function2 = {'a': 8.63, 'b': 9.59, 'c': 10.46}
     # calculate return and round it
     return_v_1 = mdp1.bellman_eq_policy(policy1, 0.5)
     for state in return_v_1:
         return_v_1[state] = round(return_v_1[state], 4)
     return_v_2 = mdp2.bellman_eq_policy(policy2, 0.9)
     for state in return_v_2:
         return_v_2[state] = round(return_v_2[state], 2)
     # Tests
     self.assertEqual(return_v_1, value_function1,
                      'bellman_eq_policy Method test failed')
     self.assertEqual(return_v_2, value_function2,
                      'bellman_eq_policy Method test failed')