Example #1
0
    def test_state_transition(self):

        # Transition to arbitrary state
        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,b1)'},
                          state_static={'subgoal(b1,b2)'})

        next_state, next_reward = mdp.transition('move(b2,table)')
        self.assertEqual({'on(b1,table)', 'on(b2,table)'}, next_state)
        self.assertEqual(-1, next_reward)
        self.assertEqual({'on(b1,table)', 'on(b2,table)'}, mdp.state)

        next_state, next_reward = mdp.transition('move(b1,b2)')
        self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, next_state)
        self.assertEqual(99, next_reward)
        self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, mdp.state)

        # Check if trajectory is correct: S0, A0, R1, S1, A1, R2, S2
        self.assertEqual({'on(b1,table)', 'on(b2,b1)'},
                         mdp.state_history[0])  # S0
        self.assertEqual('move(b2,table)', mdp.action_history[0])  # A0
        self.assertEqual(-1, mdp.reward_history[1])  # R1
        self.assertEqual({'on(b1,table)', 'on(b2,table)'},
                         mdp.state_history[1])  #S1
        self.assertEqual('move(b1,b2)', mdp.action_history[1])  # A1
        self.assertEqual(100 - 1, mdp.reward_history[2])  # R2
        self.assertEqual({'on(b1,b2)', 'on(b2,table)'},
                         mdp.state_history[2])  #S2
Example #2
0
    def test_returns_1(self):

        # Optimal way to goal
        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,table)'},
                          state_static={'subgoal(b2,b1)'})
        mdp.transition('move(b2,b1)')

        # G[t] = R[t+1] + ...
        self.assertEqual(mdp.return_history[0], 99)
Example #3
0
    def test_available_actions_4(self):

        mdp = BlocksWorld(
            state_initial={'on(b2,table)', 'on(b1,table)', 'on(b3,table)'},
            state_static={'subgoal(b2,b1)'})

        # Available actions should be updated after state transitions.
        mdp.transition('move(b1,b2)')
        self.assertEqual({'move(b1,table)', 'move(b1,b3)', 'move(b3,b1)'},
                         mdp.available_actions)
Example #4
0
    def test_available_actions_5(self):

        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,b1)'},
                          state_static={'subgoal(b2,b1)'})

        # No actions available in the goal state
        self.assertEqual(set(), mdp.available_actions)
Example #5
0
    def test_available_actions_3(self):

        mdp = BlocksWorld(
            state_initial={'on(b2,table)', 'on(b1,b2)', 'on(b3,table)'},
            state_static={'subgoal(b2,b1)'})
        self.assertEqual({'move(b1,table)', 'move(b1,b3)', 'move(b3,b1)'},
                         mdp.available_actions)
Example #6
0
    def test_blocksworld_1(self):

        mdp_builder = BlocksWorldBuilder(blocks_world_size=2)
        mdp = BlocksWorld(state_initial={'on(b0,table)', 'on(b1,table)'},
                          state_static={'subgoal(b1,b0)'})

        planner = PlannerPolicy(planning_horizon=1, mdp_builder=mdp_builder)

        suggested_action, expected_return = planner.suggest_action_and_return_for_state(
            mdp.state)

        self.assertEqual(suggested_action,
                         planner.suggest_action_for_state(mdp.state))

        mdp.transition(suggested_action)

        self.assertEqual('move(b1,b0)', suggested_action)
        self.assertEqual(mdp.return_history[0], expected_return)
Example #7
0
    def test_returns_2(self):

        # Undiscounted return

        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,table)'},
                          state_static={'subgoal(b2,b1)'})
        mdp.transition('move(b1,b2)')
        mdp.transition('move(b1,table)')
        mdp.transition('move(b2,b1)')

        # G[t] = R[t+1] + R[t+2] + R[t+3]
        self.assertEqual(mdp.return_history[0], -1 + -1 + 99)
        self.assertEqual(mdp.return_history[1], -1 + 99)
        self.assertEqual(mdp.return_history[2], 99)
        self.assertEqual(mdp.return_history[3],
                         0)  # Return is zero in terminal state
Example #8
0
    def test_blocksworld_optimal_return(self):

        mdp_builder = BlocksWorldBuilder(blocks_world_size=5)
        mdp = BlocksWorld(state_initial={
            'on(b2,b1)', 'on(b0,b3)', 'on(b4,table)', 'on(b1,table)',
            'on(b3,table)'
        },
                          state_static={
                              'subgoal(b0,table)', 'subgoal(b1,b0)',
                              'subgoal(b2,b1)', 'subgoal(b3,b2)',
                              'subgoal(b4,b3)'
                          })

        planner = PlannerPolicy(planning_horizon=2 * 5 + 1,
                                mdp_builder=mdp_builder)

        self.assertEqual(94,
                         planner.compute_optimal_return_for_state(mdp.state))