예제 #1
0
    def test_no_actions_available(self):

        builder = SokobanBuilder(level_name='suitcase-05-01b')
        mdp = builder.build_mdp()

        self.assertEqual(set(), mdp.available_actions)
        self.assertEqual(0, len(mdp.available_actions))
예제 #2
0
    def test_transition_2(self):
        """
        Moving a block into a corner should end the MDP. 

        State 0         State 4
            ########        ########
            #  ..$ #        #  ..$ #
            # $@ $ #        # $  $ #
            # $..  #        #$@..  #
            ########        ########
        """

        builder = SokobanBuilder(level_name='suitcase-05-01')
        mdp = builder.build_mdp()
        state_0 = mdp.state
        state_1 = state_0 - { 'box(3,4)', 'sokoban(4,3)' } \
                          | { 'box(2,4)', 'sokoban(3,4)' }

        next_state, next_reward = mdp.transition('push(3,4,left)')

        self.assertSetEqual(state_1, mdp.state)
        self.assertSetEqual(state_1, next_state)
        self.assertEqual(-101, next_reward)
        self.assertSetEqual(set(), mdp.available_actions)

        self.assertEqual([None, -101], mdp.reward_history)
        self.assertEqual([-101, 0], mdp.return_history)
예제 #3
0
    def test_sokoban_3(self):

        builder = SokobanBuilder(level_name='suitcase-05-02')
        mdp = builder.build_mdp()

        planner = PlannerPolicy(planning_horizon=6, mdp_builder=builder)

        suggested_actions = []
        suggested_returns = []

        for i in range(
                20
        ):  # 20 is intentionally set to be higher than the number of needed moves.

            if len(mdp.available_actions) > 0:

                a, g = planner.suggest_action_and_return_for_state(mdp.state)

                self.assertNotEqual(None, a)
                self.assertNotEqual(set(), mdp.available_actions)
                self.assertTrue(a in mdp.available_actions)

                mdp.transition(a)
                suggested_actions += [a]
                suggested_returns += [g]

        self.assertEqual(suggested_actions, mdp.action_history)
        self.assertEqual(suggested_returns + [0], mdp.return_history)
예제 #4
0
    def test_no_actions_available_after_action(self):

        builder = SokobanBuilder(level_name='suitcase-05-01c')
        mdp = builder.build_mdp()
        mdp.transition('push(3,2,right)')

        self.assertEqual(set(), mdp.available_actions)
        self.assertEqual(0, len(mdp.available_actions))
예제 #5
0
    def test_available_actions_3(self):

        builder = SokobanBuilder(level_name='suitcase-05-01a')
        mdp = builder.build_mdp()

        true_available_actions = {
            'push(5,2,right)', 'push(4,3,right)', 'push(4,4,right)'
        }

        self.assertSetEqual(true_available_actions, mdp.available_actions)
예제 #6
0
    def test_available_actions_2(self):

        builder = SokobanBuilder(level_name='suitcase-05-02')
        mdp = builder.build_mdp()

        true_available_actions = {
            'push(5,3,left)', 'push(5,3,right)', 'push(5,3,down)',
            'push(4,4,right)', 'push(4,4,up)', 'push(4,4,down)',
            'push(6,4,left)', 'push(6,4,up)', 'push(6,4,down)',
            'push(5,5,left)', 'push(5,5,right)', 'push(5,5,up)'
        }

        self.assertSetEqual(true_available_actions, mdp.available_actions)
예제 #7
0
    def test_transition_1(self):
        """
        Just messing around with arbitrary moves:

        State 0:        State 1:        State 2:
             #######         #######         #######
             #     #         #     #         #     #
            ## .$. #        ## .@. #        ## * . #
            #@ $ $ #        #  $$$ #        #  @$$ #
            #  .$. #        #  .$. #        #  .$. #
            ##     #        ##     #        ##     #
             #######         #######         #######

        """

        builder = SokobanBuilder(level_name='suitcase-05-02')
        mdp = builder.build_mdp()
        state_0 = mdp.state

        next_state, next_reward = mdp.transition('push(5,3,down)')
        state_1 = state_0 - {'box(5,3)', 'sokoban(2,4)'} | {
            'box(5,4)', 'sokoban(5,3)'
        }
        self.assertSetEqual(state_1, mdp.state)
        self.assertSetEqual(state_1, next_state)
        self.assertEqual(-1, next_reward)

        true_available_actions_1 = {
            'push(4,4,up)', 'push(4,4,down)', 'push(6,4,up)', 'push(6,4,down)',
            'push(5,5,left)', 'push(5,5,right)'
        }
        self.assertEqual(true_available_actions_1, mdp.available_actions)

        next_state, next_reward = mdp.transition('push(4,4,up)')
        state_2 = state_1 - {'box(4,4)', 'sokoban(5,3)'} | {
            'box(4,3)', 'sokoban(4,4)'
        }
        self.assertSetEqual(state_2, mdp.state)
        self.assertSetEqual(state_2, next_state)
        self.assertEqual(-1, next_reward)

        true_available_actions_2 = {
            'push(4,3,up)', 'push(4,3,down)', 'push(4,3,left)',
            'push(4,3,right)', 'push(6,4,up)', 'push(6,4,down)',
            'push(5,5,left)', 'push(5,5,right)'
        }
        self.assertEqual(true_available_actions_2, mdp.available_actions)

        self.assertEqual([None, -1, -1], mdp.reward_history)
        self.assertEqual([-2, -1, 0], mdp.return_history)
예제 #8
0
    def test_sokoban_5(self):

        # Sometimes, it is possible to end up in nonterminal states where
        # reaching the goal is no longer possible.
        # In these cases, the planner should still yield the least harmful
        # next action!

        builder = SokobanBuilder(level_name='suitcase-05-04a')
        mdp = builder.build_mdp()

        self.assertSetEqual({'push(6,4,right)'}, mdp.available_actions)

        planner = PlannerPolicy(planning_horizon=2, mdp_builder=builder)
        a, g = planner.suggest_action_and_return_for_state(mdp.state)

        self.assertEqual('push(6,4,right)', a)
        self.assertEqual(-101, g)
예제 #9
0
    def test_sokoban_1(self):

        builder = SokobanBuilder('suitcase-05-01')
        mdp = builder.build_mdp()

        planner = PlannerPolicy(planning_horizon=7, mdp_builder=builder)

        s0 = mdp.state

        a0, g0 = planner.suggest_action_and_return_for_state(mdp.state)
        self.assertEqual(g0, 94)

        mdp.transition('push(6,3,left)')
        s1 = s0 - {'sokoban(4,3)', 'box(6,3)'} | {'sokoban(6,3)', 'box(5,3)'}
        self.assertSetEqual(s1, mdp.state)

        a1, g1 = planner.suggest_action_and_return_for_state(mdp.state)
        self.assertEqual(g1, 95)
예제 #10
0
    def test_transition_3(self):
        """
        Get a reward when moving to the goal state.

        State 0         State 1         State 2         State 3         
            ########        ########        ########        ########    
            #  ..$ #        #  ..$ #        # $..$ #        # @*.$ #    
            # $@ $ #        # $  $ #        # @  $ #        #    $ #    
            # $..  #        # @*.  #        #  *.  #        #  *.  #    
            ########        ########        ########        ########    

        State 4         State 5         State 6     
            ########        ########        ########
            #  **@ #        #  **  #        #  **  #
            #    $ #        #    @ #        #      #
            #  *.  #        #  *.$ #        #  **@ #
            ########        ########        ########

        """

        builder = SokobanBuilder(level_name='suitcase-05-01')
        mdp = builder.build_mdp()
        state_0 = mdp.state

        mdp.transition('push(3,4,right)')
        mdp.transition('push(3,3,up)')
        mdp.transition('push(3,2,right)')
        mdp.transition('push(6,2,left)')
        mdp.transition('push(6,3,down)')
        next_state, next_reward = mdp.transition('push(6,4,left)')

        true_state_8 = {
            'box(4,2)', 'box(5,2)', 'box(4,4)', 'box(5,4)', 'sokoban(6,4)'
        }

        self.assertSetEqual(true_state_8, mdp.state)
        self.assertSetEqual(true_state_8, next_state)
        self.assertEqual(99, next_reward)

        self.assertEqual([None, -1, -1, -1, -1, -1, 99], mdp.reward_history)
        self.assertEqual([94, 95, 96, 97, 98, 99, 0], mdp.return_history)
예제 #11
0
    def test_executing_wrong_actions(self):

        builder = SokobanBuilder(level_name='suitcase-05-01')
        mdp = builder.build_mdp()
        with self.assertRaises(Exception):
            mdp.transition('push(3,3,up)')
예제 #12
0
    def test_builder(self):

        builder = SokobanBuilder(level_name='suitcase-05-01')

        true_string_representation = '########\n' \
                                   + '#  ..$ #\n' \
                                   + '# $@ $ #\n' \
                                   + '# $..  #\n' \
                                   + '########'

        # This is the dynamic state, which can change over time.
        true_initial_state = {
            'box(6,2)', 'box(3,3)', 'box(6,3)', 'box(3,4)', 'sokoban(4,3)'
        }

        # This is the static state, which won't change.
        true_static_state = {
            'block(1,1)',
            'block(2,1)',
            'block(3,1)',
            'block(4,1)',
            'block(5,1)',
            'block(6,1)',
            'block(7,1)',
            'block(8,1)',
            'block(1,2)',
            'block(8,2)',
            'block(1,3)',
            'block(8,3)',
            'block(1,4)',
            'block(8,4)',
            'block(1,5)',
            'block(2,5)',
            'block(3,5)',
            'block(4,5)',
            'block(5,5)',
            'block(6,5)',
            'block(7,5)',
            'block(8,5)',
            'dest(4,2)',
            'dest(5,2)',
            'dest(4,4)',
            'dest(5,4)',
            'row(1)',
            'row(2)',
            'row(3)',
            'row(4)',
            'row(5)',
            'col(1)',
            'col(2)',
            'col(3)',
            'col(4)',
            'col(5)',
            'col(6)',
            'col(7)',
            'col(8)',
        }

        self.assertEqual(true_string_representation, builder.level_txt)
        self.assertSetEqual(true_initial_state, builder.level_asp_initial)
        self.assertSetEqual(true_static_state, builder.level_asp_static)

        mdp = builder.build_mdp()

        self.assertEqual(true_initial_state, mdp.state)
예제 #13
0
    parser_frozenLake.set_defaults(mdp='frozenLake',
                                   behavior_policy='planning_epsilon_greedy')

    args = parser.parse_args()

    initial_value_estimate = -1

    gym_active = False
    is_slippery = False
    frozen_lake_active = False
    frozen_lake_level = ""
    gym_env = ""
    if args.mdp == 'blocksworld':
        mdp_builder = BlocksWorldBuilder(args.blocks_world_size)
    elif args.mdp == 'sokoban':
        mdp_builder = SokobanBuilder(args.sokoban_level_name)
    elif args.mdp == 'frozenLake':
        mdp_builder = FrozenLakeBuilder(args.frozen_lake_level,
                                        args.is_cautious == 'True')
        frozen_lake_active = True
        if args.gym_environment_active == 'True':
            gym_active = True
            initial_value_estimate = 0
        else:
            gym_active = False
        if args.is_slippery == 'True':
            is_slippery = True
        else:
            is_slippery = False
        gym_env = 'FrozenLake-v0'
        frozen_lake_level = args.frozen_lake_level