def generate_policy(self, state, grounded_task):
     '''
     Args:
          state (GridWorldState): plan in L0 starting from state
          grounded_task (FourRoomL1GroundedAction): L1 TaskNode defining L0 subgoal
     '''
     #        destination_locations = self.domain.room_to_locs[grounded_task.goal_state.agent_in_room_number]
     init_location = (state.x, state.y, state.z)
     mdp = RoomCubeMDP(init_loc=init_location,
                       env_file=self.env_file,
                       constraints=grounded_task.goal_constraints,
                       ap_maps=grounded_task.ap_maps)
     return self.get_policy(mdp, verbose=True)
Example #2
0
    def _solve_subproblem_L0(self,
                             init_locs=(1, 1, 1),
                             constraints={},
                             ap_maps={}):  #TODO
        mdp = RoomCubeMDP(init_loc=init_locs,
                          env_file=[self.cube_env],
                          constraints=constraints,
                          ap_maps=ap_maps,
                          slip_prob=self.slip_prob)
        value_iter = ValueIteration(mdp, sample_rate=5)
        value_iter.run_vi()

        # Value Iteration.
        action_seq, state_seq = value_iter.plan(mdp.get_init_state())

        # TODO: Extract policy by value_iter.policy(state)... What about returning value_iter?
        print("Plan for", mdp)
        for i in range(len(action_seq)):
            print("\t", state_seq[i], action_seq[i])
        print("\t", state_seq[-1])

        return action_seq, state_seq
Example #3
0
    def _solve_subproblem_L0(self,
                             init_locs=(1, 1, 1),
                             constraints={},
                             ap_maps={},
                             verbose=False):  #TODO
        mdp = RoomCubeMDP(init_loc=init_locs,
                          env_file=[self.cube_env],
                          constraints=constraints,
                          ap_maps=ap_maps,
                          slip_prob=self.slip_prob)
        value_iter = ValueIteration(mdp, sample_rate=1, max_iterations=50)
        value_iter.run_vi()
        num_backup = value_iter.get_num_backups_in_recent_run()

        # Value Iteration.
        action_seq, state_seq = value_iter.plan(mdp.get_init_state())

        if verbose:
            print("Plan for", mdp)
            for i in range(len(action_seq)):
                print("\t", state_seq[i], action_seq[i])
            print("\t", state_seq[-1])

        return action_seq, state_seq, num_backup
Example #4
0
from simple_rl.ltl.AMDP.RoomCubeMDPClass import RoomCubeMDP
from simple_rl.ltl.AMDP.AbstractCubeMDPClass import *
from simple_rl.ltl.AMDP.AbstractCubePolicyGeneratorClass import *
from simple_rl.ltl.AMDP.AbstractCubeStateMapperClass import *

from simple_rl.ltl.settings.build_cube_env_1 import build_cube_env

if __name__ == '__main__':
    cube_env = build_cube_env()
    start_floor = 1
    goal_floor = 3
    start_room, goal_room = 1, 15
    init_locs = cube_env['room_to_locs'][start_room][0]
    goal_locs = cube_env['room_to_locs'][goal_room]
    l0Domain = RoomCubeMDP(init_loc=init_locs,
                           goal_locs=goal_locs,
                           env_file=[cube_env])
    l1Domain = CubeL1MDP(start_room, goal_room, env_file=[cube_env])
    l2Domain = CubeL2MDP(start_floor, goal_floor, env_file=[cube_env])

    policy_generators = []
    l0_policy_generator = CubeL0PolicyGenerator(l0Domain, env_file=[cube_env])
    l1_policy_generator = CubeL1PolicyGenerator(
        l0Domain, AbstractCubeL1StateMapper(l0Domain), env_file=[cube_env])
    l2_policy_generator = CubeL2PolicyGenerator(
        l1Domain, AbstractCubeL2StateMapper(l1Domain), env_file=[cube_env])
    policy_generators.append(l0_policy_generator)
    policy_generators.append(l1_policy_generator)
    #    policy_generators.append(l2_policy_generator)

    # 3 levels
Example #5
0
    def _solve_subproblem_L2(self,
                             init_locs=(1, 1, 1),
                             constraints={},
                             ap_maps={}):
        # define l0 domain
        l0Domain = RoomCubeMDP(init_loc=init_locs,
                               env_file=[self.cube_env],
                               constraints=constraints,
                               ap_maps=ap_maps,
                               slip_prob=self.slip_prob)

        # define l1 domain
        start_room = l0Domain.get_room_numbers(init_locs)[0]
        start_floor = l0Domain.get_floor_numbers(init_locs)[0]

        l1Domain = CubeL1MDP(start_room,
                             env_file=[self.cube_env],
                             constraints=constraints,
                             ap_maps=ap_maps)
        l2Domain = CubeL2MDP(start_floor,
                             env_file=[self.cube_env],
                             constraints=constraints,
                             ap_maps=ap_maps)

        policy_generators = []
        l0_policy_generator = CubeL0PolicyGenerator(l0Domain,
                                                    env_file=[self.cube_env])
        l1_policy_generator = CubeL1PolicyGenerator(
            l0Domain,
            AbstractCubeL1StateMapper(l0Domain),
            env_file=[self.cube_env],
            constraints=constraints,
            ap_maps=ap_maps)
        l2_policy_generator = CubeL2PolicyGenerator(
            l1Domain,
            AbstractCubeL2StateMapper(l1Domain),
            env_file=[self.cube_env],
            constraints=constraints,
            ap_maps=ap_maps)

        policy_generators.append(l0_policy_generator)
        policy_generators.append(l1_policy_generator)
        policy_generators.append(l2_policy_generator)

        # 2 levels
        l1Subtasks = [
            PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS
        ]
        a2rt = [
            CubeL1GroundedAction(a, l1Subtasks, l0Domain)
            for a in l1Domain.ACTIONS
        ]
        a2rt2 = [
            CubeL2GroundedAction(a, a2rt, l1Domain) for a in l2Domain.ACTIONS
        ]

        l2Root = CubeRootL2GroundedAction(l2Domain.action_for_floor_number(1),
                                          a2rt2,
                                          l2Domain,
                                          l2Domain.terminal_func,
                                          l2Domain.reward_func,
                                          constraints=constraints,
                                          ap_maps=ap_maps)

        agent = AMDPAgent(l2Root, policy_generators, l0Domain)

        # Test - base, l1 domain
        l2Subtasks = [
            PrimitiveAbstractTask(action) for action in l1Domain.ACTIONS
        ]

        agent.solve()

        # Extract action seq, state_seq
        state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0)
        action_seq = []
        state_seq = [state]
        while state in agent.policy_stack[0].keys():
            action = agent.policy_stack[0][state]
            state = l0Domain._transition_func(state, action)

            action_seq.append(action)
            state_seq.append(state)

        print("Plan")
        for i in range(len(action_seq)):
            print("\t", state_seq[i], action_seq[i])
        print("\t", state_seq[-1])
        return action_seq, state_seq
Example #6
0
    def _solve_subproblem_L1(self,
                             init_locs=(1, 1, 1),
                             constraints={},
                             ap_maps={},
                             verbose=False):

        # define l0 domain
        l0Domain = RoomCubeMDP(init_loc=init_locs,
                               env_file=[self.cube_env],
                               constraints=constraints,
                               ap_maps=ap_maps,
                               slip_prob=self.slip_prob)
        backup_num = 0
        # if the current state satisfies the constraint already, we don't have to solve it.
        if l0Domain.init_state.q == 1:
            action_seq = []
            state_seq = [l0Domain.init_state]
        else:
            # define l1 domain
            start_room = l0Domain.get_room_numbers(init_locs)[0]
            l1Domain = CubeL1MDP(start_room,
                                 env_file=[self.cube_env],
                                 constraints=constraints,
                                 ap_maps=ap_maps,
                                 slip_prob=self.slip_prob)

            policy_generators = []
            l0_policy_generator = CubeL0PolicyGenerator(
                l0Domain, env_file=[self.cube_env])
            l1_policy_generator = CubeL1PolicyGenerator(
                l0Domain,
                AbstractCubeL1StateMapper(l0Domain),
                env_file=[self.cube_env],
                constraints=constraints,
                ap_maps=ap_maps)

            policy_generators.append(l0_policy_generator)
            policy_generators.append(l1_policy_generator)

            # 2 levels
            l1Subtasks = [
                PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS
            ]
            a2rt = [
                CubeL1GroundedAction(a, l1Subtasks, l0Domain)
                for a in l1Domain.ACTIONS
            ]
            l1Root = CubeRootL1GroundedAction(
                l1Domain.action_for_room_number(0),
                a2rt,
                l1Domain,
                l1Domain.terminal_func,
                l1Domain.reward_func,
                constraints=constraints,
                ap_maps=ap_maps)

            agent = AMDPAgent(l1Root, policy_generators, l0Domain)
            agent.solve()
            backup_num = agent.backup_num

            state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0)
            action_seq = []
            state_seq = [state]
            while state in agent.policy_stack[0].keys():
                action = agent.policy_stack[0][state]
                state = l0Domain._transition_func(state, action)

                action_seq.append(action)
                state_seq.append(state)

        if verbose:
            print("Plan")
            for i in range(len(action_seq)):
                print("\t", state_seq[i], action_seq[i])
            print("\t", state_seq[-1])

        return action_seq, state_seq, backup_num