def generate_policy(self, state, grounded_task): ''' Args: state (GridWorldState): plan in L0 starting from state grounded_task (FourRoomL1GroundedAction): L1 TaskNode defining L0 subgoal ''' # destination_locations = self.domain.room_to_locs[grounded_task.goal_state.agent_in_room_number] init_location = (state.x, state.y, state.z) mdp = RoomCubeMDP(init_loc=init_location, env_file=self.env_file, constraints=grounded_task.goal_constraints, ap_maps=grounded_task.ap_maps) return self.get_policy(mdp, verbose=True)
def _solve_subproblem_L0(self, init_locs=(1, 1, 1), constraints={}, ap_maps={}): #TODO mdp = RoomCubeMDP(init_loc=init_locs, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() # Value Iteration. action_seq, state_seq = value_iter.plan(mdp.get_init_state()) # TODO: Extract policy by value_iter.policy(state)... What about returning value_iter? print("Plan for", mdp) for i in range(len(action_seq)): print("\t", state_seq[i], action_seq[i]) print("\t", state_seq[-1]) return action_seq, state_seq
def _solve_subproblem_L0(self, init_locs=(1, 1, 1), constraints={}, ap_maps={}, verbose=False): #TODO mdp = RoomCubeMDP(init_loc=init_locs, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) value_iter = ValueIteration(mdp, sample_rate=1, max_iterations=50) value_iter.run_vi() num_backup = value_iter.get_num_backups_in_recent_run() # Value Iteration. action_seq, state_seq = value_iter.plan(mdp.get_init_state()) if verbose: print("Plan for", mdp) for i in range(len(action_seq)): print("\t", state_seq[i], action_seq[i]) print("\t", state_seq[-1]) return action_seq, state_seq, num_backup
from simple_rl.ltl.AMDP.RoomCubeMDPClass import RoomCubeMDP from simple_rl.ltl.AMDP.AbstractCubeMDPClass import * from simple_rl.ltl.AMDP.AbstractCubePolicyGeneratorClass import * from simple_rl.ltl.AMDP.AbstractCubeStateMapperClass import * from simple_rl.ltl.settings.build_cube_env_1 import build_cube_env if __name__ == '__main__': cube_env = build_cube_env() start_floor = 1 goal_floor = 3 start_room, goal_room = 1, 15 init_locs = cube_env['room_to_locs'][start_room][0] goal_locs = cube_env['room_to_locs'][goal_room] l0Domain = RoomCubeMDP(init_loc=init_locs, goal_locs=goal_locs, env_file=[cube_env]) l1Domain = CubeL1MDP(start_room, goal_room, env_file=[cube_env]) l2Domain = CubeL2MDP(start_floor, goal_floor, env_file=[cube_env]) policy_generators = [] l0_policy_generator = CubeL0PolicyGenerator(l0Domain, env_file=[cube_env]) l1_policy_generator = CubeL1PolicyGenerator( l0Domain, AbstractCubeL1StateMapper(l0Domain), env_file=[cube_env]) l2_policy_generator = CubeL2PolicyGenerator( l1Domain, AbstractCubeL2StateMapper(l1Domain), env_file=[cube_env]) policy_generators.append(l0_policy_generator) policy_generators.append(l1_policy_generator) # policy_generators.append(l2_policy_generator) # 3 levels
def _solve_subproblem_L2(self, init_locs=(1, 1, 1), constraints={}, ap_maps={}): # define l0 domain l0Domain = RoomCubeMDP(init_loc=init_locs, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) # define l1 domain start_room = l0Domain.get_room_numbers(init_locs)[0] start_floor = l0Domain.get_floor_numbers(init_locs)[0] l1Domain = CubeL1MDP(start_room, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) l2Domain = CubeL2MDP(start_floor, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) policy_generators = [] l0_policy_generator = CubeL0PolicyGenerator(l0Domain, env_file=[self.cube_env]) l1_policy_generator = CubeL1PolicyGenerator( l0Domain, AbstractCubeL1StateMapper(l0Domain), env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) l2_policy_generator = CubeL2PolicyGenerator( l1Domain, AbstractCubeL2StateMapper(l1Domain), env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) policy_generators.append(l0_policy_generator) policy_generators.append(l1_policy_generator) policy_generators.append(l2_policy_generator) # 2 levels l1Subtasks = [ PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS ] a2rt = [ CubeL1GroundedAction(a, l1Subtasks, l0Domain) for a in l1Domain.ACTIONS ] a2rt2 = [ CubeL2GroundedAction(a, a2rt, l1Domain) for a in l2Domain.ACTIONS ] l2Root = CubeRootL2GroundedAction(l2Domain.action_for_floor_number(1), a2rt2, l2Domain, l2Domain.terminal_func, l2Domain.reward_func, constraints=constraints, ap_maps=ap_maps) agent = AMDPAgent(l2Root, policy_generators, l0Domain) # Test - base, l1 domain l2Subtasks = [ PrimitiveAbstractTask(action) for action in l1Domain.ACTIONS ] agent.solve() # Extract action seq, state_seq state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0) action_seq = [] state_seq = [state] while state in agent.policy_stack[0].keys(): action = agent.policy_stack[0][state] state = l0Domain._transition_func(state, action) action_seq.append(action) state_seq.append(state) print("Plan") for i in range(len(action_seq)): print("\t", state_seq[i], action_seq[i]) print("\t", state_seq[-1]) return action_seq, state_seq
def _solve_subproblem_L1(self, init_locs=(1, 1, 1), constraints={}, ap_maps={}, verbose=False): # define l0 domain l0Domain = RoomCubeMDP(init_loc=init_locs, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) backup_num = 0 # if the current state satisfies the constraint already, we don't have to solve it. if l0Domain.init_state.q == 1: action_seq = [] state_seq = [l0Domain.init_state] else: # define l1 domain start_room = l0Domain.get_room_numbers(init_locs)[0] l1Domain = CubeL1MDP(start_room, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) policy_generators = [] l0_policy_generator = CubeL0PolicyGenerator( l0Domain, env_file=[self.cube_env]) l1_policy_generator = CubeL1PolicyGenerator( l0Domain, AbstractCubeL1StateMapper(l0Domain), env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) policy_generators.append(l0_policy_generator) policy_generators.append(l1_policy_generator) # 2 levels l1Subtasks = [ PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS ] a2rt = [ CubeL1GroundedAction(a, l1Subtasks, l0Domain) for a in l1Domain.ACTIONS ] l1Root = CubeRootL1GroundedAction( l1Domain.action_for_room_number(0), a2rt, l1Domain, l1Domain.terminal_func, l1Domain.reward_func, constraints=constraints, ap_maps=ap_maps) agent = AMDPAgent(l1Root, policy_generators, l0Domain) agent.solve() backup_num = agent.backup_num state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0) action_seq = [] state_seq = [state] while state in agent.policy_stack[0].keys(): action = agent.policy_stack[0][state] state = l0Domain._transition_func(state, action) action_seq.append(action) state_seq.append(state) if verbose: print("Plan") for i in range(len(action_seq)): print("\t", state_seq[i], action_seq[i]) print("\t", state_seq[-1]) return action_seq, state_seq, backup_num