def next(self): if self.condition < len(self.session): print 'Starting condition %d' % self.condition print 'Using seed %d' % self.maps[self.condition] from mazer import Maze print Maze.generate(ROWS, COLS, 20, 20) else: raise StopIteration
def generate_new_maze(self): self.delete_maze_objects() MazeEnvironment.maze = Maze.generate(ROWS, COLS, GRID_DX, GRID_DY) self.add_maze_objects() self.reset_maze()
class MazeEnvironment(Environment): maze = Maze.generate(ROWS, COLS, GRID_DX, GRID_DY) """ The environment is a 2-D maze. In the discrete version, the agent moves from cell to cell. * Actions (1 discrete action) * 0 - move in the +r direction * 1 - move in the -r direction * 2 - move in the +c direction * 3 - move in the -c direction * 4 - do nothing * Observations (6 discrete observations) * o[0] - the current row position * o[1] - the current col position * o[2] - obstacle in the +r direction? * o[3] - obstacle in the -r direction? * o[4] - obstacle in the +c direction? * o[5] - obstacle in the -c direction? """ def __init__(self): """ generate the maze """ Environment.__init__(self) self.rewards = MazeRewardStructure() action_info = FeatureVectorInfo() observation_info = FeatureVectorInfo() reward_info = FeatureVectorInfo() action_info.add_discrete(0, len(MAZE_MOVES) - 1) # select from the moves we can make observation_info.add_discrete(0, ROWS - 1) observation_info.add_discrete(0, COLS - 1) observation_info.add_discrete(0, 1) observation_info.add_discrete(0, 1) observation_info.add_discrete(0, 1) observation_info.add_discrete(0, 1) reward_info.add_continuous(-100, 100) self.agent_info = AgentInitInfo(observation_info, action_info, reward_info) self.max_steps = MAX_STEPS self.speedup = 0 self.marker_map = { } # a map of cells and markers so that we don't have more than one per cell self.marker_states = { } # states of the marker agents that run for one cell and stop self.agent_map = {} # agents active on the map self.agents_at_goal = set( ) # the set of agents that have reached the goal self.handles = {} # handes for the objects used to draw q-values print 'Initialized MazeEnvironment' def can_move(self, agent, move): """ Figure out if the agent can make the specified move """ pos = agent.state.position (r, c) = MazeEnvironment.maze.xy2rc(pos.x, pos.y) (dr, dc) = move return MazeEnvironment.maze.rc_bounds( r + dc, c + dc) and not MazeEnvironment.maze.is_wall(r, c, dr, dc) def get_next_rotation(self, move): """ Figure out which way the agent should be facing in order to make the specified move """ return Vector3f(0, 0, degrees(atan2(move[1], move[0]))) def reset(self, agent): """ reset the environment to its initial state """ print 'Episode %d complete' % agent.episode (x, y) = MazeEnvironment.maze.rc2xy(0, 0) pos = Vector3f(x, y, 0) agent.state.position = pos agent.state.rotation = Vector3f(0, 0, 0) return True def get_agent_info(self, agent): return self.agent_info def set_animation(self, agent, animation): """ set the agent's animation sequence to that named by animation """ if agent.state.animation != animation: agent.state.animation = animation delay = getSimContext().delay animation_speed = agent.state.animation_speed if delay > 0: agent.state.animation_speed = animation_speed / delay def set_position(self, agent, new_pose): """ set the next agent position to new_pose = (r,c,h) """ new_r, new_c, new_heading = new_pose (new_x, new_y) = MazeEnvironment.maze.rc2xy(new_r, new_c) pos = agent.state.position if pos.x == new_x and pos.y == new_y: self.set_animation(agent, 'stand') else: pos.x, pos.y = new_x, new_y agent.state.position = pos self.set_animation(agent, 'run') def step(self, agent, action): """ Discrete version """ (r, c) = MazeEnvironment.maze.xy2rc(agent.state.position.x, agent.state.position.y) # check if we reached the goal if r == ROWS - 1 and c == COLS - 1 and not isinstance( agent, MoveForwardAndStopAgent): self.agents_at_goal.add(agent) return self.rewards.goal_reached(agent) # check if we ran out of time elif agent.step >= self.max_steps - 1 and not isinstance( agent, MoveForwardAndStopAgent): return self.rewards.last_reward(agent) if not self.agent_info.actions.validate(action): # check if we ran out of time if agent.step >= self.max_steps - 1 and not isinstance( agent, MoveForwardAndStopAgent): return self.rewards.last_reward(agent) # check if we reached the goal elif r == ROWS - 1 and c == COLS - 1 and not isinstance( agent, MoveForwardAndStopAgent): self.agents_at_goal.add(agent) return self.rewards.goal_reached(agent) else: self.set_animation(agent, 'stand') return self.rewards.null_move(agent) # check for null action a = int(round(action[0])) if a == MAZE_NULL_MOVE: self.set_animation(agent, 'stand') return self.rewards.null_move(agent) # calculate new pose (dr, dc) = MAZE_MOVES[a] new_r, new_c = r + dr, c + dc next_rotation = self.get_next_rotation((dr, dc)) new_heading = next_rotation.z rotation = agent.state.rotation prev_heading = rotation.z # if the heading is right if new_heading == prev_heading: # check if we are in bounds if not MazeEnvironment.maze.rc_bounds(new_r, new_c): self.set_animation(agent, 'jump') return self.rewards.out_of_bounds(agent) # check if there is a wall in the way elif MazeEnvironment.maze.is_wall(r, c, dr, dc): self.set_animation(agent, 'jump') return self.rewards.hit_wall(agent) # if the heading is right, change the position self.set_position(agent, (new_r, new_c, new_heading)) else: # if the heading is not right, just change the heading and run the # rotation animation: # "run" "stand" "turn_r_xc" "turn_l_xc" "turn_r_lx" "turn_l_lx" # "turn_r_xxx" "turn_l_xxx" "pick_up" "put_down" # "hold_run" "hold_stand" "hold_r_xc" "hold_l_xc" # "hold_turn_r_lx" "hold_turn_l_lx" "hold_turn_r_xxx" "hold_turn_l_xxx" # "jump" "hold_jump" if new_heading - prev_heading > 0: if new_heading - prev_heading > 90: new_heading = prev_heading + 90 self.set_animation(agent, 'turn_l_lx') else: if new_heading - prev_heading < 90: new_heading = prev_heading - 90 self.set_animation(agent, 'turn_r_lx') rot0 = copy(agent.state.rotation) rot0.z = new_heading agent.state.rotation = rot0 agent.skip() # don't get a new action, just retry this one return self.rewards.valid_move(agent) # check if we reached the goal if new_r == ROWS - 1 and new_c == COLS - 1 and not isinstance( agent, MoveForwardAndStopAgent): self.agents_at_goal.add(agent) return self.rewards.goal_reached(agent) # check if we ran out of time elif agent.step >= self.max_steps - 1 and not isinstance( agent, MoveForwardAndStopAgent): return self.rewards.last_reward(agent) # return a normal reward return self.rewards.valid_move(agent) def teleport(self, agent, r, c): """ move the agent to a new location """ (x, y) = MazeEnvironment.maze.rc2xy(r, c) pos = agent.state.position pos.x = x pos.y = y agent.state.position = pos agent.teleport() def sense(self, agent, obs): """ Discrete version """ p0 = agent.state.position (r, c) = MazeEnvironment.maze.xy2rc(p0.x, p0.y) obs[0] = r obs[1] = c offset = GRID_DX / 10.0 for i, (dr, dc) in enumerate(MAZE_MOVES): direction = Vector3f(dr, dc, 0) ray = (p0 + direction * offset, p0 + direction * GRID_DX) # we only look for objects of type 1, which means walls objects = getSimContext().findInRay(ray[0], ray[1], 1, False) obs[2 + i] = int(len(objects) > 0) return obs def is_episode_over(self, agent): pos = agent.state.position (r, c) = MazeEnvironment.maze.xy2rc(pos.x, pos.y) if self.max_steps != 0 and agent.step >= self.max_steps: return True elif agent.__class__.__name__ == 'MoveForwardAndStopAgent': return False elif r == ROWS - 1 and c == COLS - 1: if hasattr(agent, "highlight_path"): disable_ai() # stop running agent.highlight_path() # mark the final path self.set_animation(agent, 'stand') # stop animation return True else: return False def mark_maze(self, r, c, marker): """ mark a maze cell with the specified color """ # remove the previous object, if necessary if (r, c) in self.marker_map: removeObject(self.marker_map[(r, c)]) # remember the ID of the marker self.marker_map[(r, c)] = addObject( marker, Vector3f((r + 1) * GRID_DX, (c + 1) * GRID_DY, -1)) def mark_maze_blue(self, r, c): self.mark_maze(r, c, "data/shapes/cube/BlueCube.xml") def mark_maze_green(self, r, c): self.mark_maze(r, c, "data/shapes/cube/GreenCube.xml") def mark_maze_yellow(self, r, c): self.mark_maze(r, c, "data/shapes/cube/YellowCube.xml") def mark_maze_white(self, r, c): self.mark_maze(r, c, "data/shapes/cube/WhiteCube.xml") def unmark_maze_agent(self, r, c): """ mark a maze cell with the specified color """ # remove the previous object, if necessary if (r, c) in self.agent_map: removeObject(self.agent_map[(r, c)]) del self.marker_states[self.agent_map[(r, c)]] del self.agent_map[(r, c)] def mark_maze_agent(self, agent, r1, c1, r2, c2): """ mark a maze cell with an agent moving from r1, c1 to r2, c2 """ # remove the previous object, if necessary self.unmark_maze_agent(r2, c2) # add a new marker object position = Vector3f((r1 + 1) * GRID_DX, (c1 + 1) * GRID_DY, 0) rotation = self.get_next_rotation((r2 - r1, c2 - c1)) agent_id = addObject(agent, position=position, rotation=rotation) self.marker_states[agent_id] = ((r1, c1), (r2, c2)) self.agent_map[(r2, c2)] = agent_id def cleanup(self): # remove the marker blocks for id in self.marker_map.values(): removeObject(id) self.marker_map = {} for id in self.agent_map.values(): removeObject(id) for o in self.handles: for a in range(len(self.handles[o])): h = self.handles[o][a] if h is not None: removeObject(h) self.handles = {} self.agent_map = {} def draw_q(self, o, Q): aa = Q[o] # get the action values min_a = min(aa) # minimum of the action values aa = [a - min_a for a in aa] # shift to make all >= 0 sum_a = sum(aa) # sum of action values if sum_a != 0: aa = [a / sum_a for a in aa] # normalize if o not in self.handles: # create handles list self.handles[o] = [None, None, None, None, None] (x, y) = self.maze.rc2xy(o[0], o[1]) for a, (dr, dc) in enumerate(MAZE_MOVES): p = Vector3f(x, y, 0) value = aa[a] * 5 if dr == 0: dr = 0.1 else: p.x += dr * value if dc == 0: dc = 0.1 else: p.y += dc * value if value == 0 and self.handles[o][a] is not None: # don't show 0 values removeObject(self.handles[o][a]) self.handles[o][a] = None elif self.handles[o][a] is None: # create the cube to show the value self.handles[o][a] = \ addObject("data/shapes/cube/BlueCube.xml", \ p, Vector3f(0, 0, 0), scale=Vector3f(0.5, 0.5, 0.5)) else: # move the existing cube getSimContext().setObjectPosition(self.handles[o][a], p) center = len(MAZE_MOVES) if self.handles[o][center] is None: self.handles[o][center] = \ addObject("data/shapes/cube/YellowCube.xml", \ Vector3f(x, y, 0), \ scale=Vector3f(0.6,0.6,0.6))