Ejemplo n.º 1
0
 def next(self):
     if self.condition < len(self.session):
         print 'Starting condition %d' % self.condition
         print 'Using seed %d' % self.maps[self.condition]
         from mazer import Maze
         print Maze.generate(ROWS, COLS, 20, 20)
     else:
         raise StopIteration
Ejemplo n.º 2
0
 def next(self):
     if self.condition < len(self.session):
         print 'Starting condition %d' % self.condition
         print 'Using seed %d' % self.maps[self.condition]
         from mazer import Maze
         print Maze.generate(ROWS, COLS, 20, 20)
     else:
         raise StopIteration
Ejemplo n.º 3
0
 def generate_new_maze(self):
     self.delete_maze_objects()
     MazeEnvironment.maze = Maze.generate(ROWS, COLS, GRID_DX, GRID_DY)
     self.add_maze_objects()
     self.reset_maze()
Ejemplo n.º 4
0
 def generate_new_maze(self):
     self.delete_maze_objects()
     MazeEnvironment.maze = Maze.generate(ROWS, COLS, GRID_DX, GRID_DY)
     self.add_maze_objects()
     self.reset_maze()
Ejemplo n.º 5
0
class MazeEnvironment(Environment):
    maze = Maze.generate(ROWS, COLS, GRID_DX, GRID_DY)
    """
    The environment is a 2-D maze.
    In the discrete version, the agent moves from cell to cell.
     * Actions (1 discrete action)
        * 0 - move in the +r direction
        * 1 - move in the -r direction
        * 2 - move in the +c direction
        * 3 - move in the -c direction
        * 4 - do nothing
     * Observations (6 discrete observations)
        * o[0] - the current row position
        * o[1] - the current col position
        * o[2] - obstacle in the +r direction?
        * o[3] - obstacle in the -r direction?
        * o[4] - obstacle in the +c direction?
        * o[5] - obstacle in the -c direction?
    """
    def __init__(self):
        """
        generate the maze
        """
        Environment.__init__(self)
        self.rewards = MazeRewardStructure()
        action_info = FeatureVectorInfo()
        observation_info = FeatureVectorInfo()
        reward_info = FeatureVectorInfo()
        action_info.add_discrete(0,
                                 len(MAZE_MOVES) -
                                 1)  # select from the moves we can make
        observation_info.add_discrete(0, ROWS - 1)
        observation_info.add_discrete(0, COLS - 1)
        observation_info.add_discrete(0, 1)
        observation_info.add_discrete(0, 1)
        observation_info.add_discrete(0, 1)
        observation_info.add_discrete(0, 1)
        reward_info.add_continuous(-100, 100)
        self.agent_info = AgentInitInfo(observation_info, action_info,
                                        reward_info)
        self.max_steps = MAX_STEPS
        self.speedup = 0
        self.marker_map = {
        }  # a map of cells and markers so that we don't have more than one per cell
        self.marker_states = {
        }  # states of the marker agents that run for one cell and stop
        self.agent_map = {}  # agents active on the map
        self.agents_at_goal = set(
        )  # the set of agents that have reached the goal
        self.handles = {}  # handes for the objects used to draw q-values
        print 'Initialized MazeEnvironment'

    def can_move(self, agent, move):
        """
        Figure out if the agent can make the specified move
        """
        pos = agent.state.position
        (r, c) = MazeEnvironment.maze.xy2rc(pos.x, pos.y)
        (dr, dc) = move
        return MazeEnvironment.maze.rc_bounds(
            r + dc, c + dc) and not MazeEnvironment.maze.is_wall(r, c, dr, dc)

    def get_next_rotation(self, move):
        """
        Figure out which way the agent should be facing in order to make the specified move
        """
        return Vector3f(0, 0, degrees(atan2(move[1], move[0])))

    def reset(self, agent):
        """
        reset the environment to its initial state
        """
        print 'Episode %d complete' % agent.episode
        (x, y) = MazeEnvironment.maze.rc2xy(0, 0)
        pos = Vector3f(x, y, 0)
        agent.state.position = pos
        agent.state.rotation = Vector3f(0, 0, 0)
        return True

    def get_agent_info(self, agent):
        return self.agent_info

    def set_animation(self, agent, animation):
        """
        set the agent's animation sequence to that named by animation
        """
        if agent.state.animation != animation:
            agent.state.animation = animation
            delay = getSimContext().delay
            animation_speed = agent.state.animation_speed
            if delay > 0:
                agent.state.animation_speed = animation_speed / delay

    def set_position(self, agent, new_pose):
        """
        set the next agent position to new_pose = (r,c,h)
        """
        new_r, new_c, new_heading = new_pose
        (new_x, new_y) = MazeEnvironment.maze.rc2xy(new_r, new_c)
        pos = agent.state.position
        if pos.x == new_x and pos.y == new_y:
            self.set_animation(agent, 'stand')
        else:
            pos.x, pos.y = new_x, new_y
            agent.state.position = pos
            self.set_animation(agent, 'run')

    def step(self, agent, action):
        """
        Discrete version
        """
        (r, c) = MazeEnvironment.maze.xy2rc(agent.state.position.x,
                                            agent.state.position.y)

        # check if we reached the goal
        if r == ROWS - 1 and c == COLS - 1 and not isinstance(
                agent, MoveForwardAndStopAgent):
            self.agents_at_goal.add(agent)
            return self.rewards.goal_reached(agent)

        # check if we ran out of time
        elif agent.step >= self.max_steps - 1 and not isinstance(
                agent, MoveForwardAndStopAgent):
            return self.rewards.last_reward(agent)

        if not self.agent_info.actions.validate(action):
            # check if we ran out of time
            if agent.step >= self.max_steps - 1 and not isinstance(
                    agent, MoveForwardAndStopAgent):
                return self.rewards.last_reward(agent)
            # check if we reached the goal
            elif r == ROWS - 1 and c == COLS - 1 and not isinstance(
                    agent, MoveForwardAndStopAgent):
                self.agents_at_goal.add(agent)
                return self.rewards.goal_reached(agent)
            else:
                self.set_animation(agent, 'stand')
                return self.rewards.null_move(agent)

        # check for null action
        a = int(round(action[0]))
        if a == MAZE_NULL_MOVE:
            self.set_animation(agent, 'stand')
            return self.rewards.null_move(agent)

        # calculate new pose
        (dr, dc) = MAZE_MOVES[a]
        new_r, new_c = r + dr, c + dc
        next_rotation = self.get_next_rotation((dr, dc))
        new_heading = next_rotation.z
        rotation = agent.state.rotation
        prev_heading = rotation.z

        # if the heading is right
        if new_heading == prev_heading:
            # check if we are in bounds
            if not MazeEnvironment.maze.rc_bounds(new_r, new_c):
                self.set_animation(agent, 'jump')
                return self.rewards.out_of_bounds(agent)
            # check if there is a wall in the way
            elif MazeEnvironment.maze.is_wall(r, c, dr, dc):
                self.set_animation(agent, 'jump')
                return self.rewards.hit_wall(agent)
            # if the heading is right, change the position
            self.set_position(agent, (new_r, new_c, new_heading))
        else:
            # if the heading is not right, just change the heading and run the
            # rotation animation:
            # "run" "stand" "turn_r_xc" "turn_l_xc" "turn_r_lx" "turn_l_lx"
            # "turn_r_xxx" "turn_l_xxx" "pick_up" "put_down"
            # "hold_run" "hold_stand" "hold_r_xc" "hold_l_xc"
            # "hold_turn_r_lx" "hold_turn_l_lx" "hold_turn_r_xxx" "hold_turn_l_xxx"
            # "jump" "hold_jump"
            if new_heading - prev_heading > 0:
                if new_heading - prev_heading > 90:
                    new_heading = prev_heading + 90
                self.set_animation(agent, 'turn_l_lx')
            else:
                if new_heading - prev_heading < 90:
                    new_heading = prev_heading - 90
                self.set_animation(agent, 'turn_r_lx')
            rot0 = copy(agent.state.rotation)
            rot0.z = new_heading
            agent.state.rotation = rot0
            agent.skip()  # don't get a new action, just retry this one
            return self.rewards.valid_move(agent)

        # check if we reached the goal
        if new_r == ROWS - 1 and new_c == COLS - 1 and not isinstance(
                agent, MoveForwardAndStopAgent):
            self.agents_at_goal.add(agent)
            return self.rewards.goal_reached(agent)

        # check if we ran out of time
        elif agent.step >= self.max_steps - 1 and not isinstance(
                agent, MoveForwardAndStopAgent):
            return self.rewards.last_reward(agent)

        # return a normal reward
        return self.rewards.valid_move(agent)

    def teleport(self, agent, r, c):
        """
        move the agent to a new location
        """
        (x, y) = MazeEnvironment.maze.rc2xy(r, c)
        pos = agent.state.position
        pos.x = x
        pos.y = y
        agent.state.position = pos
        agent.teleport()

    def sense(self, agent, obs):
        """
        Discrete version
        """
        p0 = agent.state.position
        (r, c) = MazeEnvironment.maze.xy2rc(p0.x, p0.y)
        obs[0] = r
        obs[1] = c
        offset = GRID_DX / 10.0
        for i, (dr, dc) in enumerate(MAZE_MOVES):
            direction = Vector3f(dr, dc, 0)
            ray = (p0 + direction * offset, p0 + direction * GRID_DX)
            # we only look for objects of type 1, which means walls
            objects = getSimContext().findInRay(ray[0], ray[1], 1, False)
            obs[2 + i] = int(len(objects) > 0)
        return obs

    def is_episode_over(self, agent):
        pos = agent.state.position
        (r, c) = MazeEnvironment.maze.xy2rc(pos.x, pos.y)
        if self.max_steps != 0 and agent.step >= self.max_steps:
            return True
        elif agent.__class__.__name__ == 'MoveForwardAndStopAgent':
            return False
        elif r == ROWS - 1 and c == COLS - 1:
            if hasattr(agent, "highlight_path"):
                disable_ai()  # stop running
                agent.highlight_path()  # mark the final path
                self.set_animation(agent, 'stand')  # stop animation
            return True
        else:
            return False

    def mark_maze(self, r, c, marker):
        """ mark a maze cell with the specified color """
        # remove the previous object, if necessary
        if (r, c) in self.marker_map:
            removeObject(self.marker_map[(r, c)])
        # remember the ID of the marker
        self.marker_map[(r, c)] = addObject(
            marker, Vector3f((r + 1) * GRID_DX, (c + 1) * GRID_DY, -1))

    def mark_maze_blue(self, r, c):
        self.mark_maze(r, c, "data/shapes/cube/BlueCube.xml")

    def mark_maze_green(self, r, c):
        self.mark_maze(r, c, "data/shapes/cube/GreenCube.xml")

    def mark_maze_yellow(self, r, c):
        self.mark_maze(r, c, "data/shapes/cube/YellowCube.xml")

    def mark_maze_white(self, r, c):
        self.mark_maze(r, c, "data/shapes/cube/WhiteCube.xml")

    def unmark_maze_agent(self, r, c):
        """ mark a maze cell with the specified color """
        # remove the previous object, if necessary
        if (r, c) in self.agent_map:
            removeObject(self.agent_map[(r, c)])
            del self.marker_states[self.agent_map[(r, c)]]
            del self.agent_map[(r, c)]

    def mark_maze_agent(self, agent, r1, c1, r2, c2):
        """ mark a maze cell with an agent moving from r1, c1 to r2, c2 """
        # remove the previous object, if necessary
        self.unmark_maze_agent(r2, c2)
        # add a new marker object
        position = Vector3f((r1 + 1) * GRID_DX, (c1 + 1) * GRID_DY, 0)
        rotation = self.get_next_rotation((r2 - r1, c2 - c1))
        agent_id = addObject(agent, position=position, rotation=rotation)
        self.marker_states[agent_id] = ((r1, c1), (r2, c2))
        self.agent_map[(r2, c2)] = agent_id

    def cleanup(self):
        # remove the marker blocks
        for id in self.marker_map.values():
            removeObject(id)
        self.marker_map = {}
        for id in self.agent_map.values():
            removeObject(id)
        for o in self.handles:
            for a in range(len(self.handles[o])):
                h = self.handles[o][a]
                if h is not None:
                    removeObject(h)
        self.handles = {}
        self.agent_map = {}

    def draw_q(self, o, Q):
        aa = Q[o]  # get the action values
        min_a = min(aa)  # minimum of the action values
        aa = [a - min_a for a in aa]  # shift to make all >= 0
        sum_a = sum(aa)  # sum of action values
        if sum_a != 0: aa = [a / sum_a for a in aa]  # normalize
        if o not in self.handles:  # create handles list
            self.handles[o] = [None, None, None, None, None]
        (x, y) = self.maze.rc2xy(o[0], o[1])
        for a, (dr, dc) in enumerate(MAZE_MOVES):
            p = Vector3f(x, y, 0)
            value = aa[a] * 5
            if dr == 0: dr = 0.1
            else: p.x += dr * value
            if dc == 0: dc = 0.1
            else: p.y += dc * value
            if value == 0 and self.handles[o][a] is not None:
                # don't show 0 values
                removeObject(self.handles[o][a])
                self.handles[o][a] = None
            elif self.handles[o][a] is None:
                # create the cube to show the value
                self.handles[o][a] = \
                    addObject("data/shapes/cube/BlueCube.xml", \
                    p, Vector3f(0, 0, 0), scale=Vector3f(0.5, 0.5, 0.5))
            else:
                # move the existing cube
                getSimContext().setObjectPosition(self.handles[o][a], p)
        center = len(MAZE_MOVES)
        if self.handles[o][center] is None:
            self.handles[o][center] = \
                addObject("data/shapes/cube/YellowCube.xml", \
                    Vector3f(x, y, 0), \
                    scale=Vector3f(0.6,0.6,0.6))