Ejemplo n.º 1
0
    def __init__(self, xdim, ydim, world_file = WORLD_FILE):
        """
        Create the environment
        """

        #self.Qlog = open("Qlog", "wb+")
        OpenNero.Environment.__init__(self) 
        OpenNero.getSimContext().delay = 0.0;
        
        self.XDIM = xdim
        self.YDIM = ydim
        self.max_steps = 500       
        self.states = {} # dictionary of agent states

        if WORLD_FILE is None:
            self.crumbs = world_handler.pattern_cluster(500, "Roomba/world_config.txt")
        else: # read external file
            data = world_handler.read_world(WORLD_FILE)
            self.XDIM = data['dims'][0]
            self.YDIM = data['dims'][1]
            self.crumbs = data['pellets']
            print len(self.crumbs)
        # only keep crumbs that are inside the walls
        self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)]

        self.init_list = AgentInit()
        self.init_list.add_type("<class 'Roomba.roomba.RoombaBrain'>")
        self.init_list.add_type("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        self.init_list.add_type("<class 'Roomba.RLAgent.TabularRLAgent'>")
        #print self.init_list.types

        roomba_abound = self.init_list.get_action("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_sbound = self.init_list.get_sensor("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_rbound = self.init_list.get_reward("<class 'Roomba.roomba.RoombaBrain'>")
        rtneat_abound = self.init_list.get_action("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_sbound = self.init_list.get_sensor("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_rbound = self.init_list.get_reward("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")

       
        ### Bounds for Roomba ###
        # actions
        roomba_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        roomba_sbound.add_discrete(0,1)    # wall bump
        roomba_sbound.add_continuous(0,xdim)   # self.x
        roomba_sbound.add_continuous(0,ydim)   # self.y
        roomba_sbound.add_continuous(0,xdim)   # closest.x
        roomba_sbound.add_continuous(0,ydim)   # closest.y
        
        # rewards
        roomba_rbound.add_continuous(-100,100) # range for reward

        ### End Bounds for Roomba ####

        ### Bounds for RTNEAT ###
        # actions
        rtneat_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        #rtneat_abound.add_continuous(0, 1) # signal
       
        # sensors
        rtneat_sbound.add_continuous(-math.pi, math.pi) # nearest crumb angle
        rtneat_sbound.add_continuous(0, 1) # proportion of crumb nearby

	# angle and distance sensors for (up to) 8  nearest neighbors
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)

        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
    
        # rewards
        rtneat_rbound.add_continuous(-math.pi, math.pi)
        ### End Bounds for RTNEAT ###

        #------------------------------------------------------------------------
        rltabular_abound = self.init_list.get_action("<class 'Roomba.RLAgent.TabularRLAgent'>")
        rltabular_sbound = self.init_list.get_sensor("<class 'Roomba.RLAgent.TabularRLAgent'>")
        rltabular_rbound = self.init_list.get_reward("<class 'Roomba.RLAgent.TabularRLAgent'>")

        ### Bounds for RLTabular ###
        # actions
        rltabular_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
    
        # rewards
        rltabular_rbound.add_continuous(-1, 1)
        ### End Bounds for RLTabular ###
        #------------------------------------------------------------------------


        # set up shop
        # Add Wayne's Roomba room with experimentally-derived vertical offset to match crumbs.
        common.addObject("data/terrain/RoombaRoom.xml", OpenNero.Vector3f(xdim/2,ydim/2, -1), OpenNero.Vector3f(0,0,0), OpenNero.Vector3f(xdim/245.0, ydim/245.0, constants.HEIGHT/24.5), type = constants.OBJECT_TYPE_WALLS)

        # OpenNero.getSimContext().addAxes()
        self.add_crumbs()
        for crumb in self.crumbs:
            self.add_crumb_sensors(roomba_sbound)        
Ejemplo n.º 2
0
    def __init__(self, xdim, ydim):
        """
        Create the environment
        """
        OpenNero.Environment.__init__(self) 
        
        self.XDIM = xdim
        self.YDIM = ydim
        self.max_steps = 500       
        self.states = {} # dictionary of agent states
        self.crumbs = world_handler.pattern_cluster(500, "Roomba/world_config.txt")
        # only keep crumbs that are inside the walls
        self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)]

        self.init_list = AgentInit()
        self.init_list.add_type("<class 'Roomba.roomba.RoombaBrain'>")
        self.init_list.add_type("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        #print self.init_list.types

        roomba_abound = self.init_list.get_action("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_sbound = self.init_list.get_sensor("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_rbound = self.init_list.get_reward("<class 'Roomba.roomba.RoombaBrain'>")
        rtneat_abound = self.init_list.get_action("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_sbound = self.init_list.get_sensor("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_rbound = self.init_list.get_reward("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")

        ### Bounds for Roomba ###
        # actions
        roomba_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        roomba_sbound.add_discrete(0,1)    # wall bump
        roomba_sbound.add_continuous(0,xdim)   # self.x
        roomba_sbound.add_continuous(0,ydim)   # self.y
        roomba_sbound.add_continuous(0,xdim)   # closest.x
        roomba_sbound.add_continuous(0,ydim)   # closest.y
        
        # rewards
        roomba_rbound.add_continuous(-100,100) # range for reward

        ### End Bounds for Roomba ####

        ### Bounds for RTNEAT ###
        # actions
        rtneat_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
    
        # rewards
        rtneat_rbound.add_continuous(-1, 1)
        ### End Bounds for RTNEAT ###

        # set up shop
        # Add Wayne's Roomba room with experimentally-derived vertical offset to match crumbs.
        common.addObject("data/terrain/RoombaRoom.xml", OpenNero.Vector3f(xdim/2,ydim/2, -1), OpenNero.Vector3f(0,0,0), OpenNero.Vector3f(xdim/245.0, ydim/245.0, constants.HEIGHT/24.5), type = constants.OBJECT_TYPE_WALLS)

        # OpenNero.getSimContext().addAxes()
        self.add_crumbs()
        for crumb in self.crumbs:
            self.add_crumb_sensors(roomba_sbound)        
Ejemplo n.º 3
0
class RoombaEnvironment(OpenNero.Environment):
    """
    Sample Environment for the Sandbox
    """
    def __init__(self, xdim, ydim, world_file = WORLD_FILE):
        """
        Create the environment
        """

        #self.Qlog = open("Qlog", "wb+")
        OpenNero.Environment.__init__(self) 
        OpenNero.getSimContext().delay = 0.0;
        
        self.XDIM = xdim
        self.YDIM = ydim
        self.max_steps = 500       
        self.states = {} # dictionary of agent states

        if WORLD_FILE is None:
            self.crumbs = world_handler.pattern_cluster(500, "Roomba/world_config.txt")
        else: # read external file
            data = world_handler.read_world(WORLD_FILE)
            self.XDIM = data['dims'][0]
            self.YDIM = data['dims'][1]
            self.crumbs = data['pellets']
            print len(self.crumbs)
        # only keep crumbs that are inside the walls
        self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)]

        self.init_list = AgentInit()
        self.init_list.add_type("<class 'Roomba.roomba.RoombaBrain'>")
        self.init_list.add_type("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        self.init_list.add_type("<class 'Roomba.RLAgent.TabularRLAgent'>")
        #print self.init_list.types

        roomba_abound = self.init_list.get_action("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_sbound = self.init_list.get_sensor("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_rbound = self.init_list.get_reward("<class 'Roomba.roomba.RoombaBrain'>")
        rtneat_abound = self.init_list.get_action("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_sbound = self.init_list.get_sensor("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_rbound = self.init_list.get_reward("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")

       
        ### Bounds for Roomba ###
        # actions
        roomba_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        roomba_sbound.add_discrete(0,1)    # wall bump
        roomba_sbound.add_continuous(0,xdim)   # self.x
        roomba_sbound.add_continuous(0,ydim)   # self.y
        roomba_sbound.add_continuous(0,xdim)   # closest.x
        roomba_sbound.add_continuous(0,ydim)   # closest.y
        
        # rewards
        roomba_rbound.add_continuous(-100,100) # range for reward

        ### End Bounds for Roomba ####

        ### Bounds for RTNEAT ###
        # actions
        rtneat_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        #rtneat_abound.add_continuous(0, 1) # signal
       
        # sensors
        rtneat_sbound.add_continuous(-math.pi, math.pi) # nearest crumb angle
        rtneat_sbound.add_continuous(0, 1) # proportion of crumb nearby

	# angle and distance sensors for (up to) 8  nearest neighbors
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)

        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
        rtneat_sbound.add_continuous(-math.pi, math.pi)
        rtneat_sbound.add_continuous(0, 1)
    
        # rewards
        rtneat_rbound.add_continuous(-math.pi, math.pi)
        ### End Bounds for RTNEAT ###

        #------------------------------------------------------------------------
        rltabular_abound = self.init_list.get_action("<class 'Roomba.RLAgent.TabularRLAgent'>")
        rltabular_sbound = self.init_list.get_sensor("<class 'Roomba.RLAgent.TabularRLAgent'>")
        rltabular_rbound = self.init_list.get_reward("<class 'Roomba.RLAgent.TabularRLAgent'>")

        ### Bounds for RLTabular ###
        # actions
        rltabular_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
        rltabular_sbound.add_continuous(-1, 1)
    
        # rewards
        rltabular_rbound.add_continuous(-1, 1)
        ### End Bounds for RLTabular ###
        #------------------------------------------------------------------------


        # set up shop
        # Add Wayne's Roomba room with experimentally-derived vertical offset to match crumbs.
        common.addObject("data/terrain/RoombaRoom.xml", OpenNero.Vector3f(xdim/2,ydim/2, -1), OpenNero.Vector3f(0,0,0), OpenNero.Vector3f(xdim/245.0, ydim/245.0, constants.HEIGHT/24.5), type = constants.OBJECT_TYPE_WALLS)

        # OpenNero.getSimContext().addAxes()
        self.add_crumbs()
        for crumb in self.crumbs:
            self.add_crumb_sensors(roomba_sbound)        

    def get_state(self, agent):
        if agent in self.states:
            return self.states[agent]
        else:
            print "new state created"
            pos = agent.state.position
            rot = agent.state.rotation
            self.states[agent] = AgentState(pos, rot)
            return self.states[agent]
        
    def randomize(self):
        self.crumbs = world_handler.read_pellets()
        # only keep crumbs that are inside the walls        
        self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)]

    def add_crumb_sensors(self, roomba_sbound):
        """Add the crumb sensors, in order: x position of crumb (0 to XDIM,
        continuous), y position of crumb (0 to XDIM, continuous), whether
        crumb is present at said position or has been vacced (0 or 1), and
        reward for vaccing said crumb."""
        roomba_sbound.add_continuous(0, self.XDIM)    # crumb position X
        roomba_sbound.add_continuous(0, self.YDIM)    # crumb position Y
        roomba_sbound.add_discrete(0, 1)       # crumb present/ not present
        roomba_sbound.add_discrete(1, 5)       # reward for crumb

    def add_crumbs(self):
        for pellet in self.crumbs:
            if not (pellet.x, pellet.y) in getMod().marker_map:
                getMod().mark_blue(pellet.x, pellet.y)

    def reset(self, agent):
        """ reset the environment to its initial state """
        state = self.get_state(agent)
        state.reset()
        agent.state.position = state.initial_position
        agent.state.rotation = state.initial_rotation
        agent.state.velocity = state.initial_velocity
        state.episode_count += 1
        self.add_crumbs()
        global gREWARD
        if gREWARD > 0:
            print "Episode %d complete: %d crumbs collected " % (state.episode_count,\
                                                                 gREWARD)
        global gREWARD
        gREWARD = 0

        ## Crumbs collected logging
	global crumb_count
        global crumb_time
        if crumb_count > 0:
	    print "Episode {} crumb count: {}".format(state.episode_count, crumb_count)
	    crumb_count = 0
            crumb_time = time.clock()

        return True

    def get_agent_info(self, agent):
        """ return a blueprint for a new agent """
        return self.init_list.get_info(str(type(agent)))

    def num_sensors(self):
        """
        Return total number of sensors
        """
        return (len(getMod().marker_map)*4 + constants.N_FIXED_SENSORS)
    
    def step(self, agent, action):
        """
        A step for an agent
        """
        state = self.get_state(agent) # the agent's status
        if (state.is_out == True):
            getMod().unmark(agent.state.position.x, agent.state.position.y)
        else:
            assert(self.get_agent_info(agent).actions.validate(action)) # check if the action is valid
            if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"):
                angle = action[0] # in range of -pi to pi
                degree_angle = math.degrees(angle)
                delta_angle = degree_angle - agent.state.rotation.z
                delta_dist = constants.MAX_SPEED
            else:
                angle = action[0] # in range of -pi to pi
                degree_angle = math.degrees(angle)
                delta_angle = degree_angle - agent.state.rotation.z
                delta_dist = constants.MAX_SPEED
            reward = self.update_position(agent, delta_dist, delta_angle)
        state.reward += reward
        global gREWARD 
        gREWARD += reward

	# Enable to use shared reward
	#global crumb_count
	#state.reward = crumb_count
        return reward

    # delta_angle (degrees) is change in angle
    # delta_dist is change in distance (or velocity, since unit of time unchanged)
    def update_position(self, agent, delta_dist, delta_angle):
        """
        Updates position of the agent and collects pellets.
        """
        state = self.get_state(agent)
        state.step_count += 1

        position = agent.state.position
        rotation = agent.state.rotation

        # posteriori collision detection
	rotation.z = common.wrap_degrees(rotation.z, delta_angle)
        position.x += delta_dist*math.cos(math.radians(rotation.z))
        position.y += delta_dist*math.sin(math.radians(rotation.z))


	reward = 0

        # check if one of 4 out-of-bound conditions applies
        # if yes, back-track to correct position
        if (position.x) < 0 or (position.y) < 0 or \
           (position.x) > self.XDIM or (position.y) > self.YDIM:
            # correct position
            if (position.x) < 0:
                position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
                reward -= .05
            if (position.y) < 0:
                position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
                reward -= .05
            if (position.x) > self.XDIM:
                position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
                reward -= .05
            if (position.y) > self.YDIM:
                position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
                reward -= .05
        elif position.x < self.XDIM * 0.174 and position.y > self.YDIM * (1.0 - 0.309):
            position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
            position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
            reward -= .05

	# Temp. disable collisions        
	#elif furniture_collide_all(position.x, position.y):
        #    position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
        #    position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
               
        # register new position
        state.position = position
        #state.rotation = rotation
        agent.state.position = position
        agent.state.rotation = rotation
        
        # remove all crumbs within ROOMBA_RAD of agent position
        pos = (position.x, position.y)
        for crumb in self.crumbs:
            if (crumb.x, crumb.y) in getMod().marker_map:
                distance = math.hypot(crumb[0] - pos[0], crumb[1] - pos[1])
                if distance < constants.ROOMBA_RAD:
                    getMod().unmark(crumb.x, crumb.y)
                    reward += crumb.reward
                    global crumb_count
		    crumb_count += 1
                
        # check if agent has expended its step allowance
        if (self.max_steps != 0) and (state.step_count >= self.max_steps):
            state.is_out = True    # if yes, mark it to be removed
        return reward         
    
    def sense(self, agent, sensors):
        """ figure out what the agent should sense """
        state = self.get_state(agent)
        if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"):
            if state.bumped:
                sensors[0] = 1
                state.bumped = False
            else:
                sensors[0] = 0

            # get agent's position
            pos = agent.state.position
            sensors[1] = pos.x
            sensors[2] = pos.y
            
            # describe every other crumb on the field!
            self.sense_crumbs(sensors, constants.N_S_IN_BLOCK, constants.N_FIXED_SENSORS, agent)

        else:
            max_dist = math.hypot(self.XDIM, self.YDIM)

	    # Min angle and distance to nearest crumb
            sensors[0] = constants.MAX_DISTANCE
            sensors[1] = constants.MAX_DISTANCE

            min_dist = max_dist
            min_dist_angle = 0
            nearby_crumbs = 0.0
            for cube_position in getMod().marker_map:
                distx = cube_position[0] - agent.state.position.x
                disty = cube_position[1] - agent.state.position.y
                dist = math.hypot(distx, disty)
                angle = math.atan2(disty, distx)
                angle = angle # range [-pi, pi]
		if dist < min_dist:
		    min_dist = dist
                    min_dist_angle = angle
                if dist < self.XDIM / 8:
                    nearby_crumbs += 1

            
            sensors[0] = min_dist_angle
            sensors[1] = nearby_crumbs / len(getMod().marker_map) # propotion of crumbs which are nearby

            # Min angle and distance to nearest other agent
            num_agent_sensors = 8
            min_dist = []
            for i in range(num_agent_sensors):
                min_dist.append((math.hypot(self.XDIM, self.YDIM), 0, 0))
	    for other_agent in self.states:
                distx = other_agent.state.position.x - agent.state.position.x
                disty = other_agent.state.position.y - agent.state.position.y
                dist = math.hypot(distx, disty)
                angle = math.atan2(disty, distx)
                angle = angle # range [-pi, pi]
		if dist > 0:
                    signal = 0
                    #if other_agent.past_actions != None:
                    #    signal = other_agent.past_actions[1]
                    min_dist.append((dist, angle, signal))

            min_dist.sort(key=lambda (d, a, s): d)
	    for i in range(num_agent_sensors):
                (dist, ang, signal) = min_dist[i]
                sensors[2 * i + 2] = ang
	        sensors[2 * i + 3] = dist / max_dist
		# signal isnot used

	return sensors

    def sense_crumbs(self, sensors, num_sensors, start_sensor, agent):
        """
        Generate a (big) array of observations, num_sensors for each crumb
        and store them inside sensors starting at start_sensor.
        Each crumb is stored as: (x,y,exists?,reward)
        """
        i = start_sensor
        closest = None
        closest_distance = None
        pos = agent.state.position
        pos = (pos.x, pos.y)
        for pellet in self.crumbs:
            sensors[i] = pellet.x
            sensors[i+1] = pellet.y
            if (pellet.x, pellet.y) in getMod().marker_map:
                sensors[i+2] = 1
                distance = math.hypot(pellet.x - pos[0], pellet.y - pos[1])
                if closest is None or distance < closest_distance:
                    closest = pellet
                    closest_distance = distance
            else:
                sensors[i+2] = 0 
            sensors[i+3] = pellet.reward
            i = i + num_sensors
        if closest is not None:
            # freebie for scripted agents: tell agent the closest crumb!
            sensors[3] = closest.x
            sensors[4] = closest.y

        return True
                     
    def is_episode_over(self, agent):
        """ is the current episode over for the agent? """
        state = self.get_state(agent)
        if self.max_steps != 0 and state.step_count >= self.max_steps:
            return True
        return False
    
    def cleanup(self):
        """
        cleanup the world
        """
        self.environment = None
        return True
Ejemplo n.º 4
0
class RoombaEnvironment(OpenNero.Environment):
    """
    Sample Environment for the Sandbox
    """
    def __init__(self, xdim, ydim):
        """
        Create the environment
        """
        OpenNero.Environment.__init__(self) 
        
        self.XDIM = xdim
        self.YDIM = ydim
        self.max_steps = 500       
        self.states = {} # dictionary of agent states
        self.crumbs = world_handler.pattern_cluster(500, "Roomba/world_config.txt")
        # only keep crumbs that are inside the walls
        self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)]

        self.init_list = AgentInit()
        self.init_list.add_type("<class 'Roomba.roomba.RoombaBrain'>")
        self.init_list.add_type("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        #print self.init_list.types

        roomba_abound = self.init_list.get_action("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_sbound = self.init_list.get_sensor("<class 'Roomba.roomba.RoombaBrain'>")
        roomba_rbound = self.init_list.get_reward("<class 'Roomba.roomba.RoombaBrain'>")
        rtneat_abound = self.init_list.get_action("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_sbound = self.init_list.get_sensor("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")
        rtneat_rbound = self.init_list.get_reward("<class 'Roomba.RTNEATAgent.RTNEATAgent'>")

        ### Bounds for Roomba ###
        # actions
        roomba_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        roomba_sbound.add_discrete(0,1)    # wall bump
        roomba_sbound.add_continuous(0,xdim)   # self.x
        roomba_sbound.add_continuous(0,ydim)   # self.y
        roomba_sbound.add_continuous(0,xdim)   # closest.x
        roomba_sbound.add_continuous(0,ydim)   # closest.y
        
        # rewards
        roomba_rbound.add_continuous(-100,100) # range for reward

        ### End Bounds for Roomba ####

        ### Bounds for RTNEAT ###
        # actions
        rtneat_abound.add_continuous(-math.pi, math.pi) # amount to turn by
        
        # sensors
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
        rtneat_sbound.add_continuous(-1, 1)
    
        # rewards
        rtneat_rbound.add_continuous(-1, 1)
        ### End Bounds for RTNEAT ###

        # set up shop
        # Add Wayne's Roomba room with experimentally-derived vertical offset to match crumbs.
        common.addObject("data/terrain/RoombaRoom.xml", OpenNero.Vector3f(xdim/2,ydim/2, -1), OpenNero.Vector3f(0,0,0), OpenNero.Vector3f(xdim/245.0, ydim/245.0, constants.HEIGHT/24.5), type = constants.OBJECT_TYPE_WALLS)

        # OpenNero.getSimContext().addAxes()
        self.add_crumbs()
        for crumb in self.crumbs:
            self.add_crumb_sensors(roomba_sbound)        

    def get_state(self, agent):
        if agent in self.states:
            return self.states[agent]
        else:
            print "new state created"
            pos = agent.state.position
            rot = agent.state.rotation
            self.states[agent] = AgentState(pos, rot)
            return self.states[agent]
        
    def randomize(self):
        self.crumbs = world_handler.read_pellets()
        # only keep crumbs that are inside the walls        
        self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)]

    def add_crumb_sensors(self, roomba_sbound):
        """Add the crumb sensors, in order: x position of crumb (0 to XDIM,
        continuous), y position of crumb (0 to XDIM, continuous), whether
        crumb is present at said position or has been vacced (0 or 1), and
        reward for vaccing said crumb."""
        roomba_sbound.add_continuous(0, self.XDIM)    # crumb position X
        roomba_sbound.add_continuous(0, self.YDIM)    # crumb position Y
        roomba_sbound.add_discrete(0, 1)       # crumb present/ not present
        roomba_sbound.add_discrete(1, 5)       # reward for crumb

    def add_crumbs(self):
        for pellet in self.crumbs:
            if not (pellet.x, pellet.y) in getMod().marker_map:
                getMod().mark_blue(pellet.x, pellet.y)

    def reset(self, agent):
        """ reset the environment to its initial state """
        state = self.get_state(agent)
        state.reset()
        agent.state.position = state.initial_position
        agent.state.rotation = state.initial_rotation
        agent.state.velocity = state.initial_velocity
        state.episode_count += 1
        self.add_crumbs()
        print "Episode %d complete" % state.episode_count
        return True

    def get_agent_info(self, agent):
        """ return a blueprint for a new agent """
        return self.init_list.get_info(str(type(agent)))

    def num_sensors(self):
        """
        Return total number of sensors
        """
        return (len(getMod().marker_map)*4 + constants.N_FIXED_SENSORS)
    
    def step(self, agent, action):
        """
        A step for an agent
        """
        state = self.get_state(agent) # the agent's status
        if (state.is_out == True):
            getMod().unmark(agent.state.position.x, agent.state.position.y)
        else:
            assert(self.get_agent_info(agent).actions.validate(action)) # check if the action is valid
            if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"):
                angle = action[0] # in range of -pi to pi
                degree_angle = math.degrees(angle)
                delta_angle = degree_angle - agent.state.rotation.z
                delta_dist = constants.MAX_SPEED
            else:
                angle = action[0] # in range of -pi to pi
                degree_angle = math.degrees(angle)
                delta_angle = degree_angle - agent.state.rotation.z
                delta_dist = constants.MAX_SPEED
            reward = self.update_position(agent, delta_dist, delta_angle)
        state.reward += reward
        return reward

    # delta_angle (degrees) is change in angle
    # delta_dist is change in distance (or velocity, since unit of time unchanged)
    def update_position(self, agent, delta_dist, delta_angle):
        """
        Updates position of the agent and collects pellets.
        """
        state = self.get_state(agent)
        state.step_count += 1

        position = agent.state.position
        rotation = agent.state.rotation

        # posteriori collision detection
        rotation.z = common.wrap_degrees(rotation.z, delta_angle)
        position.x += delta_dist*math.cos(math.radians(rotation.z))
        position.y += delta_dist*math.sin(math.radians(rotation.z))

        # check if one of 4 out-of-bound conditions applies
        # if yes, back-track to correct position
        if (position.x) < 0 or (position.y) < 0 or \
           (position.x) > self.XDIM or (position.y) > self.YDIM:
            # correct position
            if (position.x) < 0:
                position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
            if (position.y) < 0:
                position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
            if (position.x) > self.XDIM:
                position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
            if (position.y) > self.YDIM:
                position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
        elif position.x < self.XDIM * 0.174 and position.y > self.YDIM * (1.0 - 0.309):
            position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
            position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
        elif furniture_collide_all(position.x, position.y):
            position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z))
            position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z))
                
        # register new position
        state.position = position
        state.rotation = rotation
        agent.state.position = position
        agent.state.rotation = rotation
        
        reward = 0
        
        # remove all crumbs within ROOMBA_RAD of agent position
        pos = (position.x, position.y)
        for crumb in self.crumbs:
            if (crumb.x, crumb.y) in getMod().marker_map:
                distance = math.hypot(crumb[0] - pos[0], crumb[1] - pos[1])
                if distance < constants.ROOMBA_RAD:
                    getMod().unmark(crumb.x, crumb.y)
                    reward += crumb.reward
                
        # check if agent has expended its step allowance
        if (self.max_steps != 0) and (state.step_count >= self.max_steps):
            state.is_out = True    # if yes, mark it to be removed
        return reward            
    
    def sense(self, agent, sensors):
        """ figure out what the agent should sense """
        state = self.get_state(agent)
        if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"):
            if state.bumped:
                sensors[0] = 1
                state.bumped = False
            else:
                sensors[0] = 0

            # get agent's position
            pos = agent.state.position
            sensors[1] = pos.x
            sensors[2] = pos.y
            
            # describe every other crumb on the field!
            self.sense_crumbs(sensors, constants.N_S_IN_BLOCK, constants.N_FIXED_SENSORS, agent)

        else:
            """ Copied over from creativeit branch """
            sensors[0] = constants.MAX_DISTANCE
            sensors[1] = constants.MAX_DISTANCE
            sensors[2] = constants.MAX_DISTANCE
            sensors[3] = constants.MAX_DISTANCE
            sensors[4] = -1
            sensors[5] = constants.MAX_DISTANCE
            
            # The first four sensors detect the distance to the nearest cube in each of the
            # four quadrants defined by the coordinate frame attached to the agent.  The
            # positive X axis of the coordinate frame is oriented in the forward direction
            # with respect to the agent.  The fifth sensor detects the minimum angular
            # distance between the agent and the nearest cubes detected by the other sensors.
            # All sensor readings are normalized to lie in [-1, 1].
            
            for cube_position in getMod().marker_map:
                
                distx = cube_position[0] - agent.state.position.x
                disty = cube_position[1] - agent.state.position.y
                dist = math.hypot(distx, disty)
                angle = math.degrees(math.atan2(disty, distx)) - agent.state.rotation.z  # range [-360, 360]
                if angle > 180: angle = angle - 360
                if angle < -180: angle = angle + 360
                angle = angle/180 # range [-1, 1]
                if angle >= -1 and angle < -0.5:
                    if dist < sensors[0]:
                        sensors[0] = dist
                        if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle
                elif angle >= -0.5 and angle < 0:
                    if dist < sensors[1]:
                        sensors[1] = dist
                        if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle
                elif angle >= 0 and angle < 0.5:
                    if dist < sensors[2]:
                        sensors[2] = dist
                        if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle
                else:
                    if dist < sensors[3]:
                        sensors[3] = dist
                        if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle
                                
            # Any distance sensor that still has the value MAX_DISTANCE is set to -1.
            for i in range(0, 6):
                if i != 4 and sensors[i] >= constants.MAX_DISTANCE:
                    sensors[i] = -1

            # Invert and normalize the remaining distance sensor values to [0, 1]
            maxval = max(sensors[0], sensors[1], sensors[2], sensors[3], sensors[5])
            if maxval > 0:
                for i in range(0, 6):
                    if i != 4 and sensors[i] > 0:
                        sensors[i] = 1 - (sensors[i]/maxval)

            # Now, sensors that do not detect any cubes/wall will have the value -1,
            # sensors that detect cubes/wall at maxval distance will have the value 0,
            # and sensors that detect cubes/wall at zero distance will have value 1.
        return sensors

    def sense_crumbs(self, sensors, num_sensors, start_sensor, agent):
        """
        Generate a (big) array of observations, num_sensors for each crumb
        and store them inside sensors starting at start_sensor.
        Each crumb is stored as: (x,y,exists?,reward)
        """
        i = start_sensor
        closest = None
        closest_distance = None
        pos = agent.state.position
        pos = (pos.x, pos.y)
        for pellet in self.crumbs:
            sensors[i] = pellet.x
            sensors[i+1] = pellet.y
            if (pellet.x, pellet.y) in getMod().marker_map:
                sensors[i+2] = 1
                distance = math.hypot(pellet.x - pos[0], pellet.y - pos[1])
                if closest is None or distance < closest_distance:
                    closest = pellet
                    closest_distance = distance
            else:
                sensors[i+2] = 0 
            sensors[i+3] = pellet.reward
            i = i + num_sensors
        if closest is not None:
            # freebie for scripted agents: tell agent the closest crumb!
            sensors[3] = closest.x
            sensors[4] = closest.y
        return True
                     
    def is_episode_over(self, agent):
        """ is the current episode over for the agent? """
        state = self.get_state(agent)
        if self.max_steps != 0 and state.step_count >= self.max_steps:
            return True
        return False
    
    def cleanup(self):
        """
        cleanup the world
        """
        self.environment = None
        return True