class RoombaEnvironment(OpenNero.Environment): """ Sample Environment for the Sandbox """ def __init__(self, xdim, ydim, world_file = WORLD_FILE): """ Create the environment """ #self.Qlog = open("Qlog", "wb+") OpenNero.Environment.__init__(self) OpenNero.getSimContext().delay = 0.0; self.XDIM = xdim self.YDIM = ydim self.max_steps = 500 self.states = {} # dictionary of agent states if WORLD_FILE is None: self.crumbs = world_handler.pattern_cluster(500, "Roomba/world_config.txt") else: # read external file data = world_handler.read_world(WORLD_FILE) self.XDIM = data['dims'][0] self.YDIM = data['dims'][1] self.crumbs = data['pellets'] print len(self.crumbs) # only keep crumbs that are inside the walls self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)] self.init_list = AgentInit() self.init_list.add_type("<class 'Roomba.roomba.RoombaBrain'>") self.init_list.add_type("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") self.init_list.add_type("<class 'Roomba.RLAgent.TabularRLAgent'>") #print self.init_list.types roomba_abound = self.init_list.get_action("<class 'Roomba.roomba.RoombaBrain'>") roomba_sbound = self.init_list.get_sensor("<class 'Roomba.roomba.RoombaBrain'>") roomba_rbound = self.init_list.get_reward("<class 'Roomba.roomba.RoombaBrain'>") rtneat_abound = self.init_list.get_action("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") rtneat_sbound = self.init_list.get_sensor("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") rtneat_rbound = self.init_list.get_reward("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") ### Bounds for Roomba ### # actions roomba_abound.add_continuous(-math.pi, math.pi) # amount to turn by # sensors roomba_sbound.add_discrete(0,1) # wall bump roomba_sbound.add_continuous(0,xdim) # self.x roomba_sbound.add_continuous(0,ydim) # self.y roomba_sbound.add_continuous(0,xdim) # closest.x roomba_sbound.add_continuous(0,ydim) # closest.y # rewards roomba_rbound.add_continuous(-100,100) # range for reward ### End Bounds for Roomba #### ### Bounds for RTNEAT ### # actions rtneat_abound.add_continuous(-math.pi, math.pi) # amount to turn by #rtneat_abound.add_continuous(0, 1) # signal # sensors rtneat_sbound.add_continuous(-math.pi, math.pi) # nearest crumb angle rtneat_sbound.add_continuous(0, 1) # proportion of crumb nearby # angle and distance sensors for (up to) 8 nearest neighbors rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) rtneat_sbound.add_continuous(-math.pi, math.pi) rtneat_sbound.add_continuous(0, 1) # rewards rtneat_rbound.add_continuous(-math.pi, math.pi) ### End Bounds for RTNEAT ### #------------------------------------------------------------------------ rltabular_abound = self.init_list.get_action("<class 'Roomba.RLAgent.TabularRLAgent'>") rltabular_sbound = self.init_list.get_sensor("<class 'Roomba.RLAgent.TabularRLAgent'>") rltabular_rbound = self.init_list.get_reward("<class 'Roomba.RLAgent.TabularRLAgent'>") ### Bounds for RLTabular ### # actions rltabular_abound.add_continuous(-math.pi, math.pi) # amount to turn by # sensors rltabular_sbound.add_continuous(-1, 1) rltabular_sbound.add_continuous(-1, 1) rltabular_sbound.add_continuous(-1, 1) rltabular_sbound.add_continuous(-1, 1) rltabular_sbound.add_continuous(-1, 1) rltabular_sbound.add_continuous(-1, 1) # rewards rltabular_rbound.add_continuous(-1, 1) ### End Bounds for RLTabular ### #------------------------------------------------------------------------ # set up shop # Add Wayne's Roomba room with experimentally-derived vertical offset to match crumbs. common.addObject("data/terrain/RoombaRoom.xml", OpenNero.Vector3f(xdim/2,ydim/2, -1), OpenNero.Vector3f(0,0,0), OpenNero.Vector3f(xdim/245.0, ydim/245.0, constants.HEIGHT/24.5), type = constants.OBJECT_TYPE_WALLS) # OpenNero.getSimContext().addAxes() self.add_crumbs() for crumb in self.crumbs: self.add_crumb_sensors(roomba_sbound) def get_state(self, agent): if agent in self.states: return self.states[agent] else: print "new state created" pos = agent.state.position rot = agent.state.rotation self.states[agent] = AgentState(pos, rot) return self.states[agent] def randomize(self): self.crumbs = world_handler.read_pellets() # only keep crumbs that are inside the walls self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)] def add_crumb_sensors(self, roomba_sbound): """Add the crumb sensors, in order: x position of crumb (0 to XDIM, continuous), y position of crumb (0 to XDIM, continuous), whether crumb is present at said position or has been vacced (0 or 1), and reward for vaccing said crumb.""" roomba_sbound.add_continuous(0, self.XDIM) # crumb position X roomba_sbound.add_continuous(0, self.YDIM) # crumb position Y roomba_sbound.add_discrete(0, 1) # crumb present/ not present roomba_sbound.add_discrete(1, 5) # reward for crumb def add_crumbs(self): for pellet in self.crumbs: if not (pellet.x, pellet.y) in getMod().marker_map: getMod().mark_blue(pellet.x, pellet.y) def reset(self, agent): """ reset the environment to its initial state """ state = self.get_state(agent) state.reset() agent.state.position = state.initial_position agent.state.rotation = state.initial_rotation agent.state.velocity = state.initial_velocity state.episode_count += 1 self.add_crumbs() global gREWARD if gREWARD > 0: print "Episode %d complete: %d crumbs collected " % (state.episode_count,\ gREWARD) global gREWARD gREWARD = 0 ## Crumbs collected logging global crumb_count global crumb_time if crumb_count > 0: print "Episode {} crumb count: {}".format(state.episode_count, crumb_count) crumb_count = 0 crumb_time = time.clock() return True def get_agent_info(self, agent): """ return a blueprint for a new agent """ return self.init_list.get_info(str(type(agent))) def num_sensors(self): """ Return total number of sensors """ return (len(getMod().marker_map)*4 + constants.N_FIXED_SENSORS) def step(self, agent, action): """ A step for an agent """ state = self.get_state(agent) # the agent's status if (state.is_out == True): getMod().unmark(agent.state.position.x, agent.state.position.y) else: assert(self.get_agent_info(agent).actions.validate(action)) # check if the action is valid if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"): angle = action[0] # in range of -pi to pi degree_angle = math.degrees(angle) delta_angle = degree_angle - agent.state.rotation.z delta_dist = constants.MAX_SPEED else: angle = action[0] # in range of -pi to pi degree_angle = math.degrees(angle) delta_angle = degree_angle - agent.state.rotation.z delta_dist = constants.MAX_SPEED reward = self.update_position(agent, delta_dist, delta_angle) state.reward += reward global gREWARD gREWARD += reward # Enable to use shared reward #global crumb_count #state.reward = crumb_count return reward # delta_angle (degrees) is change in angle # delta_dist is change in distance (or velocity, since unit of time unchanged) def update_position(self, agent, delta_dist, delta_angle): """ Updates position of the agent and collects pellets. """ state = self.get_state(agent) state.step_count += 1 position = agent.state.position rotation = agent.state.rotation # posteriori collision detection rotation.z = common.wrap_degrees(rotation.z, delta_angle) position.x += delta_dist*math.cos(math.radians(rotation.z)) position.y += delta_dist*math.sin(math.radians(rotation.z)) reward = 0 # check if one of 4 out-of-bound conditions applies # if yes, back-track to correct position if (position.x) < 0 or (position.y) < 0 or \ (position.x) > self.XDIM or (position.y) > self.YDIM: # correct position if (position.x) < 0: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) reward -= .05 if (position.y) < 0: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) reward -= .05 if (position.x) > self.XDIM: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) reward -= .05 if (position.y) > self.YDIM: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) reward -= .05 elif position.x < self.XDIM * 0.174 and position.y > self.YDIM * (1.0 - 0.309): position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) reward -= .05 # Temp. disable collisions #elif furniture_collide_all(position.x, position.y): # position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) # position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) # register new position state.position = position #state.rotation = rotation agent.state.position = position agent.state.rotation = rotation # remove all crumbs within ROOMBA_RAD of agent position pos = (position.x, position.y) for crumb in self.crumbs: if (crumb.x, crumb.y) in getMod().marker_map: distance = math.hypot(crumb[0] - pos[0], crumb[1] - pos[1]) if distance < constants.ROOMBA_RAD: getMod().unmark(crumb.x, crumb.y) reward += crumb.reward global crumb_count crumb_count += 1 # check if agent has expended its step allowance if (self.max_steps != 0) and (state.step_count >= self.max_steps): state.is_out = True # if yes, mark it to be removed return reward def sense(self, agent, sensors): """ figure out what the agent should sense """ state = self.get_state(agent) if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"): if state.bumped: sensors[0] = 1 state.bumped = False else: sensors[0] = 0 # get agent's position pos = agent.state.position sensors[1] = pos.x sensors[2] = pos.y # describe every other crumb on the field! self.sense_crumbs(sensors, constants.N_S_IN_BLOCK, constants.N_FIXED_SENSORS, agent) else: max_dist = math.hypot(self.XDIM, self.YDIM) # Min angle and distance to nearest crumb sensors[0] = constants.MAX_DISTANCE sensors[1] = constants.MAX_DISTANCE min_dist = max_dist min_dist_angle = 0 nearby_crumbs = 0.0 for cube_position in getMod().marker_map: distx = cube_position[0] - agent.state.position.x disty = cube_position[1] - agent.state.position.y dist = math.hypot(distx, disty) angle = math.atan2(disty, distx) angle = angle # range [-pi, pi] if dist < min_dist: min_dist = dist min_dist_angle = angle if dist < self.XDIM / 8: nearby_crumbs += 1 sensors[0] = min_dist_angle sensors[1] = nearby_crumbs / len(getMod().marker_map) # propotion of crumbs which are nearby # Min angle and distance to nearest other agent num_agent_sensors = 8 min_dist = [] for i in range(num_agent_sensors): min_dist.append((math.hypot(self.XDIM, self.YDIM), 0, 0)) for other_agent in self.states: distx = other_agent.state.position.x - agent.state.position.x disty = other_agent.state.position.y - agent.state.position.y dist = math.hypot(distx, disty) angle = math.atan2(disty, distx) angle = angle # range [-pi, pi] if dist > 0: signal = 0 #if other_agent.past_actions != None: # signal = other_agent.past_actions[1] min_dist.append((dist, angle, signal)) min_dist.sort(key=lambda (d, a, s): d) for i in range(num_agent_sensors): (dist, ang, signal) = min_dist[i] sensors[2 * i + 2] = ang sensors[2 * i + 3] = dist / max_dist # signal isnot used return sensors def sense_crumbs(self, sensors, num_sensors, start_sensor, agent): """ Generate a (big) array of observations, num_sensors for each crumb and store them inside sensors starting at start_sensor. Each crumb is stored as: (x,y,exists?,reward) """ i = start_sensor closest = None closest_distance = None pos = agent.state.position pos = (pos.x, pos.y) for pellet in self.crumbs: sensors[i] = pellet.x sensors[i+1] = pellet.y if (pellet.x, pellet.y) in getMod().marker_map: sensors[i+2] = 1 distance = math.hypot(pellet.x - pos[0], pellet.y - pos[1]) if closest is None or distance < closest_distance: closest = pellet closest_distance = distance else: sensors[i+2] = 0 sensors[i+3] = pellet.reward i = i + num_sensors if closest is not None: # freebie for scripted agents: tell agent the closest crumb! sensors[3] = closest.x sensors[4] = closest.y return True def is_episode_over(self, agent): """ is the current episode over for the agent? """ state = self.get_state(agent) if self.max_steps != 0 and state.step_count >= self.max_steps: return True return False def cleanup(self): """ cleanup the world """ self.environment = None return True
class RoombaEnvironment(OpenNero.Environment): """ Sample Environment for the Sandbox """ def __init__(self, xdim, ydim): """ Create the environment """ OpenNero.Environment.__init__(self) self.XDIM = xdim self.YDIM = ydim self.max_steps = 500 self.states = {} # dictionary of agent states self.crumbs = world_handler.pattern_cluster(500, "Roomba/world_config.txt") # only keep crumbs that are inside the walls self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)] self.init_list = AgentInit() self.init_list.add_type("<class 'Roomba.roomba.RoombaBrain'>") self.init_list.add_type("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") #print self.init_list.types roomba_abound = self.init_list.get_action("<class 'Roomba.roomba.RoombaBrain'>") roomba_sbound = self.init_list.get_sensor("<class 'Roomba.roomba.RoombaBrain'>") roomba_rbound = self.init_list.get_reward("<class 'Roomba.roomba.RoombaBrain'>") rtneat_abound = self.init_list.get_action("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") rtneat_sbound = self.init_list.get_sensor("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") rtneat_rbound = self.init_list.get_reward("<class 'Roomba.RTNEATAgent.RTNEATAgent'>") ### Bounds for Roomba ### # actions roomba_abound.add_continuous(-math.pi, math.pi) # amount to turn by # sensors roomba_sbound.add_discrete(0,1) # wall bump roomba_sbound.add_continuous(0,xdim) # self.x roomba_sbound.add_continuous(0,ydim) # self.y roomba_sbound.add_continuous(0,xdim) # closest.x roomba_sbound.add_continuous(0,ydim) # closest.y # rewards roomba_rbound.add_continuous(-100,100) # range for reward ### End Bounds for Roomba #### ### Bounds for RTNEAT ### # actions rtneat_abound.add_continuous(-math.pi, math.pi) # amount to turn by # sensors rtneat_sbound.add_continuous(-1, 1) rtneat_sbound.add_continuous(-1, 1) rtneat_sbound.add_continuous(-1, 1) rtneat_sbound.add_continuous(-1, 1) rtneat_sbound.add_continuous(-1, 1) rtneat_sbound.add_continuous(-1, 1) # rewards rtneat_rbound.add_continuous(-1, 1) ### End Bounds for RTNEAT ### # set up shop # Add Wayne's Roomba room with experimentally-derived vertical offset to match crumbs. common.addObject("data/terrain/RoombaRoom.xml", OpenNero.Vector3f(xdim/2,ydim/2, -1), OpenNero.Vector3f(0,0,0), OpenNero.Vector3f(xdim/245.0, ydim/245.0, constants.HEIGHT/24.5), type = constants.OBJECT_TYPE_WALLS) # OpenNero.getSimContext().addAxes() self.add_crumbs() for crumb in self.crumbs: self.add_crumb_sensors(roomba_sbound) def get_state(self, agent): if agent in self.states: return self.states[agent] else: print "new state created" pos = agent.state.position rot = agent.state.rotation self.states[agent] = AgentState(pos, rot) return self.states[agent] def randomize(self): self.crumbs = world_handler.read_pellets() # only keep crumbs that are inside the walls self.crumbs = [c for c in self.crumbs if in_bounds(c.x,c.y)] def add_crumb_sensors(self, roomba_sbound): """Add the crumb sensors, in order: x position of crumb (0 to XDIM, continuous), y position of crumb (0 to XDIM, continuous), whether crumb is present at said position or has been vacced (0 or 1), and reward for vaccing said crumb.""" roomba_sbound.add_continuous(0, self.XDIM) # crumb position X roomba_sbound.add_continuous(0, self.YDIM) # crumb position Y roomba_sbound.add_discrete(0, 1) # crumb present/ not present roomba_sbound.add_discrete(1, 5) # reward for crumb def add_crumbs(self): for pellet in self.crumbs: if not (pellet.x, pellet.y) in getMod().marker_map: getMod().mark_blue(pellet.x, pellet.y) def reset(self, agent): """ reset the environment to its initial state """ state = self.get_state(agent) state.reset() agent.state.position = state.initial_position agent.state.rotation = state.initial_rotation agent.state.velocity = state.initial_velocity state.episode_count += 1 self.add_crumbs() print "Episode %d complete" % state.episode_count return True def get_agent_info(self, agent): """ return a blueprint for a new agent """ return self.init_list.get_info(str(type(agent))) def num_sensors(self): """ Return total number of sensors """ return (len(getMod().marker_map)*4 + constants.N_FIXED_SENSORS) def step(self, agent, action): """ A step for an agent """ state = self.get_state(agent) # the agent's status if (state.is_out == True): getMod().unmark(agent.state.position.x, agent.state.position.y) else: assert(self.get_agent_info(agent).actions.validate(action)) # check if the action is valid if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"): angle = action[0] # in range of -pi to pi degree_angle = math.degrees(angle) delta_angle = degree_angle - agent.state.rotation.z delta_dist = constants.MAX_SPEED else: angle = action[0] # in range of -pi to pi degree_angle = math.degrees(angle) delta_angle = degree_angle - agent.state.rotation.z delta_dist = constants.MAX_SPEED reward = self.update_position(agent, delta_dist, delta_angle) state.reward += reward return reward # delta_angle (degrees) is change in angle # delta_dist is change in distance (or velocity, since unit of time unchanged) def update_position(self, agent, delta_dist, delta_angle): """ Updates position of the agent and collects pellets. """ state = self.get_state(agent) state.step_count += 1 position = agent.state.position rotation = agent.state.rotation # posteriori collision detection rotation.z = common.wrap_degrees(rotation.z, delta_angle) position.x += delta_dist*math.cos(math.radians(rotation.z)) position.y += delta_dist*math.sin(math.radians(rotation.z)) # check if one of 4 out-of-bound conditions applies # if yes, back-track to correct position if (position.x) < 0 or (position.y) < 0 or \ (position.x) > self.XDIM or (position.y) > self.YDIM: # correct position if (position.x) < 0: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) if (position.y) < 0: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) if (position.x) > self.XDIM: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) if (position.y) > self.YDIM: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) elif position.x < self.XDIM * 0.174 and position.y > self.YDIM * (1.0 - 0.309): position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) elif furniture_collide_all(position.x, position.y): position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) # register new position state.position = position state.rotation = rotation agent.state.position = position agent.state.rotation = rotation reward = 0 # remove all crumbs within ROOMBA_RAD of agent position pos = (position.x, position.y) for crumb in self.crumbs: if (crumb.x, crumb.y) in getMod().marker_map: distance = math.hypot(crumb[0] - pos[0], crumb[1] - pos[1]) if distance < constants.ROOMBA_RAD: getMod().unmark(crumb.x, crumb.y) reward += crumb.reward # check if agent has expended its step allowance if (self.max_steps != 0) and (state.step_count >= self.max_steps): state.is_out = True # if yes, mark it to be removed return reward def sense(self, agent, sensors): """ figure out what the agent should sense """ state = self.get_state(agent) if (str(type(agent)) == "<class 'Roomba.roomba.RoombaBrain'>"): if state.bumped: sensors[0] = 1 state.bumped = False else: sensors[0] = 0 # get agent's position pos = agent.state.position sensors[1] = pos.x sensors[2] = pos.y # describe every other crumb on the field! self.sense_crumbs(sensors, constants.N_S_IN_BLOCK, constants.N_FIXED_SENSORS, agent) else: """ Copied over from creativeit branch """ sensors[0] = constants.MAX_DISTANCE sensors[1] = constants.MAX_DISTANCE sensors[2] = constants.MAX_DISTANCE sensors[3] = constants.MAX_DISTANCE sensors[4] = -1 sensors[5] = constants.MAX_DISTANCE # The first four sensors detect the distance to the nearest cube in each of the # four quadrants defined by the coordinate frame attached to the agent. The # positive X axis of the coordinate frame is oriented in the forward direction # with respect to the agent. The fifth sensor detects the minimum angular # distance between the agent and the nearest cubes detected by the other sensors. # All sensor readings are normalized to lie in [-1, 1]. for cube_position in getMod().marker_map: distx = cube_position[0] - agent.state.position.x disty = cube_position[1] - agent.state.position.y dist = math.hypot(distx, disty) angle = math.degrees(math.atan2(disty, distx)) - agent.state.rotation.z # range [-360, 360] if angle > 180: angle = angle - 360 if angle < -180: angle = angle + 360 angle = angle/180 # range [-1, 1] if angle >= -1 and angle < -0.5: if dist < sensors[0]: sensors[0] = dist if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle elif angle >= -0.5 and angle < 0: if dist < sensors[1]: sensors[1] = dist if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle elif angle >= 0 and angle < 0.5: if dist < sensors[2]: sensors[2] = dist if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle else: if dist < sensors[3]: sensors[3] = dist if math.fabs(angle) < math.fabs(sensors[4]): sensors[4] = angle # Any distance sensor that still has the value MAX_DISTANCE is set to -1. for i in range(0, 6): if i != 4 and sensors[i] >= constants.MAX_DISTANCE: sensors[i] = -1 # Invert and normalize the remaining distance sensor values to [0, 1] maxval = max(sensors[0], sensors[1], sensors[2], sensors[3], sensors[5]) if maxval > 0: for i in range(0, 6): if i != 4 and sensors[i] > 0: sensors[i] = 1 - (sensors[i]/maxval) # Now, sensors that do not detect any cubes/wall will have the value -1, # sensors that detect cubes/wall at maxval distance will have the value 0, # and sensors that detect cubes/wall at zero distance will have value 1. return sensors def sense_crumbs(self, sensors, num_sensors, start_sensor, agent): """ Generate a (big) array of observations, num_sensors for each crumb and store them inside sensors starting at start_sensor. Each crumb is stored as: (x,y,exists?,reward) """ i = start_sensor closest = None closest_distance = None pos = agent.state.position pos = (pos.x, pos.y) for pellet in self.crumbs: sensors[i] = pellet.x sensors[i+1] = pellet.y if (pellet.x, pellet.y) in getMod().marker_map: sensors[i+2] = 1 distance = math.hypot(pellet.x - pos[0], pellet.y - pos[1]) if closest is None or distance < closest_distance: closest = pellet closest_distance = distance else: sensors[i+2] = 0 sensors[i+3] = pellet.reward i = i + num_sensors if closest is not None: # freebie for scripted agents: tell agent the closest crumb! sensors[3] = closest.x sensors[4] = closest.y return True def is_episode_over(self, agent): """ is the current episode over for the agent? """ state = self.get_state(agent) if self.max_steps != 0 and state.step_count >= self.max_steps: return True return False def cleanup(self): """ cleanup the world """ self.environment = None return True