def update_position(self, agent, delta_dist, delta_angle): """ Updates position of the agent and collects pellets. """ state = self.get_state(agent) state.step_count += 1 position = agent.state.position rotation = agent.state.rotation # posteriori collision detection rotation.z = common.wrap_degrees(rotation.z, delta_angle) position.x += delta_dist*math.cos(math.radians(rotation.z)) position.y += delta_dist*math.sin(math.radians(rotation.z)) # check if one of 4 out-of-bound conditions applies # if yes, back-track to correct position if (position.x) < 0 or (position.y) < 0 or \ (position.x) > self.XDIM or (position.y) > self.YDIM: # correct position if (position.x) < 0: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) if (position.y) < 0: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) if (position.x) > self.XDIM: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) if (position.y) > self.YDIM: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) elif position.x < self.XDIM * 0.174 and position.y > self.YDIM * (1.0 - 0.309): position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) elif furniture_collide_all(position.x, position.y): position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) # register new position state.position = position state.rotation = rotation agent.state.position = position agent.state.rotation = rotation reward = 0 # remove all crumbs within ROOMBA_RAD of agent position pos = (position.x, position.y) for crumb in self.crumbs: if (crumb.x, crumb.y) in getMod().marker_map: distance = math.hypot(crumb[0] - pos[0], crumb[1] - pos[1]) if distance < constants.ROOMBA_RAD: getMod().unmark(crumb.x, crumb.y) reward += crumb.reward # check if agent has expended its step allowance if (self.max_steps != 0) and (state.step_count >= self.max_steps): state.is_out = True # if yes, mark it to be removed return reward
def update_pose(self, move_by, turn_by): dist = constants.MAX_MOVEMENT_SPEED * move_by heading = common.wrap_degrees(self.agent.state.rotation.z, turn_by) x = self.agent.state.position.x + dist * math.cos(math.radians(heading)) y = self.agent.state.position.y + dist * math.sin(math.radians(heading)) self.prev_pose = self.pose self.pose = (x, y, heading) # try to update position pos = copy.copy(self.agent.state.position) pos.x = x pos.y = y self.agent.state.position = pos # try to update rotation rot = copy.copy(self.agent.state.rotation) rot.z = heading self.agent.state.rotation = rot
def step(self, agent, action): """ 2A step for an agent """ state = self.get_state(agent) # Initilize Agent state if agent.step == 0 and agent.group != "Turret": p = agent.state.position r = agent.state.rotation if agent.group == "Agent": r.z = random.randrange(360) agent.state.rotation = r state.reset_pose(p, r) return agent.rewards.get_instance() # display agent info if neccessary if hasattr(agent, "set_display_hint"): agent.set_display_hint() # get the desired action of the agent move_by = action[constants.ACTION_INDEX_SPEED] turn_by = math.degrees(action[constants.ACTION_INDEX_TURN]) firing = action[constants.ACTION_INDEX_FIRE] firing_status = firing >= 0.5 scored_hit = False # firing decision closest_enemy = self.closest_enemy(agent) if firing_status: if closest_enemy is not None: pose = state.pose closest_enemy_pose = self.get_state(closest_enemy).pose relative_angle = self.angle(pose, closest_enemy_pose) if abs(relative_angle) <= 2: source_pos = agent.state.position closest_enemy_pos = closest_enemy.state.position source_pos.z = source_pos.z + 5 closest_enemy_pos.z = closest_enemy_pos.z + 5 dist = closest_enemy_pos.getDistanceFrom(source_pos) d = (constants.MAX_SHOT_RADIUS - dist) / constants.MAX_SHOT_RADIUS if random.random() < d / 2: # attempt a shot depending on distance team_color = constants.TEAM_LABELS[agent.team_type] if team_color == "red": color = OpenNero.Color(255, 255, 0, 0) elif team_color == "blue": color = OpenNero.Color(255, 0, 0, 255) else: color = OpenNero.Color(255, 255, 255, 0) wall_color = OpenNero.Color(128, 0, 255, 0) obstacles = OpenNero.getSimContext().findInRay( source_pos, closest_enemy_pos, constants.OBJECT_TYPE_OBSTACLE, True, wall_color, color ) # if len(obstacles) == 0 and random.random() < d/2: if len(obstacles) == 0: # count as hit depending on distance self.get_state(closest_enemy).curr_damage += 1 scored_hit = True else: # turn toward the enemy turn_by = relative_angle # set animation speed # TODO: move constants into constants.py self.set_animation(agent, state, "run") delay = OpenNero.getSimContext().delay agent.state.animation_speed = move_by * constants.ANIMATION_RATE reward = self.calculate_reward(agent, action, scored_hit) team = self.get_team(agent) # tell the system to make the calculated motion # if the motion doesn't result in a collision dist = constants.MAX_MOVEMENT_SPEED * move_by heading = common.wrap_degrees(agent.state.rotation.z, turn_by) x = agent.state.position.x + dist * math.cos(math.radians(heading)) y = agent.state.position.y + dist * math.sin(math.radians(heading)) # manual collision detection desired_pose = (x, y, heading) collision_detected = False friends, foes = self.get_friend_foe(agent) for f in friends: if f != agent: f_state = self.get_state(f) # we impose an order on agents to avoid deadlocks. Without this # two agents which spawn very close to each other can never escape # each other's collision radius if state.id > f_state.id: f_pose = f_state.pose dist = self.distance(desired_pose, f_pose) if dist < constants.MANUAL_COLLISION_DISTANCE: collision_detected = True continue # just check for collisions with the closest enemy if closest_enemy: if not collision_detected: f_pose = self.get_state(closest_enemy).pose dist = self.distance(desired_pose, f_pose) if dist < constants.MANUAL_COLLISION_DISTANCE: collision_detected = True if not collision_detected: state.update_pose(move_by, turn_by) return reward
def step(self, agent, action): """ 2A step for an agent """ # if this agent has a serialized representation waiting, load it. chunk = self.agents_to_load.get(agent.state.id) if chunk is not None: print 'loading agent', agent.state.id, 'from', len(chunk), 'bytes' del self.agents_to_load[agent.state.id] try: agent.from_string(chunk) except: # if loading fails, remove this agent. print 'error loading agent', agent.state.id self.remove_agent(agent) # if a user has a badly formatted q-learning agent in a mixed # population file, the agent won't load and will be properly # removed here. however, RTNEAT has only allocated enough brainz # to cover (pop_size - num_qlearning_agents) agents, so whenever # it comes time to spawn new agents, RTNEAT will think that it # needs to spawn an extra agent to cover for this "missing" one. # to prevent this exception, we decrement pop_size here. # # this probably prevents teams from having the proper number of # agents if the user clicks on the deploy button after loading a # broken pop file ... but that's tricky to fix. constants.pop_size -= 1 return agent.info.reward.get_instance() # set the epsilon for this agent, in case it's changed recently. agent.epsilon = self.epsilon state = self.get_state(agent) #Initilize Agent state if agent.step == 0 and agent.group != "Turret": p = agent.state.position r = agent.state.rotation if agent.group == "Agent": r.z = random.randrange(360) agent.state.rotation = r state.reset_pose(p, r) return agent.info.reward.get_instance() # display agent info if neccessary if hasattr(agent, 'set_display_hint'): agent.set_display_hint() # spawn more agents if possible. self.maybe_spawn(agent) # get the desired action of the agent move_by = action[constants.ACTION_INDEX_SPEED] turn_by = math.degrees(action[constants.ACTION_INDEX_TURN]) firing = action[constants.ACTION_INDEX_FIRE] firing_status = (firing >= 0.5) scored_hit = False # firing decision closest_enemy = self.closest_enemy(agent) if firing_status: if closest_enemy is not None: pose = state.pose closest_enemy_pose = self.get_state(closest_enemy).pose relative_angle = self.angle(pose, closest_enemy_pose) if abs(relative_angle) <= 2: source_pos = agent.state.position closest_enemy_pos = closest_enemy.state.position source_pos.z = source_pos.z + 5 closest_enemy_pos.z = closest_enemy_pos.z + 5 dist = closest_enemy_pos.getDistanceFrom(source_pos) d = (constants.MAX_SHOT_RADIUS - dist)/constants.MAX_SHOT_RADIUS if random.random() < d/2: # attempt a shot depending on distance team_color = constants.TEAM_LABELS[agent.get_team()] if team_color == 'red': color = OpenNero.Color(255, 255, 0, 0) elif team_color == 'blue': color = OpenNero.Color(255, 0, 0, 255) else: color = OpenNero.Color(255, 255, 255, 0) wall_color = OpenNero.Color(128, 0, 255, 0) obstacles = OpenNero.getSimContext().findInRay( source_pos, closest_enemy_pos, constants.OBJECT_TYPE_OBSTACLE, True, wall_color, color) #if len(obstacles) == 0 and random.random() < d/2: if len(obstacles) == 0: # count as hit depending on distance self.get_state(closest_enemy).curr_damage += 1 scored_hit = True else: # turn toward the enemy turn_by = relative_angle # set animation speed # TODO: move constants into constants.py self.set_animation(agent, state, 'run') delay = OpenNero.getSimContext().delay agent.state.animation_speed = move_by * constants.ANIMATION_RATE reward = self.calculate_reward(agent, action, scored_hit) # tell the system to make the calculated motion # if the motion doesn't result in a collision dist = constants.MAX_MOVEMENT_SPEED * move_by heading = common.wrap_degrees(agent.state.rotation.z, turn_by) x = agent.state.position.x + dist * math.cos(math.radians(heading)) y = agent.state.position.y + dist * math.sin(math.radians(heading)) # manual collision detection desired_pose = (x, y, heading) collision_detected = False friends, foes = self.getFriendFoe(agent) for f in friends: if f != agent: f_state = self.get_state(f) # we impose an order on agents to avoid deadlocks. Without this # two agents which spawn very close to each other can never escape # each other's collision radius if state.id > f_state.id: f_pose = f_state.pose dist = self.distance(desired_pose, f_pose) if dist < constants.MANUAL_COLLISION_DISTANCE: collision_detected = True continue # no need to check for collisions with all enemies #if foes: # if not collision_detected: # for f in foes: # f_pose = self.get_state(f).pose # dist = self.distance(desired_pose, f_pose) # if dist < constants.MANUAL_COLLISION_DISTANCE: # collision_detected = True # continue # just check for collisions with the closest enemy if closest_enemy: if not collision_detected: f_pose = self.get_state(closest_enemy).pose dist = self.distance(desired_pose, f_pose) if dist < constants.MANUAL_COLLISION_DISTANCE: collision_detected = True if not collision_detected: state.update_pose(move_by, turn_by) return reward
def step(self, agent, action): """ 2A step for an agent """ state = self.get_state(agent) #Initilize Agent state if agent.step == 0 and agent.group != "Turret": p = agent.state.position r = agent.state.rotation if agent.group == "Agent": r.z = random.randrange(360) agent.state.rotation = r state.reset_pose(p, r) return agent.rewards.get_instance() # display agent info if neccessary if hasattr(agent, 'set_display_hint'): agent.set_display_hint() # get the desired action of the agent move_by = action[constants.ACTION_INDEX_SPEED] turn_by = math.degrees(action[constants.ACTION_INDEX_TURN]) firing = action[constants.ACTION_INDEX_FIRE] firing_status = (firing >= 0.5) scored_hit = False # firing decision closest_enemy = self.closest_enemy(agent) if firing_status: if closest_enemy is not None: pose = state.pose closest_enemy_pose = self.get_state(closest_enemy).pose relative_angle = self.angle(pose, closest_enemy_pose) if abs(relative_angle) <= 2: source_pos = agent.state.position closest_enemy_pos = closest_enemy.state.position source_pos.z = source_pos.z + 5 closest_enemy_pos.z = closest_enemy_pos.z + 5 dist = closest_enemy_pos.getDistanceFrom(source_pos) d = (constants.MAX_SHOT_RADIUS - dist)/constants.MAX_SHOT_RADIUS if random.random() < d/2: # attempt a shot depending on distance team_color = constants.TEAM_LABELS[agent.team_type] if team_color == 'red': color = OpenNero.Color(255, 255, 0, 0) elif team_color == 'blue': color = OpenNero.Color(255, 0, 0, 255) else: color = OpenNero.Color(255, 255, 255, 0) wall_color = OpenNero.Color(128, 0, 255, 0) obstacles = OpenNero.getSimContext().findInRay( source_pos, closest_enemy_pos, constants.OBJECT_TYPE_OBSTACLE, True, wall_color, color) #if len(obstacles) == 0 and random.random() < d/2: if len(obstacles) == 0: # count as hit depending on distance self.get_state(closest_enemy).curr_damage += 1 scored_hit = True else: # turn toward the enemy turn_by = relative_angle # set animation speed # TODO: move constants into constants.py self.set_animation(agent, state, 'run') delay = OpenNero.getSimContext().delay agent.state.animation_speed = move_by * constants.ANIMATION_RATE reward = self.calculate_reward(agent, action, scored_hit) team = self.get_team(agent) # tell the system to make the calculated motion # if the motion doesn't result in a collision dist = constants.MAX_MOVEMENT_SPEED * move_by heading = common.wrap_degrees(agent.state.rotation.z, turn_by) x = agent.state.position.x + dist * math.cos(math.radians(heading)) y = agent.state.position.y + dist * math.sin(math.radians(heading)) # manual collision detection desired_pose = (x, y, heading) collision_detected = False friends, foes = self.get_friend_foe(agent) for f in friends: if f != agent: f_state = self.get_state(f) # we impose an order on agents to avoid deadlocks. Without this # two agents which spawn very close to each other can never escape # each other's collision radius if state.id > f_state.id: f_pose = f_state.pose dist = self.distance(desired_pose, f_pose) if dist < constants.MANUAL_COLLISION_DISTANCE: collision_detected = True continue # just check for collisions with the closest enemy if closest_enemy: if not collision_detected: f_pose = self.get_state(closest_enemy).pose dist = self.distance(desired_pose, f_pose) if dist < constants.MANUAL_COLLISION_DISTANCE: collision_detected = True if not collision_detected: state.update_pose(move_by, turn_by) return reward
def update_position(self, agent, delta_dist, delta_angle): """ Updates position of the agent and collects pellets. """ state = self.get_state(agent) state.step_count += 1 position = agent.state.position rotation = agent.state.rotation # posteriori collision detection rotation.z = common.wrap_degrees(rotation.z, delta_angle) position.x += delta_dist*math.cos(math.radians(rotation.z)) position.y += delta_dist*math.sin(math.radians(rotation.z)) reward = 0 # check if one of 4 out-of-bound conditions applies # if yes, back-track to correct position if (position.x) < 0 or (position.y) < 0 or \ (position.x) > self.XDIM or (position.y) > self.YDIM: # correct position if (position.x) < 0: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) reward -= .05 if (position.y) < 0: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) reward -= .05 if (position.x) > self.XDIM: position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) reward -= .05 if (position.y) > self.YDIM: position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) reward -= .05 elif position.x < self.XDIM * 0.174 and position.y > self.YDIM * (1.0 - 0.309): position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) reward -= .05 # Temp. disable collisions #elif furniture_collide_all(position.x, position.y): # position.x -= 2 * delta_dist*math.cos(math.radians(rotation.z)) # position.y -= 2 * delta_dist*math.sin(math.radians(rotation.z)) # register new position state.position = position #state.rotation = rotation agent.state.position = position agent.state.rotation = rotation # remove all crumbs within ROOMBA_RAD of agent position pos = (position.x, position.y) for crumb in self.crumbs: if (crumb.x, crumb.y) in getMod().marker_map: distance = math.hypot(crumb[0] - pos[0], crumb[1] - pos[1]) if distance < constants.ROOMBA_RAD: getMod().unmark(crumb.x, crumb.y) reward += crumb.reward global crumb_count crumb_count += 1 # check if agent has expended its step allowance if (self.max_steps != 0) and (state.step_count >= self.max_steps): state.is_out = True # if yes, mark it to be removed return reward