def is_episode_over(self, agent): """ is the current episode over for the agent? """ if agent.group == 'Turret': return False team = agent.get_team() state = self.get_state(agent) dead = self.hitpoints > 0 and state.total_damage >= self.hitpoints old = self.lifetime > 0 and agent.step > 0 and 0 == agent.step % self.lifetime if agent.ai == 'qlearning': if dead or old: # simulate a respawn by moving this agent towards the spawn location. state.total_damage = 0 state.randomize() agent.state.position = copy.copy(state.initial_position) agent.state.rotation = copy.copy(state.initial_rotation) agent.teleport() return False rtneat = OpenNero.get_ai("rtneat-%s" % team) if not rtneat: rtneat = OpenNero.get_ai("rtneatq-%s" % team) orphaned = rtneat and not rtneat.has_organism(agent) return orphaned or dead or old
def spawnAgent(self, team=constants.OBJECT_TYPE_TEAM_0, ai=None): """ This is the function ran when an agent already in the field causes the generation of a new agent. Returns the id of the spawned agent. """ if not self.environment: return if ai == 'rtneat' and not OpenNero.get_ai('rtneat-%s' % team): self.start_rtneat(team) if ai == 'rtneatq' and not OpenNero.get_ai('rtneatq-%s' % team): self.start_rtneatq(team) self.curr_team = team color = constants.TEAM_LABELS[team] dx = random.randrange(constants.XDIM / 20) - constants.XDIM / 40 dy = random.randrange(constants.XDIM / 20) - constants.XDIM / 40 return common.addObject( "data/shapes/character/steve_%s_%s.xml" % (color, ai), OpenNero.Vector3f(self.spawn_x[team] + dx, self.spawn_y[team] + dy, 2), type=team)
def ltChange(self, value): self.environment.lifetime = value for team in constants.TEAMS: rtneat = OpenNero.get_ai("rtneat-%s" % team) if not rtneat: rtneat = OpenNero.get_ai("rtneatq-%s" % team) if rtneat: rtneat.set_lifetime(value)
def set_weight(self, key, value): self.reward_weights[key] = value for team in self.teams: rtneat = OpenNero.get_ai("rtneat-%s" % team) if not rtneat: rtneat = OpenNero.get_ai("rtneatq-%s" % team) if rtneat: rtneat.set_weight(constants.FITNESS_INDEX[key], value)
def load_team(self, location, team=constants.OBJECT_TYPE_TEAM_0): NERO.module.NeroModule.load_team(self, location, team) rtneat = OpenNero.get_ai('rtneat-%s' % team) if rtneat: rtneat.set_lifetime(sys.maxint) rtneat.disable_evolution() OpenNero.disable_ai() # don't run until button
def ltChange(self, value): self.lt = value for team in (constants.OBJECT_TYPE_TEAM_0, constants.OBJECT_TYPE_TEAM_1): rtneat = OpenNero.get_ai("rtneat-%s" % team) if rtneat: rtneat.set_lifetime(value)
def set_weight(self, key, value): i = constants.FITNESS_INDEX[key] value = (value - 100) / 100.0 # value in [-1, 1] for team in (constants.OBJECT_TYPE_TEAM_0, constants.OBJECT_TYPE_TEAM_1): rtneat = OpenNero.get_ai("rtneat-%s" % team) if rtneat: rtneat.set_weight(i, value) print key, value
def setAdvice(self, advice, team=constants.OBJECT_TYPE_TEAM_0): """ advice for rtneat agents """ # if there are rtneat agents in the environment, give them some advice rtneat = OpenNero.get_ai("rtneat-%s" % team) if not rtneat: rtneat = OpenNero.get_ai("rtneatq-%s" % team) if rtneat: try: rtneat.advice = OpenNero.Advice(advice, rtneat, constants.N_SENSORS+1, constants.N_ACTIONS, True, self.sbounds_network, self.sbounds_advice) except RuntimeError as err: err_msg = \ '<message><content class="edu.utexas.cs.nn.opennero.ErrorMessage"' + \ ' name="%s" text="%s" /></message>' % ('Advice Error', err.message) getServer().write_data(err_msg) return for agent in self.environment.teams[team]: agent.has_new_advice = True
def maybe_spawn(self, agent): '''Spawn more agents if there are more to spawn.''' if agent.ai not in ('rtneat', 'rtneatq') or agent.group != 'Agent': return team = agent.get_team() rtneat = OpenNero.get_ai('rtneat-%s' % team) if not rtneat: rtneat = OpenNero.get_ai('rtneatq-%s' % team) if not rtneat or not rtneat.ready(): return friends, foes = self.getFriendFoe(agent) if len(friends) >= constants.pop_size: return if agent is tuple(f for f in friends if f.ai == agent.ai)[0]: module.getMod().spawnAgent(team=team, ai=agent.ai)
def __init__(self): """ Create the environment """ OpenNero.Environment.__init__(self) self.curr_id = 0 self.max_steps = 20 self.MAX_DIST = math.hypot(constants.XDIM, constants.YDIM) self.states = {} self.teams = {} self.script = 'Hw5/menu.py' abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors rbound = OpenNero.FeatureVectorInfo() # rewards # actions abound.add_continuous( -1, 1 ) # forward/backward speed (gets multiplied by constants.MAX_MOVEMENT_SPEED) abound.add_continuous( -constants.MAX_TURN_RADIANS, constants.MAX_TURN_RADIANS) # left/right turn (in radians) # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1) # Rewards # the enviroment returns the raw multiple dimensions of the fitness as # they get each step. This then gets combined into, e.g. Z-score, by # the ScoreHelper in order to calculate the final rtNEAT-fitness for f in constants.FITNESS_DIMENSIONS: # we don't care about the bounds of the individual dimensions rbound.add_continuous(-sys.float_info.max, sys.float_info.max) # range for reward # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", OpenNero.Population(), constants.pop_size, 1) key = "rtneat-%s" % constants.OBJECT_TYPE_TEAM_0 OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) # set the initial lifetime lifetime = module.getMod().lt rtneat.set_lifetime(lifetime) print 'rtNEAT lifetime:', lifetime self.agent_info = OpenNero.AgentInitInfo(sbound, abound, rbound)
def __init__(self): """ Create the environment """ OpenNero.Environment.__init__(self) self.curr_id = 0 self.max_steps = 20 self.MAX_DIST = math.hypot(constants.XDIM, constants.YDIM) self.states = {} self.teams = {} self.script = 'Hw5/menu.py' abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors rbound = OpenNero.FeatureVectorInfo() # rewards # actions abound.add_continuous(-1, 1) # forward/backward speed (gets multiplied by constants.MAX_MOVEMENT_SPEED) abound.add_continuous(-constants.MAX_TURN_RADIANS, constants.MAX_TURN_RADIANS) # left/right turn (in radians) # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1); # Rewards # the enviroment returns the raw multiple dimensions of the fitness as # they get each step. This then gets combined into, e.g. Z-score, by # the ScoreHelper in order to calculate the final rtNEAT-fitness for f in constants.FITNESS_DIMENSIONS: # we don't care about the bounds of the individual dimensions rbound.add_continuous(-sys.float_info.max, sys.float_info.max) # range for reward # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", constants.N_SENSORS, constants.N_ACTIONS, constants.pop_size, 1.0, rbound, False) key = "rtneat-%s" % constants.OBJECT_TYPE_TEAM_0 OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) # set the initial lifetime lifetime = module.getMod().lt rtneat.set_lifetime(lifetime) print 'rtNEAT lifetime:', lifetime self.agent_info = OpenNero.AgentInitInfo(sbound, abound, rbound)
def save_team(self, location, team=constants.OBJECT_TYPE_TEAM_0): # if there are rtneat agents in the environment, save them as a group. rtneat = OpenNero.get_ai("rtneat-%s" % team) if rtneat: location = rtneat.save_population(str(location)) # then, check whether there are any qlearning agents, and save them. with open(location, 'a') as handle: for agent in self.environment.teams[team]: if agent.group == 'Agent' and agent.ai == 'qlearning': handle.write('\n\n%s' % agent.to_string()) if hasattr(agent, 'stats'): handle.write('\n\n%s' % agent.stats())
def setAdvice(self, advice, team=constants.OBJECT_TYPE_TEAM_0): """ advice for rtneat agents """ # if there are rtneat agents in the environment, give them some advice rtneat = OpenNero.get_ai("rtneat-%s" % team) if not rtneat: rtneat = OpenNero.get_ai("rtneatq-%s" % team) if rtneat: try: rtneat.advice = OpenNero.Advice(advice, rtneat, constants.N_SENSORS + 1, constants.N_ACTIONS, True, self.sbounds_network, self.sbounds_advice) except RuntimeError as err: err_msg = \ '<message><content class="edu.utexas.cs.nn.opennero.ErrorMessage"' + \ ' name="%s" text="%s" /></message>' % ('Advice Error', err.message) getServer().write_data(err_msg) return for agent in self.environment.teams[team]: agent.has_new_advice = True
def maybe_spawn(self, agent): '''Spawn more agents if there are more to spawn.''' # do not spawn just because a first person agent is on the field if isinstance(agent, FirstPersonAgent): return team = agent.get_team() friends, foes = self.getFriendFoe(agent) friends = tuple(friends or [None]) if (agent.group == 'Agent' and agent is friends[0] and OpenNero.get_ai("rtneat-%s" % team).ready() and len(friends) < constants.pop_size): module.getMod().spawnAgent(team)
def start_rtneat(self, team=constants.OBJECT_TYPE_TEAM_0): # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", constants.N_SENSORS + 1, constants.N_ACTIONS, constants.pop_size, 1.0, rtneat_rewards(), False) key = "rtneat-%s" % team OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) rtneat.set_lifetime(self.environment.lifetime)
def start_rtneatq(self, team=constants.OBJECT_TYPE_TEAM_0): # initialize the rtNEAT+Q algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the wires # population size matches ours # 1.0 is the weight initialization noise rtneatq = OpenNero.RTNEAT("data/ai/neat-params.dat", constants.N_SENSORS+1, constants.N_ACTION_CANDIDATES * (constants.N_ACTIONS + 1), constants.pop_size, 1.0, rtneat_rewards(), False) key = "rtneatq-%s" % team OpenNero.set_ai(key, rtneatq) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) rtneatq.set_lifetime(self.environment.lifetime)
def is_episode_over(self, agent): """ is the current episode over for the agent? """ if agent.group == "Turret": return False if isinstance(agent, FirstPersonAgent): return False # first person agents never stop self.max_steps = module.getMod().lt if self.max_steps != 0 and agent.step >= self.max_steps: return True team = agent.get_team() if not OpenNero.get_ai("rtneat-%s" % team).has_organism(agent): return True state = self.get_state(agent) if module.getMod().hp != 0 and state.total_damage >= module.getMod().hp: return True return False
def save_rtneat(self, location, pop, team=constants.OBJECT_TYPE_TEAM_0): location = os.path.relpath("/") + location OpenNero.get_ai("rtneat-%s" % team).save_population(str(location))
def get_org(self): """ Returns the rtNEAT object for this agent """ return OpenNero.get_ai("rtneat-%s" % self.get_team()).get_organism(self)
def start_rtneat(self, team=constants.OBJECT_TYPE_TEAM_0): NERO.module.NeroModule.start_rtneat(self, team) rtneat = OpenNero.get_ai('rtneat-%s' % team) if rtneat: rtneat.set_lifetime(sys.maxint) rtneat.disable_evolution()
def end(self, time, reward): """ end of an episode """ OpenNero.get_ai("rtneat-%s" % self.get_team()).release_organism(self) return True