def __init__(self): self.environment = None self.agent_id = None self.flag_loc = None self.flag_id = None self.set_speedup(constants.DEFAULT_SPEEDUP) x = constants.XDIM / 2.0 y = constants.YDIM / 3.0 self.spawn_x = {} self.spawn_y = {} self.set_spawn(x, y, constants.OBJECT_TYPE_TEAM_0) self.set_spawn(x, 2 * y, constants.OBJECT_TYPE_TEAM_1) # Bounds for sensors in neural network and advice language. These bounds are # used to convert sensor values between network and advice. self.sbounds_network = OpenNero.FeatureVectorInfo() self.sbounds_advice = OpenNero.FeatureVectorInfo() # Networks have better learning bias when cube sensors produce values in the # range [-1, 1], but the range [0, 1] is more intuitive in the advice # language. Wall sensors use the same range [0, 1] for both network and advice. # The sense() method in the ForageEnvironment class use these network bounds # to scale the sensor values. for i in range(constants.N_SENSORS): self.sbounds_network.add_continuous(0, 1) self.sbounds_advice.add_continuous(0, 1) # The last sensor is the bias, which always takes the value 1 (upper bound). self.sbounds_network.add_continuous(0, 1) self.sbounds_advice.add_continuous(0, 1) print 'sbounds_network', self.sbounds_network
def __init__(self): """ Create the environment """ OpenNero.Environment.__init__(self) self.curr_id = 0 self.max_steps = 20 self.MAX_DIST = math.hypot(constants.XDIM, constants.YDIM) self.states = {} self.teams = {} self.script = 'Hw5/menu.py' abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors rbound = OpenNero.FeatureVectorInfo() # rewards # actions abound.add_continuous( -1, 1 ) # forward/backward speed (gets multiplied by constants.MAX_MOVEMENT_SPEED) abound.add_continuous( -constants.MAX_TURN_RADIANS, constants.MAX_TURN_RADIANS) # left/right turn (in radians) # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1) # Rewards # the enviroment returns the raw multiple dimensions of the fitness as # they get each step. This then gets combined into, e.g. Z-score, by # the ScoreHelper in order to calculate the final rtNEAT-fitness for f in constants.FITNESS_DIMENSIONS: # we don't care about the bounds of the individual dimensions rbound.add_continuous(-sys.float_info.max, sys.float_info.max) # range for reward # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", OpenNero.Population(), constants.pop_size, 1) key = "rtneat-%s" % constants.OBJECT_TYPE_TEAM_0 OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) # set the initial lifetime lifetime = module.getMod().lt rtneat.set_lifetime(lifetime) print 'rtNEAT lifetime:', lifetime self.agent_info = OpenNero.AgentInitInfo(sbound, abound, rbound)
def agent_info_tuple(self): abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors # actions abound.add_continuous(-1, 1) # forward/backward speed abound.add_continuous(-constants.MAX_TURNING_RATE, constants.MAX_TURNING_RATE) # left/right turn (in radians) abound.add_continuous(0, 1) # fire abound.add_continuous(0, 1) # omit friend sensors # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1) return sbound, abound, module.rtneat_rewards()
def rtneat_rewards(): """ Create a reward FeatureVectorInfo to pass to RTNEAT. """ reward = OpenNero.FeatureVectorInfo() for f in constants.FITNESS_DIMENSIONS: reward.add_continuous(-sys.float_info.max, sys.float_info.max) return reward
def get_agent_info(self, agent): """ return a blueprint for a new agent """ for a in constants.WALL_RAY_SENSORS: agent.add_sensor(OpenNero.RaySensor( math.cos(math.radians(a)), math.sin(math.radians(a)), 0, constants.WALL_SENSOR_RADIUS, constants.OBJECT_TYPE_OBSTACLE, False)) for a0, a1 in constants.FLAG_RADAR_SENSORS: agent.add_sensor(OpenNero.RadarSensor( a0, a1, -90, 90, constants.MAX_VISION_RADIUS, constants.OBJECT_TYPE_FLAG, False)) sense = constants.OBJECT_TYPE_TEAM_0 if agent.team_type == sense: sense = constants.OBJECT_TYPE_TEAM_1 for a0, a1 in constants.ENEMY_RADAR_SENSORS: agent.add_sensor(OpenNero.RadarSensor( a0, a1, -90, 90, constants.MAX_VISION_RADIUS, sense, False)) for a in constants.TARGETING_SENSORS: agent.add_sensor(OpenNero.RaySensor( math.cos(math.radians(a)), math.sin(math.radians(a)), 0, constants.TARGET_SENSOR_RADIUS, sense, False)) abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors # actions abound.add_continuous(-1, 1) # forward/backward speed abound.add_continuous(-constants.MAX_TURNING_RATE, constants.MAX_TURNING_RATE) # left/right turn (in radians) abound.add_continuous(0, 1) # fire abound.add_continuous(0, 1) # omit friend sensors # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1) rbound = OpenNero.FeatureVectorInfo() rbound.add_continuous(0, 1) return OpenNero.AgentInitInfo(sbound, abound, rbound)
def start_rtneat(self, pop_size): " start the rtneat learning demo " OpenNero.disable_ai() #self.environment = RoombaEnvironment(constants.XDIM, constants.YDIM, self) #set_environment(self.environment) #self.reset_sandbox() # Create RTNEAT object rbound = OpenNero.FeatureVectorInfo() rbound.add_continuous(-sys.float_info.max, sys.float_info.max) rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", 2, 1, pop_size, 1.0, rbound, False) rtneat.set_weight(0,1) OpenNero.set_ai("rtneat",rtneat) OpenNero.enable_ai() self.distribute_bots(pop_size, "data/shapes/roomba/RoombaRTNEAT.xml")
def agent_info_tuple(self): sbound, abound, _ = NeroAgent.agent_info_tuple(self) rbound = OpenNero.FeatureVectorInfo() # single-dimension rewards rbound.add_continuous(-sys.float_info.max, sys.float_info.max) return sbound, abound, rbound