Esempio n. 1
0
    def __init__(self):
        self.environment = None
        self.agent_id = None

        self.flag_loc = None
        self.flag_id = None

        self.set_speedup(constants.DEFAULT_SPEEDUP)

        x = constants.XDIM / 2.0
        y = constants.YDIM / 3.0
        self.spawn_x = {}
        self.spawn_y = {}
        self.set_spawn(x, y, constants.OBJECT_TYPE_TEAM_0)
        self.set_spawn(x, 2 * y, constants.OBJECT_TYPE_TEAM_1)

        # Bounds for sensors in neural network and advice language. These bounds are
        # used to convert sensor values between network and advice.
        self.sbounds_network = OpenNero.FeatureVectorInfo()
        self.sbounds_advice = OpenNero.FeatureVectorInfo()

        # Networks have better learning bias when cube sensors produce values in the
        # range [-1, 1], but the range [0, 1] is more intuitive in the advice
        # language.  Wall sensors use the same range [0, 1] for both network and advice.
        # The sense() method in the ForageEnvironment class use these network bounds
        # to scale the sensor values.
        for i in range(constants.N_SENSORS):
            self.sbounds_network.add_continuous(0, 1)
            self.sbounds_advice.add_continuous(0, 1)

        # The last sensor is the bias, which always takes the value 1 (upper bound).
        self.sbounds_network.add_continuous(0, 1)
        self.sbounds_advice.add_continuous(0, 1)

        print 'sbounds_network', self.sbounds_network
    def __init__(self):
        """
        Create the environment
        """
        OpenNero.Environment.__init__(self)

        self.curr_id = 0
        self.max_steps = 20
        self.MAX_DIST = math.hypot(constants.XDIM, constants.YDIM)
        self.states = {}
        self.teams = {}
        self.script = 'Hw5/menu.py'

        abound = OpenNero.FeatureVectorInfo()  # actions
        sbound = OpenNero.FeatureVectorInfo()  # sensors
        rbound = OpenNero.FeatureVectorInfo()  # rewards

        # actions
        abound.add_continuous(
            -1, 1
        )  # forward/backward speed (gets multiplied by constants.MAX_MOVEMENT_SPEED)
        abound.add_continuous(
            -constants.MAX_TURN_RADIANS,
            constants.MAX_TURN_RADIANS)  # left/right turn (in radians)

        # sensor dimensions
        for a in range(constants.N_SENSORS):
            sbound.add_continuous(0, 1)

        # Rewards
        # the enviroment returns the raw multiple dimensions of the fitness as
        # they get each step. This then gets combined into, e.g. Z-score, by
        # the ScoreHelper in order to calculate the final rtNEAT-fitness
        for f in constants.FITNESS_DIMENSIONS:
            # we don't care about the bounds of the individual dimensions
            rbound.add_continuous(-sys.float_info.max,
                                  sys.float_info.max)  # range for reward

        # initialize the rtNEAT algorithm parameters
        # input layer has enough nodes for all the observations plus a bias
        # output layer has enough values for all the actions
        # population size matches ours
        # 1.0 is the weight initialization noise
        rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat",
                                 OpenNero.Population(), constants.pop_size, 1)

        key = "rtneat-%s" % constants.OBJECT_TYPE_TEAM_0
        OpenNero.set_ai(key, rtneat)
        print "get_ai(%s): %s" % (key, OpenNero.get_ai(key))

        # set the initial lifetime
        lifetime = module.getMod().lt
        rtneat.set_lifetime(lifetime)
        print 'rtNEAT lifetime:', lifetime

        self.agent_info = OpenNero.AgentInitInfo(sbound, abound, rbound)
Esempio n. 3
0
    def agent_info_tuple(self):
        abound = OpenNero.FeatureVectorInfo() # actions
        sbound = OpenNero.FeatureVectorInfo() # sensors

        # actions
        abound.add_continuous(-1, 1) # forward/backward speed
        abound.add_continuous(-constants.MAX_TURNING_RATE, constants.MAX_TURNING_RATE) # left/right turn (in radians)
        abound.add_continuous(0, 1) # fire 
        abound.add_continuous(0, 1) # omit friend sensors 

        # sensor dimensions
        for a in range(constants.N_SENSORS):
            sbound.add_continuous(0, 1)

        return sbound, abound, module.rtneat_rewards()
Esempio n. 4
0
def rtneat_rewards():
    """
    Create a reward FeatureVectorInfo to pass to RTNEAT.
    """
    reward = OpenNero.FeatureVectorInfo()
    for f in constants.FITNESS_DIMENSIONS:
        reward.add_continuous(-sys.float_info.max, sys.float_info.max)
    return reward
Esempio n. 5
0
    def get_agent_info(self, agent):
        """
        return a blueprint for a new agent
        """
        for a in constants.WALL_RAY_SENSORS:
            agent.add_sensor(OpenNero.RaySensor(
                    math.cos(math.radians(a)), math.sin(math.radians(a)), 0,
                    constants.WALL_SENSOR_RADIUS,
                    constants.OBJECT_TYPE_OBSTACLE,
                    False))
        for a0, a1 in constants.FLAG_RADAR_SENSORS:
            agent.add_sensor(OpenNero.RadarSensor(
                    a0, a1, -90, 90, constants.MAX_VISION_RADIUS,
                    constants.OBJECT_TYPE_FLAG,
                    False))
        sense = constants.OBJECT_TYPE_TEAM_0
        if agent.team_type == sense:
            sense = constants.OBJECT_TYPE_TEAM_1
        for a0, a1 in constants.ENEMY_RADAR_SENSORS:
            agent.add_sensor(OpenNero.RadarSensor(
                    a0, a1, -90, 90, constants.MAX_VISION_RADIUS,
                    sense,
                    False))
        for a in constants.TARGETING_SENSORS:
            agent.add_sensor(OpenNero.RaySensor(
                    math.cos(math.radians(a)), math.sin(math.radians(a)), 0,
                    constants.TARGET_SENSOR_RADIUS,
                    sense,
                    False))

        abound = OpenNero.FeatureVectorInfo() # actions
        sbound = OpenNero.FeatureVectorInfo() # sensors

        # actions
        abound.add_continuous(-1, 1) # forward/backward speed
        abound.add_continuous(-constants.MAX_TURNING_RATE, constants.MAX_TURNING_RATE) # left/right turn (in radians)
        abound.add_continuous(0, 1) # fire 
        abound.add_continuous(0, 1) # omit friend sensors 

        # sensor dimensions
        for a in range(constants.N_SENSORS):
            sbound.add_continuous(0, 1)

        rbound = OpenNero.FeatureVectorInfo()
        rbound.add_continuous(0, 1)
        return OpenNero.AgentInitInfo(sbound, abound, rbound)
Esempio n. 6
0
 def start_rtneat(self, pop_size):
     " start the rtneat learning demo "
     OpenNero.disable_ai()
     #self.environment = RoombaEnvironment(constants.XDIM, constants.YDIM, self)
     #set_environment(self.environment)
     #self.reset_sandbox()
     # Create RTNEAT object
     rbound = OpenNero.FeatureVectorInfo()
     rbound.add_continuous(-sys.float_info.max, sys.float_info.max)
     rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", 2, 1, pop_size, 1.0, rbound, False)
     rtneat.set_weight(0,1)
     OpenNero.set_ai("rtneat",rtneat)
     OpenNero.enable_ai()
     self.distribute_bots(pop_size, "data/shapes/roomba/RoombaRTNEAT.xml")
Esempio n. 7
0
 def agent_info_tuple(self):
     sbound, abound, _ = NeroAgent.agent_info_tuple(self)
     rbound = OpenNero.FeatureVectorInfo()  # single-dimension rewards
     rbound.add_continuous(-sys.float_info.max, sys.float_info.max)
     return sbound, abound, rbound