예제 #1
0
    def __init__(self,
                 actions,
                 mapname,
                 colormap,
                 name="PlaceCellEnvironment",
                 imgsize=(1.0, 1.0),
                 dx=0.01,
                 placedev=0.1,
                 num_places=None):
        """Initialize environment variables.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param mapname: name of file describing environment map
        :param colormap: dict mapping pixel colours to labels
        :param name: name for environment
        :param imgsize: width of space represented by the map image
        :param dx: distance agent moves each timestep
        :param placedev: standard deviation of gaussian place cell activations
        :param num_places: number of placecells to use (if None it will attempt
            to fill the space)
        """

        EnvironmentTemplate.__init__(self, name, 2, actions)

        # parameters
        self.colormap = colormap
        self.rewardamount = 0  # number of timesteps spent in reward

        # number of timesteps to spend in reward before agent is reset
        # note: convenient to express this as time_in_reward / dt
        self.rewardresetamount = 0.6 / 0.001

        self.num_actions = len(actions)
        self.imgsize = [float(x) for x in imgsize]
        self.dx = dx
        self.placedev = placedev
        self.num_places = num_places
        self.optimal_move = None
        self.defaultreward = -0.075

        # load environment
        self.map = ImageIO.read(File(HRLutils.datafile(mapname)))

        # generate place cells
        self.gen_placecells(min_spread=1.0 * placedev)

        # initial conditions
        self.state = self.random_location(avoid=["wall", "target"])
        self.place_activations = [0 for _ in self.placecells]

        self.create_origin("place", lambda: self.place_activations)

        # note: making the value small, so that the noise node will give us
        # some random exploration as well
        self.create_origin(
            "optimal_move", lambda:
            [0.1 if self.optimal_move == a[0] else 0.0 for a in self.actions])
예제 #2
0
    def __init__(self, stateD, actions, filename, name="BoxWorld", delay=0.1, cellwidth=1.0, dx=0.01, cartesian=False):
        """Initializes environment variables.

        :param stateD: dimension of state
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param filename: name of file containing map description
        :param name: name for environment
        :param delay: time to wait between action updates
        :param cellwidth: physical distance represented by each character in map file
        :param dx: distance agent moves in one timestep
        :param cartesian: if True, represent the agent's location in x,y cartesian space (0,0 in centre)
            if False, agent's location is in matrix space (0,0 in top left)
        """

        EnvironmentTemplate.__init__(self, name, stateD, actions)

        self.wallboxes = []
        self.targetboxes = []
        self.mudboxes = []
        self.worldbox = None

        self.delay = delay
        self.update_time = 0.5 # the time to perform the next action update
        self.learntime = [-1, -1]
        self.resettime = [0.05, 0.1] # reset right at the beginning to set things up
        self.rewardresettime = 0.6 #time agent will spend in reward location before being reset
        self.num_actions = len(actions)
        self.chosen_action = None
        self.cellwidth = cellwidth
        self.dx = dx

        f = open(filename)
        data = [line[:-1] if line.endswith("\n") else line for line in f]
        f.close()

        if cartesian:
            # modify all the coordinates so that they lie in the standard cartesian space
            # rather than the matrix row/column numbering system
            self.yoffset = (len(data) / 2) * self.cellwidth
            self.yscale = -1
            self.xoffset = (len(data[0]) / 2) * self.cellwidth
        else:
            self.yoffset = 0
            self.yscale = 1
            self.xoffset = 0

        self.load_boxes(data)

        self.i_state = self.random_location()
            # internal state is the location in terms of the internal coordinate system (i.e. 0,0 in the top left)
        self.state = self.transform_point(self.i_state)
            # this is the state in terms of the agent's coordinate system (usually cartesian where 0,0 is the middle)

        self.create_origin("learn", lambda: [1.0 if self.t > self.learntime[0] and self.t < self.learntime[1] else 0.0])
        self.create_origin("reset", lambda: [1.0 if self.t > self.resettime[0] and self.t < self.resettime[1] else 0.0])
예제 #3
0
    def __init__(self, actions, mapname, colormap, name="PlaceCellEnvironment",
                 imgsize=(1.0, 1.0), dx=0.01, placedev=0.1, num_places=None):
        """Initialize environment variables.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param mapname: name of file describing environment map
        :param colormap: dict mapping pixel colours to labels
        :param name: name for environment
        :param imgsize: width of space represented by the map image
        :param dx: distance agent moves each timestep
        :param placedev: standard deviation of gaussian place cell activations
        :param num_places: number of placecells to use (if None it will attempt
            to fill the space)
        """

        EnvironmentTemplate.__init__(self, name, 2, actions)

        # parameters
        self.colormap = colormap
        self.rewardamount = 0  # number of timesteps spent in reward

        # number of timesteps to spend in reward before agent is reset
        # note: convenient to express this as time_in_reward / dt
        self.rewardresetamount = 0.6 / 0.001

        self.num_actions = len(actions)
        self.imgsize = [float(x) for x in imgsize]
        self.dx = dx
        self.placedev = placedev
        self.num_places = num_places
        self.optimal_move = None
        self.defaultreward = -0.075

        # load environment
        self.map = ImageIO.read(File(HRLutils.datafile(mapname)))

        # generate place cells
        self.gen_placecells(min_spread=1.0 * placedev)

        # initial conditions
        self.state = self.random_location(avoid=["wall", "target"])
        self.place_activations = [0 for _ in self.placecells]

        self.create_origin("place", lambda: self.place_activations)

        # note: making the value small, so that the noise node will give us
        # some random exploration as well
        self.create_origin("optimal_move",
                           lambda: [0.1 if self.optimal_move == a[0] else 0.0
                                    for a in self.actions])
예제 #4
0
    def __init__(self, flat=False):
        """Set up task parameters.

        :param flat: if True, no hierarchical relationship between stimuli and
            reward; if False, stimuli-response rewards will be dependent on
            colour
        """

        self.rewardval = 1.5

        # actions correspond to three different button presses
        actions = [("left", [1, 0, 0]), ("middle", [0, 1, 0]),
                   ("right", [0, 0, 1])]

        # number of instances of each attribute (stimuli formed through
        # different combinations of attribute instances)
        self.num_orientations = 3
        self.num_shapes = 3
        self.num_colours = 2

        self.presentationtime = 0.5  # length of time to present each stimuli
        self.rewardtime = 0.1  # length of reward period

        # next presentation interval
        self.presentationperiod = [0, self.presentationtime]

        # next reward interval
        self.rewardperiod = [self.presentationtime,
                             self.presentationtime + self.rewardtime]

        self.answer = random.choice(actions)[0]  # answer selected by agent

        self.stateD = (self.num_orientations + self.num_shapes +
                       self.num_colours)

        self.correct = [0] * 20

        EnvironmentTemplate.__init__(self, "BadreEnvironment", self.stateD,
                                     actions)

        self.answers = self.gen_answers(flat)

        self.create_origin("optimal_move",
                           lambda: [a[1] for a in actions
                                    if a[0] == self.answer][0])
        self.create_origin("score",
                           lambda: [sum(self.correct) / len(self.correct)])
예제 #5
0
    def __init__(self, actions, playernum=0):
        EnvironmentTemplate.__init__(self, "PongEnvironment", 2, actions)


        self.state_radius = 0.9
        self.max_y = 480
        self.playernum = playernum
        self.place_dev = 0.2
        self.rewardscale = 5
        self.optimal_move = 0
        self.stats = [0]*4
        self.mapping = {"up":-1, "stay":0, "down":1}
        
        self.placecells = self.gen_placecells(min_spread=self.place_dev * 0.5)
        self.place_activations = [0 for _ in self.placecells]

        self.reader = None
        self.error = None

        self.create_origin("place", lambda: self.place_activations)
        self.create_origin("stats", lambda: self.stats)