def gen_encoders(self, N, contextD, context_scale): """Generate encoders for state population of learning agent. :param N: number of neurons in state population :param contextD: dimension of context vector representation :param context_scale: weight on context representation relative to state (1.0 = equal weighting) """ if contextD > 0: contexts = MU.I(contextD) else: contexts = [[]] # neurons each sensitive to different combinations of stimuli encs = (list(MU.I(self.stateD)) + [o + s + c for o in MU.I(self.num_orientations) for s in MU.I(self.num_shapes) for c in MU.I(self.num_colours)]) return [HRLutils.normalize( HRLutils.normalize(random.choice(encs)) + [x * context_scale for x in random.choice(contexts)]) for _ in range(N)]
def tick(self): self.reward *= 0.97 # note: doing a slow decay rather than setting to 0, to put # a bit more power in the reward signal data = self.reader.readline() while data is not None: data = data.split() if data[0] == "state": self.state = [float(data[2]), float(data[3 + self.playernum])] self.state = [2 * self.state_radius * x / self.max_y - self.state_radius for x in self.state] self.optimal_move = 1 if float(data[2]) > float(data[4-self.playernum]) else -1 elif data[0] == "reward" and int(data[1]) == self.playernum: self.reward = float(data[2]) * self.rewardscale elif data[0] == "stats": self.stats = [float(x) for x in data[1:]] # else: # print "unrecognized input (%s)" % data data = self.reader.readline() self.place_activations = self.calc_activations(self.state, self.place_dev) # print "state:", self.state print >> self.p.stdin, "move %d %f" % (1 - self.playernum, random.choice([self.optimal_move*1,1,-1])) print >> self.p.stdin, "move %d %f" % (self.playernum, self.mapping[self.action[0]])
def __init__(self, actions, mapname, contextD, context_rewards, **kwargs): """Initialize the environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: filename for map file :param contextD: dimension of vector representing context :param context_rewards: mapping from region labels to rewards for being in that region (each entry represents one context) :type context_rewards: dict {"regionlabel":rewardval,...} :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, actions, mapname, name="ContextEnvironment", **kwargs) self.rewards = context_rewards # generate vectors representing each context self.contexts = {} # mapping from region label to context vector for i, r in enumerate(self.rewards): self.contexts[r] = list(MU.I(contextD)[i]) self.context = self.contexts[random.choice(self.contexts.keys())] # randomly pick a new context every context_delay seconds self.context_delay = 60 self.context_update = self.context_delay self.create_origin("placewcontext", lambda: self.place_activations + self.context) self.create_origin("context", lambda: self.context)
def __init__(self, actions, mapname, contextD, context_rewards, **kwargs): """Initialize the environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: filename for map file :param contextD: dimension of vector representing context :param context_rewards: mapping from region labels to rewards for being in that region (each entry represents one context) :type context_rewards: dict {"regionlabel":rewardval,...} :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, actions, mapname, name="ContextEnvironment", **kwargs) self.rewards = context_rewards # generate vectors representing each context self.contexts = {} # mapping from region label to context vector for i, r in enumerate(self.rewards): # self.contexts[r] = list(RandomHypersphereVG().genVectors(1, contextD)[0]) self.contexts[r] = list(MU.I(contextD)[i]) self.context = self.contexts[random.choice(self.contexts.keys())] # randomly pick a new context every context_delay seconds self.context_delay = 60 self.context_update = self.context_delay self.create_origin("placewcontext", lambda: self.place_activations + self.context) self.create_origin("context", lambda: self.context)
def tick(self): """Update state/reward each timestep.""" # present stimuli if (self.t > self.presentationperiod[0] and self.t < self.presentationperiod[1] and self.state == [0 for _ in range(self.stateD)]): # pick a random stimuli at beginning of presentation period # and set that as the current state for the duration of # the presentation period self.state = random.choice(self.answers.keys()) self.answer = self.answers[self.state] self.state = list(self.state) # provide feedback if in reward period if self.t > self.rewardperiod[0] and self.t < self.rewardperiod[1]: self.reward = (self.rewardval if self.action[0] == self.answer else -self.rewardval) else: self.reward = 0 # update score if ((self.t + self.rewardtime) % (self.presentationtime + self.rewardtime)) < 0.002: self.correct = self.correct[1:] + ([1.0] if self.action[0] == self.answer else [0.0]) # update presentation/reward period if (self.t % (self.presentationtime + self.rewardtime)) < 0.002: self.presentationperiod = [self.t, self.t + self.presentationtime] self.rewardperiod = [self.t + self.presentationtime, self.t + self.presentationtime + self.rewardtime] self.state = [0 for _ in range(self.stateD)]
def gen_encoders(self, N, contextD, context_scale): """Generates encoders for state population in RL agent. State aspect of encoders comes from PlaceCellEnvironment. Context component is a unit vector with contextD dimensions and length context_scale. """ s_encoders = PlaceCellEnvironment.gen_encoders(self, N) # c_encoders = RandomHypersphereVG(True,1.0,1.0).genVectors(N, d) #0.7 c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)] c_encoders = [[x * context_scale for x in enc] for enc in c_encoders] encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)] encoders = [[x / math.sqrt(sum([y ** 2 for y in e])) for x in e] for e in encoders] return encoders
def gen_encoders(self, N, contextD, context_scale): """Generates encoders for state population in RL agent. State aspect of encoders comes from PlaceCellEnvironment. Context component is a unit vector with contextD dimensions and length context_scale. """ s_encoders = PlaceCellEnvironment.gen_encoders(self, N) c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)] c_encoders = [[x * context_scale for x in enc] for enc in c_encoders] encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)] encoders = [[x / math.sqrt(sum([y**2 for y in e])) for x in e] for e in encoders] return encoders
def __init__(self, flat=False): """Set up task parameters. :param flat: if True, no hierarchical relationship between stimuli and reward; if False, stimuli-response rewards will be dependent on colour """ self.rewardval = 1.5 # actions correspond to three different button presses actions = [("left", [1, 0, 0]), ("middle", [0, 1, 0]), ("right", [0, 0, 1])] # number of instances of each attribute (stimuli formed through # different combinations of attribute instances) self.num_orientations = 3 self.num_shapes = 3 self.num_colours = 2 self.presentationtime = 0.5 # length of time to present each stimuli self.rewardtime = 0.1 # length of reward period # next presentation interval self.presentationperiod = [0, self.presentationtime] # next reward interval self.rewardperiod = [self.presentationtime, self.presentationtime + self.rewardtime] self.answer = random.choice(actions)[0] # answer selected by agent self.stateD = (self.num_orientations + self.num_shapes + self.num_colours) self.correct = [0] * 20 EnvironmentTemplate.__init__(self, "BadreEnvironment", self.stateD, actions) self.answers = self.gen_answers(flat) self.create_origin("optimal_move", lambda: [a[1] for a in actions if a[0] == self.answer][0]) self.create_origin("score", lambda: [sum(self.correct) / len(self.correct)])
def update_context(self): if self.t > self.context_update: self.context = self.contexts[random.choice(self.contexts.keys())] self.context_update = self.t + self.context_delay
def update_context(self): if self.t > self.context_update: self.context = self.contexts[random.choice(self.contexts.keys())] self.context_update = self.t + self.context_delay