def __init__(self, actions, mapname, contextD, context_rewards, **kwargs): """Initialize the environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: filename for map file :param contextD: dimension of vector representing context :param context_rewards: mapping from region labels to rewards for being in that region (each entry represents one context) :type context_rewards: dict {"regionlabel":rewardval,...} :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, actions, mapname, name="ContextEnvironment", **kwargs) self.rewards = context_rewards # generate vectors representing each context self.contexts = {} # mapping from region label to context vector for i, r in enumerate(self.rewards): self.contexts[r] = list(MU.I(contextD)[i]) self.context = self.contexts[random.choice(self.contexts.keys())] # randomly pick a new context every context_delay seconds self.context_delay = 60 self.context_update = self.context_delay self.create_origin("placewcontext", lambda: self.place_activations + self.context) self.create_origin("context", lambda: self.context)
def __init__(self, actions, mapname, contextD, context_rewards, **kwargs): """Initialize the environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: filename for map file :param contextD: dimension of vector representing context :param context_rewards: mapping from region labels to rewards for being in that region (each entry represents one context) :type context_rewards: dict {"regionlabel":rewardval,...} :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, actions, mapname, name="ContextEnvironment", **kwargs) self.rewards = context_rewards # generate vectors representing each context self.contexts = {} # mapping from region label to context vector for i, r in enumerate(self.rewards): # self.contexts[r] = list(RandomHypersphereVG().genVectors(1, contextD)[0]) self.contexts[r] = list(MU.I(contextD)[i]) self.context = self.contexts[random.choice(self.contexts.keys())] # randomly pick a new context every context_delay seconds self.context_delay = 60 self.context_update = self.context_delay self.create_origin("placewcontext", lambda: self.place_activations + self.context) self.create_origin("context", lambda: self.context)
def tick(self): if self.is_in(self.state, "a"): self.in_hand = True elif self.rewardamount > self.rewardresetamount: self.in_hand = False PlaceCellEnvironment.tick(self)
def __init__(self, *args, **kwargs): """Initialize environment variables. :param name: name for environment :param *args: see PlaceCellEnvironment.__init__ :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, name="DeliveryEnvironment", *args, **kwargs) # reward value when no reward condition is met self.defaultreward = -0.05 self.contexts = {"in_hand": [1, 0], "out_hand": [0, 1]} self.in_hand = False self.create_origin("placewcontext", lambda: (self.place_activations + self.contexts["in_hand"] if self.in_hand else self.place_activations + self.contexts["out_hand"])) self.create_origin("context", lambda: (self.contexts["in_hand"] if self.in_hand else self.contexts["out_hand"]))
def __init__(self, *args, **kwargs): """Initialize environment variables. :param name: name for environment :param *args: see PlaceCellEnvironment.__init__ :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, name="DeliveryEnvironment", *args, **kwargs) # reward value when no reward condition is met self.defaultreward = -0.05 self.contexts = {"in_hand": [1, 0], "out_hand": [0, 1]} self.in_hand = False self.create_origin( "placewcontext", lambda: (self.place_activations + self.contexts["in_hand"] if self.in_hand else self.place_activations + self.contexts["out_hand"])) self.create_origin( "context", lambda: (self.contexts["in_hand"] if self.in_hand else self.contexts["out_hand"]))
def gen_encoders(self, N, contextD, context_scale): """Generates encoders for state population in RL agent. State aspect of encoders comes from PlaceCellEnvironment. Context component is a unit vector with contextD dimensions and length context_scale. """ s_encoders = PlaceCellEnvironment.gen_encoders(self, N) # c_encoders = RandomHypersphereVG(True,1.0,1.0).genVectors(N, d) #0.7 c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)] c_encoders = [[x * context_scale for x in enc] for enc in c_encoders] encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)] encoders = [[x / math.sqrt(sum([y ** 2 for y in e])) for x in e] for e in encoders] return encoders
def gen_encoders(self, N, contextD, context_scale): """Generates encoders for state population in RL agent. State aspect of encoders comes from PlaceCellEnvironment. Context component is a unit vector with contextD dimensions and length context_scale. """ s_encoders = PlaceCellEnvironment.gen_encoders(self, N) c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)] c_encoders = [[x * context_scale for x in enc] for enc in c_encoders] encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)] encoders = [[x / math.sqrt(sum([y**2 for y in e])) for x in e] for e in encoders] return encoders
def tick(self): PlaceCellEnvironment.tick(self) self.update_context()