def tick(self): cond_active = False for c in self.conds: if isinstance(c, Timer): # if it is a timer entry, just update the timer and check if it # has expired c.tick() if c.ring(): self.reward = self.rewardval self.activate() c.reset() cond_active = True elif (self.env.is_in(self.env.state, c) and (self.conds[c] is None or HRLutils.similarity(HRLutils.normalize(self.context), self.conds[c]) > 0.3)): # if it is a state entry, check if the agent is in the region # associated with that state, and check if that region is the # one corresponding to the currently selected context self.reward = self.rewardval self.rewardamount += 1 if self.rewardamount > self.rewardresetamount: self.activate() self.rewardamount = 0 cond_active = True # if no termination conditions met, just give default reward if not cond_active: self.reward = self.defaultreward # reset rewardamount when the reset signal is sent (so that there won't # be any leftover rewardamount from the agent's previous decision) if self.t > self.resettime[0] and self.t < self.resettime[1]: self.rewardamount = 0 # add a penalty if the state hasn't changed (to help prevent agent from # getting stuck) if sum(self.prev_state) != 0 and \ HRLutils.similarity(HRLutils.normalize(self.env.state), HRLutils.normalize(self.prev_state)) < 1.0: self.state_penalty = 0.0 else: self.state_penalty += 0.0001 self.prev_state = copy.deepcopy(self.env.state) self.reward = self.reward - self.state_penalty
def tick(self): cond_active = False for c in self.conds: if isinstance(c, Timer): # if it is a timer entry, just update the timer and check if it # has expired c.tick() if c.ring(): self.reward = self.rewardval self.activate() c.reset() cond_active = True elif (self.env.is_in(self.env.state, c) and (self.conds[c] is None or HRLutils.similarity( HRLutils.normalize(self.context), self.conds[c]) > 0.3)): # if it is a state entry, check if the agent is in the region # associated with that state, and check if that region is the # one corresponding to the currently selected context self.reward = self.rewardval self.rewardamount += 1 if self.rewardamount > self.rewardresetamount: self.activate() self.rewardamount = 0 cond_active = True # if no termination conditions met, just give default reward if not cond_active: self.reward = self.defaultreward # reset rewardamount when the reset signal is sent (so that there won't # be any leftover rewardamount from the agent's previous decision) if self.t > self.resettime[0] and self.t < self.resettime[1]: self.rewardamount = 0 # add a penalty if the state hasn't changed (to help prevent agent from # getting stuck) if sum(self.prev_state) != 0 and \ HRLutils.similarity(HRLutils.normalize(self.env.state), HRLutils.normalize(self.prev_state)) < 1.0: self.state_penalty = 0.0 else: self.state_penalty += 0.0001 self.prev_state = copy.deepcopy(self.env.state) self.reward = self.reward - self.state_penalty
def calc_optimal_move(self): """Calculate the optimal move for the agent to take in the current state/context.""" # basically the same as PlaceCellEnvironment.calc_optimal_move, except # we look at the current context to find the goal goal = [c for c in self.contexts if self.contexts[c] == self.context][0] stepsize = 0.1 self.optimal_move = None for y in [ v * stepsize for v in range( int(-self.imgsize[1] / (2 * stepsize)) + 1, int(self.imgsize[1] / (2 * stepsize)) - 1) ]: for x in [ v * stepsize for v in range( int(-self.imgsize[0] / (2 * stepsize)) + 1, int(self.imgsize[0] / (2 * stepsize)) - 1) ]: if self.is_in((x, y), goal): angle = math.atan2(y - self.state[1], x - self.state[0]) pt = (math.cos(angle), math.sin(angle)) self.optimal_move = max( self.actions, key=lambda x: -1 if self.is_in((x[1][0] * self.dx + self.state[0], x[1][ 1] * self.dx + self.state[1]), "wall" ) else HRLutils.similarity(x[1], pt))[0] return
def calc_optimal_move(self): """Calculate the optimal move for the agent to take in the current state/context.""" # basically the same as PlaceCellEnvironment.calc_optimal_move, except # we look at whether or not we have the package to pick a goal state stepsize = 0.1 self.optimal_move = None for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1, int(self.imgsize[1] / (2 * stepsize)) - 1)]: for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1, int(self.imgsize[0] / (2 * stepsize)) - 1)]: if ((self.is_in((x, y), "a") and not self.in_hand) or (self.is_in((x, y), "b") and self.in_hand)): angle = math.atan2(y - self.state[1], x - self.state[0]) pt = (math.cos(angle), math.sin(angle)) self.optimal_move = max( self.actions, key=lambda x:-1 if self.is_in((x[1][0] * self.dx + self.state[0], x[1][1] * self.dx + self.state[1]), "wall") else HRLutils.similarity(x[1], pt))[0] return
def calc_optimal_move(self): """Calculate the optimal move for the agent to take in the current state/context.""" # basically the same as PlaceCellEnvironment.calc_optimal_move, except # we look at the current context to find the goal goal = [c for c in self.contexts if self.contexts[c] == self.context][0] stepsize = 0.1 self.optimal_move = None for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1, int(self.imgsize[1] / (2 * stepsize)) - 1)]: for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1, int(self.imgsize[0] / (2 * stepsize)) - 1)]: if self.is_in((x, y), goal): angle = math.atan2(y - self.state[1], x - self.state[0]) pt = (math.cos(angle), math.sin(angle)) self.optimal_move = max( self.actions, key=lambda x:-1 if self.is_in((x[1][0] * self.dx + self.state[0], x[1][1] * self.dx + self.state[1]), "wall") else HRLutils.similarity(x[1], pt))[0] return
def calc_optimal_move(self): """Calculates the optimal move for the agent to make in the current state. Used for debugging mainly. """ # grid search the image with the given stepsize stepsize = 0.1 self.optimal_move = None for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1, int(self.imgsize[1] / (2 * stepsize)) - 1)]: for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1, int(self.imgsize[0] / (2 * stepsize)) - 1)]: # if the pt you're looking at is in the region you're looking for if self.is_in((x, y), "target"): # generate a target point in the direction from current location to target angle = math.atan2(y - self.state[1], x - self.state[0]) pt = (math.cos(angle), math.sin(angle)) # pick the action that is closest to the target point # note: penalize actions that would involve moving through a wall self.optimal_move = max(self.actions, key=lambda x:-1 if self.is_in((x[1][0] * self.dx + self.state[0], x[1][1] * self.dx + self.state[1]), "wall") else HRLutils.similarity(x[1], pt))[0] return
def calc_optimal_move(self): """Calculate the optimal move for the agent to take in the current state/context.""" # basically the same as PlaceCellEnvironment.calc_optimal_move, except # we look at whether or not we have the package to pick a goal state stepsize = 0.1 self.optimal_move = None for y in [ v * stepsize for v in range( int(-self.imgsize[1] / (2 * stepsize)) + 1, int(self.imgsize[1] / (2 * stepsize)) - 1) ]: for x in [ v * stepsize for v in range( int(-self.imgsize[0] / (2 * stepsize)) + 1, int(self.imgsize[0] / (2 * stepsize)) - 1) ]: if ((self.is_in((x, y), "a") and not self.in_hand) or (self.is_in((x, y), "b") and self.in_hand)): angle = math.atan2(y - self.state[1], x - self.state[0]) pt = (math.cos(angle), math.sin(angle)) self.optimal_move = max( self.actions, key=lambda x: -1 if self.is_in((x[1][0] * self.dx + self.state[0], x[1][ 1] * self.dx + self.state[1]), "wall" ) else HRLutils.similarity(x[1], pt))[0] return
def tick(self): # check if env is currently giving reward (we want to give pseudoreward at the same time) if self.env.reward != 0: if self.target_answer is None: self.reward = 0 else: # check if the selected action matches the correct action self.reward = self.rewardval if HRLutils.similarity(self.target_answer, self.action) > 0.5 else -self.rewardval else: self.reward = 0 # update the target_answer (the action the low level should be selecting given # the current context) if self.context[0] == "orientation": self.target_answer = self.env.state[:self.env.num_orientations] elif self.context[0] == "shape": self.target_answer = self.env.state[self.env.num_orientations:-self.env.num_colours] else: self.target_answer = None
def calc_optimal_move(self): """Calculates the optimal move for the agent to make in the current state. Used for debugging. """ # grid search the image with the given stepsize stepsize = 0.1 self.optimal_move = None for y in [ v * stepsize for v in range( int(-self.imgsize[1] / (2 * stepsize)) + 1, int(self.imgsize[1] / (2 * stepsize)) - 1) ]: for x in [ v * stepsize for v in range( int(-self.imgsize[0] / (2 * stepsize)) + 1, int(self.imgsize[0] / (2 * stepsize)) - 1) ]: # if the pt you're looking at is in the region you're # looking for if self.is_in((x, y), "target"): # generate a target point in the direction from current # location to target angle = math.atan2(y - self.state[1], x - self.state[0]) pt = (math.cos(angle), math.sin(angle)) # pick the action that is closest to the target point # note: penalize actions that would involve moving through # a wall self.optimal_move = max( self.actions, key=lambda x: -1 if self.is_in((x[1][0] * self.dx + self.state[0], x[1][ 1] * self.dx + self.state[1]), "wall" ) else HRLutils.similarity(x[1], pt))[0] return