def tick(self): if self.t > self.updatetime: self.scale = [0.0 for _ in range(self.action_dimension)] # the least visited vector could also be found by checking all the tiles and weighing them by the last visit time, instead of just looking for the minimum min_list = [] min_val = self.state_visited[0][0] # get min directions from the data structure # this is basically the gradient descent problem? # only if the dataset is too big to just iterate through? # find the global minimums O(n) for i in range(len(self.state_visited)): for j in range(len(self.state_visited[i])): if(self.state_visited[i][j] == min_val): min_list.append([i, j]) elif(self.state_visited[i][j] < min_val): min_list = [] min_val = self.state_visited[i][j] min_list.append([i, j]) # take the average of their orientation # runtime: O(n) # by taking the average of the list of minimum vectors total = [0.0 for _ in range(self.grid_dimension)] for val in min_list: total[0] += val[0] - self.xoffset total[1] += val[1] - self.yoffset least_visited = [total[0] / len(total), total[1] / len(total)] # convert the average minimum vector to a scale # because actions are encoded to up, right, down, left # set the scale proportional to the time it was last visited versus the current time closest_min = self.agent_state min_state_dist = HRLutils.distance(self.agent_state, min_list[0]) for min_loc in range(len(min_list)): state_dist = HRLutils.distance(self.agent_state, min_list[min_loc]) if(state_dist < min_state_dist): closest_min = min_list[min_loc] min_state_dist = state_dist least_visited[0] += self.xoffset least_visited[1] += self.yoffset state_diff = HRLutils.difference(self.agent_state, least_visited) # Whats the point of state_diff? # It catches the corner case where the least visited node is the one you're already on, which may or may not be a real thing that happens # To summarize this noise boost operation, all it's accomplishing is discouraging the agent to go to really far places or to a place that it's visited recently hor_min_dist = abs(self.agent_state[0] - closest_min[0]) hor_noise_boost = ( (self.t - min_val) * self.time_constant + (1/(1+hor_min_dist)) * self.distance_constant ) * (state_diff[0] != 0) vert_min_dist = abs(self.agent_state[1] - closest_min[1]) vert_noise_boost = ( (self.t - min_val) * self.time_constant + (1/(1+vert_min_dist)) * self.distance_constant ) * (state_diff[1] != 0) # this is a bit of a clustermuffin for mapping if(state_diff[1] > 0): # go left print("boost left") self.scale[3] = hor_noise_boost elif(state_diff[1] < 0): # got right print("boost right") self.scale[1] = hor_noise_boost if(state_diff[0] < 0): # got down print("boost down") self.scale[2] = vert_noise_boost elif(state_diff[0] > 0): # got up print("boost up") self.scale[0] = vert_noise_boost print("Current state %s" %self.agent_state) print("least_visited %s" %least_visited) print("state_diff %s" %state_diff) print("scale: %s" %self.scale) #pdb.set_trace() self.state = [self.pdf.sample()[0]*self.scale[i] for i in range(len(self.state))] self.updatetime = self.t + self.period