Exemple #1
0
    def tick(self):
        if self.t > self.updatetime:
            self.scale = [0.0 for _ in range(self.action_dimension)]
            # the least visited vector could also be found by checking all the tiles and weighing them by the last visit time, instead of just looking for the minimum 
            min_list = []
            min_val = self.state_visited[0][0]
            # get min directions from the data structure
            # this is basically the gradient descent problem?
            # only if the dataset is too big to just iterate through?
            # find the global minimums O(n)
            for i in range(len(self.state_visited)):
                for j in range(len(self.state_visited[i])):
                    if(self.state_visited[i][j] == min_val):
                        min_list.append([i, j])
                    elif(self.state_visited[i][j] < min_val):
                        min_list = []
                        min_val = self.state_visited[i][j]
                        min_list.append([i, j])
            # take the average of their orientation # runtime: O(n)
            # by taking the average of the list of minimum vectors
            total = [0.0 for _ in range(self.grid_dimension)]
            for val in min_list:
                total[0] += val[0] - self.xoffset
                total[1] += val[1] - self.yoffset
            least_visited = [total[0] / len(total), total[1] / len(total)]
            # convert the average minimum vector to a scale
            # because actions are encoded to up, right, down, left
            # set the scale proportional to the time it was last visited versus the current time
            closest_min = self.agent_state
            min_state_dist = HRLutils.distance(self.agent_state, min_list[0])
            for min_loc in range(len(min_list)):
                state_dist = HRLutils.distance(self.agent_state, min_list[min_loc])
                if(state_dist < min_state_dist):
                    closest_min = min_list[min_loc]
                    min_state_dist = state_dist

            least_visited[0] += self.xoffset
            least_visited[1] += self.yoffset
            state_diff = HRLutils.difference(self.agent_state, least_visited)
            # Whats the point of state_diff? # It catches the corner case where the least visited node is the one you're already on, which may or may not be a real thing that happens

            # To summarize this noise boost operation, all it's accomplishing is discouraging the agent to go to really far places or to a place that it's visited recently
            hor_min_dist = abs(self.agent_state[0] - closest_min[0])
            hor_noise_boost = (
                (self.t - min_val) * self.time_constant
                + (1/(1+hor_min_dist)) * self.distance_constant
            ) * (state_diff[0] != 0)

            vert_min_dist = abs(self.agent_state[1] - closest_min[1])
            vert_noise_boost = (
                (self.t - min_val) * self.time_constant
                + (1/(1+vert_min_dist)) * self.distance_constant
            ) * (state_diff[1] != 0)

            # this is a bit of a clustermuffin for mapping
            if(state_diff[1] > 0):
                # go left
                print("boost left")
                self.scale[3] = hor_noise_boost
            elif(state_diff[1] < 0):
                # got right
                print("boost right")
                self.scale[1] = hor_noise_boost

            if(state_diff[0] < 0):
                # got down
                print("boost down")
                self.scale[2] = vert_noise_boost
            elif(state_diff[0] > 0):
                # got up
                print("boost up")
                self.scale[0] = vert_noise_boost

            print("Current state %s" %self.agent_state)
            print("least_visited %s" %least_visited)
            print("state_diff %s" %state_diff)
            print("scale: %s" %self.scale)
            #pdb.set_trace()
            self.state = [self.pdf.sample()[0]*self.scale[i] for i in range(len(self.state))]
            self.updatetime = self.t + self.period