def finished(self, state):
     if self.is_not_pushing_box(state):
         flat = state.flatten()
         if flat[7] < -.25:
             return dist(state[5:7], np.zeros(2)) < .4
         return dist(state[5:8], np.zeros(3)) < .3
     else:
         flat = state.flatten()
         if flat[7] < -.2:  # hole
             return flat[
                 2] < -.15  # and dist(state[:2], state[5:7]) < self.box_distance_finished_condition
         return dist(state[:3],
                     state[5:8]) < self.box_distance_finished_condition
Example #2
0
    def get_path_to_hole(self, start, finish, from_box_to_hole=True):
        priority_queue = []
        node_to_prev_node = {}  # maps a node to the previous node that had called dijkstra on it
        visited = set()

        heapq.heappush(priority_queue, (0, start))
        visited.add(start)
        iteration = 0

        while True:
            if len(priority_queue) == 0:
                break
            distance_to_curr, curr_node = heapq.heappop(priority_queue)
            # assert curr_node not in visited
            visited.add(curr_node)
            if curr_node == finish:
                break

            neighbors = curr_node.neighbors if (
                        from_box_to_hole and curr_node == start) else curr_node.traversable_neighbors

            if finish in curr_node.neighbors and finish not in neighbors:  # If the hole is nearby, then append to neighbors
                neighbors = neighbors + [finish]
            for n in neighbors:
                if n not in visited:  # if it hasn't been visited, there's a possibility of getting better path
                    nodes_in_pq = (list(map(lambda x: x[1], priority_queue)))
                    if n in nodes_in_pq:  # if it's currently in the pq, update the priority
                        index = nodes_in_pq.index(n)
                        curr_dist = distance_to_curr + dist(n.pos, curr_node.pos)
                        prev_dist = priority_queue[index][0]
                        if curr_dist < prev_dist:
                            priority_queue[index] = (curr_dist, n)
                            node_to_prev_node[n] = curr_node
                    else:  # otherwise add it to the pq
                        heapq.heappush(priority_queue, (distance_to_curr + dist(n.pos, curr_node.pos), n))
                        node_to_prev_node[n] = curr_node
            prev_node = curr_node
            iteration += 1
        # Getting the path:
        if finish not in node_to_prev_node.keys():
            return [start]  # Returns only the first node if there is no path

        curr_node = finish
        path = [finish]
        while curr_node != start:
            curr_node = node_to_prev_node[curr_node]
            path.insert(0, curr_node)
        return path
Example #3
0
 def update_hole_pheromones(self, path):
     hole_pheromones = {p for p in self.pheromone_set if 0 <= p < PHEROMONE_ID_STAGGER}
     for node in path:
         neighbors = node.traversable_neighbors
         if len(neighbors) > 0:
             for p in hole_pheromones:
                 max_p = max([n.pheromones[p] * np.exp(-.1 * dist(node.pos, n.pos)) for n in neighbors])
                 if node.pheromones[p] < max_p:
                     node.pheromones[p] = max_p
     return
    def receive_state_info(self, msg):
        state = np.array(vrep.simxUnpackFloats(msg.data))
        if self.shut_down:
            msg = Vector3()
            msg.x = 1
            msg.y = 1
            self.pub.publish(msg)
        elif self.finished(state):
            if (not self.is_not_pushing_box(state)
                ) and state[7] < -.25 and dist(state[5:7],
                                               np.zeros(2)) < 1:  # hole
                msg = Vector3()
                msg.x = -2
                msg.y = -2
            else:
                msg = Vector3()
                msg.x = 0
                msg.y = 0
            self.pub.publish(msg)

        if self.counter == 0 and not self.shut_down:
            if self.finished(state):
                if not self.waiting_for_target:
                    rospy.sleep(1)
                    self.publish_finished_to_map()
                    self.waiting_for_target = True
                    rospy.wait_for_message("/target" + str(self.robot_id),
                                           Vector3)
                    self.waiting_for_target = False
            else:
                self.controller.goal = state.ravel()[:2]
                if self.is_not_pushing_box(state):
                    action_index = 0 if abs(
                        state[6]
                    ) > .5 else 1  # align yourself otherwise travel towards node
                else:
                    if any(np.isnan(state)):
                        print(state)
                    assert not any(np.isnan(state))
                    action_index = self.policy.get_action(state,
                                                          testing_time=True,
                                                          probabilistic=True)
                action_name = action_map[action_index]
                adjusted_state_for_controls = self.controller.feature_2_task_state(
                    state)
                left_right_frequencies = self.controller.getPrimitive(
                    adjusted_state_for_controls, action_name)
                msg = Vector3()
                msg.x = left_right_frequencies[0]
                msg.y = left_right_frequencies[1]
                self.pub.publish(msg)
        self.counter = (self.counter + 1) % self.period
        return
    def gaussian_explore_q(self, s, testing_time):
        exploration = np.random.random()
        if exploration < self.explore and not testing_time:
            if len(self.model.transitions
                   ) > 400 and self.success_states.shape[0] >= 2:
                curr_size = self.success_states.shape[0] - 1
                if curr_size == 0:
                    return np.random.randint(self.u_n)
                sample_size = min(1000, curr_size)
                success_states = self.success_states[1:, :]
                past_states = success_states[-sample_size:, :]

                actions = [i for i in range(self.u_n)]
                states = np.repeat(s, self.u_n, axis=0)
                states = self.concatenate_identifier(states)
                next_states = self.model.predict(states,
                                                 actions).detach().numpy()
                """                 
                if self.gmm_counter % self.gmm_period == 0:
                    self.gmm = GaussianMixture(n_components=1) 
                    self.gmm.fit(past_states)

                log_prob = self.gmm.score_samples(next_states)
                log_prob = log_prob - np.min(log_prob)
                log_prob = np.exp(log_prob)

                probs = log_prob / np.sum(log_prob)
                print('probs: ', probs)
                self.gmm_counter += 1
                self.success_states = self.success_states[:5000, :]"""
                """mean = np.mean(past_states, axis=0)
                centered = past_states - mean
                covariance = np.matmul(centered.T, centered) / (past_states.shape[0])
                covariance = covariance + np.eye(covariance.shape[0]) * 1e-2
                pdf_generator = lambda x: multivariate_normal.pdf(x, mean=mean, cov=covariance)

                pdf_of_next_states = np.apply_along_axis(pdf_generator, 1, next_states)
                pdf_of_next_states = pdf_of_next_states - np.min(pdf_of_next_states)
                probs = np.exp(pdf_of_next_states)
                probs = probs / np.sum(probs)"""

                avg_end_state = np.mean(past_states, axis=0)
                probs = np.array([
                    np.exp(-7 * dist(avg_end_state, state))
                    for state in next_states
                ])
                probs = probs / np.sum(probs)

                uniform = np.ones((1, self.u_n)) / self.u_n
                q_lookahead = np.max(self.policy.get_q(
                    self.concatenate_identifier(next_states)).detach().numpy(),
                                     axis=1)
                q_lookahead = np.exp(q_lookahead)
                q_lookahead = q_lookahead / np.sum(q_lookahead)

                probs = self.weight_towards_model * probs + (
                    1 -
                    self.weight_towards_model) * uniform  # it's qlookahead rn
                self.weight_towards_model = max(
                    .1, self.weight_towards_model * .999)
                probs = probs.flatten()
                probs = probs / np.sum(probs)  # just in case

                print('weight:', self.weight_towards_model, ' probs:', probs)
                choice = np.random.choice(self.u_n, p=probs)
                return actions[choice]
            else:
                return np.random.randint(self.u_n)
        else:
            return self.policy.get_action(
                self.concatenate_identifier(s),
                testing_time,
                probabilistic=(self.policy_action_selection == 'PROB'))
Example #6
0
 def get_detection_distance_between_nodes(self, first, second):
     first_position = deepcopy(first.pos)
     second_position = deepcopy(second.pos)
     first_position[2] = first_position[2] - sum([b.height for b in first.box])
     second_position[2] = second_position[2] - sum([b.height for b in second.box])
     return dist(first_position, second_position)
Example #7
0
 def get_distance_between_nodes(self, first, second):
     return dist(first.pos[:2], second.pos[:2])