Esempio n. 1
0
def test_action_space_shortest_path():
    config = get_config()
    if not os.path.exists(config.SIMULATOR.SCENE):
        pytest.skip("Please download Habitat test data to data folder.")

    env = habitat.Env(config=config, dataset=None)

    # action space shortest path
    source_position = env.sim.sample_navigable_point()
    angles = [x for x in range(-180, 180, config.SIMULATOR.TURN_ANGLE)]
    angle = np.radians(np.random.choice(angles))
    source_rotation = [0, np.sin(angle / 2), 0, np.cos(angle / 2)]
    source = AgentState(source_position, source_rotation)

    reachable_targets = []
    unreachable_targets = []
    while len(reachable_targets) < 5:
        position = env.sim.sample_navigable_point()
        angles = [x for x in range(-180, 180, config.SIMULATOR.TURN_ANGLE)]
        angle = np.radians(np.random.choice(angles))
        rotation = [0, np.sin(angle / 2), 0, np.cos(angle / 2)]
        if env.sim.geodesic_distance(source_position, position) != np.inf:
            reachable_targets.append(AgentState(position, rotation))

    while len(unreachable_targets) < 3:
        position = env.sim.sample_navigable_point()
        angles = [x for x in range(-180, 180, config.SIMULATOR.TURN_ANGLE)]
        angle = np.radians(np.random.choice(angles))
        rotation = [0, np.sin(angle / 2), 0, np.cos(angle / 2)]
        if env.sim.geodesic_distance(source_position, position) == np.inf:
            unreachable_targets.append(AgentState(position, rotation))

    targets = reachable_targets
    shortest_path1 = env.sim.action_space_shortest_path(source, targets)
    assert shortest_path1 != []

    targets = unreachable_targets
    shortest_path2 = env.sim.action_space_shortest_path(source, targets)
    assert shortest_path2 == []

    targets = reachable_targets + unreachable_targets
    shortest_path3 = env.sim.action_space_shortest_path(source, targets)

    # shortest_path1 should be identical to shortest_path3
    assert len(shortest_path1) == len(shortest_path3)
    for i in range(len(shortest_path1)):
        assert np.allclose(shortest_path1[i].position,
                           shortest_path3[i].position)
        assert np.allclose(shortest_path1[i].rotation,
                           shortest_path3[i].rotation)
        assert shortest_path1[i].action == shortest_path3[i].action

    targets = unreachable_targets + [source]
    shortest_path4 = env.sim.action_space_shortest_path(source, targets)
    assert len(shortest_path4) == 1
    assert np.allclose(shortest_path4[0].position, source.position)
    assert np.allclose(shortest_path4[0].rotation, source.rotation)
    assert shortest_path4[0].action is None

    env.close()
Esempio n. 2
0
    def from_json(self,
                  json_str: str,
                  scenes_dir: Optional[str] = None) -> None:
        deserialized = json.loads(json_str)
        default_rotation = [0, 0, 0, 1]

        self.train_vocab = VocabDict(
            word_list=deserialized["train_vocab"]["word_list"])
        self.trainval_vocab = VocabDict(
            word_list=deserialized["trainval_vocab"]["word_list"])

        self.action_tokens = deserialized["BERT_vocab"]["action_tokens"]
        self.mini_alignments = deserialized["mini_alignments"]

        self.scenes = deserialized["scenes"]

        self.connectivity = load_connectivity(self.config.CONNECTIVITY_PATH,
                                              self.scenes)

        for ep_index, r2r_episode in enumerate(deserialized["episodes"]):

            r2r_episode["curr_viewpoint"] = ViewpointData(
                image_id=r2r_episode["goals"][0],
                view_point=AgentState(position=r2r_episode["start_position"],
                                      rotation=r2r_episode["start_rotation"]))
            instruction_encoding = r2r_episode["instruction_encoding"]
            mask = r2r_episode["mask"]
            del r2r_episode["instruction_encoding"]
            del r2r_episode["mask"]
            episode = VLNEpisode(**r2r_episode)

            if scenes_dir is not None:
                if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX):
                    episode.scene_id = episode.scene_id[
                        len(DEFAULT_SCENE_PATH_PREFIX):]
                episode.scene_id = os.path.join(scenes_dir, episode.scene_id)
            episode.instruction = InstructionData(
                instruction=r2r_episode["instruction"],
                tokens=instruction_encoding,
                tokens_length=sum(mask),
                mask=mask)

            scan = episode.scan
            for v_index, viewpoint in enumerate(episode.goals):
                viewpoint_id = self.connectivity[scan]["idxtoid"][viewpoint]
                pos = self.connectivity[scan]["viewpoints"][viewpoint_id]
                rot = default_rotation
                episode.goals[v_index] = ViewpointData(image_id=viewpoint,
                                                       view_point=AgentState(
                                                           position=pos,
                                                           rotation=rot))
            episode.distance = self.get_distance_to_target(
                scan, episode.goals[0].image_id, episode.goals[-1].image_id)
            self.episodes.append(episode)
    def from_json(self,
                  json_str: str,
                  scenes_dir: Optional[str] = None) -> None:
        deserialized = json.loads(json_str)
        self.__dict__.update(deserialized)
        self.answer_vocab = VocabDict(word_list=self.answer_vocab["word_list"])
        self.question_vocab = VocabDict(
            word_list=self.question_vocab["word_list"])

        for ep_index, episode in enumerate(deserialized["episodes"]):
            episode = EQAEpisode(**episode)
            if scenes_dir is not None:
                if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX):
                    episode.scene_id = episode.scene_id[
                        len(DEFAULT_SCENE_PATH_PREFIX):]
                episode.scene_id = os.path.join(scenes_dir, episode.scene_id)
            episode.question = QuestionData(**episode.question)
            for g_index, goal in enumerate(episode.goals):
                episode.goals[g_index] = ObjectGoal(**goal)
                new_goal = episode.goals[g_index]
                if new_goal.view_points is not None:
                    for p_index, agent_state in enumerate(
                            new_goal.view_points):
                        new_goal.view_points[p_index] = AgentState(
                            **agent_state)
            if episode.shortest_paths is not None:
                for path in episode.shortest_paths:
                    for p_index, point in enumerate(path):
                        path[p_index] = ShortestPathPoint(**point)
            self.episodes[ep_index] = episode
Esempio n. 4
0
    def step(self, *args: Any, target: ViewpointData, **kwargs: Any):
        r"""Update ``_metric``, this method is called from ``Env`` on each
        ``step``.
        """
        position = target.view_point.position
        rotation = target.view_point.rotation

        #print("Teleport Action to pos ", target.view_point.position)
        if not isinstance(rotation, list):
            rotation = list(rotation)

        if not self._sim.is_navigable(position):
            #print("The destination is not navigable, running snap point")
            # is not navigable then we search for a location close to the target
            new_position = np.array(position, dtype='f')
            new_position = self._sim._sim.pathfinder.snap_point(new_position)
            if np.isnan(new_position[0]):
                #print("Snap point couldn't find a place to land, error.")
                return self._sim.get_observations_at()
            else:
                position = new_position  #.tolist()
                #print("New position found", position)

        if kwargs and "episode" in kwargs:
            last_viewpoint = kwargs["episode"].curr_viewpoint
            kwargs["episode"].curr_viewpoint = ViewpointData(
                image_id=target.image_id,
                view_point=AgentState(position=position, rotation=rotation))

        return self._sim.get_observations_at(position=position,
                                             rotation=rotation,
                                             keep_agent_at_new_pose=True)
    def from_json(self,
                  json_str: str,
                  scenes_dir: Optional[str] = None) -> None:
        deserialized = json.loads(json_str)
        if CONTENT_SCENES_PATH_FIELD in deserialized:
            self.content_scenes_path = deserialized[CONTENT_SCENES_PATH_FIELD]

        if "category_to_task_category_id" in deserialized:
            self.category_to_task_category_id = deserialized[
                "category_to_task_category_id"]

        if "category_to_scene_annotation_category_id" in deserialized:
            self.category_to_scene_annotation_category_id = deserialized[
                "category_to_scene_annotation_category_id"]

        if "category_to_mp3d_category_id" in deserialized:
            self.category_to_scene_annotation_category_id = deserialized[
                "category_to_mp3d_category_id"]

        assert len(self.category_to_task_category_id) == len(
            self.category_to_scene_annotation_category_id)

        assert set(self.category_to_task_category_id.keys()) == set(
            self.category_to_scene_annotation_category_id.keys(
            )), "category_to_task and category_to_mp3d must have the same keys"

        for episode in deserialized["episodes"]:
            episode = NavigationEpisode(**episode)

            if scenes_dir is not None:
                if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX):
                    episode.scene_id = episode.scene_id[
                        len(DEFAULT_SCENE_PATH_PREFIX):]

                episode.scene_id = os.path.join(scenes_dir, episode.scene_id)

            for i in range(len(episode.goals)):
                episode.goals[i] = ObjectGoal(**episode.goals[i])

                for vidx, view in enumerate(episode.goals[i].view_points):
                    view_location = ObjectViewLocation(**view)
                    view_location.agent_state = AgentState(
                        **view_location.agent_state)
                    episode.goals[i].view_points[vidx] = view_location

            if episode.shortest_paths is not None:
                for path in episode.shortest_paths:
                    for p_index, point in enumerate(path):
                        point = {
                            "action": point,
                            "rotation": None,
                            "position": None,
                        }
                        path[p_index] = ShortestPathPoint(**point)

            self.episodes.append(episode)

        for i, ep in enumerate(self.episodes):
            ep.episode_id = str(i)
Esempio n. 6
0
    def __deserialize_goal(serialized_goal: Dict[str, Any]) -> ObjectGoal:
        g = ObjectGoal(**serialized_goal)

        for vidx, view in enumerate(g.view_points):
            view_location = ObjectViewLocation(**view)
            view_location.agent_state = AgentState(**view_location.agent_state)
            g.view_points[vidx] = view_location

        return g
    def __deserialize_goal(
            serialized_goal: Dict[str, Any]) -> SemanticAudioGoal:
        g = SemanticAudioGoal(**serialized_goal)

        for vidx, view in enumerate(g.view_points):
            view_location = ObjectViewLocation(view, iou=0)
            view_location.agent_state = AgentState(view_location.agent_state)
            g.view_points[vidx] = view_location

        return g
Esempio n. 8
0
    def _teacher_actions(self, observations, goal):
        action = ""
        action_args = {}
        navigable_locations = observations["adjacentViewpoints"]

        if goal == navigable_locations[0][1]:  # image_id
            action = "STOP"
        else:
            step_size = np.pi/6.0  # default step in R2R
            goal_location = None
            for location in navigable_locations:
                if location[1] == goal:  # image_id
                    goal_location = location
                    break
            # Check if the goal is visible
            if goal_location:

                rel_heading = goal_location[2]  # rel_heading
                rel_elevation = goal_location[3]  #rel_elevation

                if rel_heading > step_size:
                    action = "TURN_RIGHT"
                elif rel_heading < -step_size:
                    action = "TURN_LEFT"
                elif rel_elevation > step_size:
                    action = "LOOK_UP"
                elif rel_elevation < -step_size:
                    action = "LOOK_DOWN"
                else:
                    if goal_location[0] == 1:  # restricted
                        print("WARNING: The target was not in the" +
                              " Field of view, but the step action " +
                              "is going to be performed")
                    action = "TELEPORT"  # Move forward
                    image_id = goal
                    posB = goal_location[4:7]  # start_position
                    rotA = navigable_locations[0][14:18]  # camera_rotation
                    viewpoint = ViewpointData(
                        image_id=image_id,
                        view_point=AgentState(position=posB, rotation=rotA)
                    )
                    action_args.update({"target": viewpoint})
            else:
                # Episode Failure
                action = 'STOP'
                print("Target position %s not visible, " % goal +
                      "This is an error in the system")
                '''
                for ob in observations["images"]:
                    image = ob
                    image =  image[:,:, [2,1,0]]
                    cv2.imshow("RGB", image)
                    cv2.waitKey(0)
                '''
        return action, action_args
Esempio n. 9
0
def test_action_space_shortest_path():
    config = get_config()
    if not os.path.exists(config.SIMULATOR.SCENE):
        pytest.skip("Please download Habitat test data to data folder.")

    env = habitat.Env(config=config, dataset=None)

    # action space shortest path
    source_position = env.sim.sample_navigable_point()
    angles = [x for x in range(-180, 180, config.SIMULATOR.TURN_ANGLE)]
    angle = np.radians(np.random.choice(angles))
    source_rotation = [0, np.sin(angle / 2), 0, np.cos(angle / 2)]
    source = AgentState(source_position, source_rotation)

    reachable_targets = []
    unreachable_targets = []
    while len(reachable_targets) < 5:
        position = env.sim.sample_navigable_point()
        angles = [x for x in range(-180, 180, config.SIMULATOR.TURN_ANGLE)]
        angle = np.radians(np.random.choice(angles))
        rotation = [0, np.sin(angle / 2), 0, np.cos(angle / 2)]
        if env.sim.geodesic_distance(source_position, position) != np.inf:
            reachable_targets.append(AgentState(position, rotation))

    while len(unreachable_targets) < 3:
        position = env.sim.sample_navigable_point()
        # Change height of the point to make it unreachable
        position[1] = 100
        angles = [x for x in range(-180, 180, config.SIMULATOR.TURN_ANGLE)]
        angle = np.radians(np.random.choice(angles))
        rotation = [0, np.sin(angle / 2), 0, np.cos(angle / 2)]
        if env.sim.geodesic_distance(source_position, position) == np.inf:
            unreachable_targets.append(AgentState(position, rotation))

    targets = reachable_targets
    shortest_path1 = env.action_space_shortest_path(source, targets)
    assert shortest_path1 != []

    targets = unreachable_targets
    shortest_path2 = env.action_space_shortest_path(source, targets)
    assert shortest_path2 == []
    env.close()
Esempio n. 10
0
    def xy_yaw_to_agent_state(self, x, y, yaw):
        # xy: spot has x point forward, y point left; but habitat agent state has x point right, z point back
        position = np.array([-y, 0., -x], dtype=np.float32)

        # yaw: magic conversion... debugged by taking habitat agent_state().rotation and reproduce it from yaw
        # it is roughly the inverse of this function
        # https://github.com/facebookresearch/habitat-lab/blob/b7a93bc493f7fb89e5bf30b40be204ff7b5570d7/habitat/tasks/nav/nav.py#L864
        # quaternion.from_euler_angles(np.pi, -true_yaw - np.pi/2, np.pi)
        rotation = quaternion.from_euler_angles(np.pi, -yaw,
                                                np.pi)  # - np.pi/2

        return AgentState(position=position, rotation=rotation)
Esempio n. 11
0
    def _teleport_target(self, observations):
        action = ""
        action_args = {}
        navigable_locations = observations["adjacentViewpoints"]

        for location in navigable_locations[1:]:
            if location[0] == 1:  # location is restricted
                continue
            elif location[0] == 0:  # Non restricted location
                action = "TELEPORT"
                image_id = location[1]
                posB = location[4:7]  # start_position
                rotA = navigable_locations[0][14:18]  # camera_rotation
                viewpoint = ViewpointData(image_id=image_id,
                                          view_point=AgentState(position=posB,
                                                                rotation=rotA))
                action_args = {"target": viewpoint}
                #print("the target is ", location)
                return {"action": action, "action_args": action_args}

        return {"action": action, "action_args": action_args}
Esempio n. 12
0
    def act(self, observations, elapsed_steps, previous_step_collided):
        action = ""
        action_args = {}
        visible_points = sum([
            1 - ob[0] for ob in observations["adjacentViewpoints"]
            if ob[0] != -1
        ])

        if elapsed_steps == 0:
            # Turn right (direction choosing)
            action = "TURN_RIGHT"
            num_steps = random.randint(0, 11)
            if num_steps > 0:
                action_args = {"num_steps": num_steps}

        # Stop after teleporting 6 times.
        elif elapsed_steps >= 5:
            action = "STOP"

        # Turn right until we can go forward
        elif visible_points > 0:
            for ob in observations["adjacentViewpoints"]:
                if not ob[0]:
                    goal = ob
                    action = "TELEPORT"
                    image_id = goal[1]
                    pos = goal[4:7]  # agent_position

                    # Keeping the same rotation as the previous step
                    # camera rotation
                    rot = observations["adjacentViewpoints"][0][14:18]

                    viewpoint = ViewpointData(image_id=image_id,
                                              view_point=AgentState(
                                                  position=pos, rotation=rot))
                    action_args.update({"target": viewpoint})
                    break
        else:
            action = "TURN_RIGHT"
        return {"action": action, "action_args": action_args}
Esempio n. 13
0
    def act(self, observations, episode, goal):
        # Initialization when the action is start
        batch_size = 1

        if self.previous_action == "<start>":
            # should be a tensor of logits
            if episode.instruction.tokens_length < self.max_tokens:
                pad = self.max_tokens - episode.instruction.tokens_length
                tokens = episode.instruction.tokens
                tokens.extend([0] * pad)
                tokens = np.array([tokens])
            else:
                tokens = episode.instruction.tokens[:self.max_tokens]
                tokens = np.array([tokens])

            seq_lengths = np.argmax(tokens == 0, axis=1)
            seq_lengths[seq_lengths == 0] = self.max_tokens
            seq_tensor = torch.from_numpy(tokens).to('cuda')
            seq_lengths = torch.from_numpy(seq_lengths).to('cuda')
            seq_mask = (seq_tensor == 0)[:, :seq_lengths[0]]
            self.seq_mask = seq_mask.to('cuda')
            tokens = None

            # Forward through encoder, giving initial hidden state and memory cell for decoder
            self.ctx, self.h_t, self.c_t = self.encoder(
                seq_tensor, seq_lengths)
            self.a_t = torch.ones(batch_size, requires_grad=False).long() * \
                    self.model_actions.index(self.previous_action)
            self.a_t = self.a_t.unsqueeze(0).to('cuda')

        im = observations["rgb"][:, :, [2, 1, 0]]
        f_t = self._get_image_features(im)  #.to('cuda')

        im = None
        ended = np.array(
            [False] *
            batch_size)  # Indices match permuation of the model, not env

        # Do a sequence rollout and calculate the loss
        self.h_t, self.c_t, alpha, logit = self.decoder(
            self.a_t.view(-1, 1), f_t, self.h_t, self.c_t, self.ctx,
            self.seq_mask)
        # Mask outputs where agent can't move forward
        # Release memory?
        f_t = None
        visible_points = sum([
            1 - ob[0] for ob in observations["adjacentViewpoints"]
            if ob[0] != -1
        ])

        if visible_points == 0:
            logit[0, self.model_actions.index('TELEPORT')] = -float('inf')

        # Supervised training
        target_action, action_args = self._teacher_actions(observations, goal)
        target = torch.LongTensor(1)
        target[0] = self.model_actions.index(target_action)
        target = target.to('cuda')

        self.loss += self.criterion(logit, target)
        #print(logit)
        # Determine next model inputs
        if self.feedback == 'teacher':
            self.a_t = target  # teacher forcing
            action = target_action
        elif self.feedback == 'argmax':
            _, self.a_t = logit.max(1)  # student forcing - argmax
            self.a_t = self.a_t.detach()
            action = self.model_actions[self.a_t.item()]
            action_args = {
            }  # What happens if you need to teleport? How to choose?
        elif self.feedback == 'sample':
            probs = F.softmax(logit, dim=1)
            m = D.Categorical(probs)
            self.a_t = m.sample()  # sampling an action from model
            action = self.model_actions[self.a_t.item()]
            action_args = {}
        else:
            sys.exit('Invalid feedback option')

        # Teleport to the next location, the one with lower rel heading

        if action == "TELEPORT" and self.feedback != 'teacher':
            sorted_obs = sorted(
                observations["adjacentViewpoints"][1:],
                key=lambda x: abs(x[2]))  # sort by relative heading
            for ob in sorted_obs:
                if not ob[0]:  # restricted
                    next_location = ob
                    action = "TELEPORT"
                    image_id = next_location[1]
                    pos = next_location[4:7]  # agent_position

                    # Keeping the same rotation as the previous step
                    # camera rotation
                    rot = observations["adjacentViewpoints"][0][14:18]
                    #print("Teleporting to ",image_id, goal, pos, rot, ob,
                    #[ ob[:3] for ob in sorted_obs if ob[0] != -1])
                    viewpoint = ViewpointData(image_id=image_id,
                                              view_point=AgentState(
                                                  position=pos, rotation=rot))
                    action_args = {"target": viewpoint}
                    break
        sorted_obs = None
        #print(action, target_action, self.loss.item())
        #self.predicted_actions.append(action)
        self.previous_action = action

        return {"action": action, "action_args": action_args}
def generate_objectnav_goals_by_category(sim: Simulator,
                                         task_category) -> ObjectGoal:

    scene = sim.semantic_annotations()
    target = dict()
    for obj in scene.objects:
        if obj is not None:
            # print(
            #     f"Object id:{obj.id}, category:{obj.category.name()}, Index:{obj.category.index()}"
            #     f" center:{obj.aabb.center}, dims:{obj.aabb.sizes}"
            # )
            if obj.category.name() in task_category.keys():
                if obj.category.index() in target:
                    target[obj.category.index()].append(obj)
                else:
                    target[obj.category.index()] = [obj]

    for i in target:
        # print("target episode len:", len(target[i]))
        object_category = target[i][0].category.name()
        # print("object_category :", object_category)
        str_goal = f"{os.path.basename(sim.config.SCENE)}_{object_category}"
        # print(str_goal)

        goals_by = []
        for j in range(len(target[i])):

            object_view_list = []
            for x in np.arange(
                    target[i][j].aabb.center[0] -
                    target[i][j].aabb.sizes[0] / 2.0,
                    target[i][j].aabb.center[0] +
                    target[i][j].aabb.sizes[0] / 2.0, 0.1):
                for y in np.arange(
                        target[i][j].aabb.center[2] -
                        target[i][j].aabb.sizes[2] / 2.0,
                        target[i][j].aabb.center[2] +
                        target[i][j].aabb.sizes[2] / 2.0, 0.1):
                    # print("x, y: ", type(x)) # np.float6
                    # print("x, y: ", type(target[i][j].aabb.center[1])) # np.floa32 报错
                    object_view_list.append(
                        ObjectViewLocation(
                            agent_state=AgentState(position=[
                                x, target[i][j].aabb.center[1].astype(
                                    np.float64), y
                            ]),
                            iou=0.0,
                        ))
            # print(type(object_view_list))
            goal_by_object = ObjectGoal(
                position=target[i][j].aabb.center,
                radius=0.5,
                object_id=int(re.findall(r"\d+", target[i][j].id)[0]),
                object_name=target[i][j].id,
                object_category=object_category,
                view_points=object_view_list,
            )

            goals_by.append(goal_by_object)

        yield str_goal, goals_by