def from_json(self,
                  json_str: str,
                  scenes_dir: Optional[str] = None) -> None:
        deserialized = json.loads(json_str)
        self.__dict__.update(deserialized)
        self.answer_vocab = VocabDict(word_list=self.answer_vocab["word_list"])
        self.question_vocab = VocabDict(
            word_list=self.question_vocab["word_list"])

        for ep_index, episode in enumerate(deserialized["episodes"]):
            episode = EQAEpisode(**episode)
            if scenes_dir is not None:
                if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX):
                    episode.scene_id = episode.scene_id[
                        len(DEFAULT_SCENE_PATH_PREFIX):]
                episode.scene_id = os.path.join(scenes_dir, episode.scene_id)
            episode.question = QuestionData(**episode.question)
            for g_index, goal in enumerate(episode.goals):
                episode.goals[g_index] = ObjectGoal(**goal)
                new_goal = episode.goals[g_index]
                if new_goal.view_points is not None:
                    for p_index, agent_state in enumerate(
                            new_goal.view_points):
                        new_goal.view_points[p_index] = AgentState(
                            **agent_state)
            if episode.shortest_paths is not None:
                for path in episode.shortest_paths:
                    for p_index, point in enumerate(path):
                        path[p_index] = ShortestPathPoint(**point)
            self.episodes[ep_index] = episode
Beispiel #2
0
    def from_json(self,
                  json_str: str,
                  scenes_dir: Optional[str] = None) -> None:
        deserialized = json.loads(json_str)
        default_rotation = [0, 0, 0, 1]

        self.train_vocab = VocabDict(
            word_list=deserialized["train_vocab"]["word_list"])
        self.trainval_vocab = VocabDict(
            word_list=deserialized["trainval_vocab"]["word_list"])

        self.action_tokens = deserialized["BERT_vocab"]["action_tokens"]
        self.mini_alignments = deserialized["mini_alignments"]

        self.scenes = deserialized["scenes"]

        self.connectivity = load_connectivity(self.config.CONNECTIVITY_PATH,
                                              self.scenes)

        for ep_index, r2r_episode in enumerate(deserialized["episodes"]):

            r2r_episode["curr_viewpoint"] = ViewpointData(
                image_id=r2r_episode["goals"][0],
                view_point=AgentState(position=r2r_episode["start_position"],
                                      rotation=r2r_episode["start_rotation"]))
            instruction_encoding = r2r_episode["instruction_encoding"]
            mask = r2r_episode["mask"]
            del r2r_episode["instruction_encoding"]
            del r2r_episode["mask"]
            episode = VLNEpisode(**r2r_episode)

            if scenes_dir is not None:
                if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX):
                    episode.scene_id = episode.scene_id[
                        len(DEFAULT_SCENE_PATH_PREFIX):]
                episode.scene_id = os.path.join(scenes_dir, episode.scene_id)
            episode.instruction = InstructionData(
                instruction=r2r_episode["instruction"],
                tokens=instruction_encoding,
                tokens_length=sum(mask),
                mask=mask)

            scan = episode.scan
            for v_index, viewpoint in enumerate(episode.goals):
                viewpoint_id = self.connectivity[scan]["idxtoid"][viewpoint]
                pos = self.connectivity[scan]["viewpoints"][viewpoint_id]
                rot = default_rotation
                episode.goals[v_index] = ViewpointData(image_id=viewpoint,
                                                       view_point=AgentState(
                                                           position=pos,
                                                           rotation=rot))
            episode.distance = self.get_distance_to_target(
                scan, episode.goals[0].image_id, episode.goals[-1].image_id)
            self.episodes.append(episode)
Beispiel #3
0
    def _save_vqa_results(
        self,
        ckpt_idx: int,
        episode_ids: torch.Tensor,
        questions: torch.Tensor,
        images: torch.Tensor,
        pred_scores: torch.Tensor,
        gt_answers: torch.Tensor,
        q_vocab_dict: VocabDict,
        ans_vocab_dict: VocabDict,
    ) -> None:

        r"""For saving VQA results.
        Args:
            ckpt_idx: idx of checkpoint being evaluated
            episode_ids: episode ids of batch
            questions: input questions to model
            images: images' tensor containing input frames
            pred_scores: model prediction scores
            gt_answers: ground truth answers
            ground_truth: ground truth answer
            q_vocab_dict: Question VocabDict
            ans_vocab_dict: Answer VocabDict

        Returns:
            None
        """
        episode_id = episode_ids[0].item()
        question = questions[0]
        images = images[0]
        gt_answer = gt_answers[0]
        scores = pred_scores[0]

        q_string = q_vocab_dict.token_idx_2_string(question)

        _, index = scores.max(0)
        pred_answer = sorted(ans_vocab_dict.word2idx_dict.keys())[index]
        gt_answer = sorted(ans_vocab_dict.word2idx_dict.keys())[gt_answer]

        logger.info("Question: {}".format(q_string))
        logger.info("Predicted answer: {}".format(pred_answer))
        logger.info("Ground-truth answer: {}".format(gt_answer))

        result_path = self.config.RESULTS_DIR.format(
            split=self.config.TASK_CONFIG.DATASET.SPLIT
        )

        result_path = os.path.join(
            result_path, "ckpt_{}_{}_image.jpg".format(ckpt_idx, episode_id)
        )

        save_vqa_image_results(
            images, q_string, pred_answer, gt_answer, result_path
        )
Beispiel #4
0
    def from_json(self,
                  json_str: str,
                  scenes_dir: Optional[str] = None) -> None:

        deserialized = json.loads(json_str)
        self.instruction_vocab = VocabDict(
            word_list=deserialized["instruction_vocab"]["word_list"])

        for episode in deserialized["episodes"]:
            episode = VLNEpisode(**episode)

            if scenes_dir is not None:
                if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX):
                    episode.scene_id = episode.scene_id[
                        len(DEFAULT_SCENE_PATH_PREFIX):]

                episode.scene_id = os.path.join(scenes_dir, episode.scene_id)

            episode.instruction = InstructionData(**episode.instruction)
            for g_index, goal in enumerate(episode.goals):
                episode.goals[g_index] = NavigationGoal(**goal)
            self.episodes.append(episode)