Exemple #1
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_object_visible()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success

            if (
                    not self.last_action_success
            ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None:
                self.env.update_graph_with_failed_action(
                    failed_action=action_str)

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result
Exemple #2
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success
            pose = self.env.agent_state()
            self.path.append({k: pose[k] for k in ["x", "y", "z"]})
            self.task_info["followed_path"].append(pose)
        if len(self.path) > 1 and self.path[-1] != self.path[-2]:
            self.num_moves_made += 1
        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={
                "last_action_success": self.last_action_success,
                "action": action
            },
        )
        return step_result
Exemple #3
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        minigrid_obs, reward, done, info = self.env.step(action=action)
        self._last_action = action

        self._was_successful = done and reward > 0

        return RLStepResult(
            observation=self.get_observations(
                minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )
Exemple #4
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        old_pos = self.get_observations()["agent_position_and_rotation"]

        action_str = self.action_names()[action]
        self._actions_taken.append(action_str)

        self.env.step({"action": action_str})

        # if action_str != END:
        #     self.env.step({"action": action_str})

        # if self.env.env.get_metrics()['distance_to_goal'] <= 0.2:
        #     self._took_end_action = True
        #     self._success = self.env.env.get_metrics()['distance_to_goal'] <= 0.2
        #     self.last_action_success = self._success
        # else:
        #     self.last_action_success = self.env.last_action_success

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        new_pos = self.get_observations()["agent_position_and_rotation"]
        if np.all(old_pos == new_pos):
            self._num_invalid_actions += 1

        pos = self.get_observations()["agent_position_and_rotation"]
        self._positions.append({
            "x": pos[0],
            "y": pos[1],
            "z": pos[2],
            "rotation": pos[3]
        })

        return step_result
Exemple #5
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        # if self.num_steps_taken() == 0:
        #     self.env.render()
        assert isinstance(action, int)
        action = cast(int, action)

        minigrid_obs, reward, self._minigrid_done, info = self.env.step(
            action=self._ACTION_IND_TO_MINIGRID_IND[action])

        # self.env.render()

        return RLStepResult(
            observation=self.get_observations(
                minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )
Exemple #6
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        success = self.env.step(action)
        reward = STEP_PENALTY

        if np.all(self.env.current_position == self.env.goal_position):
            self._found_target = True
            reward += FOUND_TARGET_REWARD
        elif self.num_steps_taken() == self.max_steps - 1:
            reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR)

        return RLStepResult(
            observation=self.get_observations(),
            reward=reward,
            done=self.is_done(),
            info=None,
        )
Exemple #7
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        self.env.step({"action": action_str})

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result