예제 #1
0
class EmbodiedTask:
    r"""Base class for embodied tasks. ``EmbodiedTask`` holds definition of
    a task that agent needs to solve: action space, observation space,
    measures, simulator usage. ``EmbodiedTask`` has :ref:`reset` and
    :ref:`step` methods that are called by ``Env``. ``EmbodiedTask`` is the
    one of main dimensions for the framework extension. Once new embodied task
    is introduced implementation of ``EmbodiedTask`` is a formal definition of
    the task that opens opportunity for others to propose solutions and
    include it into benchmark results.

    Args:
        config: config for the task.
        sim: reference to the simulator for calculating task observations.
        dataset: reference to dataset for task instance level information.

    :data measurements: set of task measures.
    :data sensor_suite: suite of task sensors.
    """

    _config: Any
    _sim: Optional[Simulator]
    _dataset: Optional[Dataset]
    _is_episode_active: bool
    measurements: Measurements
    sensor_suite: SensorSuite

    def __init__(self,
                 config: Config,
                 sim: Simulator,
                 dataset: Optional[Dataset] = None) -> None:
        self._config = config
        self._sim = sim
        self._dataset = dataset

        self.measurements = Measurements(
            self._init_entities(
                entity_names=config.MEASUREMENTS,
                register_func=registry.get_measure,
                entities_config=config,
            ).values())

        self.sensor_suite = SensorSuite(
            self._init_entities(
                entity_names=config.SENSORS,
                register_func=registry.get_sensor,
                entities_config=config,
            ).values())

        self.actions = self._init_entities(
            entity_names=config.POSSIBLE_ACTIONS,
            register_func=registry.get_task_action,
            entities_config=self._config.ACTIONS,
        )
        self._action_keys = list(self.actions.keys())

    def _init_entities(self,
                       entity_names,
                       register_func,
                       entities_config=None) -> OrderedDict:
        if entities_config is None:
            entities_config = self._config

        entities = OrderedDict()
        for entity_name in entity_names:
            entity_cfg = getattr(entities_config, entity_name)
            entity_type = register_func(entity_cfg.TYPE)
            assert (
                entity_type
                is not None), f"invalid {entity_name} type {entity_cfg.TYPE}"
            entities[entity_name] = entity_type(
                sim=self._sim,
                config=entity_cfg,
                dataset=self._dataset,
                task=self,
            )
        return entities

    def reset(self, episode: Type[Episode]):
        observations = self._sim.reset()
        observations.update(
            self.sensor_suite.get_observations(observations=observations,
                                               episode=episode,
                                               task=self))

        for action_instance in self.actions.values():
            action_instance.reset(episode=episode, task=self)

        return observations

    def step(self, action: Dict[str, Any], episode: Type[Episode]):
        if "action_args" not in action or action["action_args"] is None:
            action["action_args"] = {}
        action_name = action["action"]
        if isinstance(action_name, (int, np.integer)):
            action_name = self.get_action_name(action_name)
        assert (
            action_name in self.actions
        ), f"Can't find '{action_name}' action in {self.actions.keys()}."

        task_action = self.actions[action_name]
        observations = task_action.step(**action["action_args"], task=self)
        observations.update(
            self.sensor_suite.get_observations(
                observations=observations,
                episode=episode,
                action=action,
                task=self,
            ))

        self._is_episode_active = self._check_episode_is_active(
            observations=observations, action=action, episode=episode)

        return observations

    def get_action_name(self, action_index: int):
        if action_index >= len(self.actions):
            raise ValueError(f"Action index '{action_index}' is out of range.")
        return self._action_keys[action_index]

    @property
    def action_space(self) -> Space:
        return ActionSpace({
            action_name: action_instance.action_space
            for action_name, action_instance in self.actions.items()
        })

    def overwrite_sim_config(self, sim_config: Config,
                             episode: Type[Episode]) -> Config:
        r"""Update config merging information from :p:`sim_config` and
        :p:`episode`.

        :param sim_config: config for simulator.
        :param episode: current episode.
        """
        raise NotImplementedError

    def _check_episode_is_active(
        self,
        *args: Any,
        action: Union[int, Dict[str, Any]],
        episode: Type[Episode],
        **kwargs: Any,
    ) -> bool:
        raise NotImplementedError

    @property
    def is_episode_active(self):
        return self._is_episode_active

    def seed(self, seed: int) -> None:
        return
예제 #2
0
class HabitatSim(habitat_sim.Simulator, Simulator):
    r"""Simulator wrapper over habitat-sim

    habitat-sim repo: https://github.com/facebookresearch/habitat-sim

    Args:
        config: configuration for initializing the simulator.
    """
    def __init__(self, config: Config) -> None:
        self.habitat_config = config
        agent_config = self._get_agent_config()

        sim_sensors = []

        for sensor_name in agent_config.SENSORS:
            #print("AGENT CONFIG SENSORS :  {}".format(agent_config.SENSORS), flush=True)
            sensor_cfg = getattr(self.habitat_config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE)
            sim_sensors.append(sensor_type(sensor_cfg))

        self._sensor_suite = SensorSuite(sim_sensors)
        self.sim_config = self.create_sim_config(self._sensor_suite)
        self._current_scene = self.sim_config.sim_cfg.scene_id
        super().__init__(self.sim_config)
        self._action_space = spaces.Discrete(
            len(self.sim_config.agents[0].action_space))
        self._prev_sim_obs: Optional[Observations] = None

        # NavMesh related settings :
        self.navmesh_settings = NavMeshSettings()
        self.navmesh_settings.set_defaults()
        self.navmesh_settings.agent_radius = agent_config.RADIUS
        self.navmesh_settings.agent_height = agent_config.HEIGHT

    def create_sim_config(
            self, _sensor_suite: SensorSuite) -> habitat_sim.Configuration:
        sim_config = habitat_sim.SimulatorConfiguration()
        # Check if Habitat-Sim is post Scene Config Update
        if not hasattr(sim_config, "scene_id"):
            raise RuntimeError(
                "Incompatible version of Habitat-Sim detected, please upgrade habitat_sim"
            )
        overwrite_config(
            config_from=self.habitat_config.HABITAT_SIM_V0,
            config_to=sim_config,
            # Ignore key as it gets propogated to sensor below
            ignore_keys={"gpu_gpu"},
        )
        sim_config.scene_id = self.habitat_config.SCENE
        agent_config = habitat_sim.AgentConfiguration()
        overwrite_config(
            config_from=self._get_agent_config(),
            config_to=agent_config,
            # These keys are only used by Hab-Lab
            ignore_keys={
                "is_set_start_state",
                # This is the Sensor Config. Unpacked below
                "sensors",
                "start_position",
                "start_rotation",
            },
        )

        sensor_specifications = []
        VisualSensorTypeSet = {
            habitat_sim.SensorType.COLOR,
            habitat_sim.SensorType.DEPTH,
            habitat_sim.SensorType.SEMANTIC,
        }
        CameraSensorSubTypeSet = {
            habitat_sim.SensorSubType.PINHOLE,
            habitat_sim.SensorSubType.ORTHOGRAPHIC,
        }
        for sensor in _sensor_suite.sensors.values():

            # Check if type VisualSensorSpec, we know that Sensor is one of HabitatSimRGBSensor, HabitatSimDepthSensor, HabitatSimSemanticSensor
            if (getattr(sensor, "sim_sensor_type", [])
                    not in VisualSensorTypeSet):
                raise ValueError(
                    f"""{getattr(sensor, "sim_sensor_type", [])} is an illegal sensorType that is not implemented yet"""
                )
            # Check if type CameraSensorSpec
            if (getattr(sensor, "sim_sensor_subtype", [])
                    not in CameraSensorSubTypeSet):
                raise ValueError(
                    f"""{getattr(sensor, "sim_sensor_subtype", [])} is an illegal sensorSubType for a VisualSensor"""
                )

            # TODO: Implement checks for other types of SensorSpecs

            sim_sensor_cfg = habitat_sim.CameraSensorSpec()
            # TODO Handle configs for custom VisualSensors that might need
            # their own ignore_keys. Maybe with special key / checking
            # SensorType
            overwrite_config(
                config_from=sensor.config,
                config_to=sim_sensor_cfg,
                # These keys are only used by Hab-Lab
                # or translated into the sensor config manually
                ignore_keys={
                    "height",
                    "hfov",
                    "max_depth",
                    "min_depth",
                    "normalize_depth",
                    "type",
                    "width",
                },
            )
            sim_sensor_cfg.uuid = sensor.uuid
            sim_sensor_cfg.resolution = list(
                sensor.observation_space.shape[:2])

            # TODO(maksymets): Add configure method to Sensor API to avoid
            # accessing child attributes through parent interface
            # We know that the Sensor has to be one of these Sensors
            sensor = cast(HabitatSimVizSensors, sensor)
            sim_sensor_cfg.sensor_type = sensor.sim_sensor_type
            sim_sensor_cfg.sensor_subtype = sensor.sim_sensor_subtype
            sim_sensor_cfg.gpu2gpu_transfer = (
                self.habitat_config.HABITAT_SIM_V0.GPU_GPU)
            sensor_specifications.append(sim_sensor_cfg)

        agent_config.sensor_specifications = sensor_specifications
        agent_config.action_space = registry.get_action_space_configuration(
            self.habitat_config.ACTION_SPACE_CONFIG)(
                self.habitat_config).get()

        return habitat_sim.Configuration(sim_config, [agent_config])

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    @property
    def action_space(self) -> Space:
        return self._action_space

    def _update_agents_state(self) -> bool:
        is_updated = False
        for agent_id, _ in enumerate(self.habitat_config.AGENTS):
            agent_cfg = self._get_agent_config(agent_id)
            if agent_cfg.IS_SET_START_STATE:
                self.set_agent_state(
                    agent_cfg.START_POSITION,
                    agent_cfg.START_ROTATION,
                    agent_id,
                )
                is_updated = True

        return is_updated

    def reset(self) -> Observations:

        sim_obs = super().reset()
        self.counter = 0
        # agent_cfg = self._get_agent_config(0)
        # agent1 = habitat_sim.Agent(super().get_active_scene_graph().get_root_node().create_child(), agent_cfg)
        # agent1.controls.move_filter_fn = super().step_filter
        if self._update_agents_state():
            sim_obs = self.get_sensor_observations()
        self._prev_sim_obs = sim_obs
        observations = self._sensor_suite.get_observations(sim_obs)
        return self._sensor_suite.get_observations(sim_obs)

    def step(self, action: Union[str, int]) -> Observations:

        sim_obs = super().step(action)
        self._prev_sim_obs = sim_obs
        observations = self._sensor_suite.get_observations(sim_obs)
        # self.counter += 1
        # if self.counter % 5 == 0:
        #     self.init_objects(super())
        return observations

    def set_object_in_front_of_agent(self, sim, obj_id, z_offset=-1.5):
        r"""
        Adds an object in front of the agent at some distance.
        """
        #print("Agent : ")
        #print(self.get_agent(0))
        #agent_transform = sim.agents[0].scene_node.transformation_matrix()
        agent_transform = self.get_agent(0).scene_node.transformation_matrix()
        obj_translation = agent_transform.transform_point(
            np.array([0, 0, z_offset]))
        sim.set_translation(obj_translation, obj_id)

        obj_node = sim.get_object_scene_node(obj_id)
        xform_bb = habitat_sim.geo.get_transformed_bb(obj_node.cumulative_bb,
                                                      obj_node.transformation)

        # also account for collision margin of the scene
        scene_collision_margin = 0.04
        y_translation = mn.Vector3(
            0,
            xform_bb.size_y() / 2.0 + scene_collision_margin, 0)
        sim.set_translation(y_translation + sim.get_translation(obj_id),
                            obj_id)

    def init_objects(self, sim):
        # Manager of Object Attributes Templates
        obj_attr_mgr = sim.get_object_template_manager()
        #print("Object Template Manager : {}".format(obj_attr_mgr))
        #print("SIM Config : ")
        #print(self.sim_config)
        #print("Habitat Config: ")
        #print(self.habitat_config)
        obj_attr_mgr.load_configs("data/test_assets/objects")

        # Add a chair into the scene.
        obj_path = "data/test_assets/objects/locobot_merged"
        chair_template_id = obj_attr_mgr.load_object_configs(obj_path)[0]
        chair_attr = obj_attr_mgr.get_template_by_ID(chair_template_id)
        obj_attr_mgr.register_template(chair_attr)

        # Object's initial position 3m away from the agent.
        object_id = sim.add_object_by_handle(chair_attr.handle)
        self.set_object_in_front_of_agent(sim, object_id, -3.0)
        sim.set_object_motion_type(habitat_sim.physics.MotionType.STATIC,
                                   object_id)

        # Object's final position 7m away from the agent
        # goal_id = sim.add_object_by_handle(chair_attr.handle)
        # self.set_object_in_front_of_agent(sim, goal_id, -7.0)
        # sim.set_object_motion_type(habitat_sim.physics.MotionType.STATIC, goal_id)
        print("Object introduced at frame : {}".format(self.counter),
              flush=True)
        self.recompute_navmesh(self.pathfinder, self.navmesh_settings, True)

        #return object_id

    def add_robot_embodiment(self, sim):
        # Manager of Object Attributes Templates
        obj_attr_mgr = sim.get_object_template_manager()
        #print("Object Template Manager : {}".format(obj_attr_mgr))
        #print("SIM Config : ")
        #print(self.sim_config)
        #print("Habitat Config: ")
        #print(self.habitat_config)
        #obj_attr_mgr.load_configs("data/test_assets/objects")

        # Add a chair into the scene.
        obj_path = "data/test_assets/objects/locobot_merged"
        locobot_template_id = obj_attr_mgr.load_object_configs(obj_path)[0]
        #chair_attr = obj_attr_mgr.get_template_by_ID(chair_template_id)
        #obj_attr_mgr.register_template(chair_attr)
        # Object's initial position 3m away from the agent.
        object_id = sim.add_object(locobot_template_id,
                                   self.get_agent(0).scene_node)

        sim.set_object_motion_type(habitat_sim.physics.MotionType.KINEMATIC,
                                   object_id)

        sim.set_translation(np.array([1.75, -1.02, 0.4]), object_id)

        vel_control = sim.get_object_velocity_control(object_id)
        vel_control.linear_velocity = np.array([0, 0, -1.0])
        vel_control.angular_velocity = np.array([0.0, 2.0, 0])
        #self.set_object_in_front_of_agent(sim, object_id, -3.0)
        #sim.set_object_motion_type(
        #    habitat_sim.physics.MotionType.STATIC, object_id
        #)

        # Object's final position 7m away from the agent
        #goal_id = sim.add_object_by_handle(chair_attr.handle)
        #self.set_object_in_front_of_agent(sim, goal_id, -7.0)
        #sim.set_object_motion_type(habitat_sim.physics.MotionType.STATIC, goal_id)
        #self.recompute_navmesh(self.pathfinder, self.navmesh_settings, True)

        #return object_id

    def _initialize_objects(self):
        objects = self.habitat_config.objects[0]
        obj_attr_mgr = self.get_object_template_manager()
        obj_attr_mgr.load_configs(
            str(os.path.join(data_path, "test_assets/objects")))
        # first remove all existing objects
        existing_object_ids = self.get_existing_object_ids()

        if len(existing_object_ids) > 0:
            for obj_id in existing_object_ids:
                self.remove_object(obj_id)

        self.sim_object_to_objid_mapping = {}
        self.objid_to_sim_object_mapping = {}

        if objects is not None:
            object_template = objects["object_template"]
            object_pos = objects["position"]
            object_rot = objects["rotation"]

            object_template_id = obj_attr_mgr.load_object_configs(
                object_template)[0]
            object_attr = obj_attr_mgr.get_template_by_ID(object_template_id)
            obj_attr_mgr.register_template(object_attr)

            object_id = self.add_object_by_handle(object_attr.handle)
            self.sim_object_to_objid_mapping[object_id] = objects["object_id"]
            self.objid_to_sim_object_mapping[objects["object_id"]] = object_id

            self.set_translation(object_pos, object_id)
            if isinstance(object_rot, list):
                object_rot = quat_from_coeffs(object_rot)

            object_rot = quat_to_magnum(object_rot)
            self.set_rotation(object_rot, object_id)

            self.set_object_motion_type(MotionType.STATIC, object_id)

        # Recompute the navmesh after placing all the objects.
        self.recompute_navmesh(self.pathfinder, self.navmesh_settings, True)

    def render(self, mode: str = "rgb") -> Any:
        r"""
        Args:
            mode: sensor whose observation is used for returning the frame,
                eg: "rgb", "depth", "semantic"

        Returns:
            rendered frame according to the mode
        """
        sim_obs = self.get_sensor_observations()
        observations = self._sensor_suite.get_observations(sim_obs)

        output = observations.get(mode)
        assert output is not None, "mode {} sensor is not active".format(mode)
        if not isinstance(output, np.ndarray):
            # If it is not a numpy array, it is a torch tensor
            # The function expects the result to be a numpy array
            output = output.to("cpu").numpy()

        return output

    def reconfigure(self, habitat_config: Config) -> None:
        # TODO(maksymets): Switch to Habitat-Sim more efficient caching
        is_same_scene = habitat_config.SCENE == self._current_scene
        self.habitat_config = habitat_config
        self.sim_config = self.create_sim_config(self._sensor_suite)
        if not is_same_scene:
            self._current_scene = habitat_config.SCENE
            self.close()
            super().reconfigure(self.sim_config)

        self._update_agents_state()

    def geodesic_distance(
        self,
        position_a: Union[Sequence[float], ndarray],
        position_b: Union[Sequence[float], Sequence[Sequence[float]]],
        episode: Optional[Episode] = None,
    ) -> float:
        if episode is None or episode._shortest_path_cache is None:
            path = habitat_sim.MultiGoalShortestPath()
            if isinstance(position_b[0], (Sequence, np.ndarray)):
                path.requested_ends = np.array(position_b, dtype=np.float32)
            else:
                path.requested_ends = np.array(
                    [np.array(position_b, dtype=np.float32)])
        else:
            path = episode._shortest_path_cache

        path.requested_start = np.array(position_a, dtype=np.float32)

        self.pathfinder.find_path(path)

        if episode is not None:
            episode._shortest_path_cache = path

        return path.geodesic_distance

    def action_space_shortest_path(
        self,
        source: AgentState,
        targets: Sequence[AgentState],
        agent_id: int = 0,
    ) -> List[ShortestPathPoint]:
        r"""
        Returns:
            List of agent states and actions along the shortest path from
            source to the nearest target (both included). If one of the
            target(s) is identical to the source, a list containing only
            one node with the identical agent state is returned. Returns
            an empty list in case none of the targets are reachable from
            the source. For the last item in the returned list the action
            will be None.
        """
        raise NotImplementedError(
            "This function is no longer implemented. Please use the greedy "
            "follower instead")

    @property
    def up_vector(self) -> np.ndarray:
        return np.array([0.0, 1.0, 0.0])

    @property
    def forward_vector(self) -> np.ndarray:
        return -np.array([0.0, 0.0, 1.0])

    def get_straight_shortest_path_points(self, position_a, position_b):
        path = habitat_sim.ShortestPath()
        path.requested_start = position_a
        path.requested_end = position_b
        self.pathfinder.find_path(path)
        return path.points

    def sample_navigable_point(self) -> List[float]:
        return self.pathfinder.get_random_navigable_point().tolist()

    def is_navigable(self, point: List[float]) -> bool:
        return self.pathfinder.is_navigable(point)

    def semantic_annotations(self):
        r"""
        Returns:
            SemanticScene which is a three level hierarchy of semantic
            annotations for the current scene. Specifically this method
            returns a SemanticScene which contains a list of SemanticLevel's
            where each SemanticLevel contains a list of SemanticRegion's where
            each SemanticRegion contains a list of SemanticObject's.

            SemanticScene has attributes: aabb(axis-aligned bounding box) which
            has attributes aabb.center and aabb.sizes which are 3d vectors,
            categories, levels, objects, regions.

            SemanticLevel has attributes: id, aabb, objects and regions.

            SemanticRegion has attributes: id, level, aabb, category (to get
            name of category use category.name()) and objects.

            SemanticObject has attributes: id, region, aabb, obb (oriented
            bounding box) and category.

            SemanticScene contains List[SemanticLevels]
            SemanticLevel contains List[SemanticRegion]
            SemanticRegion contains List[SemanticObject]

            Example to loop through in a hierarchical fashion:
            for level in semantic_scene.levels:
                for region in level.regions:
                    for obj in region.objects:
        """
        return self.semantic_scene

    def _get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.habitat_config.DEFAULT_AGENT_ID
        agent_name = self.habitat_config.AGENTS[agent_id]
        agent_config = getattr(self.habitat_config, agent_name)
        #agent_config.SENSORS = ['DEPTH_SENSOR']
        #setattr(self.habitat_config, agent_config.SENSORS[0], "DEPTH_SENSOR")

        return agent_config

    def get_agent_state(self, agent_id: int = 0) -> habitat_sim.AgentState:
        assert agent_id == 0, "No support of multi agent in {} yet.".format(
            self.__class__.__name__)
        return self.get_agent(agent_id).get_state()

    def set_agent_state(
        self,
        position: List[float],
        rotation: List[float],
        agent_id: int = 0,
        reset_sensors: bool = True,
    ) -> bool:
        r"""Sets agent state similar to initialize_agent, but without agents
        creation. On failure to place the agent in the proper position, it is
        moved back to its previous pose.

        Args:
            position: list containing 3 entries for (x, y, z).
            rotation: list with 4 entries for (x, y, z, w) elements of unit
                quaternion (versor) representing agent 3D orientation,
                (https://en.wikipedia.org/wiki/Versor)
            agent_id: int identification of agent from multiagent setup.
            reset_sensors: bool for if sensor changes (e.g. tilt) should be
                reset).

        Returns:
            True if the set was successful else moves the agent back to its
            original pose and returns false.
        """
        agent = self.get_agent(agent_id)
        new_state = self.get_agent_state(agent_id)
        new_state.position = position
        new_state.rotation = rotation

        # NB: The agent state also contains the sensor states in _absolute_
        # coordinates. In order to set the agent's body to a specific
        # location and have the sensors follow, we must not provide any
        # state for the sensors. This will cause them to follow the agent's
        # body
        new_state.sensor_states = {}
        agent.set_state(new_state, reset_sensors)
        return True

    def get_observations_at(
        self,
        position: Optional[List[float]] = None,
        rotation: Optional[List[float]] = None,
        keep_agent_at_new_pose: bool = False,
    ) -> Optional[Observations]:
        current_state = self.get_agent_state()
        if position is None or rotation is None:
            success = True
        else:
            success = self.set_agent_state(position,
                                           rotation,
                                           reset_sensors=False)

        if success:
            sim_obs = self.get_sensor_observations()

            self._prev_sim_obs = sim_obs

            observations = self._sensor_suite.get_observations(sim_obs)
            if not keep_agent_at_new_pose:
                self.set_agent_state(
                    current_state.position,
                    current_state.rotation,
                    reset_sensors=False,
                )
            return observations
        else:
            return None

    def distance_to_closest_obstacle(self,
                                     position: ndarray,
                                     max_search_radius: float = 2.0) -> float:
        return self.pathfinder.distance_to_closest_obstacle(
            position, max_search_radius)

    def island_radius(self, position: Sequence[float]) -> float:
        return self.pathfinder.island_radius(position)

    @property
    def previous_step_collided(self):
        r"""Whether or not the previous step resulted in a collision

        Returns:
            bool: True if the previous step resulted in a collision, false otherwise

        Warning:
            This feild is only updated when :meth:`step`, :meth:`reset`, or :meth:`get_observations_at` are
            called.  It does not update when the agent is moved to a new loction.  Furthermore, it
            will _always_ be false after :meth:`reset` or :meth:`get_observations_at` as neither of those
            result in an action (step) being taken.
        """
        return self._prev_sim_obs.get("collided", False)
예제 #3
0
class HabitatSim(Simulator):
    r"""Simulator wrapper over habitat-sim

    habitat-sim repo: https://github.com/facebookresearch/habitat-sim

    Args:
        config: configuration for initializing the simulator.
    """
    def __init__(self, config: Config) -> None:
        self.config = config
        agent_config = self._get_agent_config()

        sim_sensors = []
        for sensor_name in agent_config.SENSORS:
            sensor_cfg = getattr(self.config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE)
            sim_sensors.append(sensor_type(sensor_cfg))

        self._sensor_suite = SensorSuite(sim_sensors)
        self.sim_config = self.create_sim_config(self._sensor_suite)
        self._current_scene = self.sim_config.sim_cfg.scene.id
        self._sim = habitat_sim.Simulator(self.sim_config)
        self._action_space = spaces.Discrete(
            len(self.sim_config.agents[0].action_space))
        self._prev_sim_obs = None

    def create_sim_config(
            self, _sensor_suite: SensorSuite) -> habitat_sim.Configuration:
        sim_config = habitat_sim.SimulatorConfiguration()
        overwrite_config(config_from=self.config.HABITAT_SIM_V0,
                         config_to=sim_config)
        sim_config.scene.id = self.config.SCENE
        agent_config = habitat_sim.AgentConfiguration()
        overwrite_config(config_from=self._get_agent_config(),
                         config_to=agent_config)

        sensor_specifications = []
        for sensor in _sensor_suite.sensors.values():
            sim_sensor_cfg = habitat_sim.SensorSpec()
            overwrite_config(config_from=sensor.config,
                             config_to=sim_sensor_cfg)
            sim_sensor_cfg.uuid = sensor.uuid
            sim_sensor_cfg.resolution = list(
                sensor.observation_space.shape[:2])
            sim_sensor_cfg.parameters["hfov"] = str(sensor.config.HFOV)

            # TODO(maksymets): Add configure method to Sensor API to avoid
            # accessing child attributes through parent interface
            sim_sensor_cfg.sensor_type = sensor.sim_sensor_type  # type: ignore
            sim_sensor_cfg.gpu2gpu_transfer = (
                self.config.HABITAT_SIM_V0.GPU_GPU)
            sensor_specifications.append(sim_sensor_cfg)

        agent_config.sensor_specifications = sensor_specifications
        agent_config.action_space = registry.get_action_space_configuration(
            self.config.ACTION_SPACE_CONFIG)(self.config).get()

        return habitat_sim.Configuration(sim_config, [agent_config])

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    @property
    def action_space(self) -> Space:
        return self._action_space

    def _update_agents_state(self) -> bool:
        is_updated = False
        for agent_id, _ in enumerate(self.config.AGENTS):
            agent_cfg = self._get_agent_config(agent_id)
            if agent_cfg.IS_SET_START_STATE:
                self.set_agent_state(
                    agent_cfg.START_POSITION,
                    agent_cfg.START_ROTATION,
                    agent_id,
                )
                is_updated = True

        return is_updated

    def reset(self):
        sim_obs = self._sim.reset()
        if self._update_agents_state():
            sim_obs = self._sim.get_sensor_observations()

        self._prev_sim_obs = sim_obs
        return self._sensor_suite.get_observations(sim_obs)

    def step(self, action):
        sim_obs = self._sim.step(action)
        self._prev_sim_obs = sim_obs
        observations = self._sensor_suite.get_observations(sim_obs)
        return observations

    def render(self, mode: str = "rgb") -> Any:
        r"""
        Args:
            mode: sensor whose observation is used for returning the frame,
                eg: "rgb", "depth", "semantic"

        Returns:
            rendered frame according to the mode
        """
        sim_obs = self._sim.get_sensor_observations()
        observations = self._sensor_suite.get_observations(sim_obs)

        output = observations.get(mode)
        assert output is not None, "mode {} sensor is not active".format(mode)
        if not isinstance(output, np.ndarray):
            # If it is not a numpy array, it is a torch tensor
            # The function expects the result to be a numpy array
            output = output.to("cpu").numpy()

        return output

    def seed(self, seed):
        self._sim.seed(seed)

    def reconfigure(self, config: Config) -> None:
        # TODO(maksymets): Switch to Habitat-Sim more efficient caching
        is_same_scene = config.SCENE == self._current_scene
        self.config = config
        self.sim_config = self.create_sim_config(self._sensor_suite)
        if not is_same_scene:
            self._current_scene = config.SCENE
            self._sim.close()
            del self._sim
            self._sim = habitat_sim.Simulator(self.sim_config)

        self._update_agents_state()

    def geodesic_distance(self,
                          position_a,
                          position_b,
                          episode: Optional[Episode] = None):
        if episode is None or episode._shortest_path_cache is None:
            path = habitat_sim.MultiGoalShortestPath()
            if isinstance(position_b[0], List) or isinstance(
                    position_b[0], np.ndarray):
                path.requested_ends = np.array(position_b, dtype=np.float32)
            else:
                path.requested_ends = np.array(
                    [np.array(position_b, dtype=np.float32)])
        else:
            path = episode._shortest_path_cache

        path.requested_start = np.array(position_a, dtype=np.float32)

        self._sim.pathfinder.find_path(path)

        if episode is not None:
            episode._shortest_path_cache = path

        return path.geodesic_distance

    def action_space_shortest_path(
            self,
            source: AgentState,
            targets: List[AgentState],
            agent_id: int = 0) -> List[ShortestPathPoint]:
        r"""
        Returns:
            List of agent states and actions along the shortest path from
            source to the nearest target (both included). If one of the
            target(s) is identical to the source, a list containing only
            one node with the identical agent state is returned. Returns
            an empty list in case none of the targets are reachable from
            the source. For the last item in the returned list the action
            will be None.
        """
        raise NotImplementedError(
            "This function is no longer implemented. Please use the greedy "
            "follower instead")

    @property
    def up_vector(self):
        return np.array([0.0, 1.0, 0.0])

    @property
    def forward_vector(self):
        return -np.array([0.0, 0.0, 1.0])

    def get_straight_shortest_path_points(self, position_a, position_b):
        path = habitat_sim.ShortestPath()
        path.requested_start = position_a
        path.requested_end = position_b
        self._sim.pathfinder.find_path(path)
        return path.points

    def sample_navigable_point(self):
        return self._sim.pathfinder.get_random_navigable_point().tolist()

    def is_navigable(self, point: List[float]):
        return self._sim.pathfinder.is_navigable(point)

    def semantic_annotations(self):
        r"""
        Returns:
            SemanticScene which is a three level hierarchy of semantic
            annotations for the current scene. Specifically this method
            returns a SemanticScene which contains a list of SemanticLevel's
            where each SemanticLevel contains a list of SemanticRegion's where
            each SemanticRegion contains a list of SemanticObject's.

            SemanticScene has attributes: aabb(axis-aligned bounding box) which
            has attributes aabb.center and aabb.sizes which are 3d vectors,
            categories, levels, objects, regions.

            SemanticLevel has attributes: id, aabb, objects and regions.

            SemanticRegion has attributes: id, level, aabb, category (to get
            name of category use category.name()) and objects.

            SemanticObject has attributes: id, region, aabb, obb (oriented
            bounding box) and category.

            SemanticScene contains List[SemanticLevels]
            SemanticLevel contains List[SemanticRegion]
            SemanticRegion contains List[SemanticObject]

            Example to loop through in a hierarchical fashion:
            for level in semantic_scene.levels:
                for region in level.regions:
                    for obj in region.objects:
        """
        return self._sim.semantic_scene

    def close(self):
        self._sim.close()

    def _get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def get_agent_state(self, agent_id: int = 0) -> habitat_sim.AgentState:
        assert agent_id == 0, "No support of multi agent in {} yet.".format(
            self.__class__.__name__)
        return self._sim.get_agent(agent_id).get_state()

    def set_agent_state(
        self,
        position: List[float],
        rotation: List[float],
        agent_id: int = 0,
        reset_sensors: bool = True,
    ) -> bool:
        r"""Sets agent state similar to initialize_agent, but without agents
        creation. On failure to place the agent in the proper position, it is
        moved back to its previous pose.

        Args:
            position: list containing 3 entries for (x, y, z).
            rotation: list with 4 entries for (x, y, z, w) elements of unit
                quaternion (versor) representing agent 3D orientation,
                (https://en.wikipedia.org/wiki/Versor)
            agent_id: int identification of agent from multiagent setup.
            reset_sensors: bool for if sensor changes (e.g. tilt) should be
                reset).

        Returns:
            True if the set was successful else moves the agent back to its
            original pose and returns false.
        """
        agent = self._sim.get_agent(agent_id)
        original_state = self.get_agent_state(agent_id)
        new_state = self.get_agent_state(agent_id)
        new_state.position = position
        new_state.rotation = rotation

        # NB: The agent state also contains the sensor states in _absolute_
        # coordinates. In order to set the agent's body to a specific
        # location and have the sensors follow, we must not provide any
        # state for the sensors. This will cause them to follow the agent's
        # body
        new_state.sensor_states = dict()
        agent.set_state(new_state, reset_sensors)
        return True

    def get_observations_at(
        self,
        position: Optional[List[float]] = None,
        rotation: Optional[List[float]] = None,
        keep_agent_at_new_pose: bool = False,
    ) -> Optional[Observations]:
        current_state = self.get_agent_state()
        if position is None or rotation is None:
            success = True
        else:
            success = self.set_agent_state(position,
                                           rotation,
                                           reset_sensors=False)

        if success:
            sim_obs = self._sim.get_sensor_observations()

            self._prev_sim_obs = sim_obs

            observations = self._sensor_suite.get_observations(sim_obs)
            if not keep_agent_at_new_pose:
                self.set_agent_state(
                    current_state.position,
                    current_state.rotation,
                    reset_sensors=False,
                )
            return observations
        else:
            return None

    def distance_to_closest_obstacle(self, position, max_search_radius=2.0):
        return self._sim.pathfinder.distance_to_closest_obstacle(
            position, max_search_radius)

    def island_radius(self, position):
        return self._sim.pathfinder.island_radius(position)

    @property
    def previous_step_collided(self):
        r"""Whether or not the previous step resulted in a collision

        Returns:
            bool: True if the previous step resulted in a collision, false otherwise

        Warning:
            This feild is only updated when :meth:`step`, :meth:`reset`, or :meth:`get_observations_at` are
            called.  It does not update when the agent is moved to a new loction.  Furthermore, it
            will _always_ be false after :meth:`reset` or :meth:`get_observations_at` as neither of those
            result in an action (step) being taken.
        """
        return self._prev_sim_obs.get("collided", False)
예제 #4
0
class PyRobot(Simulator):
    r"""Simulator wrapper over PyRobot.

    PyRobot repo: https://github.com/facebookresearch/pyrobot
    To use this abstraction the user will have to setup PyRobot
    python3 version. Please refer to the PyRobot repository
    for setting it up. The user will also have to export a
    ROS_PATH environment variable to use this integration,
    please refer to `habitat.core.utils.try_cv2_import` for
    more details on this.

    This abstraction assumes that reality is a simulation
    (https://www.youtube.com/watch?v=tlTKTTt47WE).

    Args:
        config: configuration for initializing the PyRobot object.
    """
    def __init__(self, config: Config) -> None:
        self._config = config

        robot_sensors = []
        for sensor_name in self._config.SENSORS:
            sensor_cfg = getattr(self._config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE)
            robot_sensors.append(sensor_type(sensor_cfg))
        self._sensor_suite = SensorSuite(robot_sensors)

        config_pyrobot = {
            "base_controller": self._config.BASE_CONTROLLER,
            "base_planner": self._config.BASE_PLANNER,
        }

        assert (self._config.ROBOT
                in self._config.ROBOTS), "Invalid robot type {}".format(
                    self._config.ROBOT)
        self._robot_config = getattr(self._config, self._config.ROBOT.upper())

        action_spaces_dict = {}

        self._action_space = self._robot_action_space(self._config.ROBOT,
                                                      self._robot_config)

        self._robot = pyrobot.Robot(self._config.ROBOT,
                                    base_config=config_pyrobot)

    def get_robot_observations(self):
        return {
            "rgb": self._robot.camera.get_rgb(),
            "depth": self._robot.camera.get_depth(),
            "bump": self._robot.base.base_state.bumper,
        }

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    @property
    def base(self):
        return self._robot.base

    @property
    def camera(self):
        return self._robot.camera

    def _robot_action_space(self, robot_type, robot_config):
        action_spaces_dict = {}
        for action in robot_config.ACTIONS:
            action_spaces_dict[action] = ACTION_SPACES[
                robot_type.upper()][action]
        return spaces.Dict(action_spaces_dict)

    @property
    def action_space(self) -> Space:
        return self._action_space

    def reset(self):
        self._robot.camera.reset()

        observations = self._sensor_suite.get_observations(
            robot_obs=self.get_robot_observations())
        return observations

    def step(self, action, action_params):
        r"""Step in reality. Currently the supported
        actions are the ones defined in `_locobot_base_action_space`
        and `_locobot_camera_action_space`. For details on how
        to use these actions please refer to the documentation
        of namesake methods in PyRobot
        (https://github.com/facebookresearch/pyrobot).
        """
        if action in self._robot_config.BASE_ACTIONS:
            getattr(self._robot.base, action)(**action_params)
        elif action in self._robot_config.CAMERA_ACTIONS:
            getattr(self._robot.camera, action)(**action_params)
        else:
            raise ValueError("Invalid action {}".format(action))

        observations = self._sensor_suite.get_observations(
            robot_obs=self.get_robot_observations())

        return observations

    def render(self, mode: str = "rgb") -> Any:
        observations = self._sensor_suite.get_observations(
            robot_obs=self.get_robot_observations())

        output = observations.get(mode)
        assert output is not None, "mode {} sensor is not active".format(mode)

        return output

    def get_agent_state(self,
                        agent_id: int = 0,
                        base_state_type: str = "odom"):
        assert agent_id == 0, "No support of multi agent in {} yet.".format(
            self.__class__.__name__)
        state = {
            "base": self._robot.base.get_state(base_state_type),
            "camera": self._robot.camera.get_state(),
        }
        # TODO(akadian): add arm state when supported
        return state

    def seed(self, seed: int) -> None:
        raise NotImplementedError("No support for seeding in reality")
예제 #5
0
class SpotSim(Simulator):
    r"""Simulator wrapper over the Spot robot.
    """
    def __init__(self, config: Config) -> None:
        self.config = config
        agent_config = self._get_agent_config()

        sim_sensors = []
        for sensor_name in agent_config.SENSORS:
            sensor_cfg = getattr(self.config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE)
            sim_sensors.append(sensor_type(sensor_cfg))

        self._sensor_suite = SensorSuite(sim_sensors)
        # self.sim_config = self.create_sim_config(self._sensor_suite)
        # self._current_scene = self.sim_config.sim_cfg.scene.id
        self._current_scene = '../habitat-api/data/scene_datasets/gibson/Pablo.glb'
        self._action_space = spaces.Discrete(
            4)  # 0 stop, 1 fwd, 2 turn left, 3 turn right, 4 do nothing
        self._last_obs = None

        self.origin_xy_yaw = np.array([0., 0., 0.], np.float32)

        # Supports navigation in 80x80m area, goal at most 40m away.
        self._coord_min = -40.
        self._coord_max = 40.

        self.socket = None
        atexit.register(
            self.cleanup
        )  # this is to close connection when the object is cleaned up

        # self.origin_agent_state = self.xy_yaw_to_agent_state(0., 0., 0.)

        ans = ""
        while ans != "ping":
            try:
                ans = self.reconnect_and_send("ping")
            except Exception as e:
                # pdb.set_trace()
                print("Failed to connect, retry.")
                time.sleep(2.0)
                continue

            if ans == "ping":
                print("Server is alive")

                msg = self.reconnect_and_send("obs")
                # obs = json.loads(msg)
                # print (obs.keys())
                # pdb.set_trace()

                obs = self.decode_spot_message(msg)
                self.show_robot_obs(obs)
            else:
                print(
                    "Server sent an unexpected response to ping. Response: " +
                    str(ans))
                pdb.set_trace()

    @staticmethod
    def decode_spot_message(msg):
        msg = json.loads(msg)
        robot_obs = {}
        if 'rgb' in msg:
            rgb = base64.b64decode(msg['rgb'])
            rgb = np.frombuffer(rgb, np.uint8).reshape((360, 640, 3))
            robot_obs['rgb'] = rgb
        if 'depth' in msg:
            depth = base64.b64decode(msg['depth'])
            depth = np.frombuffer(depth, np.uint16).reshape((360, 640))
            depth = depth.astype(np.float32)
            robot_obs['depth'] = depth
        if 'pos' in msg:
            pos = base64.b64decode(msg['pos'])
            pos = np.frombuffer(pos, np.float32).reshape((3, ))
            robot_obs['pos'] = pos
        if 'quat' in msg:
            quat = base64.b64decode(msg['quat'])
            quat = np.frombuffer(quat, np.float32).reshape((4, ))
            robot_obs['quat'] = quat
        return robot_obs

    @staticmethod
    def show_robot_obs(obs):
        # import cv2
        # cv2.imshow('rgb', rgb)
        # cv2.waitKey(delay=10)

        print(obs.keys())
        print('pos', obs.get('pos', ''))
        print('quat', obs.get('quat', ''))

        # import matplotlib.pyplot as plt
        # plt.ion()
        # if 'depth' in obs:
        #     plt.figure('depth')
        #     plt.imshow(obs['depth'])
        # if 'rgb' in obs:
        #     plt.figure('rgb')
        #     plt.imshow(obs['rgb'])
        # plt.show()
        # plt.waitforbuttonpress(0.1)

        # pdb.set_trace()

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    @property
    def action_space(self) -> Space:
        return self._action_space

    @staticmethod
    async def send_async(websocket, cmd, verbose=False):
        await websocket.send(cmd)
        if verbose:
            print(f"> {cmd}")

        ans = await websocket.recv()
        if verbose:
            print(f"< {ans}")

        return ans

    def reconnect_and_send(self, cmd):
        if not self.socket or self.socket.closed:
            t = time.time()
            self.socket = asyncio.get_event_loop().run_until_complete(
                websockets.connect(SERVER_URI,
                                   read_limit=2**24,
                                   max_size=2**24))
            t_connect = time.time() - t
        else:
            t_connect = 0.

        t = time.time()
        msg = asyncio.get_event_loop().run_until_complete(
            self.reconnect_and_send_async(self.socket, cmd))

        if PRINT_TIMES:
            print("Connect: %.3f Comm: %.3f" % (t_connect, time.time() - t))

        return msg

    def cleanup(self):
        pass
        # if self.socket:
        #     try:
        #         asyncio.get_event_loop().run_until_complete(self.socket.close())
        #         self.socket = None
        #     except:
        #         pass

    # @staticmethod
    # async def reconnect_and_send_async(cmd):
    #     time_start = time.time()
    #     async with websockets.connect(SERVER_URI, read_limit=2 ** 24, max_size=2 ** 24) as websocket:
    #         time_conn = time.time()
    #         ans = await SpotSim.send_async(websocket, cmd)
    #         time_ans = time.time()
    #     print ("Connect %.3f comm %.3f"%(time_conn-time_start, time_ans-time_conn))
    #     return ans

    @staticmethod
    async def reconnect_and_send_async(socket, cmd):
        if KEEP_CONNECTION:
            ans = await SpotSim.send_async(socket, cmd)
        else:
            time_start = time.time()
            async with websockets.connect(SERVER_URI,
                                          read_limit=2**24,
                                          max_size=2**24) as websocket:
                time_conn = time.time()
                ans = await SpotSim.send_async(websocket, cmd)
                time_ans = time.time()
            if PRINT_TIMES:
                print("Connect %.3f comm %.3f" %
                      (time_conn - time_start, time_ans - time_conn))
        return ans

    def get_robot_observations(self, reset=False):
        if reset:
            self.send_robot_command("obs", reset)
        return self._last_obs

    def send_robot_command(self, command="obs", reset=False):
        msg = self.reconnect_and_send(command)
        obs = self.decode_spot_message(msg)

        # if "rgb" not in obs:
        #     obs["rgb"] = np.zeros((360, 640, 3), np.uint8)
        # if "depth" not in obs:
        #     obs["depth"] = np.zeros((360, 640), np.float32)
        assert "pos" in obs
        xy = obs['pos'][:2]
        yaw = obs['pos'][2]

        if reset:
            if self.config.get('MANUAL_RESET', False):
                # Drive robot to start pose
                input(
                    "Drive robot to start pose manually. Press enter when ready.."
                )

                msg = self.reconnect_and_send(command)
                obs = self.decode_spot_message(msg)
                xy = obs['pos'][:2]
                yaw = obs['pos'][2]

                # Interactive goal
                print(
                    "Current coords: %f %f (%f deg)" %
                    (obs['pos'][0], obs['pos'][1], np.rad2deg(obs['pos'][2])))
                while True:
                    ans = input("Enter goal in format 'abs/rel x y': ")
                    ans = ans.split(" ")
                    if len(ans) != 3 or ans[0] not in ['abs', 'rel']:
                        print("Wrong format")
                        print(ans)
                        continue
                    is_goal_relative = (ans[0] == 'rel')
                    goal_xy = ans[1:]
                    goal_xy = np.array([float(x) for x in goal_xy], np.float32)

                    # if is_goal_relative:
                    #     goal_xy = xy + rotate_2d(goal_xy, yaw)

                    if is_goal_relative:
                        rel_goal_xy = rotate_2d(goal_xy, yaw + np.pi)
                    else:
                        rel_goal_xy = -goal_xy + xy
                        # rel_goal_xy = rotate_2d(goal_xy, yaw + np.pi)
                        print(goal_xy, xy)
                        print(rel_goal_xy)

                    self.origin_xy_yaw = np.array(
                        [xy[0] - rel_goal_xy[0], xy[1] - rel_goal_xy[1], yaw],
                        np.float32)

                    # Episode goal is assumed to be zero. Set origin such that goal is at zero.
                    goal_state = self.xy_yaw_to_agent_state(
                        goal_xy[0], goal_xy[1], 0.)
                    cur_state = self.xy_yaw_to_agent_state(xy[0], xy[1], 0.)
                    # self.origin_agent_state.position = cur_state.position - goal_state.position
                    # pdb.set_trace()
                    # self.origin_xy_yaw = np.array([-rel_goal_xy[0], -rel_goal_xy[1], 0.], np.float32)

                    break
            else:
                self.origin_xy_yaw = obs['pos']
                # self.origin_agent_state = self.xy_yaw_to_agent_state(xy[0], xy[1], yaw)

        # convert coordinate frame
        xy0 = self.origin_xy_yaw[:2]
        yaw0 = self.origin_xy_yaw[2]
        xy = obs['pos'][:2]
        yaw = obs['pos'][2]
        xy = rotate_2d(xy - xy0, -yaw0)
        yaw = yaw - yaw0

        obs['xy'] = xy
        obs['yaw'] = yaw

        print('Agent pos: %f %f %f' % (xy[0], xy[1], np.rad2deg(yaw)))
        # print (obs['pos'], xy0, yaw0, obs['xy'], obs['yaw'])

        self._last_obs = obs

        return obs

    def reset(self):
        # Reset origin
        self.get_robot_observations(reset=True)

        obs = self._sensor_suite.get_observations(
            self.get_robot_observations())

        return obs

    def step(self, action):
        if action == 0:
            # TODO
            obs = self.get_robot_observations()
        elif action <= 3:
            obs = self.send_robot_command(str(action))
        else:
            raise ValueError

        observations = self._sensor_suite.get_observations(
            self.get_robot_observations())
        return observations

    def render(self, mode: str = "rgb") -> Any:
        observations = self._sensor_suite.get_observations(
            self.get_robot_observations())

        output = observations.get(mode)
        assert output is not None, "mode {} sensor is not active".format(mode)

        return output

    def get_agent_state(self,
                        agent_id: int = 0,
                        base_state_type: str = "odom"):
        assert agent_id == 0, "No support of multi agent in {} yet.".format(
            self.__class__.__name__)
        # TODO this should use last observation, or rather, pose should be treated as an obesrvation, not as agent state

        robot_obs = self.get_robot_observations()

        state = self.xy_yaw_to_agent_state(robot_obs['xy'][0],
                                           robot_obs['xy'][1],
                                           robot_obs['yaw'])

        # state = AgentState(position=np.array([1.2  ,  0.16851515,  1.4 ], dtype=np.float32),
        #                    rotation=quaternion.quaternion(-0.506893455982208, 0, 0.862008690834045, 0))
        # state = AgentState(position=np.array([robot_obs['xy'][0],  0.16851515,  robot_obs['xy'][1]], dtype=np.float32),
        #                    rotation=quaternion.from_euler_angles(0., robot_obs['yaw'], 0.),)

        # Validate coordinates.
        if np.any(state.position < self._coord_min) or np.any(
                state.position >= self._coord_max):
            raise ValueError(
                "Coordinates exceeds bounds. This is a problem for the haibtat negative map coordinates."
            )

        return state

    def xy_yaw_to_agent_state(self, x, y, yaw):
        # xy: spot has x point forward, y point left; but habitat agent state has x point right, z point back
        position = np.array([-y, 0., -x], dtype=np.float32)

        # yaw: magic conversion... debugged by taking habitat agent_state().rotation and reproduce it from yaw
        # it is roughly the inverse of this function
        # https://github.com/facebookresearch/habitat-lab/blob/b7a93bc493f7fb89e5bf30b40be204ff7b5570d7/habitat/tasks/nav/nav.py#L864
        # quaternion.from_euler_angles(np.pi, -true_yaw - np.pi/2, np.pi)
        rotation = quaternion.from_euler_angles(np.pi, -yaw,
                                                np.pi)  # - np.pi/2

        return AgentState(position=position, rotation=rotation)

    def get_observations_at(
        self,
        position: Optional[List[float]] = None,
        rotation: Optional[List[float]] = None,
        keep_agent_at_new_pose: bool = False,
    ) -> Optional[Observations]:
        # current_state = self.get_agent_state()
        if position is None or rotation is None:
            success = True
        else:
            success = False

        if success:
            return self._sensor_suite.get_observations(self._last_obs)
        else:
            return None

    def _get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def seed(self, seed):
        pass

    def reconfigure(self, config: Config) -> None:
        # TODO(maksymets): Switch to Habitat-Sim more efficient caching
        is_same_scene = config.SCENE == self._current_scene
        self.config = config
        # self.sim_config = self.create_sim_config(self._sensor_suite)
        if not is_same_scene:
            self._current_scene = config.SCENE
            # self._sim.close()
            # del self._sim
            # self._sim = habitat_sim.Simulator(self.sim_config)

        # self._update_agents_state()

    def close(self):
        pass

    def geodesic_distance(self,
                          position_a,
                          position_b,
                          episode: Optional[Episode] = None):
        # if episode is None or episode._shortest_path_cache is None:
        #     path = habitat_sim.MultiGoalShortestPath()
        #     if isinstance(position_b[0], List) or isinstance(
        #         position_b[0], np.ndarray
        #     ):
        #         path.requested_ends = np.array(position_b, dtype=np.float32)
        #     else:
        #         path.requested_ends = np.array(
        #             [np.array(position_b, dtype=np.float32)]
        #         )
        # else:
        #     path = episode._shortest_path_cache
        #
        # path.requested_start = np.array(position_a, dtype=np.float32)
        #
        # self._sim.pathfinder.find_path(path)
        #
        # if episode is not None:
        #     episode._shortest_path_cache = path
        #
        # return path.geodesic_distance
        return np.linalg.norm(position_a - position_b)

    def sample_navigable_point(self):
        #return self._sim.pathfinder.get_random_navigable_point().tolist()

        # rand_xy = np.random.random(2) * 10.0
        # return [rand_xy[0], 0., rand_xy[1]]

        # rand_xy = np.random.random(2) * (self._coord_max - self._coord_min) + self._coord_min
        # return [rand_xy[0], 0., rand_xy[1]]

        x = self._coord_min if np.random.random() < 0.5 else self._coord_max
        y = self._coord_min if np.random.random() < 0.5 else self._coord_max
        return [x, 0., y]

    def is_navigable(self, point: List[float]):
        return True
예제 #6
0
class SoundSpacesSim(Simulator, ABC):
    r"""Changes made to simulator wrapper over habitat-sim

    This simulator first loads the graph of current environment and moves the agent among nodes.
    Any sounds can be specified in the episode and loaded in this simulator.
    Args:
        config: configuration for initializing the simulator.
    """

    def action_space_shortest_path(self, source: AgentState, targets: List[AgentState], agent_id: int = 0) -> List[
            ShortestPathPoint]:
        pass

    def __init__(self, config: Config) -> None:
        self.config = config
        agent_config = self._get_agent_config()
        sim_sensors = []
        for sensor_name in agent_config.SENSORS:
            sensor_cfg = getattr(self.config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE
            )
            sim_sensors.append(sensor_type(sensor_cfg))

        self._sensor_suite = SensorSuite(sim_sensors)
        self.sim_config = self.create_sim_config(self._sensor_suite)
        self._current_scene = self.sim_config.sim_cfg.scene_id
        self._action_space = spaces.Discrete(
            len(self.sim_config.agents[0].action_space)
        )
        self._prev_sim_obs = None

        self._source_position_index = None
        self._receiver_position_index = None
        self._rotation_angle = None
        self._current_sound = None
        self._offset = None
        self._duration = None
        self._audio_index = None
        self._audio_length = None
        self._source_sound_dict = dict()
        self._sampling_rate = None
        self._node2index = None
        self._frame_cache = dict()
        self._audiogoal_cache = dict()
        self._spectrogram_cache = dict()
        self._egomap_cache = defaultdict(dict)
        self._scene_observations = None
        self._episode_step_count = None
        self._is_episode_active = None
        self._position_to_index_mapping = dict()
        self._previous_step_collided = False
        self._instance2label_mapping = None
        self._house_readers = dict()
        self._use_oracle_planner = True
        self._oracle_actions = list()

        self.points, self.graph = load_metadata(self.metadata_dir)
        for node in self.graph.nodes():
            self._position_to_index_mapping[self.position_encoding(self.graph.nodes()[node]['point'])] = node

        if self.config.AUDIO.HAS_DISTRACTOR_SOUND:
            self._distractor_position_index = None
            self._current_distractor_sound = None

        if self.config.USE_RENDERED_OBSERVATIONS:
            self._sim = DummySimulator()
            with open(self.current_scene_observation_file, 'rb') as fo:
                self._frame_cache = pickle.load(fo)
        else:
            self._sim = habitat_sim.Simulator(config=self.sim_config)

    def create_sim_config(
            self, _sensor_suite: SensorSuite
    ) -> habitat_sim.Configuration:
        sim_config = habitat_sim.SimulatorConfiguration()
        overwrite_config(
            config_from=self.config.HABITAT_SIM_V0, config_to=sim_config
        )
        sim_config.scene_id = self.config.SCENE
        agent_config = habitat_sim.AgentConfiguration()
        overwrite_config(
            config_from=self.get_agent_config(), config_to=agent_config
        )

        sensor_specifications = []
        for sensor in _sensor_suite.sensors.values():
            sim_sensor_cfg = habitat_sim.SensorSpec()
            overwrite_config(
                config_from=sensor.config, config_to=sim_sensor_cfg
            )
            sim_sensor_cfg.uuid = sensor.uuid
            sim_sensor_cfg.resolution = list(
                sensor.observation_space.shape[:2]
            )
            sim_sensor_cfg.parameters["hfov"] = str(sensor.config.HFOV)

            # accessing child attributes through parent interface
            sim_sensor_cfg.sensor_type = sensor.sim_sensor_type  # type: ignore
            sim_sensor_cfg.gpu2gpu_transfer = (
                self.config.HABITAT_SIM_V0.GPU_GPU
            )
            sensor_specifications.append(sim_sensor_cfg)

        agent_config.sensor_specifications = sensor_specifications
        agent_config.action_space = registry.get_action_space_configuration(
            self.config.ACTION_SPACE_CONFIG
        )(self.config).get()

        return habitat_sim.Configuration(sim_config, [agent_config])

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    def get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def _update_agents_state(self) -> bool:
        is_updated = False
        for agent_id, _ in enumerate(self.config.AGENTS):
            agent_cfg = self._get_agent_config(agent_id)
            if agent_cfg.IS_SET_START_STATE:
                self.set_agent_state(
                    agent_cfg.START_POSITION,
                    agent_cfg.START_ROTATION,
                    agent_id,
                )
                is_updated = True

        return is_updated

    def _get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def get_agent_state(self, agent_id: int = 0) -> habitat_sim.AgentState:
        if self.config.USE_RENDERED_OBSERVATIONS:
            return self._sim.get_agent_state()
        else:
            return self._sim.get_agent(agent_id).get_state()

    def set_agent_state(
        self,
        position: List[float],
        rotation: List[float],
        agent_id: int = 0,
        reset_sensors: bool = True,
    ) -> bool:
        if self.config.USE_RENDERED_OBSERVATIONS:
            self._sim.set_agent_state(position, rotation)
        else:
            agent = self._sim.get_agent(agent_id)
            new_state = self.get_agent_state(agent_id)
            new_state.position = position
            new_state.rotation = rotation

            # NB: The agent state also contains the sensor states in _absolute_
            # coordinates. In order to set the agent's body to a specific
            # location and have the sensors follow, we must not provide any
            # state for the sensors. This will cause them to follow the agent's
            # body
            new_state.sensor_states = {}
            agent.set_state(new_state, reset_sensors)
            return True

    @property
    def binaural_rir_dir(self):
        return os.path.join(self.config.AUDIO.BINAURAL_RIR_DIR, self.config.SCENE_DATASET, self.current_scene_name)

    @property
    def source_sound_dir(self):
        return self.config.AUDIO.SOURCE_SOUND_DIR

    @property
    def distractor_sound_dir(self):
        return self.config.AUDIO.DISTRACTOR_SOUND_DIR

    @property
    def metadata_dir(self):
        return os.path.join(self.config.AUDIO.METADATA_DIR, self.config.SCENE_DATASET, self.current_scene_name)

    @property
    def current_scene_name(self):
        # config.SCENE (_current_scene) looks like 'data/scene_datasets/replica/office_1/habitat/mesh_semantic.ply'
        return self._current_scene.split('/')[3]

    @property
    def current_scene_observation_file(self):
        return os.path.join(self.config.SCENE_OBSERVATION_DIR, self.config.SCENE_DATASET,
                            self.current_scene_name + '.pkl')

    @property
    def current_source_sound(self):
        return self._source_sound_dict[self._current_sound]

    @property
    def is_silent(self):
        return self._episode_step_count > self._duration

    @property
    def pathfinder(self):
        return self._sim.pathfinder

    def get_agent(self, agent_id):
        return self._sim.get_agent(agent_id)

    def reconfigure(self, config: Config) -> None:
        self.config = config
        if hasattr(self.config.AGENT_0, 'OFFSET'):
            self._offset = int(self.config.AGENT_0.OFFSET)
        else:
            self._offset = 0
        if self.config.AUDIO.EVERLASTING:
            self._duration = 500
        else:
            assert hasattr(self.config.AGENT_0, 'DURATION')
            self._duration = int(self.config.AGENT_0.DURATION)
        self._audio_index = 0
        is_same_sound = config.AGENT_0.SOUND_ID == self._current_sound
        if not is_same_sound:
            self._current_sound = self.config.AGENT_0.SOUND_ID
            self._load_single_source_sound()
            logging.debug("Switch to sound {} with duration {} seconds".format(self._current_sound, self._duration))

        is_same_scene = config.SCENE == self._current_scene
        if not is_same_scene:
            self._current_scene = config.SCENE
            logging.debug('Current scene: {} and sound: {}'.format(self.current_scene_name, self._current_sound))

            if self.config.USE_RENDERED_OBSERVATIONS:
                with open(self.current_scene_observation_file, 'rb') as fo:
                    self._frame_cache = pickle.load(fo)
            else:
                self._sim.close()
                del self._sim
                self.sim_config = self.create_sim_config(self._sensor_suite)
                self._sim = habitat_sim.Simulator(self.sim_config)
                self._update_agents_state()
                self._frame_cache = dict()
            logging.debug('Loaded scene {}'.format(self.current_scene_name))

            self.points, self.graph = load_metadata(self.metadata_dir)
            for node in self.graph.nodes():
                self._position_to_index_mapping[self.position_encoding(self.graph.nodes()[node]['point'])] = node
            self._instance2label_mapping = None

        if not is_same_scene or not is_same_sound:
            self._audiogoal_cache = dict()
            self._spectrogram_cache = dict()

        self._episode_step_count = 0

        # set agent positions
        self._receiver_position_index = self._position_to_index(self.config.AGENT_0.START_POSITION)
        self._source_position_index = self._position_to_index(self.config.AGENT_0.GOAL_POSITION)
        # the agent rotates about +Y starting from -Z counterclockwise,
        # so rotation angle 90 means the agent rotate about +Y 90 degrees
        self._rotation_angle = int(np.around(np.rad2deg(quat_to_angle_axis(quat_from_coeffs(
                             self.config.AGENT_0.START_ROTATION))[0]))) % 360
        if self.config.USE_RENDERED_OBSERVATIONS:
            self._sim.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                      quat_from_coeffs(self.config.AGENT_0.START_ROTATION))
        else:
            self.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                 self.config.AGENT_0.START_ROTATION)

        if self.config.AUDIO.HAS_DISTRACTOR_SOUND:
            self._distractor_position_index = self.config.AGENT_0.DISTRACTOR_POSITION_INDEX
            self._current_distractor_sound = self.config.AGENT_0.DISTRACTOR_SOUND_ID
            self._load_single_distractor_sound()

        if self._use_oracle_planner:
            self._oracle_actions = self.compute_oracle_actions()

        logging.debug("Initial source, agent at: {}, {}, orientation: {}".
                      format(self._source_position_index, self._receiver_position_index, self.get_orientation()))

    def compute_semantic_index_mapping(self):
        # obtain mapping from instance id to semantic label id
        if isinstance(self._sim, DummySimulator):
            if self._current_scene not in self._house_readers:
                self._house_readers[self._current_sound] = HouseReader(self._current_scene.replace('.glb', '.house'))
            reader = self._house_readers[self._current_sound]
            instance_id_to_label_id = reader.compute_object_to_category_index_mapping()
        else:
            scene = self._sim.semantic_scene
            instance_id_to_label_id = {int(obj.id.split("_")[-1]): obj.category.index() for obj in scene.objects}
        self._instance2label_mapping = np.array([instance_id_to_label_id[i] for i in range(len(instance_id_to_label_id))])

    @staticmethod
    def position_encoding(position):
        return '{:.2f}_{:.2f}_{:.2f}'.format(*position)

    def _position_to_index(self, position):
        if self.position_encoding(position) in self._position_to_index_mapping:
            return self._position_to_index_mapping[self.position_encoding(position)]
        else:
            raise ValueError("Position misalignment.")

    def _get_sim_observation(self):
        joint_index = (self._receiver_position_index, self._rotation_angle)
        if joint_index in self._frame_cache:
            return self._frame_cache[joint_index]
        else:
            assert not self.config.USE_RENDERED_OBSERVATIONS
            sim_obs = self._sim.get_sensor_observations()
            for sensor in sim_obs:
                sim_obs[sensor] = sim_obs[sensor]
            self._frame_cache[joint_index] = sim_obs
            return sim_obs

    def reset(self):
        logging.debug('Reset simulation')
        if self.config.USE_RENDERED_OBSERVATIONS:
            sim_obs = self._get_sim_observation()
            self._sim.set_sensor_observations(sim_obs)
        else:
            sim_obs = self._sim.reset()
            if self._update_agents_state():
                sim_obs = self._get_sim_observation()

        self._is_episode_active = True
        self._prev_sim_obs = sim_obs
        self._previous_step_collided = False
        # Encapsule data under Observations class
        observations = self._sensor_suite.get_observations(sim_obs)

        return observations

    def step(self, action, only_allowed=True):
        """
        All angle calculations in this function is w.r.t habitat coordinate frame, on X-Z plane
        where +Y is upward, -Z is forward and +X is rightward.
        Angle 0 corresponds to +X, angle 90 corresponds to +y and 290 corresponds to 270.

        :param action: action to be taken
        :param only_allowed: if true, then can't step anywhere except allowed locations
        :return:
        Dict of observations
        """
        assert self._is_episode_active, (
            "episode is not active, environment not RESET or "
            "STOP action called previously"
        )

        self._previous_step_collided = False
        # STOP: 0, FORWARD: 1, LEFT: 2, RIGHT: 2
        if action == HabitatSimActions.STOP:
            self._is_episode_active = False
        else:
            prev_position_index = self._receiver_position_index
            prev_rotation_angle = self._rotation_angle
            if action == HabitatSimActions.MOVE_FORWARD:
                # the agent initially faces -Z by default
                self._previous_step_collided = True
                for neighbor in self.graph[self._receiver_position_index]:
                    p1 = self.graph.nodes[self._receiver_position_index]['point']
                    p2 = self.graph.nodes[neighbor]['point']
                    direction = int(np.around(np.rad2deg(np.arctan2(p2[2] - p1[2], p2[0] - p1[0])))) % 360
                    if direction == self.get_orientation():
                        self._receiver_position_index = neighbor
                        self._previous_step_collided = False
                        break
            elif action == HabitatSimActions.TURN_LEFT:
                # agent rotates counterclockwise, so turning left means increasing rotation angle by 90
                self._rotation_angle = (self._rotation_angle + 90) % 360
            elif action == HabitatSimActions.TURN_RIGHT:
                self._rotation_angle = (self._rotation_angle - 90) % 360

            if self.config.CONTINUOUS_VIEW_CHANGE:
                intermediate_observations = list()
                fps = self.config.VIEW_CHANGE_FPS
                if action == HabitatSimActions.MOVE_FORWARD:
                    prev_position = np.array(self.graph.nodes[prev_position_index]['point'])
                    current_position = np.array(self.graph.nodes[self._receiver_position_index]['point'])
                    for i in range(1, fps):
                        intermediate_position = prev_position + i / fps * (current_position - prev_position)
                        self.set_agent_state(intermediate_position.tolist(), quat_from_angle_axis(np.deg2rad(
                                            self._rotation_angle), np.array([0, 1, 0])))
                        sim_obs = self._sim.get_sensor_observations()
                        observations = self._sensor_suite.get_observations(sim_obs)
                        intermediate_observations.append(observations)
                else:
                    for i in range(1, fps):
                        if action == HabitatSimActions.TURN_LEFT:
                            intermediate_rotation = prev_rotation_angle + i / fps * 90
                        elif action == HabitatSimActions.TURN_RIGHT:
                            intermediate_rotation = prev_rotation_angle - i / fps * 90
                        self.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                             quat_from_angle_axis(np.deg2rad(intermediate_rotation),
                                                                  np.array([0, 1, 0])))
                        sim_obs = self._sim.get_sensor_observations()
                        observations = self._sensor_suite.get_observations(sim_obs)
                        intermediate_observations.append(observations)

            self.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                 quat_from_angle_axis(np.deg2rad(self._rotation_angle), np.array([0, 1, 0])))
        self._episode_step_count += 1

        # log debugging info
        logging.debug('After taking action {}, s,r: {}, {}, orientation: {}, location: {}'.format(
            action, self._source_position_index, self._receiver_position_index,
            self.get_orientation(), self.graph.nodes[self._receiver_position_index]['point']))

        sim_obs = self._get_sim_observation()
        if self.config.USE_RENDERED_OBSERVATIONS:
            self._sim.set_sensor_observations(sim_obs)
        self._prev_sim_obs = sim_obs
        observations = self._sensor_suite.get_observations(sim_obs)
        if self.config.CONTINUOUS_VIEW_CHANGE:
            observations['intermediate'] = intermediate_observations

        return observations

    def get_orientation(self):
        _base_orientation = 270
        return (_base_orientation - self._rotation_angle) % 360

    @property
    def azimuth_angle(self):
        # this is the angle used to index the binaural audio files
        # in mesh coordinate systems, +Y forward, +X rightward, +Z upward
        # azimuth is calculated clockwise so +Y is 0 and +X is 90
        return -(self._rotation_angle + 0) % 360

    @property
    def reaching_goal(self):
        return self._source_position_index == self._receiver_position_index

    def _load_source_sounds(self):
        # load all mono files at once
        sound_files = os.listdir(self.source_sound_dir)
        for sound_file in sound_files:
            sound = sound_file.split('.')[0]
            audio_data, sr = librosa.load(os.path.join(self.source_sound_dir, sound),
                                          sr=self.config.AUDIO.RIR_SAMPLING_RATE)
            self._source_sound_dict[sound] = audio_data
            self._audio_length = audio_data.shape[0] // self.config.AUDIO.RIR_SAMPLING_RATE

    def _load_single_distractor_sound(self):
        if self._current_distractor_sound not in self._source_sound_dict:
            audio_data, sr = librosa.load(os.path.join(self.distractor_sound_dir, self._current_distractor_sound),
                                          sr=self.config.AUDIO.RIR_SAMPLING_RATE)
            self._source_sound_dict[self._current_distractor_sound] = audio_data

    def _load_single_source_sound(self):
        if self._current_sound not in self._source_sound_dict:
            audio_data, sr = librosa.load(os.path.join(self.source_sound_dir, self._current_sound),
                                          sr=self.config.AUDIO.RIR_SAMPLING_RATE)
            self._source_sound_dict[self._current_sound] = audio_data
        self._audio_length = self._source_sound_dict[self._current_sound].shape[0]//self.config.AUDIO.RIR_SAMPLING_RATE

    def _compute_euclidean_distance_between_sr_locations(self):
        p1 = self.graph.nodes[self._receiver_position_index]['point']
        p2 = self.graph.nodes[self._source_position_index]['point']
        d = np.sqrt((p1[0] - p2[0])**2 + (p1[2] - p2[2])**2)
        return d

    def _compute_audiogoal(self):
        sampling_rate = self.config.AUDIO.RIR_SAMPLING_RATE
        if self._episode_step_count > self._duration:
            logging.debug('Step count is greater than duration. Empty spectrogram.')
            audiogoal = np.zeros((2, sampling_rate))
        else:
            binaural_rir_file = os.path.join(self.binaural_rir_dir, str(self.azimuth_angle), '{}_{}.wav'.format(
                self._receiver_position_index, self._source_position_index))
            try:
                sampling_freq, binaural_rir = wavfile.read(binaural_rir_file)  # float32
            except ValueError:
                logging.warning("{} file is not readable".format(binaural_rir_file))
                binaural_rir = np.zeros((sampling_rate, 2)).astype(np.float32)
            if len(binaural_rir) == 0:
                logging.debug("Empty RIR file at {}".format(binaural_rir_file))
                binaural_rir = np.zeros((sampling_rate, 2)).astype(np.float32)

            # by default, convolve in full mode, which preserves the direct sound
            if self.current_source_sound.shape[0] == sampling_rate:
                binaural_convolved = np.array([fftconvolve(self.current_source_sound, binaural_rir[:, channel]
                                                           ) for channel in range(binaural_rir.shape[-1])])
                audiogoal = binaural_convolved[:, :sampling_rate]
            else:
                index = self._audio_index
                self._audio_index = (self._audio_index + 1) % self._audio_length
                if index * sampling_rate - binaural_rir.shape[0] < 0:
                    source_sound = self.current_source_sound[: (index + 1) * sampling_rate]
                    binaural_convolved = np.array([fftconvolve(source_sound, binaural_rir[:, channel]
                                                               ) for channel in range(binaural_rir.shape[-1])])
                    audiogoal = binaural_convolved[:, index * sampling_rate: (index + 1) * sampling_rate]
                else:
                    # include reverb from previous time step
                    source_sound = self.current_source_sound[index * sampling_rate - binaural_rir.shape[0] + 1
                                                             : (index + 1) * sampling_rate]
                    binaural_convolved = np.array([fftconvolve(source_sound, binaural_rir[:, channel], mode='valid',
                                                               ) for channel in range(binaural_rir.shape[-1])])
                    audiogoal = binaural_convolved

            if self.config.AUDIO.HAS_DISTRACTOR_SOUND:
                binaural_rir_file = os.path.join(self.binaural_rir_dir, str(self.azimuth_angle), '{}_{}.wav'.format(
                    self._receiver_position_index, self._distractor_position_index))
                try:
                    sampling_freq, distractor_rir = wavfile.read(binaural_rir_file)
                except ValueError:
                    logging.warning("{} file is not readable".format(binaural_rir_file))
                    distractor_rir = np.zeros((self.config.AUDIO.RIR_SAMPLING_RATE, 2)).astype(np.float32)
                if len(distractor_rir) == 0:
                    logging.debug("Empty RIR file at {}".format(binaural_rir_file))
                    distractor_rir = np.zeros((self.config.AUDIO.RIR_SAMPLING_RATE, 2)).astype(np.float32)

                distractor_convolved = np.array([fftconvolve(self._source_sound_dict[self._current_distractor_sound],
                                                             distractor_rir[:, channel]
                                                             ) for channel in range(distractor_rir.shape[-1])])
                audiogoal += distractor_convolved[:, :sampling_rate]

        return audiogoal

    def get_egomap_observation(self):
        joint_index = (self._receiver_position_index, self._rotation_angle)
        if joint_index in self._egomap_cache[self._current_scene]:
            return self._egomap_cache[self._current_scene][joint_index]
        else:
            return None

    def cache_egomap_observation(self, egomap):
        self._egomap_cache[self._current_scene][(self._receiver_position_index, self._rotation_angle)] = egomap

    def get_current_audiogoal_observation(self):
        if self.config.AUDIO.HAS_DISTRACTOR_SOUND:
            # by default, does not cache for distractor sound
            audiogoal = self._compute_audiogoal()
        else:
            joint_index = (self._source_position_index, self._receiver_position_index, self.azimuth_angle)
            if joint_index not in self._audiogoal_cache:
                self._audiogoal_cache[joint_index] = self._compute_audiogoal()
            audiogoal = self._audiogoal_cache[joint_index]

        return audiogoal

    def get_current_spectrogram_observation(self, audiogoal2spectrogram):
        if self.config.AUDIO.HAS_DISTRACTOR_SOUND:
            audiogoal = self.get_current_audiogoal_observation()
            spectrogram = audiogoal2spectrogram(audiogoal)
        else:
            joint_index = (self._source_position_index, self._receiver_position_index, self.azimuth_angle)
            if joint_index not in self._spectrogram_cache:
                audiogoal = self.get_current_audiogoal_observation()
                self._spectrogram_cache[joint_index] = audiogoal2spectrogram(audiogoal)
            spectrogram = self._spectrogram_cache[joint_index]

        return spectrogram

    def geodesic_distance(self, position_a, position_bs, episode=None):
        distances = []
        for position_b in position_bs:
            index_a = self._position_to_index(position_a)
            index_b = self._position_to_index(position_b)
            assert index_a is not None and index_b is not None
            path_length = nx.shortest_path_length(self.graph, index_a, index_b) * self.config.GRID_SIZE
            distances.append(path_length)

        return min(distances)

    def get_straight_shortest_path_points(self, position_a, position_b):
        index_a = self._position_to_index(position_a)
        index_b = self._position_to_index(position_b)
        assert index_a is not None and index_b is not None

        shortest_path = nx.shortest_path(self.graph, source=index_a, target=index_b)
        points = list()
        for node in shortest_path:
            points.append(self.graph.nodes()[node]['point'])
        return points

    def compute_oracle_actions(self):
        start_node = self._receiver_position_index
        end_node = self._source_position_index
        shortest_path = nx.shortest_path(self.graph, source=start_node, target=end_node)
        assert shortest_path[0] == start_node and shortest_path[-1] == end_node
        logging.debug(shortest_path)

        oracle_actions = []
        orientation = self.get_orientation()
        for i in range(len(shortest_path) - 1):
            prev_node = shortest_path[i]
            next_node = shortest_path[i+1]
            p1 = self.graph.nodes[prev_node]['point']
            p2 = self.graph.nodes[next_node]['point']
            direction = int(np.around(np.rad2deg(np.arctan2(p2[2] - p1[2], p2[0] - p1[0])))) % 360
            if direction == orientation:
                pass
            elif (direction - orientation) % 360 == 270:
                orientation = (orientation - 90) % 360
                oracle_actions.append(HabitatSimActions.TURN_LEFT)
            elif (direction - orientation) % 360 == 90:
                orientation = (orientation + 90) % 360
                oracle_actions.append(HabitatSimActions.TURN_RIGHT)
            elif (direction - orientation) % 360 == 180:
                orientation = (orientation - 180) % 360
                oracle_actions.append(HabitatSimActions.TURN_RIGHT)
                oracle_actions.append(HabitatSimActions.TURN_RIGHT)
            oracle_actions.append(HabitatSimActions.MOVE_FORWARD)
        oracle_actions.append(HabitatSimActions.STOP)
        return oracle_actions

    def get_oracle_action(self):
        return self._oracle_actions[self._episode_step_count]

    @property
    def previous_step_collided(self):
        return self._previous_step_collided

    def find_nearest_graph_node(self, target_pos):
        from scipy.spatial import cKDTree
        all_points = np.array([self.graph.nodes()[node]['point'] for node in self.graph.nodes()])
        kd_tree = cKDTree(all_points[:, [0, 2]])
        d, ind = kd_tree.query(target_pos[[0, 2]])
        return all_points[ind]

    def seed(self, seed):
        self._sim.seed(seed)

    def get_observations_at(
            self,
            position: Optional[List[float]] = None,
            rotation: Optional[List[float]] = None,
            keep_agent_at_new_pose: bool = False,
    ) -> Optional[Observations]:
        current_state = self.get_agent_state()
        if position is None or rotation is None:
            success = True
        else:
            success = self.set_agent_state(
                position, rotation, reset_sensors=False
            )

        if success:
            sim_obs = self._sim.get_sensor_observations()

            self._prev_sim_obs = sim_obs

            observations = self._sensor_suite.get_observations(sim_obs)
            if not keep_agent_at_new_pose:
                self.set_agent_state(
                    current_state.position,
                    current_state.rotation,
                    reset_sensors=False,
                )
            return observations
        else:
            return None
예제 #7
0
class HabitatSim(Simulator):
    r"""Simulator wrapper over habitat-sim

    habitat-sim repo: https://github.com/facebookresearch/habitat-sim

    Args:
        config: configuration for initializing the simulator.
    """
    def __init__(self, config: Config) -> None:
        self.config = config
        agent_config = self._get_agent_config()

        sim_sensors = []
        for sensor_name in agent_config.SENSORS:
            sensor_cfg = getattr(self.config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE)
            sim_sensors.append(sensor_type(sensor_cfg))

        self._sensor_suite = SensorSuite(sim_sensors)
        self.sim_config = self.create_sim_config(self._sensor_suite)
        self._current_scene = self.sim_config.sim_cfg.scene.id
        self._sim = habitat_sim.Simulator(self.sim_config)
        self._action_space = spaces.Discrete(
            len(self.sim_config.agents[0].action_space))

        self._is_episode_active = False

    def create_sim_config(
            self, _sensor_suite: SensorSuite) -> habitat_sim.Configuration:
        sim_config = habitat_sim.SimulatorConfiguration()
        sim_config.scene.id = self.config.SCENE
        sim_config.gpu_device_id = self.config.HABITAT_SIM_V0.GPU_DEVICE_ID
        agent_config = habitat_sim.AgentConfiguration()
        overwrite_config(config_from=self._get_agent_config(),
                         config_to=agent_config)

        sensor_specifications = []
        for sensor in _sensor_suite.sensors.values():
            sim_sensor_cfg = habitat_sim.SensorSpec()
            sim_sensor_cfg.uuid = sensor.uuid
            sim_sensor_cfg.resolution = list(
                sensor.observation_space.shape[:2])
            sim_sensor_cfg.parameters["hfov"] = str(sensor.config.HFOV)
            sim_sensor_cfg.position = sensor.config.POSITION
            # TODO(maksymets): Add configure method to Sensor API to avoid
            # accessing child attributes through parent interface
            sim_sensor_cfg.sensor_type = sensor.sim_sensor_type  # type: ignore
            sensor_specifications.append(sim_sensor_cfg)

        # If there is no sensors specified create a dummy sensor so simulator
        # won't throw an error
        if not _sensor_suite.sensors.values():
            sim_sensor_cfg = habitat_sim.SensorSpec()
            sim_sensor_cfg.resolution = [1, 1]
            sensor_specifications.append(sim_sensor_cfg)

        agent_config.sensor_specifications = sensor_specifications
        agent_config.action_space = {
            SimulatorActions.STOP.value:
            habitat_sim.ActionSpec("stop"),
            SimulatorActions.MOVE_FORWARD.value:
            habitat_sim.ActionSpec(
                "move_forward",
                habitat_sim.ActuationSpec(
                    amount=self.config.FORWARD_STEP_SIZE),
            ),
            SimulatorActions.TURN_LEFT.value:
            habitat_sim.ActionSpec(
                "turn_left",
                habitat_sim.ActuationSpec(amount=self.config.TURN_ANGLE),
            ),
            SimulatorActions.TURN_RIGHT.value:
            habitat_sim.ActionSpec(
                "turn_right",
                habitat_sim.ActuationSpec(amount=self.config.TURN_ANGLE),
            ),
            SimulatorActions.LOOK_UP.value:
            habitat_sim.ActionSpec(
                "look_up",
                habitat_sim.ActuationSpec(amount=self.config.TILT_ANGLE),
            ),
            SimulatorActions.LOOK_DOWN.value:
            habitat_sim.ActionSpec(
                "look_down",
                habitat_sim.ActuationSpec(amount=self.config.TILT_ANGLE),
            ),
        }

        return habitat_sim.Configuration(sim_config, [agent_config])

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    @property
    def action_space(self) -> Space:
        return self._action_space

    @property
    def is_episode_active(self) -> bool:
        return self._is_episode_active

    def _update_agents_state(self) -> bool:
        is_updated = False
        for agent_id, _ in enumerate(self.config.AGENTS):
            agent_cfg = self._get_agent_config(agent_id)
            if agent_cfg.IS_SET_START_STATE:
                self.set_agent_state(
                    agent_cfg.START_POSITION,
                    agent_cfg.START_ROTATION,
                    agent_id,
                )
                is_updated = True
        return is_updated

    def reset(self):
        sim_obs = self._sim.reset()
        if self._update_agents_state():
            sim_obs = self._sim.get_sensor_observations()

        self._is_episode_active = True
        return self._sensor_suite.get_observations(sim_obs)

    def step(self, action):
        assert self._is_episode_active, (
            "episode is not active, environment not RESET or "
            "STOP action called previously")

        if action == SimulatorActions.STOP.value:
            self._is_episode_active = False
            sim_obs = self._sim.get_sensor_observations()
        else:
            sim_obs = self._sim.step(action)

        observations = self._sensor_suite.get_observations(sim_obs)
        return observations

    def render(self, mode: str = "rgb") -> Any:
        r"""
        Args:
            mode: sensor whose observation is used for returning the frame,
                eg: "rgb", "depth", "semantic"

        Returns:
            rendered frame according to the mode
        """
        sim_obs = self._sim.get_sensor_observations()
        observations = self._sensor_suite.get_observations(sim_obs)

        output = observations.get(mode)
        assert output is not None, "mode {} sensor is not active".format(mode)

        return output

    def seed(self, seed):
        self._sim.seed(seed)

    def reconfigure(self, config: Config) -> None:
        # TODO(maksymets): Switch to Habitat-Sim more efficient caching
        is_same_scene = config.SCENE == self._current_scene
        self.config = config
        self.sim_config = self.create_sim_config(self._sensor_suite)
        if not is_same_scene:
            self._current_scene = config.SCENE
            self._sim.close()
            del self._sim
            self._sim = habitat_sim.Simulator(self.sim_config)

        self._update_agents_state()

    def geodesic_distance(self, position_a, position_b):
        path = habitat_sim.ShortestPath()
        path.requested_start = np.array(position_a, dtype=np.float32)
        path.requested_end = np.array(position_b, dtype=np.float32)
        self._sim.pathfinder.find_path(path)
        return path.geodesic_distance

    def action_space_shortest_path(
            self,
            source: AgentState,
            targets: List[AgentState],
            agent_id: int = 0) -> List[ShortestPathPoint]:
        r"""
        Returns:
            List of agent states and actions along the shortest path from
            source to the nearest target (both included). If one of the
            target(s) is identical to the source, a list containing only
            one node with the identical agent state is returned. Returns
            an empty list in case none of the targets are reachable from
            the source. For the last item in the returned list the action
            will be None.
        """
        raise NotImplementedError(
            "This function is no longer implemented. Please use the greedy "
            "follower instead")

    @property
    def up_vector(self):
        return np.array([0.0, 1.0, 0.0])

    @property
    def forward_vector(self):
        return -np.array([0.0, 0.0, 1.0])

    def get_straight_shortest_path_points(self, position_a, position_b):
        path = habitat_sim.ShortestPath()
        path.requested_start = position_a
        path.requested_end = position_b
        self._sim.pathfinder.find_path(path)
        return path.points

    def sample_navigable_point(self):
        return self._sim.pathfinder.get_random_navigable_point().tolist()

    def is_navigable(self, point: List[float]):
        return self._sim.pathfinder.is_navigable(point)

    def semantic_annotations(self):
        r"""
        Returns:
            SemanticScene which is a three level hierarchy of semantic
            annotations for the current scene. Specifically this method
            returns a SemanticScene which contains a list of SemanticLevel's
            where each SemanticLevel contains a list of SemanticRegion's where
            each SemanticRegion contains a list of SemanticObject's.

            SemanticScene has attributes: aabb(axis-aligned bounding box) which
            has attributes aabb.center and aabb.sizes which are 3d vectors,
            categories, levels, objects, regions.

            SemanticLevel has attributes: id, aabb, objects and regions.

            SemanticRegion has attributes: id, level, aabb, category (to get
            name of category use category.name()) and objects.

            SemanticObject has attributes: id, region, aabb, obb (oriented
            bounding box) and category.

            SemanticScene contains List[SemanticLevels]
            SemanticLevel contains List[SemanticRegion]
            SemanticRegion contains List[SemanticObject]

            Example to loop through in a hierarchical fashion:
            for level in semantic_scene.levels:
                for region in level.regions:
                    for obj in region.objects:
        """
        return self._sim.semantic_scene

    def close(self):
        self._sim.close()

    @property
    def index_stop_action(self):
        return SimulatorActions.STOP.value

    @property
    def index_forward_action(self):
        return SimulatorActions.MOVE_FORWARD.value

    def _get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def get_agent_state(self, agent_id: int = 0) -> habitat_sim.AgentState:
        assert agent_id == 0, "No support of multi agent in {} yet.".format(
            self.__class__.__name__)
        return self._sim.get_agent(agent_id).get_state()

    def set_agent_state(
        self,
        position: List[float],
        rotation: List[float],
        agent_id: int = 0,
        reset_sensors: bool = True,
    ) -> bool:
        r"""Sets agent state similar to initialize_agent, but without agents
        creation. On failure to place the agent in the proper position, it is
        moved back to its previous pose.

        Args:
            position: list containing 3 entries for (x, y, z).
            rotation: list with 4 entries for (x, y, z, w) elements of unit
                quaternion (versor) representing agent 3D orientation,
                (https://en.wikipedia.org/wiki/Versor)
            agent_id: int identification of agent from multiagent setup.
            reset_sensors: bool for if sensor changes (e.g. tilt) should be
                reset).

        Returns:
            True if the set was successful else moves the agent back to its
            original pose and returns false.
        """
        agent = self._sim.get_agent(agent_id)
        original_state = self.get_agent_state(agent_id)
        new_state = self.get_agent_state(agent_id)
        new_state.position = position
        new_state.rotation = rotation

        # NB: The agent state also contains the sensor states in _absolute_
        # coordinates. In order to set the agent's body to a specific
        # location and have the sensors follow, we must not provide any
        # state for the sensors. This will cause them to follow the agent's
        # body
        new_state.sensor_states = dict()

        agent.set_state(new_state, reset_sensors)

        if not self._check_agent_position(position, agent_id):
            agent.set_state(original_state, reset_sensors)
            return False
        return True

    def get_observations_at(
        self,
        position: List[float],
        rotation: List[float],
        keep_agent_at_new_pose: bool = False,
    ) -> Optional[Observations]:

        current_state = self.get_agent_state()

        success = self.set_agent_state(position, rotation, reset_sensors=False)
        if success:
            sim_obs = self._sim.get_sensor_observations()
            observations = self._sensor_suite.get_observations(sim_obs)
            if not keep_agent_at_new_pose:
                self.set_agent_state(
                    current_state.position,
                    current_state.rotation,
                    reset_sensors=False,
                )
            return observations
        else:
            return None

    # TODO (maksymets): Remove check after simulator became stable
    def _check_agent_position(self, position, agent_id=0) -> bool:
        if not np.allclose(position, self.get_agent_state(agent_id).position):
            logger.info("Agent state diverges from configured start position.")
            return False
        return True

    def distance_to_closest_obstacle(self, position, max_search_radius=2.0):
        return self._sim.pathfinder.distance_to_closest_obstacle(
            position, max_search_radius)

    def island_radius(self, position):
        return self._sim.pathfinder.island_radius(position)
예제 #8
0
class SoundSpacesSim(Simulator, ABC):
    r"""Changes made to simulator wrapper over habitat-sim

    This simulator first loads the graph of current environment and moves the agent among nodes.
    Any sounds can be specified in the episode and loaded in this simulator.
    Args:
        config: configuration for initializing the simulator.
    """

    def action_space_shortest_path(self, source: AgentState, targets: List[AgentState], agent_id: int = 0) -> List[
        ShortestPathPoint]:
        pass

    def __init__(self, config: Config) -> None:
        self.config = config
        agent_config = self.get_agent_config()
        sim_sensors = []
        for sensor_name in agent_config.SENSORS:
            sensor_cfg = getattr(self.config, sensor_name)
            sensor_type = registry.get_sensor(sensor_cfg.TYPE)

            assert sensor_type is not None, "invalid sensor type {}".format(
                sensor_cfg.TYPE
            )
            sim_sensors.append(sensor_type(sensor_cfg))

        self._sensor_suite = SensorSuite(sim_sensors)
        self.sim_config = self.create_sim_config(self._sensor_suite)
        self._current_scene = self.sim_config.sim_cfg.scene.id
        self._sim = habitat_sim.Simulator(self.sim_config)
        self._action_space = spaces.Discrete(
            len(self.sim_config.agents[0].action_space)
        )
        self._prev_sim_obs = None

        self._source_position_index = None
        self._receiver_position_index = None
        self._rotation_angle = None
        self._current_sound = None
        self._source_sound_dict = dict()
        self._sampling_rate = None
        self._node2index = None
        self._frame_cache = dict()
        self._audiogoal_cache = dict()
        self._spectrogram_cache = dict()
        self._egomap_cache = defaultdict(dict)
        self._scene_observations = None
        self._episode_step_count = None
        self._is_episode_active = None
        self._position_to_index_mapping = dict()
        self._previous_step_collided = False

        self.points, self.graph = load_metadata(self.metadata_dir)
        for node in self.graph.nodes():
            self._position_to_index_mapping[self.position_encoding(self.graph.nodes()[node]['point'])] = node
        self._load_source_sounds()
        logging.info('Current scene: {} and sound: {}'.format(self.current_scene_name, self._current_sound))

        if self.config.USE_RENDERED_OBSERVATIONS:
            self._sim.close()
            del self._sim
            self._sim = DummySimulator()
            with open(self.current_scene_observation_file, 'rb') as fo:
                self._frame_cache = pickle.load(fo)

    def create_sim_config(
            self, _sensor_suite: SensorSuite
    ) -> habitat_sim.Configuration:
        sim_config = habitat_sim.SimulatorConfiguration()
        overwrite_config(
            config_from=self.config.HABITAT_SIM_V0, config_to=sim_config
        )
        sim_config.scene.id = self.config.SCENE
        agent_config = habitat_sim.AgentConfiguration()
        overwrite_config(
            config_from=self.get_agent_config(), config_to=agent_config
        )

        sensor_specifications = []
        for sensor in _sensor_suite.sensors.values():
            sim_sensor_cfg = habitat_sim.SensorSpec()
            overwrite_config(
                config_from=sensor.config, config_to=sim_sensor_cfg
            )
            sim_sensor_cfg.uuid = sensor.uuid
            sim_sensor_cfg.resolution = list(
                sensor.observation_space.shape[:2]
            )
            sim_sensor_cfg.parameters["hfov"] = str(sensor.config.HFOV)

            # accessing child attributes through parent interface
            sim_sensor_cfg.sensor_type = sensor.sim_sensor_type  # type: ignore
            sim_sensor_cfg.gpu2gpu_transfer = (
                self.config.HABITAT_SIM_V0.GPU_GPU
            )
            sensor_specifications.append(sim_sensor_cfg)

        agent_config.sensor_specifications = sensor_specifications
        agent_config.action_space = registry.get_action_space_configuration(
            self.config.ACTION_SPACE_CONFIG
        )(self.config).get()

        return habitat_sim.Configuration(sim_config, [agent_config])

    @property
    def sensor_suite(self) -> SensorSuite:
        return self._sensor_suite

    def get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def get_agent_state(self, agent_id: int = 0) -> habitat_sim.AgentState:
        if not self.config.USE_RENDERED_OBSERVATIONS:
            agent_state = self._sim.get_agent(agent_id).get_state()
        else:
            agent_state = self._sim.get_agent_state()

        return agent_state

    def _update_agents_state(self) -> bool:
        is_updated = False
        for agent_id, _ in enumerate(self.config.AGENTS):
            agent_cfg = self._get_agent_config(agent_id)
            if agent_cfg.IS_SET_START_STATE:
                self.set_agent_state(
                    agent_cfg.START_POSITION,
                    agent_cfg.START_ROTATION,
                    agent_id,
                )
                is_updated = True

        return is_updated

    def _get_agent_config(self, agent_id: Optional[int] = None) -> Any:
        if agent_id is None:
            agent_id = self.config.DEFAULT_AGENT_ID
        agent_name = self.config.AGENTS[agent_id]
        agent_config = getattr(self.config, agent_name)
        return agent_config

    def set_agent_state(
            self,
            position: List[float],
            rotation: List[float],
            agent_id: int = 0,
            reset_sensors: bool = True,
    ) -> bool:
        if not self.config.USE_RENDERED_OBSERVATIONS:
            agent = self._sim.get_agent(agent_id)
            new_state = self.get_agent_state(agent_id)
            new_state.position = position
            new_state.rotation = rotation
            new_state.sensor_states = {}
            agent.set_state(new_state, reset_sensors)
        else:
            pass

    def get_observations_at(
            self,
            position: Optional[List[float]] = None,
            rotation: Optional[List[float]] = None,
            keep_agent_at_new_pose: bool = False,
    ) -> Optional[Observations]:
        current_state = self.get_agent_state()
        if position is None or rotation is None:
            success = True
        else:
            success = self.set_agent_state(
                position, rotation, reset_sensors=False
            )

        if success:
            sim_obs = self._sim.get_sensor_observations()

            self._prev_sim_obs = sim_obs

            observations = self._sensor_suite.get_observations(sim_obs)
            if not keep_agent_at_new_pose:
                self.set_agent_state(
                    current_state.position,
                    current_state.rotation,
                    reset_sensors=False,
                )
            return observations
        else:
            return None

    @property
    def binaural_rir_dir(self):
        return os.path.join(self.config.AUDIO.BINAURAL_RIR_DIR, self.config.SCENE_DATASET, self.current_scene_name)

    @property
    def source_sound_dir(self):
        return self.config.AUDIO.SOURCE_SOUND_DIR

    @property
    def metadata_dir(self):
        return os.path.join(self.config.AUDIO.METADATA_DIR, self.config.SCENE_DATASET, self.current_scene_name)

    @property
    def current_scene_name(self):
        # config.SCENE (_current_scene) looks like 'data/scene_datasets/replica/office_1/habitat/mesh_semantic.ply'
        return self._current_scene.split('/')[3]

    @property
    def current_scene_observation_file(self):
        return os.path.join(self.config.SCENE_OBSERVATION_DIR, self.config.SCENE_DATASET,
                            self.current_scene_name + '.pkl')

    @property
    def current_source_sound(self):
        return self._source_sound_dict[self._current_sound]

    def reconfigure(self, config: Config) -> None:
        self.config = config
        is_same_sound = config.AGENT_0.SOUND_ID == self._current_sound
        if not is_same_sound:
            self._current_sound = self.config.AGENT_0.SOUND_ID

        is_same_scene = config.SCENE == self._current_scene
        if not is_same_scene:
            self._current_scene = config.SCENE
            logging.debug('Current scene: {} and sound: {}'.format(self.current_scene_name, self._current_sound))

            if not self.config.USE_RENDERED_OBSERVATIONS:
                self._sim.close()
                del self._sim
                self.sim_config = self.create_sim_config(self._sensor_suite)
                self._sim = habitat_sim.Simulator(self.sim_config)
                self._update_agents_state()
                self._frame_cache = dict()
            else:
                with open(self.current_scene_observation_file, 'rb') as fo:
                    self._frame_cache = pickle.load(fo)
            logging.debug('Loaded scene {}'.format(self.current_scene_name))

            self.points, self.graph = load_metadata(self.metadata_dir)
            for node in self.graph.nodes():
                self._position_to_index_mapping[self.position_encoding(self.graph.nodes()[node]['point'])] = node

        if not is_same_scene or not is_same_sound:
            self._audiogoal_cache = dict()
            self._spectrogram_cache = dict()

        self._episode_step_count = 0

        # set agent positions
        self._receiver_position_index = self._position_to_index(self.config.AGENT_0.START_POSITION)
        self._source_position_index = self._position_to_index(self.config.AGENT_0.GOAL_POSITION)
        # the agent rotates about +Y starting from -Z counterclockwise,
        # so rotation angle 90 means the agent rotate about +Y 90 degrees
        self._rotation_angle = int(np.around(np.rad2deg(quat_to_angle_axis(quat_from_coeffs(
            self.config.AGENT_0.START_ROTATION))[0]))) % 360
        if not self.config.USE_RENDERED_OBSERVATIONS:
            self.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                 self.config.AGENT_0.START_ROTATION)
        else:
            self._sim.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                      quat_from_coeffs(self.config.AGENT_0.START_ROTATION))

        logging.debug("Initial source, agent at: {}, {}, orientation: {}".
                      format(self._source_position_index, self._receiver_position_index, self.get_orientation()))

    @staticmethod
    def position_encoding(position):
        return '{:.2f}_{:.2f}_{:.2f}'.format(*position)

    def _position_to_index(self, position):
        if self.position_encoding(position) in self._position_to_index_mapping:
            return self._position_to_index_mapping[self.position_encoding(position)]
        else:
            raise ValueError("Position misalignment.")

    def _get_sim_observation(self):
        joint_index = (self._receiver_position_index, self._rotation_angle)
        if joint_index in self._frame_cache:
            return self._frame_cache[joint_index]
        else:
            assert not self.config.USE_RENDERED_OBSERVATIONS
            sim_obs = self._sim.get_sensor_observations()
            for sensor in sim_obs:
                sim_obs[sensor] = sim_obs[sensor]
            self._frame_cache[joint_index] = sim_obs
            return sim_obs

    def reset(self):
        logging.debug('Reset simulation')
        if not self.config.USE_RENDERED_OBSERVATIONS:
            sim_obs = self._sim.reset()
            if self._update_agents_state():
                sim_obs = self._get_sim_observation()
        else:
            sim_obs = self._get_sim_observation()
            self._sim.set_sensor_observations(sim_obs)

        self._is_episode_active = True
        self._prev_sim_obs = sim_obs
        self._previous_step_collided = False
        # Encapsulate data under Observations class
        observations = self._sensor_suite.get_observations(sim_obs)

        return observations

    def step(self, action, only_allowed=True):
        """
        All angle calculations in this function is w.r.t habitat coordinate frame, on X-Z plane
        where +Y is upward, -Z is forward and +X is rightward.
        Angle 0 corresponds to +X, angle 90 corresponds to +y and 290 corresponds to 270.

        :param action: action to be taken
        :param only_allowed: if true, then can't step anywhere except allowed locations
        :return:
        Dict of observations
        """
        assert self._is_episode_active, (
            "episode is not active, environment not RESET or "
            "STOP action called previously"
        )

        self._previous_step_collided = False
        # STOP: 0, FORWARD: 1, LEFT: 2, RIGHT: 2
        if action == HabitatSimActions.STOP:
            self._is_episode_active = False
        else:
            prev_position_index = self._receiver_position_index
            prev_rotation_angle = self._rotation_angle
            if action == HabitatSimActions.MOVE_FORWARD:
                # the agent initially faces -Z by default
                self._previous_step_collided = True
                for neighbor in self.graph[self._receiver_position_index]:
                    p1 = self.graph.nodes[self._receiver_position_index]['point']
                    p2 = self.graph.nodes[neighbor]['point']
                    direction = int(np.around(np.rad2deg(np.arctan2(p2[2] - p1[2], p2[0] - p1[0])))) % 360
                    if direction == self.get_orientation():
                        self._receiver_position_index = neighbor
                        self._previous_step_collided = False
                        break
            elif action == HabitatSimActions.TURN_LEFT:
                # agent rotates counterclockwise, so turning left means increasing rotation angle by 90
                self._rotation_angle = (self._rotation_angle + 90) % 360
            elif action == HabitatSimActions.TURN_RIGHT:
                self._rotation_angle = (self._rotation_angle - 90) % 360

            if self.config.CONTINUOUS_VIEW_CHANGE:
                intermediate_observations = list()
                fps = self.config.VIEW_CHANGE_FPS
                if action == HabitatSimActions.MOVE_FORWARD:
                    prev_position = np.array(self.graph.nodes[prev_position_index]['point'])
                    current_position = np.array(self.graph.nodes[self._receiver_position_index]['point'])
                    for i in range(1, fps):
                        intermediate_position = prev_position + i / fps * (current_position - prev_position)
                        self.set_agent_state(intermediate_position.tolist(), quat_from_angle_axis(np.deg2rad(
                            self._rotation_angle), np.array([0, 1, 0])))
                        sim_obs = self._sim.get_sensor_observations()
                        observations = self._sensor_suite.get_observations(sim_obs)
                        intermediate_observations.append(observations)
                else:
                    for i in range(1, fps):
                        if action == HabitatSimActions.TURN_LEFT:
                            intermediate_rotation = prev_rotation_angle + i / fps * 90
                        elif action == HabitatSimActions.TURN_RIGHT:
                            intermediate_rotation = prev_rotation_angle - i / fps * 90
                        self.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                             quat_from_angle_axis(np.deg2rad(intermediate_rotation),
                                                                  np.array([0, 1, 0])))
                        sim_obs = self._sim.get_sensor_observations()
                        observations = self._sensor_suite.get_observations(sim_obs)
                        intermediate_observations.append(observations)

            if not self.config.USE_RENDERED_OBSERVATIONS:
                self.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                     quat_from_angle_axis(np.deg2rad(self._rotation_angle), np.array([0, 1, 0])))
            else:
                self._sim.set_agent_state(list(self.graph.nodes[self._receiver_position_index]['point']),
                                          quat_from_angle_axis(np.deg2rad(self._rotation_angle), np.array([0, 1, 0])))
        self._episode_step_count += 1

        # log debugging info
        logging.debug('After taking action {}, s,r: {}, {}, orientation: {}, location: {}'.format(
            action, self._source_position_index, self._receiver_position_index,
            self.get_orientation(), self.graph.nodes[self._receiver_position_index]['point']))

        sim_obs = self._get_sim_observation()
        if self.config.USE_RENDERED_OBSERVATIONS:
            self._sim.set_sensor_observations(sim_obs)
        self._prev_sim_obs = sim_obs
        observations = self._sensor_suite.get_observations(sim_obs)
        if self.config.CONTINUOUS_VIEW_CHANGE:
            observations['intermediate'] = intermediate_observations

        return observations

    def get_orientation(self):
        _base_orientation = 270
        return (_base_orientation - self._rotation_angle) % 360

    @property
    def azimuth_angle(self):
        # this is the angle used to index the binaural audio files
        # in mesh coordinate systems, +Y forward, +X rightward, +Z upward
        # azimuth is calculated clockwise so +Y is 0 and +X is 90
        return -(self._rotation_angle + 0) % 360

    @property
    def reaching_goal(self):
        return self._source_position_index == self._receiver_position_index

    def _update_observations_with_audio(self, observations):
        audio = self.get_current_audio_observation()
        observations.update({"audio": audio})

    def _load_source_sounds(self):
        # load all mono files at once
        sound_files = os.listdir(self.source_sound_dir)
        for sound_file in sound_files:
            sr, audio_data = wavfile.read(os.path.join(self.source_sound_dir, sound_file))
            assert sr == 44100
            if sr != self.config.AUDIO.RIR_SAMPLING_RATE:
                audio_data = scipy.signal.resample(audio_data, self.config.AUDIO.RIR_SAMPLING_RATE)
            self._source_sound_dict[sound_file] = audio_data

    def _compute_euclidean_distance_between_sr_locations(self):
        p1 = self.graph.nodes[self._receiver_position_index]['point']
        p2 = self.graph.nodes[self._source_position_index]['point']
        d = np.sqrt((p1[0] - p2[0]) ** 2 + (p1[2] - p2[2]) ** 2)
        return d

    def _compute_audiogoal(self):
        binaural_rir_file = os.path.join(self.binaural_rir_dir, str(self.azimuth_angle), '{}_{}.wav'.format(
            self._receiver_position_index, self._source_position_index))
        try:
            sampling_freq, binaural_rir = wavfile.read(binaural_rir_file)  # float32
            # # pad RIR with zeros to take initial delays into account
            # num_delay_sample = int(self._compute_euclidean_distance_between_sr_locations() / 343.0 * sampling_freq)
            # binaural_rir = np.pad(binaural_rir, ((num_delay_sample, 0), (0, 0)))

        except ValueError:
            logging.warning("{} file is not readable".format(binaural_rir_file))
            binaural_rir = np.zeros((self.config.AUDIO.RIR_SAMPLING_RATE, 2)).astype(np.float32)
        if len(binaural_rir) == 0:
            logging.debug("Empty RIR file at {}".format(binaural_rir_file))
            binaural_rir = np.zeros((self.config.AUDIO.RIR_SAMPLING_RATE, 2)).astype(np.float32)

        # by default, convolve in full mode, which preserves the direct sound
        binaural_convolved = [fftconvolve(self.current_source_sound, binaural_rir[:, channel]
                                          ) for channel in range(binaural_rir.shape[-1])]
        audiogoal = np.array(binaural_convolved)[:, :self.current_source_sound.shape[0]]

        return audiogoal

    def get_current_audiogoal_observation(self):
        sr_index = (self._source_position_index, self._receiver_position_index, self.azimuth_angle)
        if sr_index not in self._audiogoal_cache:
            self._audiogoal_cache[sr_index] = self._compute_audiogoal()

        return self._audiogoal_cache[sr_index]

    def get_current_spectrogram_observation(self, audiogoal2spectrogram):
        sr_index = (self._source_position_index, self._receiver_position_index)
        sr_index = sr_index + (self.azimuth_angle,)
        if sr_index not in self._spectrogram_cache:
            audiogoal = self._compute_audiogoal()
            spectrogram = audiogoal2spectrogram(audiogoal)
            self._spectrogram_cache[sr_index] = spectrogram

        return self._spectrogram_cache[sr_index]

    def geodesic_distance(self, position_a, position_bs, episode=None):
        distances = []
        for position_b in position_bs:
            index_a = self._position_to_index(position_a)
            index_b = self._position_to_index(position_b)
            assert index_a is not None and index_b is not None
            path_length = nx.shortest_path_length(self.graph, index_a, index_b) * self.config.GRID_SIZE
            distances.append(path_length)

        return min(distances)

    def get_straight_shortest_path_points(self, position_a, position_b):
        index_a = self._position_to_index(position_a)
        index_b = self._position_to_index(position_b)
        assert index_a is not None and index_b is not None

        shortest_path = nx.shortest_path(self.graph, source=index_a, target=index_b)
        points = list()
        for node in shortest_path:
            points.append(self.graph.nodes()[node]['point'])
        return points

    @property
    def previous_step_collided(self):
        return self._previous_step_collided

    def get_egomap_observation(self):
        joint_index = (self._receiver_position_index, self._rotation_angle)
        if joint_index in self._egomap_cache[self._current_scene]:
            return self._egomap_cache[self._current_scene][joint_index]
        else:
            return None

    def cache_egomap_observation(self, egomap):
        self._egomap_cache[self._current_scene][(self._receiver_position_index, self._rotation_angle)] = egomap

    def seed(self, seed):
        self._sim.seed(seed)