Ejemplo n.º 1
0
    def reset(self) -> Observation:
        # Initialize pybullet if not yet done
        p = self.pybullet
        assert p, "PyBullet object not valid"

        if self._hard_reset and self.task.has_robot():
            if not self._first_run:
                logger.debug("Hard reset: deleting the robot")
                self.task.robot.delete_simulated_robot()

                logger.debug("Hard reset: creating new robot")
                self.task.robot = self._get_robot()
            else:
                self._first_run = False

        # Reset the environment
        ok_reset = self.task.reset_task()
        assert ok_reset, "Failed to reset the task"

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        return Observation(observation)
Ejemplo n.º 2
0
    def step(self, action: Action) -> State:

        if not self.action_space.contains(action):
            logger.warn("The action does not belong to the action space")

        # Set the action
        self.task.set_action(action)

        # Step the simulator
        ok_gazebo = self.gazebo.run()
        assert ok_gazebo, "Failed to step gazebo"

        # Get the observation
        observation = self.task.get_observation()
        assert isinstance(observation, np.ndarray)

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        # Get the reward
        reward = self.task.get_reward()
        assert isinstance(reward, float), "Failed to get the reward"

        # Check termination
        done = self.task.is_done()

        # Get info
        info = self.task.get_info()

        return State(
            (Observation(observation), Reward(reward), Done(done), Info(info)))
Ejemplo n.º 3
0
    def get_reward(self) -> Reward:
        # Calculate the reward
        if not self.is_done():
            reward = 1.0
        else:
            if self._steps_beyond_done is None:
                # Pole just fell
                self._steps_beyond_done = 0
                reward = 1.0
            else:
                self._steps_beyond_done += 1
                reward = 0.0

                # Warn the user to call reset
                if self._steps_beyond_done == 1:
                    logger.warn(
                        "You are calling 'step()' even though this environment "
                        "has already returned done = True. You should always "
                        "call 'reset()' once you receive 'done = True' -- any "
                        "further steps are undefined behavior.")

        if self._reward_cart_at_center:
            # Get the observation
            observation = self.get_observation()
            x = observation[0]
            x_dot = observation[1]

            # Update the reward
            reward = reward \
                - np.abs(x) \
                - np.abs(x_dot) \
                - 10 * (x >= self._x_threshold)

        return reward
Ejemplo n.º 4
0
    def reset_task(self) -> bool:
        # Sample the angular velocity from the observation space
        _, _, theta_dot = self.observation_space.sample().tolist()

        # Sample the angular position from an uniform rng
        theta = self.np_random.uniform(0, 2 * np.pi)

        try:
            desired_control_mode = robot_joints.JointControlMode.TORQUE
            if self.robot.joint_control_mode("pivot") != desired_control_mode:
                ok_mode = self.robot.set_joint_control_mode(
                    "pivot", desired_control_mode)
                assert ok_mode, "Failed to set pendulum control mode"
        except Exception:
            logger.warn(
                "Failed to set control mode. Is it supported by the runtime?")
            pass

        # Reset the robot state
        ok_reset = self.robot.reset_joint("pivot", theta, theta_dot)
        assert ok_reset, "Failed to reset the pendulum"

        # Clean the last applied force
        self._last_a = None

        return True
Ejemplo n.º 5
0
    def reset(self) -> Observation:
        # Initialize gazebo if not yet done
        gazebo = self.gazebo
        assert gazebo, "Gazebo object not valid"

        # Remove the model and insert it again. This is the reset strategy for
        # floating-base robots. Resetting the joint state, instead, is sufficient to
        # reset fixed-based robots. Though, while avoiding the model deletion might
        # provide better performance, we should be sure that all the internal buffers
        # (e.g. those related to the low-level PIDs) are correctly re-initialized.
        if self._hard_reset and self.task.has_robot():
            if not self._first_run:
                logger.debug("Hard reset: deleting the robot")
                self.task.robot.delete_simulated_robot()

                logger.debug("Hard reset: creating new robot")
                self.task.robot = self._get_robot()
            else:
                self._first_run = False

        # Reset the environment
        ok_reset = self.task.reset_task()
        assert ok_reset, "Failed to reset the task"

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        return Observation(observation)
Ejemplo n.º 6
0
    def step(self, action: Action) -> State:
        if not self.action_space.contains(action):
            logger.warn("The action does not belong to the action space")

        # Set the action
        ok_action = self.task.set_action(action)
        assert ok_action, "Failed to set the action"

        # Step the simulator
        self.pybullet.stepSimulation()

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        # Get the reward
        reward = self.task.get_reward()
        assert reward is not None, "Failed to get the reward"

        # Check termination
        done = self.task.is_done()

        # Enforce the real-time factor
        self._enforce_rtf()

        return State((observation, reward, done, {}))
Ejemplo n.º 7
0
    def reset(self) -> Observation:
        # Initialize gazebo if not yet done
        gazebo = self.gazebo
        assert gazebo, "Gazebo object not valid"

        # Remove the robot and insert a new one
        if not self._first_run:
            logger.debug("Hard reset: deleting the robot")
            self.task.robot.delete_simulated_robot()

            # Execute a dummy step to process model removal
            self.gazebo.run()

            logger.debug("Hard reset: creating new robot")
            self.task.robot = self._get_robot()
        else:
            self._first_run = False

        # Reset the environment
        ok_reset = self.task.reset_task()
        assert ok_reset, "Failed to reset the task"

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        return Observation(observation)
Ejemplo n.º 8
0
    def gazebo(self) -> scenario.GazeboSimulator:

        if self._gazebo is not None:
            assert self._gazebo.initialized()
            return self._gazebo

        # Compute the number of physics iteration to execute at every environment step
        num_of_steps_per_run = self._physics_rate / self.agent_rate

        if num_of_steps_per_run != int(num_of_steps_per_run):
            logger.warn(
                "Rounding the number of iterations to {} from the nominal {}".
                format(int(num_of_steps_per_run), num_of_steps_per_run))

        # Create the simulator
        gazebo = scenario.GazeboSimulator(1.0 / self._physics_rate,
                                          self._real_time_factor,
                                          int(num_of_steps_per_run))

        # Store the simulator
        self._gazebo = gazebo

        # Insert the world (it will initialize the simulator)
        _ = self.world
        assert self._gazebo.initialized()

        return self._gazebo
Ejemplo n.º 9
0
    def step(self, action: Action) -> State:
        if not self.action_space.contains(action):
            logger.warn("The action does not belong to the action space")

        # Set the action
        ok_action = self.task.set_action(action)
        assert ok_action, "Failed to set the action"

        # Step the simulator
        ok_gazebo = self.gazebo.run()
        assert ok_gazebo, "Failed to step gazebo"

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        # Get the reward
        # TODO: use the wrapper method?
        reward = self.task.get_reward()
        assert reward is not None, "Failed to get the reward"

        # Check termination
        done = self.task.is_done()

        return State((observation, reward, done, {}))
Ejemplo n.º 10
0
    def reset_task(self) -> bool:
        # Initialize the environment with a new random state using the random number
        # generator provided by the Task.
        new_state = self.np_random.uniform(low=-0.05, high=0.05, size=(4, ))
        new_state[2] = self.np_random.uniform(low=-np.deg2rad(10),
                                              high=np.deg2rad(10))

        # Set the joints in torque control mode
        for joint in {"linear", "pivot"}:
            desired_control_mode = robot_joints.JointControlMode.TORQUE

            # TODO: temporary workaround for robot implementation without this method,
            #       e.g. FactoryRobot, which does not need to call it at the moment.
            try:
                if self.robot.joint_control_mode(
                        joint) != desired_control_mode:
                    ok_mode = self.robot.set_joint_control_mode(
                        joint, desired_control_mode)
                    assert ok_mode, f"Failed to set control mode for joint '{joint}'"
            except Exception:
                logger.warn(
                    "This runtime does not support setting the control mode")
                pass

        # Reset position and velocity
        ok1 = self.robot.reset_joint("linear", new_state[0], new_state[1])
        ok2 = self.robot.reset_joint("pivot", new_state[2], new_state[3])

        # Reset the flag that assures reset is properly called
        self._steps_beyond_done = None

        return ok1 and ok2
Ejemplo n.º 11
0
    def delete_simulated_robot(self) -> None:
        if not self._pybullet or self._robot_id is None:
            logger.warn("Failed to delete robot from the simulation. "
                        "Simulator not running.")
            return

        # Remove the robot from the simulation
        self._pybullet.removeBody(self._robot_id)
Ejemplo n.º 12
0
    def delete_simulated_robot(self) -> None:
        if not self._gazebo or not self._gympp_robot:
            logger.warn("Failed to delete robot from the simulation. "
                        "Simulator not running.")
            return

        # Remove the robot from the simulation
        ok_model = self._gazebo.removeModel(self._robot_name)
        assert ok_model, f"Failed to remove the model '{self._robot_name}' from gazebo"
Ejemplo n.º 13
0
    def is_done(self) -> bool:
        if not self.observation_space.contains(self.get_observation()):
            logger.warn(
                "Observation is outside its space. Marking the episode as done."
            )
            return True

        # This environment is episodic and always reach the max_episode_steps
        return False
    def blacklist_model(self, model_path, reason='Unknown'):

        if self._enable_blacklisting:
            bl_file = open(self.get_blacklisted_path(model_path), 'w')
            bl_file.write(reason)
            bl_file.close()
        logger.warn(
            '%s model "%s". Reason: %s.' %
            ('Blacklisting' if self._enable_blacklisting else 'Skipping',
             model_path, reason))
Ejemplo n.º 15
0
    def gazebo(self) -> bindings.GazeboWrapper:
        if self._gazebo_wrapper:
            assert self._gazebo_wrapper.getPhysicsData().rtf == self._rtf, \
                "The RTF of gazebo does not match the configuration"
            assert 1 / self._gazebo_wrapper.getPhysicsData().maxStepSize == \
                self._physics_rate, \
                "The physics rate of gazebo does not match the configuration"

            return self._gazebo_wrapper

        # =================
        # INITIALIZE GAZEBO
        # =================

        assert self._rtf, "Real Time Factor was not set"
        assert self.agent_rate, "Agent rate was not set"
        assert self._physics_rate, "Physics rate was not set"

        logger.debug("Starting gazebo")
        logger.debug(f"Agent rate: {self.agent_rate} Hz")
        logger.debug(f"Physics rate: {self._physics_rate} Hz")
        logger.debug(f"Simulation RTF: {self._rtf}")

        # Compute the number of physics iteration to execute at every environment step
        num_of_iterations_per_gazebo_step = self._physics_rate / self.agent_rate

        if num_of_iterations_per_gazebo_step != int(
                num_of_iterations_per_gazebo_step):
            logger.warn(
                "Rounding the number of iterations to {} from the nominal {}".
                format(int(num_of_iterations_per_gazebo_step),
                       num_of_iterations_per_gazebo_step))
        else:
            logger.debug("Setting {} iteration per simulator step".format(
                int(num_of_iterations_per_gazebo_step)))

        # Create the GazeboWrapper object
        self._gazebo_wrapper = bindings.GazeboWrapper(
            int(num_of_iterations_per_gazebo_step), self._rtf,
            self._physics_rate)

        # Set the verbosity
        logger.set_level(gym.logger.MIN_LEVEL)

        # Initialize the world
        world_ok = self._gazebo_wrapper.setupGazeboWorld(self._world)
        assert world_ok, "Failed to initialize the gazebo world"

        # Initialize the ignition gazebo wrapper
        gazebo_initialized = self._gazebo_wrapper.initialize()
        assert gazebo_initialized, "Failed to initialize ignition gazebo"

        return self._gazebo_wrapper
Ejemplo n.º 16
0
def add_path(data_path: str) -> None:
    if not exists(data_path):
        logger.warn(f"The path '{data_path}' does not exist. Not added to the data path.")
        return

    global GYM_IGNITION_DATA_PATH

    for path in GYM_IGNITION_DATA_PATH:
        if path == data_path:
            logger.debug(f"The path '{data_path}' is already present in the data path")
            return

    logger.debug(f"Adding new search path: '{data_path}'")
    GYM_IGNITION_DATA_PATH.append(data_path)
Ejemplo n.º 17
0
def add_path_from_env_var(env_variable: str) -> None:
    if env_variable not in os.environ:
        logger.warn(f"Failed to find '{env_variable}' environment variable")
        return

    # Get the environment variable
    env_var_content = os.environ[env_variable]

    # Remove leading ':' characters
    if env_var_content[0] == ':':
        env_var_content = env_var_content[1:]

    # Split multiple value
    env_var_paths = env_var_content.split(":")

    for path in env_var_paths:
        add_path(path)
Ejemplo n.º 18
0
    def reset(self) -> Observation:

        # Reset the task
        self.task.reset_task()

        # Execute a paused step
        ok_run = self.gazebo.run(paused=True)

        if not ok_run:
            raise RuntimeError("Failed to run Gazebo")

        # Get the observation
        observation = self.task.get_observation()
        assert isinstance(observation, np.ndarray)

        if not self.observation_space.contains(observation):
            logger.warn("The observation does not belong to the observation space")

        return Observation(observation)
Ejemplo n.º 19
0
    def set_joint_pid(self, joint_name: str, pid: robot.PID) -> bool:

        mode = self._jointname2jointcontrolinfo[joint_name].mode
        assert mode != JointControlMode.POSITION_INTERPOLATED

        if mode == JointControlMode.TORQUE:
            logger.warn(f"Joint '{joint_name}' is torque controlled. "
                        f"Setting the PID has no effect")
            return False

        if mode == JointControlMode.POSITION and pid.i != 0.0:
            raise Exception("Integral term not supported for POSITION mode")
        elif mode == JointControlMode.VELOCITY and pid.d != 0.0:
            raise Exception("Derivative term not supported for VELOCITY mode")

        # Store the new PID
        self._jointname2jointcontrolinfo[joint_name]._replace(PID=pid)

        # Update the PIDs setting again the control mode
        ok_mode = self.set_joint_control_mode(joint_name, mode)
        assert ok_mode, "Failed to set the control mode"

        return True
Ejemplo n.º 20
0
    def render(self, mode: str = 'human', **kwargs) -> None:
        if mode != "human":
            raise Exception(f"Render mode '{mode}' not yet supported")

        # If render has been already called once, and the simulator is ok, return
        if self._render_enabled and self._pybullet:
            return

        # Enable rendering
        self._render_enabled = True

        # If the simulator is already allocated (in DIRECT mode, headless), we have to
        # disconnect it, starting it in GUI mode, and recreate the robot object.
        if self._pybullet is not None:
            logger.warn(
                "The simulator was allocated in DIRECT mode before calling render. "
                "The simulation has been reset. All changes to the simulation state "
                "will be lost.")

            # Disconnect the simulator and deletes the robot object
            self.close()

            # Create a new simulator and robot object
            self.task._robot = self._get_robot()
Ejemplo n.º 21
0
    def __init__(self,
                 task_cls: type,
                 robot_cls: type,
                 model: str,
                 rtf: float,
                 agent_rate: float,
                 physics_rate: float,
                 world: str = "plane_implicit.urdf",
                 hard_reset: bool = True,
                 **kwargs):

        # Save the keyworded arguments.
        # We use them to build the task and the robot objects, and allow custom class
        # to accept user-defined parameters.
        self._kwargs = kwargs

        # Store the type of the class that provides Robot interface
        self._robot_cls = robot_cls

        # Delete and create a new robot every environment reset
        self._first_run = True
        self._hard_reset = hard_reset

        # URDF or SDF model files
        self._world = world
        self._model = model

        # Simulation attributes
        self._rtf = rtf
        self._now = None
        self._bias = 0.0

        self._physics_rate = physics_rate

        self._plane_id = None
        self._pybullet = None
        self._render_enabled = False
        self._render_called = False

        # Calculate the rate between agent and physics rate
        physics_steps_per_agent_update = physics_rate / agent_rate
        self._num_of_physics_steps = int(physics_steps_per_agent_update)

        assert physics_rate >= agent_rate, "Agent cannot run faster than physics"

        # If there is an incompatibility between the agent and physics rates, round the
        # iterations and notify the user
        if physics_steps_per_agent_update != round(
                physics_steps_per_agent_update):
            self._num_of_physics_steps = round(physics_steps_per_agent_update)
            logger.warn("The real physics rate will be {} Hz".format(
                agent_rate * self._num_of_physics_steps))

        logger.debug(f"RTF = {rtf}")
        logger.debug(f"Agent rate = {agent_rate} Hz")
        logger.debug(
            f"Physics rate = {agent_rate * self._num_of_physics_steps} Hz")

        logger.debug("Initializing the Task")
        task = task_cls(**kwargs)

        assert isinstance(task, base.task.Task), \
            "'task_cls' object must inherit from Task"

        # Wrap the task with this runtime
        super().__init__(task=task, agent_rate=agent_rate)

        # Initialize the simulator and the robot
        self.task.robot = self._get_robot()

        # Initialize the spaces
        self.task.action_space, self.task.observation_space = self.task.create_spaces(
        )

        # Seed the environment
        self.seed()