Example #1
0
class ModularEnv(gym.Env):
    """Abstract modular environment"""

    metadata = {'render.modes': ['human']}

    def __init__(self, terrain=None):
        # Create pybullet interfaces
        self.client = BulletClient(connection_mode=pyb.DIRECT)
        self._last_render = time.time()
        # Setup information needed for simulation
        self.dt = 1. / 240.
        self.client.setAdditionalSearchPath(ASSET_PATH)
        self.terrain_type = terrain
        # Stored for user interactions
        self.morphology = None
        self.multi_id = None
        self._joint_ids = []
        self._joints = []
        # Used for user interaction:
        self._real_time = False
        # Run setup
        self.setup()

    def load_terrain(self):
        """Helper method to load terrain for ground"""
        if not self.terrain_type:
            return None
        elif type(self.terrain_type) is PNGTerrain:
            shape = self.client.createCollisionShape(
                shapeType=pyb.GEOM_HEIGHTFIELD,
                meshScale=self.terrain_type.scale,
                fileName=self.terrain_type.height_field)
            assert shape >= 0, "Could not load PNGTerrain shape"
            terrain = self.client.createMultiBody(0, shape)
            if self.terrain_type.texture:
                texture = self.client.loadTexture(self.terrain_type.texture)
                assert texture >= 0, "Could not load PNGTerrain texture"
                self.client.changeVisualShape(terrain,
                                              -1,
                                              textureUniqueId=texture)
            if self.terrain_type.position:
                self.client.resetBasePositionAndOrientation(
                    terrain, self.terrain_type.position, [0, 0, 0, 1])
        elif type(self.terrain_type) is ArrayTerrain:
            shape = self.client.createCollisionShape(
                shapeType=pyb.GEOM_HEIGHTFIELD,
                meshScale=self.terrain_type.scale,
                heightfieldTextureScaling=(self.terrain_type.rows - 1) / 2.,
                heightfieldData=self.terrain_type.field.flatten(),
                numHeightfieldRows=self.terrain_type.rows,
                numHeightfieldColumns=self.terrain_type.cols)
            assert shape >= 0, "Could not create ArrayTerrain shape"
            terrain = self.client.createMultiBody(0, shape)
            self.client.resetBasePositionAndOrientation(
                terrain, [0, 0, 0], [0, 0, 0, 1])
        assert terrain >= 0, "Could not load terrain"
        self.client.changeVisualShape(terrain, -1, rgbaColor=[1, 1, 1, 1])
        return terrain

    def setup(self):
        """Helper method to initialize default environment"""
        # This is abstracted out from '__init__' because we need to do this
        # first time 'render' is called
        self.client.resetSimulation()
        self.client.setGravity(0, 0, -9.81)
        # Load ground plane for robots to walk on
        self.plane_id = self.client.loadURDF('plane/plane.urdf',
                                             useMaximalCoordinates=1)
        assert self.plane_id >= 0, "Could not load 'plane.urdf'"
        # Change dynamics parameters from:
        # https://github.com/bulletphysics/bullet3/blob/master/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py#L45
        self.client.changeDynamics(self.plane_id, -1, lateralFriction=0.9)
        # self.client.setPhysicsEngineParameter(numSolverIterations=10)
        # self.client.setPhysicsEngineParameter(minimumSolverIslandSize=100)
        self.client.setPhysicsEngineParameter(contactERP=0)
        # Extract time step for sleep during rendering
        self.dt = self.client.getPhysicsEngineParameters()['fixedTimeStep']

    def close(self):
        self.client.disconnect()

    def reset(self, morphology=None, max_size=None):
        # Disable rendering during spawning
        self.client.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, 0)
        # Reset the environment by running all setup code again
        self.setup()
        # Reset internal state
        self.morphology = None
        self.multi_id = None
        self._joint_ids = []
        self._joints = []
        # Check method arguments for simple logic errors
        if morphology is None:
            raise TypeError("Morphology cannot be 'None'!")
        if max_size is not None and max_size < 1:
            raise ValueError("'max_size' must be larger than 0")
        # NOTE: Utilize deep copy here to avoid interfering with morphology,
        # this means that if we want to do structural changes we can do so on
        # our own private instance. Which is nice for maximum size etc.
        self.morphology = copy.deepcopy(morphology.root)
        # Mapping from module to PyBullet ID
        spawned_ids = {}
        # NOTE: We are using explicit queue handling here so that we can
        # ignore children of overlapping modules
        queue = deque([self.morphology.root])
        while len(queue) > 0:
            module = queue.popleft()
            assert isinstance(module, Module3D), "{} does not inherit\
                from 3D Module".format(type(module))
            # Spawn module in world
            m_id = module.spawn(self.client)
            # Check if the module overlaps
            aabb_min, aabb_max = self.client.getAABB(m_id)
            # Check overlapping modules
            overlap = self.client.getOverlappingObjects(aabb_min, aabb_max)
            # NOTE: An object always collides with it self
            overlapping_modules = False
            if overlap:
                overlapping_modules = any(
                    [u_id != m_id for u_id, _ in overlap])
            # Check against plane
            aabb_min, aabb_max = self.client.getAABB(self.plane_id)
            plane_overlap = self.client.getOverlappingObjects(
                aabb_min, aabb_max)
            overlapping_plane = False
            if plane_overlap:
                overlapping_plane = any(
                    [u_id != self.plane_id for u_id, _ in plane_overlap])
            # If overlap is detected de-spawn module and continue
            if overlapping_modules or overlapping_plane:
                # Remove from simulation
                self.client.removeBody(m_id)
                # Remove from our private copy
                parent = module.parent
                if parent:
                    del parent[module]
                else:
                    raise RuntimeError(
                        "Trying to remove root module due to collision!")
                continue
            # Add children to queue for processing
            queue.extend(module.children)
            # Add ID to spawned IDs so that we can remove them later
            spawned_ids[module] = m_id
            # Add joint if present
            if module.joint:
                self._joints.append(module.joint)
            # Check size constraints
            if max_size is not None and len(spawned_ids) >= max_size:
                # If we are above max desired spawn size drain queue and remove
                for module in queue:
                    parent = module.parent
                    if parent:
                        del parent[module]
                    else:
                        raise RuntimeError("Trying to prune root link!")
                break
        # Create multi body builder and instantiate with morphological tree and
        # module IDs
        builder = MultiBodyBuilder(self.morphology,
                                   spawned_ids,
                                   self.client,
                                   flags=pyb.URDF_USE_SELF_COLLISION)
        # Before spawning multi body we remove all modules so far
        # NOTE: We must do that here after all modules have been spawned to not
        # interfere with collision detection
        for m_id in spawned_ids.values():
            self.client.removeBody(m_id)
        # Instantiate the builder creating a multi body from the morphology
        multi_id = builder.build()
        assert multi_id >= 0, "Could not spawn multibody!"
        self.multi_id = multi_id
        for jid in range(self.client.getNumJoints(self.multi_id)):
            j_info = self.client.getJointInfo(self.multi_id, jid)
            if j_info[2] != pyb.JOINT_FIXED:
                self._joint_ids.append(jid)
        self.observation_space = gym.spaces.Box(-100.,
                                                100.,
                                                shape=(3 *
                                                       len(self._joints), ),
                                                dtype=np.float64)
        lows = np.repeat(-1.57, len(self._joints))
        self.action_space = gym.spaces.Box(lows, -1. * lows, dtype=np.float64)
        # Load additional terrain if requested
        # NOTE: We load terrain after collision detection to avoid collisions
        # with terrain itself
        if self.load_terrain() is not None:
            # If a different terrain was loaded we remove the plane
            self.client.removeBody(self.plane_id)
            self.plane_id = None
        # Enable rendering here
        self.client.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, 1)
        return self.observation()

    def act(self, action):
        """Helper function to apply actions to robot"""
        assert action.shape[0] == len(self._joint_ids), 'Action not equal to\
            number of joints'

        for j_id, cfg, act in zip(self._joint_ids, self._joints, action):
            # Create a local copy so we can delete from it
            l_cfg = cfg.copy()
            # If joint should be limited we check that here
            if 'limit' in l_cfg:
                act = max(l_cfg['limit'][0], min(l_cfg['limit'][1], act))
                del l_cfg['limit']
            # Extract type of 'target' for joint
            l_cfg[l_cfg['target']] = act
            del l_cfg['target']
            self.client.setJointMotorControl2(self.multi_id, j_id, **l_cfg)

    def step(self, action):
        self.act(np.asarray(action))
        self.client.stepSimulation()
        return self.observation(), self.reward(), False, {}

    def observation(self):
        """Get observation from environment

        The default observation is `numpy.concatenate([positions, velocities,
        torques])` for all movable joints."""
        # If the body does not have any movable joints:
        if not self._joint_ids:
            return np.array([])
        positions = []
        velocities = []
        torques = []
        states = self.client.getJointStates(self.multi_id, self._joint_ids)
        for state in states:
            positions.append(state[0])
            velocities.append(state[1])
            torques.append(state[3])
        return np.concatenate([positions, velocities, torques])

    def reward(self):
        """Estimate current reward"""
        # Get state of root link
        pos, _ = self.client.getBasePositionAndOrientation(self.multi_id)
        # Extract position
        position = np.array(pos)
        # We are only interested in distance in (X, Y)
        position[-1] = 0.0
        # Return the root's distance from World center
        return np.linalg.norm(position)

    def render(self, mode="human"):
        info = self.client.getConnectionInfo()
        if info['connectionMethod'] != pyb.GUI and mode == 'human':
            # Close current simulation and start new with GUI
            self.close()
            self.client = BulletClient(connection_mode=pyb.GUI)
            # Disable rendering during spawning
            self.client.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, 0)
            # Reset if morphology is set
            if self.morphology is not None:
                self.reset(self.morphology)
            else:
                self.setup()
            # Configure viewport
            self.client.configureDebugVisualizer(pyb.COV_ENABLE_GUI, 0)
            self.client.configureDebugVisualizer(
                pyb.COV_ENABLE_PLANAR_REFLECTION, 1)
            self.client.resetDebugVisualizerCamera(0.5, 50.0, -35.0, (0, 0, 0))
            # Setup timing for correct sleeping when rendering
            self._last_render = time.time()
        elif info['connectionMethod'] == pyb.GUI:
            # Handle interaction with simulation
            self.handle_interaction()
            # Calculate time to sleep
            now = time.time()
            diff = now - self._last_render
            to_sleep = self.dt - diff
            if to_sleep > 0:
                time.sleep(to_sleep)
            self._last_render = now
        else:
            raise RuntimeError(
                "Unknown pybullet mode ({}) or render mode ({})".format(
                    info['connectionMethod'], mode))

    def handle_interaction(self):
        """Handle user interaction with simulation

        This function is called in the 'render' call and is only called if the
        GUI is visible"""
        keys = self.client.getKeyboardEvents()
        # If 'n' is pressed then we want to step the simulation
        next_key = ord('n')
        if next_key in keys and keys[next_key] & pyb.KEY_WAS_TRIGGERED:
            self.client.stepSimulation()
        # If space is pressed we start/stop real-time simulation
        space = ord(' ')
        if space in keys and keys[space] & pyb.KEY_WAS_TRIGGERED:
            self._real_time = not self._real_time
            self.client.setRealTimeSimulation(self._real_time)
        # if 'r' is pressed we restart simulation
        r = ord('r')
        if r in keys and keys[r] & pyb.KEY_WAS_TRIGGERED:
            real_time = self._real_time
            self.client.setRealTimeSimulation(False)
            self.reset(self.morphology)
            self.client.setRealTimeSimulation(real_time)
Example #2
0
class PyBullet(Environment):
    """
    Class to create a Mushroom environment using the PyBullet simulator.

    """
    def __init__(self,
                 files,
                 actuation_spec,
                 observation_spec,
                 gamma,
                 horizon,
                 timestep=1 / 240,
                 n_intermediate_steps=1,
                 enforce_joint_velocity_limits=False,
                 debug_gui=False,
                 **viewer_params):
        """
        Constructor.

        Args:
            files (dict): dictionary of the URDF/MJCF/SDF files to load (key) and parameters dictionary (value);
            actuation_spec (list): A list of tuples specifying the names of the
                joints which should be controllable by the agent and tehir control mode.
                Can be left empty when all actuators should be used in position control;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            timestep (float, 0.00416666666): The timestep used by the PyBullet
                simulator;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Allows the user to modify, control and
                access intermediate states;
            enforce_joint_velocity_limits (bool, False): flag to enforce the velocity limits;
            debug_gui (bool, False): flag to activate the default pybullet visualizer, that can be used for debug
                purposes;
            **viewer_params: other parameters to be passed to the viewer.
                See PyBulletViewer documentation for the available options.

        """
        assert len(observation_spec) > 0
        assert len(actuation_spec) > 0

        # Store simulation parameters
        self._timestep = timestep
        self._n_intermediate_steps = n_intermediate_steps

        # Create the simulation and viewer
        if debug_gui:
            self._client = BulletClient(connection_mode=pybullet.GUI)
            self._client.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
        else:
            self._client = BulletClient(connection_mode=pybullet.DIRECT)
        self._client.setTimeStep(self._timestep)
        self._client.setGravity(0, 0, -9.81)
        self._client.setAdditionalSearchPath(pybullet_data.getDataPath())

        self._viewer = PyBulletViewer(self._client, self.dt, **viewer_params)
        self._state = None

        # Load model and create access maps
        self._model_map = dict()
        self._load_all_models(files, enforce_joint_velocity_limits)

        # Build utils
        self._indexer = IndexMap(self._client, self._model_map, actuation_spec,
                                 observation_spec)
        self.joints = JointsHelper(self._client, self._indexer,
                                   observation_spec)

        # Finally, we create the MDP information and call the constructor of the parent class
        action_space = Box(*self._indexer.action_limits)

        observation_space = Box(*self._indexer.observation_limits)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Let the child class modify the mdp_info data structure
        mdp_info = self._modify_mdp_info(mdp_info)

        # Provide the structure to the superclass
        super().__init__(mdp_info)

        # Save initial state of the MDP
        self._initial_state = self._client.saveState()

    def seed(self, seed):
        np.random.seed(seed)

    def reset(self, state=None):
        self._client.restoreState(self._initial_state)
        self.setup(state)
        self._state = self._indexer.create_sim_state()
        observation = self._create_observation(self._state)

        return observation

    def render(self):
        self._viewer.display()

    def stop(self):
        self._viewer.close()

    def step(self, action):
        curr_state = self._state.copy()

        action = self._preprocess_action(action)

        self._step_init(curr_state, action)

        for i in range(self._n_intermediate_steps):

            ctrl_action = self._compute_action(curr_state, action)
            self._indexer.apply_control(ctrl_action)

            self._simulation_pre_step()

            self._client.stepSimulation()

            self._simulation_post_step()

            curr_state = self._indexer.create_sim_state()

        self._step_finalize()

        absorbing = self.is_absorbing(curr_state)
        reward = self.reward(self._state, action, curr_state, absorbing)

        observation = self._create_observation(curr_state)

        self._state = curr_state

        return observation, reward, absorbing, {}

    def get_sim_state_index(self, name, obs_type):
        return self._indexer.get_index(name, obs_type)

    def get_sim_state(self, obs, name, obs_type):
        """
        Returns a specific observation value

        Args:
            obs (np.ndarray): the observation vector;
            name (str): the name of the object to consider;
            obs_type (PyBulletObservationType): the type of observation to be used.

        Returns:
            The required elements of the input state vector.

        """
        indices = self.get_sim_state_index(name, obs_type)

        return obs[indices]

    def _modify_mdp_info(self, mdp_info):
        """
        This method can be overridden to modify the automatically generated MDPInfo data structure.
        By default, returns the given mdp_info structure unchanged.

        Args:
            mdp_info (MDPInfo): the MDPInfo structure automatically computed by the environment.

        Returns:
            The modified MDPInfo data structure.

        """
        return mdp_info

    def _create_observation(self, state):
        """
        This method can be overridden to ctreate an observation vector from the simulator state vector.
        By default, returns the simulator state vector unchanged.

        Args:
            state (np.ndarray): the simulator state vector.

        Returns:
            The environment observation.

        """
        return state

    def _load_model(self, file_name, kwargs):
        if file_name.endswith('.urdf'):
            model_id = self._client.loadURDF(file_name, **kwargs)
        elif file_name.endswith('.sdf'):
            model_id = self._client.loadSDF(file_name, **kwargs)[0]
        else:
            model_id = self._client.loadMJCF(file_name, **kwargs)[0]

        for j in range(self._client.getNumJoints(model_id)):
            self._client.setJointMotorControl2(model_id,
                                               j,
                                               pybullet.POSITION_CONTROL,
                                               force=0)

        return model_id

    def _load_all_models(self, files, enforce_joint_velocity_limits):
        for file_name, kwargs in files.items():
            model_id = self._load_model(file_name, kwargs)
            model_name = self._client.getBodyInfo(model_id)[1].decode('UTF-8')
            self._model_map[model_name] = model_id
        self._model_map.update(self._custom_load_models())

        # Enforce velocity limits on every joint
        if enforce_joint_velocity_limits:
            for model_id in self._model_map.values():
                for joint_id in range(self._client.getNumJoints(model_id)):
                    joint_data = self._client.getJointInfo(model_id, joint_id)
                    self._client.changeDynamics(
                        model_id, joint_id, maxJointVelocity=joint_data[11])

    def _preprocess_action(self, action):
        """
        Compute a transformation of the action provided to the
        environment.

        Args:
            action (np.ndarray): numpy array with the actions
                provided to the environment.

        Returns:
            The action to be used for the current step
        """
        return action

    def _step_init(self, state, action):
        """
        Allows information to be initialized at the start of a step.
        """
        pass

    def _compute_action(self, state, action):
        """
        Compute a transformation of the action at every intermediate step.
        Useful to add control signals simulated directly in python.

        Args:
            state (np.ndarray): numpy array with the current state of teh simulation;
            action (np.ndarray): numpy array with the actions, provided at every step.

        Returns:
            The action to be set in the actual pybullet simulation.

        """
        return action

    def _simulation_pre_step(self):
        """
        Allows information to be accesed and changed at every intermediate step
        before taking a step in the pybullet simulation.
        Can be usefull to apply an external force/torque to the specified bodies.
        """
        pass

    def _simulation_post_step(self):
        """
        Allows information to be accesed at every intermediate step
        after taking a step in the pybullet simulation.
        Can be usefull to average forces over all intermediate steps.
        """
        pass

    def _step_finalize(self):
        """
        Allows information to be accesed at the end of a step.
        """
        pass

    def _custom_load_models(self):
        """
        Allows to custom load a set of objects in the simulation

        Returns:
            A dictionary with the names and the ids of the loaded objects
        """
        return list()

    def reward(self, state, action, next_state, absorbing):
        """
        Compute the reward based on the given transition.

        Args:
            state (np.array): the current state of the system;
            action (np.array): the action that is applied in the current state;
            next_state (np.array): the state reached after applying the given action;
            absorbing (bool): whether next_state is an absorbing state or not.

        Returns:
            The reward as a floating point scalar value.

        """
        raise NotImplementedError

    def is_absorbing(self, state):
        """
        Check whether the given state is an absorbing state or not.

        Args:
            state (np.array): the state of the system.

        Returns:
            A boolean flag indicating whether this state is absorbing or not.

        """
        raise NotImplementedError

    def setup(self, state):
        """
        A function that allows to execute setup code after an environment
        reset.

        Args:
            state (np.ndarray): the state to be restored. If the state should be
            chosen by the environment, state is None. Environments can ignore this
            value if the initial state cannot be set programmatically.

        """
        pass

    @property
    def client(self):
        return self._client

    @property
    def dt(self):
        return self._timestep * self._n_intermediate_steps