Ejemplo n.º 1
0
class HandEnv(gym.Env):
    """The base Hand Environment class."""

    # The environment can have both or a single hand
    HAND_RIGHT = 1
    HAND_LEFT = 2
    HAND_BOTH = HAND_LEFT + HAND_RIGHT

    # The environment can be controlled in joint or MANO-pose spaces
    MODE_JOINT = 1
    MODE_MANO = 2
    MODE_LIST = (MODE_JOINT, MODE_MANO)

    # Shape betas magnitude
    SHAPE_BETAS_MAGNITUDE = 0.1

    metadata = {
        'render.modes': ['rgb_array'],
        'video.frames_per_second': 60.0,
        'video.output_frames_per_second': 30.0,
        'step_time': 1.0 / 60.0}

    def __init__(self,
                 hand_side=HAND_RIGHT,
                 control_mode=MODE_JOINT,
                 hand_model_cls=HandModel20,
                 randomize_hand_shape=False):
        """Constructor of a HandEnv.

        Keyword Arguments:
            hand_side {int} -- hand side: left, right or both (default: {HAND_BOTH})
            control_mode {int} -- control modalities (default: {MODE_JOINT})
            hand_model_cls {type} -- hand kinematic model (default: {HandModel20})
            randomize_hand_shape {bool} -- randomize hand shape at reset (default: {True})
        """
        assert hand_side & self.HAND_BOTH, f'Wrong hand side flag: {hand_side}'
        assert control_mode in self.MODE_LIST, f'Wrong control mode flag: {control_mode}'

        self._hand_side = hand_side
        self._control_mode = control_mode
        self._randomize_hand_shape = randomize_hand_shape

        self._show_window = False
        self._random = None
        self._client = None
        self._hand_bodies = None
        self._camera_shape = (320, 240)
        self._camera_view = None
        self._camera_proj = None

        self._hand_models = []
        if self.HAND_LEFT & hand_side:
            self._hand_models.append(hand_model_cls(left_hand=True))
        if self.HAND_RIGHT & hand_side:
            self._hand_models.append(hand_model_cls(left_hand=False))

        # if control in joint space
        if self.MODE_JOINT == control_mode:
            joint_low, joint_high = map(np.float32, self._hand_models[0].dofs_limits)
            self.action_space = gym.spaces.Tuple([
                gym.spaces.Tuple((
                    gym.spaces.Box(-5.0, 5.0, shape=(3,)),  # base position x,y,z
                    gym.spaces.Box(-1.0, 1.0, shape=(4,)),  # base quaternion x,y,z,w
                    gym.spaces.Box(joint_low, joint_high),  # joint angles
                )) for _ in self._hand_models])
            self.observation_space = gym.spaces.Tuple([
                gym.spaces.Tuple((
                    gym.spaces.Box(-5.0, 5.0, shape=(3,)),  # base position x,y,z
                    gym.spaces.Box(-1.0, 1.0, shape=(4,)),  # base quaternion x,y,z,w
                    gym.spaces.Box(-10.0, 10.0, shape=(6,)),  # base constraint forces
                    gym.spaces.Box(joint_low, joint_high),  # joint angles
                    gym.spaces.Box(-3.0, 3.0, shape=(len(joint_low),)),  # joint velocities
                    gym.spaces.Box(-1.0, 1.0, shape=(len(joint_low),)),  # joint torques
                )) for _ in self._hand_models])
        # if control in MANO space
        elif self.MODE_MANO == control_mode:
            self.action_space = gym.spaces.Tuple([
                gym.spaces.Tuple((
                    gym.spaces.Box(-5.0, 5.0, shape=(3,)),  # trans
                    gym.spaces.Box(-np.pi, np.pi, shape=(16, 3)),  # pose
                )) for _ in self._hand_models])
            self.observation_space = gym.spaces.Tuple([
                gym.spaces.Tuple((
                    gym.spaces.Box(-5.0, 5.0, shape=(3,)),  # trans
                    gym.spaces.Box(-np.pi, np.pi, shape=(16, 3)),  # pose
                )) for _ in self._hand_models])

    def show_window(self, show=True):
        """Show GUI window or not.

        Keyword Arguments:
            show {bool} -- show flag (default: {True})
        """
        self._show_window = show

    def reset(self, initial_hands_state=None, **_kwargs):
        """Resets the environment to an initial state and returns an initial observation.

        Keyword Arguments:
            initial_hands_state {int} -- override the initial hands state (default: {None})

        Returns:
            observation (object): the initial observation.
        """
        if self._client is None:
            self._client = BulletClient(pb.GUI if self._show_window else pb.DIRECT)
            self._client.configureDebugVisualizer(pb.COV_ENABLE_GUI, 0)
            self._setup_camera()

        self._client.resetSimulation()
        self._client.setAdditionalSearchPath(pd.getDataPath())
        self._client.setPhysicsEngineParameter(deterministicOverlappingPairs=1)
        self._client.setGravity(0, 0, -10)
        self._client.configureDebugVisualizer(pb.COV_ENABLE_RENDERING, 0)

        # randomize the handshape if needed
        betas = None
        if self._randomize_hand_shape:
            betas = self._random.rand(10) * self.SHAPE_BETAS_MAGNITUDE

        # spawn the hands
        self._hand_bodies = []
        for i, model in enumerate(self._hand_models):
            hand_body = HandBody(self._client, model, shape_betas=betas)

            if initial_hands_state is not None:
                pos, orn, angles = initial_hands_state[i]
            else:
                pos_y, orn_z = (0.10, 0.0) if model.is_left_hand else (-0.10, -np.pi)
                pos, orn = (0.0, pos_y, 0.25), pb.getQuaternionFromEuler((np.pi/2, 0.0, orn_z))
                angles = np.zeros(len(hand_body.joint_indices))

            hand_body.reset(pos, orn, angles)
            hand_body.set_target(pos, orn, angles)
            self._hand_bodies.append(hand_body)

        self._client.configureDebugVisualizer(pb.COV_ENABLE_RENDERING, 1)
        return self._get_observation()

    def step(self, action):
        """Run one timestep of the environment's dynamics.

        Args:
            action (object): an action provided by the agent

        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (bool): whether the episode has ended
            info (dict): contains auxiliary diagnostic information
        """
        self._take_action(action)

        num_steps = int(self.metadata.get('step_time') * 240.0)
        for _ in range(num_steps):
            self._client.stepSimulation()

        observation = self._get_observation()
        reward = self._get_reward(observation)
        done = self._is_done(observation)
        info = {}
        return observation, reward, done, info

    def render(self, mode='human'):
        """Renders the environment.

        Args:
            mode (str): the mode to render with
        """
        if mode == 'rgb_array':
            data = pb.getCameraImage(*self._camera_shape, self._camera_view, self._camera_proj)
            rgba = data[2]
            return rgba[:, :, :3]
        return super().render(mode=mode)

    def close(self):
        """Perform necessary cleanup.

        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        if self._client.isConnected():
            self._client.disconnect()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        Args:
            seed (int): provided number generators seed
        """
        self._random = RandomState(seed)
        return [seed]

    def _take_action(self, action):
        """Compute observation of the current environment.

        Args:
            action (object): an action provided by the agent
        """
        if self.MODE_JOINT == self._control_mode:
            for hand, (pos, orn, angles) in zip(self._hand_bodies, action):
                hand.set_target(pos, orn, angles)
        elif self.MODE_MANO == self._control_mode:
            for hand, (trans, pose) in zip(self._hand_bodies, action):
                hand.set_target_from_mano(trans, pose)

    def _get_observation(self):
        """Compute observation of the current environment.

        Returns:
            observation (object): agent's observation of the current environment
        """
        if self.MODE_JOINT == self._control_mode:
            return [hand.get_state() for hand in self._hand_bodies]
        if self.MODE_MANO == self._control_mode:
            return [hand.get_mano_state() for hand in self._hand_bodies]
        return None

    def _get_reward(self, observation):
        """Compute reward at the current environment state.

        Returns:
            observation (object): agent's observation of the current environment
        """
        # pylint: disable=no-self-use, unused-argument
        return 0.0

    def _is_done(self, observation):
        """Check if the current environment state is final.

        Returns:
            observation (object): agent's observation of the current environment
        """
        # pylint: disable=no-self-use, unused-argument
        return False

    def _setup_camera(self):
        """Setup virtual camera."""
        self._client.resetDebugVisualizerCamera(
            cameraDistance=0.5,
            cameraYaw=-45.0,
            cameraPitch=-40.0,
            cameraTargetPosition=[0, 0, 0.1])

        self._camera_view = self._client.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=[0.0, 0.0, 0.1],
            distance=0.5,
            yaw=-45.0,
            pitch=-40.0,
            roll=0.0,
            upAxisIndex=2)

        self._camera_proj = self._client.computeProjectionMatrixFOV(
            fov=60.0,
            aspect=self._camera_shape[0] / self._camera_shape[1],
            nearVal=0.1,
            farVal=10.0)
Ejemplo n.º 2
0
class LocomotionGymEnv(gym.Env):
    def __init__(self,
                 gym_config: SimulationParameters,
                 robot_class=None,
                 robot_params: RobotSimParams = None,
                 task: BaseTask = None):
        self.seed()
        self._gym_config = gym_config
        if robot_class is None:
            raise ValueError('robot_class cannot be None.')
        self._robot_class = robot_class

        # A dictionary containing the objects in the world other than the robot.
        self._world_dict = {}

        self._robot_params = robot_params

        self._task = task

        self._obs_sensor = None

        self._time_step = self._gym_config.time_step
        self._num_action_repeat = self._gym_config.num_action_repeat

        # self._num_bullet_solver_iterations = 12

        self._is_render = self._gym_config.enable_rendering and (
            not self._gym_config.egl_rendering)
        if self._is_render:
            self._pybullet_client = BulletClient(connection_mode=p.GUI)
            self._pybullet_client.configureDebugVisualizer(
                p.COV_ENABLE_GUI, gym_config.enable_rendering_gui)
        else:
            self._pybullet_client = BulletClient(connection_mode=p.DIRECT)

        self._pybullet_client.setAdditionalSearchPath(
            pybullet_data.getDataPath())

        self._last_frame_time = 0.0

        # set egl acceleration
        if self._gym_config.egl_rendering:
            egl = pkgutil.get_loader('eglRenderer')
            self._eglPlugin = self._pybullet_client.loadPlugin(
                egl.get_filename(), "_eglRendererPlugin")

        self._build_action_space()
        self._build_observation_space()

        # Set the default render options.
        self._camera_dist = self._gym_config.camera_distance
        self._camera_yaw = self._gym_config.camera_yaw
        self._camera_pitch = self._gym_config.camera_pitch
        self._render_width = self._gym_config.render_width
        self._render_height = self._gym_config.render_height

        self._hard_reset = True

        self._num_env_act = 0

        self.set_gui_sliders()

        self._contact_fall = ContactDetection(self)

        self._reset_time = self._gym_config.reset_time
        self.reset(start_motor_angles=None, reset_duration=self._reset_time)

        if self._task is not None:
            self._task.reset(self)

        self._hard_reset = self._gym_config.enable_hard_reset

        # global_values.global_userDebugParams = UserDebugParams(self._pybullet_client)
        # global_values.global_userDebugParams.setPbClient(self._pybullet_client)
        # set_gui_sliders(self._pybullet_client)

    def _build_action_space(self):
        motor_mode = self._robot_params.motor_control_mode
        action_upper_bound = []
        action_lower_bound = []

        if (motor_mode == MotorControlMode.POSITION) or (
                motor_mode == MotorControlMode.HYBRID_COMPUTED_POS):
            action_lower_bound.extend(self._robot_params.joint_angle_MinMax[0])
            action_upper_bound.extend(self._robot_params.joint_angle_MinMax[1])
            # action_config = self._robot_params.JOINT_ANGLE_LIMIT
            # for action in action_config:
            #     action_upper_bound.append(action.upper_bound)
            #     action_lower_bound.append(action.lower_bound)

        elif motor_mode == MotorControlMode.TORQUE:
            action_lower_bound.extend(
                self._robot_params.joint_torque_MinMax[0])
            action_upper_bound.extend(
                self._robot_params.joint_torque_MinMax[1])
            # action_config = self._robot_params.JOINT_TORQUE_LIMIT
            # for action in action_config:
            #     action_upper_bound.append(action.upper_bound)
            #     action_lower_bound.append(action.lower_bound)

        elif motor_mode == MotorControlMode.HYBRID_COMPUTED_POS_VEL:
            action_lower_bound.extend(self._robot_params.joint_angle_MinMax[0])
            action_lower_bound.extend(
                self._robot_params.joint_velocity_MinMax[0])
            action_upper_bound.extend(self._robot_params.joint_angle_MinMax[1])
            action_upper_bound.extend(
                self._robot_params.joint_velocity_MinMax[1])
            # for action_config in [self._robot_params.JOINT_ANGLE_LIMIT, self._robot_params.JOINT_VELOCITY_LIMIT]:
            #     for action in action_config:
            #         action_upper_bound.append(action.upper_bound)
            #         action_lower_bound.append(action.lower_bound)
        elif motor_mode == MotorControlMode.HYBRID_COMPUTED_POS_SINGLE:
            action_lower_bound.extend(
                self._robot_params.joint_angle_MinMax[0][:3])
            action_upper_bound.extend(
                self._robot_params.joint_angle_MinMax[1][:3])

        elif motor_mode == MotorControlMode.HYBRID_COMPUTED_POS_TROT:
            action_lower_bound.extend(
                self._robot_params.joint_angle_MinMax[0][:6])
            action_upper_bound.extend(
                self._robot_params.joint_angle_MinMax[1][:6])

        self.action_space = spaces.Box(np.array(action_lower_bound),
                                       np.array(action_upper_bound),
                                       dtype=np.float32)

    def _build_observation_space(self):
        if self._task is None:
            return
        else:
            self._obs_sensor = SensorWrapper(
                self._task.get_observation_sensors())
        self.observation_space = spaces.Box(self._obs_sensor.get_lower_bound(),
                                            self._obs_sensor.get_upper_bound(),
                                            dtype=np.float32)

    def seed(self, seed=None):
        self.np_random, self.np_random_seed = seeding.np_random(seed)
        return [self.np_random_seed]

    def reset(self,
              start_motor_angles=None,
              reset_duration=0.002,
              reset_visualization_camera=True,
              force_hard_reset=False):
        if self._is_render:
            self._pybullet_client.configureDebugVisualizer(
                p.COV_ENABLE_RENDERING, 0)

        # Clear the simulation world and rebuild the robot interface.
        if force_hard_reset or self._hard_reset:
            self._pybullet_client.resetSimulation()
            # self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=self._num_bullet_solver_iterations)
            self._pybullet_client.setTimeStep(self._time_step)
            self._pybullet_client.setGravity(0, 0, -9.8)

            # Rebuild the world.
            self._world_dict = {
                "ground": self._pybullet_client.loadURDF("plane_implicit.urdf")
            }

            # Rebuild the robot
            self._robot = self._robot_class(
                self._pybullet_client,
                self._robot_params,
                time_step=self._time_step,
                num_action_repeat=self._num_action_repeat)

            if self._obs_sensor is not None:
                self._obs_sensor.set_robot(self._robot)

        # Reset the pose of the robot.
        # self._robot.Reset(reload_urdf=False, default_motor_angles=start_motor_angles, reset_time=reset_duration)

        if self._is_render and reset_visualization_camera:
            self._pybullet_client.resetDebugVisualizerCamera(
                self._camera_dist, self._camera_yaw, self._camera_pitch,
                [0, 0, 0])

        if self._is_render:
            self._pybullet_client.configureDebugVisualizer(
                p.COV_ENABLE_RENDERING, 1)
        self._robot.Reset(reload_urdf=False,
                          default_motor_angles=start_motor_angles,
                          reset_time=reset_duration)
        if self._obs_sensor is not None:
            self._obs_sensor.on_reset()

        self._contact_fall.reset()

        self._num_env_act = 0

        return self._get_observation()

    def step(self, action):
        time_spent = time.time() - self._last_frame_time
        self._last_frame_time = time.time()
        # if self._num_env_act % 1 == 0:
        #     print(f"time_spent: {time_spent * 1000} ms")

        if self._is_render:
            # Sleep, otherwise the computation takes less time than real time,
            # which will make the visualization like a fast-forward video.
            time_to_sleep = self._time_step * self._num_action_repeat - time_spent
            if time_to_sleep > 0:
                time.sleep(time_to_sleep)

        # action = action * np.asarray(self.action_space.high)
        self._robot.Step(action, None)

        if self._obs_sensor is not None:
            self._obs_sensor.on_step()

        self._num_env_act += 1

        return self._get_observation(), self._get_reward(), self.done, {}

    def render(self, mode='rgb_array'):
        if mode != 'rgb_array':
            raise ValueError('Unsupported render mode:{}'.format(mode))
        base_pos = self._robot.GetBasePosition()
        view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=base_pos,
            distance=self._camera_dist,
            yaw=self._camera_yaw,
            pitch=self._camera_pitch,
            roll=0,
            upAxisIndex=2)
        proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
            fov=60,
            aspect=float(self._render_width) / self._render_height,
            nearVal=0.1,
            farVal=100.0)
        (_, _, px, _, _) = self._pybullet_client.getCameraImage(
            width=self._render_width,
            height=self._render_height,
            renderer=self._pybullet_client.ER_BULLET_HARDWARE_OPENGL,
            viewMatrix=view_matrix,
            projectionMatrix=proj_matrix)
        rgb_array = np.array(px)
        rgb_array = rgb_array[:, :, :3]
        return rgb_array

    def close(self):
        if hasattr(self, '_robot') and self._robot:
            self._robot.Terminate()

    def getRobotID(self):
        return self._robot.getRobotID()

    @property
    def robot(self):
        return self._robot

    @property
    def ground(self):
        return self._world_dict["ground"]

    def getClient(self):
        return self._pybullet_client

    @property
    def env_step_counter(self):
        return self._num_env_act

    @property
    def baseHeight(self):
        return self._robot.ReadBaseHeight()

    def set_gui_sliders(self):
        set_gui_sliders(self._pybullet_client)

    def _get_observation(self):
        if self._obs_sensor is not None:
            return self._obs_sensor.get_observation()
        else:
            return None

    def _get_reward(self):
        if self._task is not None:
            return self._task.reward()
        else:
            return None

    @property
    def done(self) -> bool:
        # if self._obs_sensor is not None:
        #     if self._obs_sensor.on_terminate():
        #         return True

        if self._contact_fall.is_contact_fall():
            return True

        if self._task is not None:
            return self._task.done()
        else:
            return False