def __init__(self, debug=False, urdf_version=None, control_time_step=0.005, action_repeat=5, control_latency=0, pd_latency=0, on_rack=False, motor_kp=1.0, motor_kd=0.02, render=False, num_steps_to_log=1000, env_randomizer=None, log_path=None, target_orient=None, init_orient=None, signal_type="ik", terrain_type="plane", terrain_id=None, mark='base'): """Initialize the rex alternating legs gym environment. Args: urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION] are allowable versions. If None, DEFAULT_URDF_VERSION is used. Refer to rex_gym_env for more details. control_time_step: The time step between two successive control signals. action_repeat: The number of simulation steps that an action is repeated. control_latency: The latency between get_observation() and the actual observation. See minituar.py for more details. pd_latency: The latency used to get motor angles/velocities used to compute PD controllers. See rex.py for more details. on_rack: Whether to place the rex on rack. This is only used to debug the walk gait. In this mode, the rex's base is hung midair so that its walk gait is clearer to visualize. motor_kp: The P gain of the motor. motor_kd: The D gain of the motor. remove_default_joint_damping: Whether to remove the default joint damping. render: Whether to render the simulation. num_steps_to_log: The max number of control steps in one episode. If the number of steps is over num_steps_to_log, the environment will still be running, but only first num_steps_to_log will be recorded in logging. env_randomizer: An instance (or a list) of EnvRanzomier(s) that can randomize the environment during when env.reset() is called and add perturbations when env.step() is called. log_path: The path to write out logs. For the details of logging, refer to rex_logging.proto. """ super(RexTurnEnv, self).__init__( debug=debug, urdf_version=urdf_version, accurate_motor_model_enabled=True, motor_overheat_protection=True, hard_reset=False, motor_kp=motor_kp, motor_kd=motor_kd, control_latency=control_latency, pd_latency=pd_latency, on_rack=on_rack, render=render, num_steps_to_log=num_steps_to_log, env_randomizer=env_randomizer, log_path=log_path, control_time_step=control_time_step, action_repeat=action_repeat, target_orient=target_orient, signal_type=signal_type, init_orient=init_orient, terrain_id=terrain_id, terrain_type=terrain_type, mark=mark) action_max = { 'ik': 0.01, 'ol': 0.01 } action_dim_map = { 'ik': 2, 'ol': 2 } action_dim = action_dim_map[self._signal_type] action_high = np.array([action_max[self._signal_type]] * action_dim) self.action_space = spaces.Box(-action_high, action_high) self._cam_dist = 1.1 self._cam_yaw = 30 self._cam_pitch = -30 self._signal_type = signal_type # we need an alternate gait, so walk will works self._gait_planner = GaitPlanner("walk") self._kinematics = Kinematics() self._target_orient = target_orient self._init_orient = init_orient self._random_orient_target = False self._random_orient_start = False self._cube = None self.goal_reached = False self._stay_still = False self.is_terminating = False if self._on_rack: self._cam_pitch = 0
def __init__(self, debug=False, urdf_version=None, energy_weight=0.005, control_time_step=0.006, action_repeat=6, control_latency=0.0, pd_latency=0.0, on_rack=False, motor_kp=1.0, motor_kd=0.02, render=False, num_steps_to_log=2000, use_angle_in_observation=True, env_randomizer=None, log_path=None, target_position=None, signal_type="ik", terrain_type="plane", terrain_id=None, mark='base'): """Initialize Rex trotting gym environment. Args: urdf_version: [DEFAULT_URDF_VERSION] are allowable versions. If None, DEFAULT_URDF_VERSION is used. Refer to rex_gym_env for more details. energy_weight: The weight of the energy term in the reward function. Refer to rex_gym_env for more details. control_time_step: The time step between two successive control signals. action_repeat: The number of simulation steps that an action is repeated. control_latency: The latency between get_observation() and the actual observation. See rex.py for more details. pd_latency: The latency used to get motor angles/velocities used to compute PD controllers. See rex.py for more details. on_rack: Whether to place Rex on rack. This is only used to debug the walk gait. In this mode, Rex's base is hung midair so that its walk gait is clearer to visualize. motor_kp: The P gain of the motor. motor_kd: The D gain of the motor. num_steps_to_log: The max number of control steps in one episode. If the number of steps is over num_steps_to_log, the environment will still be running, but only first num_steps_to_log will be recorded in logging. use_angle_in_observation: Whether to include motor angles in observation. env_randomizer: An instance (or a list) of EnvRanzomier(s) that can randomize the environment during when env.reset() is called and add perturbations when env.step() is called. log_path: The path to write out logs. For the details of logging, refer to rex_logging.proto. """ self._use_angle_in_observation = use_angle_in_observation super(RexReactiveEnv, self).__init__(urdf_version=urdf_version, energy_weight=energy_weight, accurate_motor_model_enabled=True, motor_overheat_protection=True, hard_reset=False, motor_kp=motor_kp, motor_kd=motor_kd, remove_default_joint_damping=False, control_latency=control_latency, pd_latency=pd_latency, on_rack=on_rack, render=render, num_steps_to_log=num_steps_to_log, env_randomizer=env_randomizer, log_path=log_path, control_time_step=control_time_step, action_repeat=action_repeat, target_position=target_position, signal_type=signal_type, debug=debug, terrain_id=terrain_id, terrain_type=terrain_type, mark=mark) # (eventually) allow different feedback ranges/action spaces for different signals action_max = {'ik': 0.4, 'ol': 0.3} action_dim_map = { 'ik': 2, 'ol': 4, } action_dim = action_dim_map[self._signal_type] action_low = np.array([action_max[self._signal_type]] * action_dim) action_high = -action_low self.action_space = spaces.Box(action_low, action_high) self._cam_dist = 1.0 self._cam_yaw = 0.0 self._cam_pitch = -20 self._target_position = target_position self._signal_type = signal_type self._gait_planner = GaitPlanner("gallop") self._kinematics = Kinematics() self.goal_reached = False self._stay_still = False self.is_terminating = False
class RexTurnEnv(rex_gym_env.RexGymEnv): """The gym environment for the rex. It simulates the locomotion of a rex, a quadruped robot. The state space include the angles, velocities and torques for all the motors and the action space is the desired motor angle for each motor. The reward function is based on how far the rex walks in 1000 steps and penalizes the energy expenditure.""" metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 66} load_ui = True is_terminating = False def __init__(self, debug=False, urdf_version=None, control_time_step=0.005, action_repeat=5, control_latency=0, pd_latency=0, on_rack=False, motor_kp=1.0, motor_kd=0.02, render=False, num_steps_to_log=1000, env_randomizer=None, log_path=None, target_orient=None, init_orient=None, signal_type="ik", terrain_type="plane", terrain_id=None, mark='base'): """Initialize the rex alternating legs gym environment. Args: urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION] are allowable versions. If None, DEFAULT_URDF_VERSION is used. Refer to rex_gym_env for more details. control_time_step: The time step between two successive control signals. action_repeat: The number of simulation steps that an action is repeated. control_latency: The latency between get_observation() and the actual observation. See minituar.py for more details. pd_latency: The latency used to get motor angles/velocities used to compute PD controllers. See rex.py for more details. on_rack: Whether to place the rex on rack. This is only used to debug the walk gait. In this mode, the rex's base is hung midair so that its walk gait is clearer to visualize. motor_kp: The P gain of the motor. motor_kd: The D gain of the motor. remove_default_joint_damping: Whether to remove the default joint damping. render: Whether to render the simulation. num_steps_to_log: The max number of control steps in one episode. If the number of steps is over num_steps_to_log, the environment will still be running, but only first num_steps_to_log will be recorded in logging. env_randomizer: An instance (or a list) of EnvRanzomier(s) that can randomize the environment during when env.reset() is called and add perturbations when env.step() is called. log_path: The path to write out logs. For the details of logging, refer to rex_logging.proto. """ super(RexTurnEnv, self).__init__( debug=debug, urdf_version=urdf_version, accurate_motor_model_enabled=True, motor_overheat_protection=True, hard_reset=False, motor_kp=motor_kp, motor_kd=motor_kd, control_latency=control_latency, pd_latency=pd_latency, on_rack=on_rack, render=render, num_steps_to_log=num_steps_to_log, env_randomizer=env_randomizer, log_path=log_path, control_time_step=control_time_step, action_repeat=action_repeat, target_orient=target_orient, signal_type=signal_type, init_orient=init_orient, terrain_id=terrain_id, terrain_type=terrain_type, mark=mark) action_max = { 'ik': 0.01, 'ol': 0.01 } action_dim_map = { 'ik': 2, 'ol': 2 } action_dim = action_dim_map[self._signal_type] action_high = np.array([action_max[self._signal_type]] * action_dim) self.action_space = spaces.Box(-action_high, action_high) self._cam_dist = 1.1 self._cam_yaw = 30 self._cam_pitch = -30 self._signal_type = signal_type # we need an alternate gait, so walk will works self._gait_planner = GaitPlanner("walk") self._kinematics = Kinematics() self._target_orient = target_orient self._init_orient = init_orient self._random_orient_target = False self._random_orient_start = False self._cube = None self.goal_reached = False self._stay_still = False self.is_terminating = False if self._on_rack: self._cam_pitch = 0 def reset(self): self.goal_reached = False self.is_terminating = False self._stay_still = False self.init_pose = Rex.INIT_POSES["stand"] if self._signal_type == 'ol': self.init_pose = Rex.INIT_POSES["stand_ol"] super(RexTurnEnv, self).reset(initial_motor_angles=self.init_pose, reset_duration=0.5) if not self._target_orient or self._random_orient_target: self._target_orient = random.uniform(0.2, 6) self._random_orient_target = True if self._on_rack: # on rack debug simulation self._init_orient = 2.1 position = self.rex.init_on_rack_position else: position = self.rex.init_position if self._init_orient is None or self._random_orient_start: self._init_orient = random.uniform(0.2, 6) self._random_orient_start = True self.clockwise = self._solve_direction() if self._is_debug: print(f"Start Orientation: {self._init_orient}, Target Orientation: {self._target_orient}") print("Turning right") if self.clockwise else print("Turning left") if self._is_render and self._signal_type == 'ik': if self.load_ui: self.setup_ui() self.load_ui = False self._load_cube(self._target_orient) q = self.pybullet_client.getQuaternionFromEuler([0, 0, self._init_orient]) self.pybullet_client.resetBasePositionAndOrientation(self.rex.quadruped, position, q) return self._get_observation() def setup_ui(self): self.base_x_ui = self._pybullet_client.addUserDebugParameter("base_x", self._ranges["base_x"][0], self._ranges["base_x"][1], 0.009) self.base_y_ui = self._pybullet_client.addUserDebugParameter("base_y", self._ranges["base_y"][0], self._ranges["base_y"][1], self._ranges["base_y"][2]) self.base_z_ui = self._pybullet_client.addUserDebugParameter("base_z", self._ranges["base_z"][0], self._ranges["base_z"][1], self._ranges["base_z"][2]) self.roll_ui = self._pybullet_client.addUserDebugParameter("roll", self._ranges["roll"][0], self._ranges["roll"][1], self._ranges["roll"][2]) self.pitch_ui = self._pybullet_client.addUserDebugParameter("pitch", self._ranges["pitch"][0], self._ranges["pitch"][1], self._ranges["pitch"][2]) self.yaw_ui = self._pybullet_client.addUserDebugParameter("yaw", self._ranges["yaw"][0], self._ranges["yaw"][1], self._ranges["yaw"][2]) self.step_length_ui = self._pybullet_client.addUserDebugParameter("step_length", -0.7, 0.7, 0.02) self.step_rotation_ui = self._pybullet_client.addUserDebugParameter("step_rotation", -1.5, 1.5, 0.5) self.step_angle_ui = self._pybullet_client.addUserDebugParameter("step_angle", -180., 180., 0.) self.step_period_ui = self._pybullet_client.addUserDebugParameter("step_period", 0.2, 0.9, 0.75) def _read_inputs(self, base_pos_coeff, gait_stage_coeff): position = np.array( [ self._pybullet_client.readUserDebugParameter(self.base_x_ui), self._pybullet_client.readUserDebugParameter(self.base_y_ui) * base_pos_coeff, self._pybullet_client.readUserDebugParameter(self.base_z_ui) * base_pos_coeff ] ) orientation = np.array( [ self._pybullet_client.readUserDebugParameter(self.roll_ui) * base_pos_coeff, self._pybullet_client.readUserDebugParameter(self.pitch_ui) * base_pos_coeff, self._pybullet_client.readUserDebugParameter(self.yaw_ui) * base_pos_coeff ] ) step_length = self._pybullet_client.readUserDebugParameter(self.step_length_ui) * gait_stage_coeff step_rotation = self._pybullet_client.readUserDebugParameter(self.step_rotation_ui) * gait_stage_coeff step_angle = self._pybullet_client.readUserDebugParameter(self.step_angle_ui) step_period = self._pybullet_client.readUserDebugParameter(self.step_period_ui) return position, orientation, step_length, step_rotation, step_angle, step_period def _signal(self, t, action): if self._signal_type == 'ik': return self._IK_signal(t, action) if self._signal_type == 'ol': return self._open_loop_signal(t, action) @staticmethod def _evaluate_base_stage_coeff(current_t, end_t=0.0, width=0.001): # sigmoid function beta = p = width if p - beta + end_t <= current_t <= p - (beta / 2) + end_t: return (2 / beta ** 2) * (current_t - p + beta) ** 2 elif p - (beta/2) + end_t <= current_t <= p + end_t: return 1 - (2 / beta ** 2) * (current_t - p) ** 2 else: return 1 @staticmethod def _evaluate_gait_stage_coeff(current_t, end_t=0.0): # ramp function p = .8 if end_t <= current_t <= p + end_t: return current_t else: return 1.0 def _IK_signal(self, t, action): base_pos_coeff = self._evaluate_base_stage_coeff(t, width=1.5) gait_stage_coeff = self._evaluate_gait_stage_coeff(t) if self._is_render and self._is_debug: position, orientation, step_length, step_rotation, step_angle, step_period = \ self._read_inputs(base_pos_coeff, gait_stage_coeff) else: step_dir_value = -0.5 * gait_stage_coeff if self.clockwise: step_dir_value *= -1 position = np.array([0.009, self._base_y * base_pos_coeff, self._base_z * base_pos_coeff]) orientation = np.array([self._base_roll * base_pos_coeff, self._base_pitch * base_pos_coeff, self._base_yaw * base_pos_coeff]) step_length = (self.step_length if self.step_length is not None else 0.02) step_rotation = (self.step_rotation if self.step_rotation is not None else step_dir_value) + action[0] step_angle = self.step_angle if self.step_angle is not None else 0.0 step_period = (self.step_period if self.step_period is not None else 0.75) + action[1] if self.goal_reached: self._stay_still = True frames = self._gait_planner.loop(step_length, step_angle, step_rotation, step_period, 1.0) fr_angles, fl_angles, rr_angles, rl_angles, _ = self._kinematics.solve(orientation, position, frames) signal = [ fl_angles[0], fl_angles[1], fl_angles[2], fr_angles[0], fr_angles[1], fr_angles[2], rl_angles[0], rl_angles[1], rl_angles[2], rr_angles[0], rr_angles[1], rr_angles[2] ] return signal def _open_loop_signal(self, t, action): if self.goal_reached: self._stay_still = True initial_pose = self.rex.INIT_POSES['stand_ol'] period = STEP_PERIOD extension = 0.1 swing = 0.03 + action[0] swipe = 0.05 + action[1] ith_leg = int(t / period) % 2 pose = { 'left_0': np.array([swipe, extension, -swing, -swipe, extension, swing, swipe, -extension, swing, -swipe, -extension, -swing]), 'left_1': np.array([-swipe, 0, swing, swipe, 0, -swing, -swipe, 0, -swing, swipe, 0, swing]), 'right_0': np.array([swipe, extension, swing, -swipe, extension, -swing, swipe, -extension, -swing, -swipe, -extension, swing]), 'right_1': np.array([-swipe, 0, -swing, swipe, 0, swing, -swipe, 0, swing, swipe, 0, -swing]) } clockwise = self._solve_direction() if clockwise: # turn right first_leg = pose['right_0'] second_leg = pose['right_1'] else: # turn left first_leg = pose['left_0'] second_leg = pose['left_1'] if ith_leg: signal = initial_pose + second_leg else: signal = initial_pose + first_leg return signal def _solve_direction(self): diff = abs(self._init_orient - self._target_orient) clockwise = False if self._init_orient < self._target_orient: if diff > 3.14: clockwise = True else: if diff < 3.14: clockwise = True return clockwise def _check_target_position(self, t): current_z = self.pybullet_client.getEulerFromQuaternion(self.rex.GetBaseOrientation())[2] if current_z < 0: current_z += 6.28 if abs(self._target_orient - current_z) <= 0.01: self.goal_reached = True if not self.is_terminating: self.end_time = t self.is_terminating = True def _terminate_with_delay(self, current_t): if current_t - self.end_time >= 1.: self.env_goal_reached = True def _transform_action_to_motor_command(self, action): if self._stay_still: self._terminate_with_delay(self.rex.GetTimeSinceReset()) return self.init_pose t = self.rex.GetTimeSinceReset() self._check_target_position(t) action = self._signal(t, action) return action def is_fallen(self): """Decide whether the rex has fallen. If the up directions between the base and the world is large (the dot product is smaller than 0.85), the rex is considered fallen. Returns: Boolean value that indicates whether the rex has fallen. """ orientation = self.rex.GetBaseOrientation() rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation) local_up = rot_mat[6:] return np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 def _reward(self): current_base_position = self.rex.GetBasePosition() # tolerance: 0.035 position_penality = 0.035 - abs(current_base_position[0]) - abs(current_base_position[1]) reward = position_penality return reward def _load_cube(self, angle): if len(self._companion_obj) > 0: self.pybullet_client.removeBody(self._companion_obj['cube']) urdf_root = pybullet_data.getDataPath() self._cube = self._pybullet_client.loadURDF(f"{urdf_root}/cube_small.urdf") self._companion_obj['cube'] = self._cube orientation = [0, 0, 0, 1] x, y = math.cos(angle + 3.14), math.sin(angle + 3.14) position = [x, y, 1] self.pybullet_client.resetBasePositionAndOrientation(self._cube, position, orientation) def _get_true_observation(self): """Get the true observations of this environment. It includes the roll, the error between current pitch and desired pitch, roll dot and pitch dot of the base. Returns: The observation list. """ observation = [] roll, pitch, _ = self.rex.GetTrueBaseRollPitchYaw() roll_rate, pitch_rate, _ = self.rex.GetTrueBaseRollPitchYawRate() observation.extend([roll, pitch, roll_rate, pitch_rate]) self._true_observation = np.array(observation) return self._true_observation def _get_observation(self): observation = [] roll, pitch, _ = self.rex.GetBaseRollPitchYaw() roll_rate, pitch_rate, _ = self.rex.GetBaseRollPitchYawRate() observation.extend([roll, pitch, roll_rate, pitch_rate]) self._observation = np.array(observation) return self._observation def _get_observation_upper_bound(self): """Get the upper bound of the observation. Returns: The upper bound of an observation. See GetObservation() for the details of each element of an observation. """ upper_bound = np.zeros(self._get_observation_dimension()) upper_bound[0:2] = 2 * math.pi # Roll, pitch, yaw of the base. upper_bound[2:4] = 2 * math.pi / self._time_step # Roll, pitch, yaw rate. return upper_bound def _get_observation_lower_bound(self): lower_bound = -self._get_observation_upper_bound() return lower_bound
class RexReactiveEnv(rex_gym_env.RexGymEnv): """The gym environment for Rex. It simulates the locomotion of Rex, a quadruped robot. The state space include the angles, velocities and torques for all the motors and the action space is the desired motor angle for each motor. The reward function is based on how far Rex walks in 1000 steps and penalizes the energy expenditure. """ metadata = { "render.modes": ["human", "rgb_array"], "video.frames_per_second": 166 } load_ui = True is_terminating = False def __init__(self, debug=False, urdf_version=None, energy_weight=0.005, control_time_step=0.006, action_repeat=6, control_latency=0.0, pd_latency=0.0, on_rack=False, motor_kp=1.0, motor_kd=0.02, render=False, num_steps_to_log=2000, use_angle_in_observation=True, env_randomizer=None, log_path=None, target_position=None, signal_type="ik", terrain_type="plane", terrain_id=None, mark='base'): """Initialize Rex trotting gym environment. Args: urdf_version: [DEFAULT_URDF_VERSION] are allowable versions. If None, DEFAULT_URDF_VERSION is used. Refer to rex_gym_env for more details. energy_weight: The weight of the energy term in the reward function. Refer to rex_gym_env for more details. control_time_step: The time step between two successive control signals. action_repeat: The number of simulation steps that an action is repeated. control_latency: The latency between get_observation() and the actual observation. See rex.py for more details. pd_latency: The latency used to get motor angles/velocities used to compute PD controllers. See rex.py for more details. on_rack: Whether to place Rex on rack. This is only used to debug the walk gait. In this mode, Rex's base is hung midair so that its walk gait is clearer to visualize. motor_kp: The P gain of the motor. motor_kd: The D gain of the motor. num_steps_to_log: The max number of control steps in one episode. If the number of steps is over num_steps_to_log, the environment will still be running, but only first num_steps_to_log will be recorded in logging. use_angle_in_observation: Whether to include motor angles in observation. env_randomizer: An instance (or a list) of EnvRanzomier(s) that can randomize the environment during when env.reset() is called and add perturbations when env.step() is called. log_path: The path to write out logs. For the details of logging, refer to rex_logging.proto. """ self._use_angle_in_observation = use_angle_in_observation super(RexReactiveEnv, self).__init__(urdf_version=urdf_version, energy_weight=energy_weight, accurate_motor_model_enabled=True, motor_overheat_protection=True, hard_reset=False, motor_kp=motor_kp, motor_kd=motor_kd, remove_default_joint_damping=False, control_latency=control_latency, pd_latency=pd_latency, on_rack=on_rack, render=render, num_steps_to_log=num_steps_to_log, env_randomizer=env_randomizer, log_path=log_path, control_time_step=control_time_step, action_repeat=action_repeat, target_position=target_position, signal_type=signal_type, debug=debug, terrain_id=terrain_id, terrain_type=terrain_type, mark=mark) # (eventually) allow different feedback ranges/action spaces for different signals action_max = {'ik': 0.4, 'ol': 0.3} action_dim_map = { 'ik': 2, 'ol': 4, } action_dim = action_dim_map[self._signal_type] action_low = np.array([action_max[self._signal_type]] * action_dim) action_high = -action_low self.action_space = spaces.Box(action_low, action_high) self._cam_dist = 1.0 self._cam_yaw = 0.0 self._cam_pitch = -20 self._target_position = target_position self._signal_type = signal_type self._gait_planner = GaitPlanner("gallop") self._kinematics = Kinematics() self.goal_reached = False self._stay_still = False self.is_terminating = False def reset(self): self.init_pose = rex_constants.INIT_POSES["stand"] if self._signal_type == 'ol': self.init_pose = rex_constants.INIT_POSES["stand_ol"] super(RexReactiveEnv, self).reset(initial_motor_angles=self.init_pose, reset_duration=0.5) self.goal_reached = False self._stay_still = False self.is_terminating = False if not self._target_position or self._random_pos_target: self._target_position = random.uniform(1, 3) self._random_pos_target = True if self._is_render and self._signal_type == 'ik': if self.load_ui: self.setup_ui() self.load_ui = False if self._is_debug: print( f"Target Position x={self._target_position}, Random assignment: {self._random_pos_target}" ) return self._get_observation() def setup_ui(self): self.base_x_ui = self._pybullet_client.addUserDebugParameter( "base_x", self._ranges["base_x"][0], self._ranges["base_x"][1], 0.01) self.base_y_ui = self._pybullet_client.addUserDebugParameter( "base_y", self._ranges["base_y"][0], self._ranges["base_y"][1], self._ranges["base_y"][2]) self.base_z_ui = self._pybullet_client.addUserDebugParameter( "base_z", self._ranges["base_z"][0], self._ranges["base_z"][1], -0.007) self.roll_ui = self._pybullet_client.addUserDebugParameter( "roll", self._ranges["roll"][0], self._ranges["roll"][1], self._ranges["roll"][2]) self.pitch_ui = self._pybullet_client.addUserDebugParameter( "pitch", self._ranges["pitch"][0], self._ranges["pitch"][1], self._ranges["pitch"][2]) self.yaw_ui = self._pybullet_client.addUserDebugParameter( "yaw", self._ranges["yaw"][0], self._ranges["yaw"][1], self._ranges["yaw"][2]) self.step_length_ui = self._pybullet_client.addUserDebugParameter( "step_length", -0.7, 1.5, 1.3) self.step_rotation_ui = self._pybullet_client.addUserDebugParameter( "step_rotation", -1.5, 1.5, 0.) self.step_angle_ui = self._pybullet_client.addUserDebugParameter( "step_angle", -180., 180., 0.) self.step_period_ui = self._pybullet_client.addUserDebugParameter( "step_period", 0.1, .9, 0.3) def _read_inputs(self, base_pos_coeff, gait_stage_coeff): position = np.array([ self._pybullet_client.readUserDebugParameter(self.base_x_ui), self._pybullet_client.readUserDebugParameter(self.base_y_ui) * base_pos_coeff, self._pybullet_client.readUserDebugParameter(self.base_z_ui) ]) orientation = np.array([ self._pybullet_client.readUserDebugParameter(self.roll_ui) * base_pos_coeff, self._pybullet_client.readUserDebugParameter(self.pitch_ui) * base_pos_coeff, self._pybullet_client.readUserDebugParameter(self.yaw_ui) * base_pos_coeff ]) step_length = self._pybullet_client.readUserDebugParameter( self.step_length_ui) * gait_stage_coeff step_rotation = self._pybullet_client.readUserDebugParameter( self.step_rotation_ui) step_angle = self._pybullet_client.readUserDebugParameter( self.step_angle_ui) step_period = self._pybullet_client.readUserDebugParameter( self.step_period_ui) return position, orientation, step_length, step_rotation, step_angle, step_period def _check_target_position(self, t): if self._target_position: current_x = abs(self.rex.GetBasePosition()[0]) # give 0.15 stop space if current_x >= abs(self._target_position): self.goal_reached = True if not self.is_terminating: self.end_time = t self.is_terminating = True @staticmethod def _evaluate_stage_coefficient(current_t, end_t=0.0, width=0.001): # sigmoid function beta = p = width if p - beta + end_t <= current_t <= p - (beta / 2) + end_t: return (2 / beta**2) * (current_t - p + beta)**2 elif p - (beta / 2) + end_t <= current_t <= p + end_t: return 1 - (2 / beta**2) * (current_t - p)**2 else: return 1 @staticmethod def _evaluate_brakes_stage_coeff(current_t, action, end_t=0.0, end_value=0.0): # ramp function p = 1. + action[0] if end_t <= current_t <= p + end_t: return 1 - (current_t - end_t) else: return end_value @staticmethod def _evaluate_gait_stage_coeff(current_t, action, end_t=0.0): # ramp function p = 1. + action[1] if end_t <= current_t <= p + end_t: return current_t else: return 1.0 def _signal(self, t, action): if self._signal_type == 'ik': return self._IK_signal(t, action) if self._signal_type == 'ol': return self._open_loop_signal(t, action) def _IK_signal(self, t, action): base_pos_coeff = self._evaluate_stage_coefficient(t, width=1.5) gait_stage_coeff = self._evaluate_gait_stage_coeff(t, action) if self._is_render: position, orientation, step_length, step_rotation, step_angle, step_period = \ self._read_inputs(base_pos_coeff, gait_stage_coeff) else: position = np.array([0.01, self._base_y * base_pos_coeff, -0.007]) orientation = np.array([ self._base_roll * base_pos_coeff, self._base_pitch * base_pos_coeff, self._base_yaw * base_pos_coeff ]) step_length = (self.step_length if self.step_length is not None else 1.3) * gait_stage_coeff step_rotation = (self.step_rotation if self.step_rotation is not None else 0.0) step_angle = self.step_angle if self.step_angle is not None else 0.0 step_period = (self.step_period if self.step_period is not None else 0.3) if self.goal_reached: brakes_coeff = self._evaluate_brakes_stage_coeff( t, action, self.end_time) step_length *= brakes_coeff frames = self._gait_planner.loop(step_length, step_angle, step_rotation, step_period, 1.0) fr_angles, fl_angles, rr_angles, rl_angles, _ = self._kinematics.solve( orientation, position, frames) signal = [ fl_angles[0], fl_angles[1], fl_angles[2], fr_angles[0], fr_angles[1], fr_angles[2], rl_angles[0], rl_angles[1], rl_angles[2], rr_angles[0], rr_angles[1], rr_angles[2] ] return signal def _open_loop_signal(self, t, leg_pose): if self.goal_reached: coeff = self._evaluate_brakes_stage_coeff(t, [.0], end_t=self.end_time, end_value=0.0) leg_pose *= coeff if coeff is 0.0: self._stay_still = True motor_pose = np.zeros(NUM_MOTORS) for i in range(NUM_LEGS): motor_pose[int(3 * i)] = self.init_pose[3 * i] init_leg = self.init_pose[3 * i + 1] init_foot = self.init_pose[3 * i + 2] if i == 0 or i == 1: motor_pose[int(3 * i + 1)] = init_leg + leg_pose[0] motor_pose[int(3 * i + 2)] = init_foot + leg_pose[1] else: motor_pose[int(3 * i + 1)] = init_leg + leg_pose[2] motor_pose[int(3 * i + 2)] = init_foot + leg_pose[3] return motor_pose def _transform_action_to_motor_command(self, action): if self._stay_still: return self.rex.initial_pose t = self.rex.GetTimeSinceReset() self._check_target_position(t) action = self._signal(t, action) action = super(RexReactiveEnv, self)._transform_action_to_motor_command(action) return action def _out_of_trajectory(self): current_base_position = self.rex.GetBasePosition() return current_base_position[1] > 0.3 def is_fallen(self): """Decides whether Rex is in a fallen state. If the roll or the pitch of the base is greater than 0.3 radians, the rex is considered fallen. Returns: Boolean value that indicates whether Rex has fallen. """ roll, pitch, _ = self.rex.GetTrueBaseRollPitchYaw() return math.fabs(roll) > 0.3 or math.fabs(pitch) > 0.5 def _get_true_observation(self): """Get the true observations of this environment. It includes the roll, the pitch, the roll dot and the pitch dot of the base. If _use_angle_in_observation is true, eight motor angles are added into the observation. Returns: The observation list, which is a numpy array of floating-point values. """ roll, pitch, _ = self.rex.GetTrueBaseRollPitchYaw() roll_rate, pitch_rate, _ = self.rex.GetTrueBaseRollPitchYawRate() observation = [roll, pitch, roll_rate, pitch_rate] if self._use_angle_in_observation: observation.extend(self.rex.GetMotorAngles().tolist()) self._true_observation = np.array(observation) return self._true_observation def _get_observation(self): roll, pitch, _ = self.rex.GetBaseRollPitchYaw() roll_rate, pitch_rate, _ = self.rex.GetBaseRollPitchYawRate() observation = [roll, pitch, roll_rate, pitch_rate] if self._use_angle_in_observation: observation.extend(self.rex.GetMotorAngles().tolist()) self._observation = np.array(observation) return self._observation def _get_observation_upper_bound(self): """Get the upper bound of the observation. Returns: The upper bound of an observation. See _get_true_observation() for the details of each element of an observation. """ upper_bound_roll = 2 * math.pi upper_bound_pitch = 2 * math.pi upper_bound_roll_dot = 2 * math.pi / self._time_step upper_bound_pitch_dot = 2 * math.pi / self._time_step upper_bound_motor_angle = 2 * math.pi upper_bound = [ upper_bound_roll, upper_bound_pitch, upper_bound_roll_dot, upper_bound_pitch_dot ] if self._use_angle_in_observation: upper_bound.extend( [upper_bound_motor_angle] * mark_constants.MARK_DETAILS['motors_num'][self.mark]) return np.array(upper_bound) def _get_observation_lower_bound(self): lower_bound = -self._get_observation_upper_bound() return lower_bound