def reset(self): self.episode_steps = 0 self.sim.reset(rest=True) # also stand up the robot self.gait = HardPolicy() return self.get_obs()
def reset(self): self.episode_steps = 0 # both when the action formulation is incremental and when it's relative, we need to start standing self.sim.reset(rest=True) # also stand up the robot # for _ in range(10): # self.sim.step() # this is used when self.incremental_action == True self.current_action = self.sim.get_rest_pos() self.gait = HardPolicy() return self.get_obs()
def reset(self): self.episode_steps = 0 # both when the action formulation is incremental and when it's relative, we need to start standing self.sim.reset(rest=True) # also stand up the robot # Add elements in our environment self._reset_simulation_env() # this is used when self.incremental_action == True self.current_action = self.sim.get_rest_pos() # Our expert policy self.gait = HardPolicy() return self._get_obs()
import time import gym import gym_pupper from gym_pupper.common.Utilities import controller_to_sim from gym_pupper.pupper.policy import HardPolicy ROLLOUTS = 5 env = gym.make( "Pupper-Walk-Absolute-aScale_1.0-gFact_0.0-RandomZRot_1-Graphical-v0") for run in range(ROLLOUTS): state = env.reset() policy = HardPolicy() while True: action = controller_to_sim( policy.act(velocity_horizontal=(0.2, 0), normalized=True)) print(action) quit() next_state, rew, done, misc = env.step(action) if done: break time.sleep(1 / 60)
class FootOffsetEnv(gym.Env): def __init__( self, debug=False, steps=120, control_freq=CONTROL_FREQUENCY, action_scaling=1.0, random_rot=(0, 0, 0), reward_coefficients=(0.1, 1, 1), stop_on_flip=False, ): """ Gym-compatible environment to teach the pupper how to walk - with predefined gait as baseline """ super().__init__() # observation space: # - 12 lef joints in the order # - front right hip # - front right upper leg # - front right lower leg # - front left hip/upper/lower leg # - back right hip/upper/lower leg # - back left hip/upper/lower leg # - 3 body orientation in euler angles # - 2 linear velocity (only along the plane, we don't care about z velocity self.observation_space = spaces.Box(low=-1, high=1, shape=(12 + 3 + 2,), dtype=np.float32) # action = x/y/z offset for all 4 feet self.action_space = spaces.Box(low=-1, high=1, shape=(12,), dtype=np.float32) # turning off start_standing because the that's done in self.reset() kwargs = {} # if relative_action: # if this is turned on, the robot is underpowered for the hardcoded gait # kwargs = {"gain_pos": 1 / 16, "gain_vel": 1 / 8, "max_torque": 1 / 2} self.sim = PupperSim2(debug=debug, start_standing=False, **kwargs) self.episode_steps = 0 self.episode_steps_max = steps self.control_freq = control_freq self.dt = 1 / self.control_freq self.incremental_angle = np.deg2rad(MAX_ANGLE_PER_SEC) / self.control_freq self.sim_steps = int(round(FREQ_SIM / control_freq)) print(f"Running with {self.sim_steps} substeps") self.action_scaling = action_scaling self.random_rot = random_rot self.stop_on_flip = stop_on_flip self.current_action = np.array([0] * 12) self.gait = None ranges = np.load(f"{ASSET_DIR}/joint_foot_ranges.npz") self.foot_ranges = ranges["foot_ranges"].reshape(12, 2) self.foot_ranges_diff = self.foot_ranges[:, 1] - self.foot_ranges[:, 0] # extend the allowed foot ranges by 25% of the range on that axis print("fr before", self.foot_ranges) self.foot_ranges = np.array( [ (fr[0] - 0.25 * fd, fr[1] + 0.25 * fd) if fd > 0 else (fr[0] - 0.1, fr[1] + 0.1) for fr, fd in zip(self.foot_ranges, self.foot_ranges_diff) ] ) self.foot_ranges_diff[self.foot_ranges_diff == 0] = 0.2 # corresponding to min dist print("fr after", self.foot_ranges) self.joint_ranges = ranges["joint_ranges"] print("joint ranges", self.joint_ranges) # new reward coefficients self.rcoeff_ctrl, self.rcoeff_run, self.rcoeff_stable = reward_coefficients def reset(self): self.episode_steps = 0 self.sim.reset(rest=True) # also stand up the robot self.gait = HardPolicy() return self.get_obs() def seed(self, seed=None): np.random.seed(seed) return super().seed(seed) def close(self): self.sim.p.disconnect() super().close() def sanitize_actions(self, actions): assert len(actions) == 12 actions_clipped = (np.clip(actions, -1, 1).astype(np.float32) + 1) / 2 # denormalize actions to correspond to foot offset actions_denorm = actions_clipped * self.foot_ranges_diff + self.foot_ranges[:, 0] # stepping the gait to get the foot positions self.gait.act(velocity_horizontal=(0.2, 0), normalized=False) # combine the gait with the offset, half-half actions_merged = 0.5 * actions_denorm + 0.5 * self.gait.state.foot_locations.T.flatten() # make sure feet don't exceed the foot max range clipped_feet = np.clip(actions_merged, self.foot_ranges[:, 0], self.foot_ranges[:, 1]) # reshape and apply inverse kinematics to get joints from feet clipped_feet = clipped_feet.reshape(4, 3).T joints = self.gait.controller.inverse_kinematics(clipped_feet, self.gait.controller.config) # convert joints from 3,4 repr to sim-usable one joints = controller_to_sim(joints) return joints def get_obs(self): pos, orn, vel = self.sim.get_pos_orn_vel() joint_states = np.array(self.sim.get_joint_states()) # per-joint normalization based on the max useful range joint_states = np.array( [(js - rngs[0]) / (rngs[1] - rngs[0]) for js, rngs in zip(joint_states, self.joint_ranges)] ) / (np.pi * 1.25) obs = list(joint_states) + list(orn) + list(vel)[:2] return obs def step(self, action): action_clean = self.sanitize_actions(action) pos_before, _, _ = self.sim.get_pos_orn_vel() self.sim.action(action_clean) for _ in range(self.sim_steps): self.sim.step() pos_after, orn_after, _ = self.sim.get_pos_orn_vel() obs = self.get_obs() # this reward calculation is taken verbatim from halfcheetah-v2, save reward_ctrl = -np.square(action).sum() reward_run = (pos_after[0] - pos_before[0]) / self.dt # technically we should divide the next line by (3*pi^2) but that's really hard to reach reward_stable = -np.square(orn_after).sum() / np.square(np.pi) reward = self.rcoeff_ctrl * reward_ctrl + self.rcoeff_run * reward_run + self.rcoeff_stable * reward_stable done = False self.episode_steps += 1 if self.episode_steps == self.episode_steps_max: done = True return obs, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl, reward_stable=reward_stable) def render(self, mode="human"): # todo: if the mode=="human" then this should open and display a # window a la "cv2.imshow('xxx',img), cv2.waitkey(10)" img, _ = self.sim.take_photo() return img
import gym_pupper from gym_pupper.common.Utilities import controller_to_sim, ik_to_sim from gym_pupper.pupper.policy import HardPolicy from gym_pupper.sim.simulator2 import PupperSim2 FREQ_SIM = 240 FREQ_CTRL = 60 substeps = int(FREQ_SIM / FREQ_CTRL) sim = PupperSim2(with_arm=True, frequency=FREQ_SIM, debug=True, img_size=(512, 256)) sim.reset() sim.change_color((0.588, 0.419, 1, 1)) # time.sleep(1) policy = HardPolicy(fps=FREQ_CTRL, warmup=10) arm_rest = np.array([0, 1, -0.8]) * np.pi / 2 sim.action_arm(arm_rest) arm_angles = [] for i in trange(60 * 47): # action, _ = controller_to_sim(policy.act(velocity_horizontal=(-0.2, 0), normalized=True)) * np.pi # action, _ = controller_to_sim(policy.act(velocity_horizontal=(0.2, 0), yaw_rate=1.0, normalized=True)) * np.pi # idle_walk_fw = 0.008 # # vel_body = (0, 0) # yaw = 0 # vel_arm = (0, 0, 0) # (+0.001, +0.002, 0) # if 0 <= i <= 60 * 5:
plt.tight_layout() plt.title("sim direct joints") plt.show() reset_sim() debug_params = [] for param in ["velocity fwd", "velocity left"]: debug_params.append(sim.p.addUserDebugParameter(param, -1, 1, 0)) debug_params.append(sim.p.addUserDebugParameter("reset", 1, 0, 1)) debug_params.append(sim.p.addUserDebugParameter("plot joints", 1, 0, 1)) # debug_params.append(sim.p.addUserDebugParameter("zero", 1, 0, 1)) policy = HardPolicy() last_reset_val = -1 last_plot_val = -1 joints = [] counter = 0 while True: vel = [0, 0] for param_idx, param in enumerate(debug_params): val = sim.p.readUserDebugParameter(param) if param_idx < 2: vel[param_idx] = val if param_idx == 2 and val != last_reset_val: reset_sim() last_reset_val = val if param_idx == 3 and val != last_plot_val:
import time import numpy as np from gym_pupper.common.Utilities import controller_to_sim from gym_pupper.pupper.policy import HardPolicy FREQ_CTRL = 60 TEST_BODY = False TEST_ARM = True policy = HardPolicy(fps=FREQ_CTRL) ########### BODY if TEST_BODY: iterations = 10_000 times = 0 for _ in range(iterations): start = time.time() _ = controller_to_sim(policy.act(velocity_horizontal=(0.2, 0), yaw_rate=1.0, normalized=True)) * np.pi diff = time.time() - start times += diff print( f"BODY: ran {iterations} iteractions at {np.around(iterations/times,1)}Hz, " f"so avg of {times/iterations}s per iterations" ) # MBP 2018, ran 100000 iteractions at 1749.8Hz, so avg of 0.000571490352153778s per iterations ########### ARM
import numpy as np from scipy import fftpack from tqdm import trange import matplotlib.pyplot as plt from gym_pupper.common.Utilities import controller_to_sim from gym_pupper.pupper.policy import HardPolicy policy = HardPolicy() steps = 2000 joints = [] feet = [] for _ in trange(steps): joints_legs, _ = policy.act(velocity_horizontal=(0.2, 0), yaw_rate=0.0, normalized=False) action = controller_to_sim(joints_legs) joints.append(action) feet.append(policy.state.foot_locations.T) feet = np.array(feet) print(feet.shape) x = np.arange(0, steps) fig, axs = plt.subplots(2, 2, sharex=True, sharey=True) for foot in range(4): im = axs[foot // 2][foot % 2].scatter(feet[:, foot, 0], feet[:, foot, 2], alpha=0.5, c=x, cmap="viridis") # axs[foot%2][foot//2].legend()
class WalkingEnv(gym.Env): def __init__( self, debug=False, steps=120, control_freq=CONTROL_FREQUENCY, relative_action=True, incremental_action=False, action_scaling=1.0, action_smoothing=1, random_rot=(0, 0, 0), reward_coefficients=(0.1, 1, 0), stop_on_flip=False, gait_factor=0.0, ): """ Gym-compatible environment to teach the pupper how to walk :param bool debug: If True, shows PyBullet in GUI mode. Set to False when training. :param int steps: How long is the episode? Each step = one call to WalkingEnv.step() :param int control_freq: How many simulation steps are there in a second of Pybullet sim. Pybullet always runs at 240Hz but that's not optimal for RL control. :param bool relative_action: If set to True, then all actions are added to the resting position. This give the robot a more stable starting position. """ super().__init__() # observation space: # - 12 lef joints in the order # - front right hip # - front right upper leg # - front right lower leg # - front left hip/upper/lower leg # - back right hip/upper/lower leg # - back left hip/upper/lower leg # - 3 body orientation in euler angles # - 2 linear velocity (only along the plane, we don't care about z velocity self.observation_space = spaces.Box(low=-1, high=1, shape=(12 + 3 + 2, ), dtype=np.float32) # action = 12 joint angles in the same order as above (fr/fl/br/bl and each with hip/upper/lower) self.action_space = spaces.Box(low=-1, high=1, shape=(12, ), dtype=np.float32) # turning off start_standing because the that's done in self.reset() kwargs = {} # if relative_action: # if this is turned on, the robot is underpowered for the hardcoded gait # kwargs = {"gain_pos": 1 / 16, "gain_vel": 1 / 8, "max_torque": 1 / 2} self.sim = PupperSim2(debug=debug, start_standing=False, **kwargs) self.episode_steps = 0 self.episode_steps_max = steps self.control_freq = control_freq self.dt = 1 / self.control_freq self.incremental_angle = np.deg2rad( MAX_ANGLE_PER_SEC) / self.control_freq self.sim_steps = int(round(FREQ_SIM / control_freq)) self.relative_action = relative_action self.incremental_action = incremental_action self.action_scaling = action_scaling self.action_smoothing = deque(maxlen=action_smoothing) self.random_rot = random_rot self.stop_on_flip = stop_on_flip self.current_action = np.array([0] * 12) self.gait = None assert 0 <= gait_factor <= 1 self.gait_factor = gait_factor self.joints_hard_limit_lower = (-np.pi + 0.001) * np.ones(12) self.joints_hard_limit_uppper = (np.pi - 0.001) * np.ones(12) # new reward coefficients self.rcoeff_ctrl, self.rcoeff_run, self.rcoeff_stable = reward_coefficients def reset(self): self.episode_steps = 0 # both when the action formulation is incremental and when it's relative, we need to start standing self.sim.reset(rest=True) # also stand up the robot # for _ in range(10): # self.sim.step() # this is used when self.incremental_action == True self.current_action = self.sim.get_rest_pos() self.gait = HardPolicy() return self.get_obs() def seed(self, seed=None): np.random.seed(seed) return super().seed(seed) def close(self): self.sim.p.disconnect() super().close() def sanitize_actions(self, actions): assert len(actions) == 12 actions = np.array(actions).astype(np.float32) if not self.incremental_action: scaled = actions * np.pi * self.action_scaling # because 1/-1 corresponds to pi/-pi radians rotation if self.relative_action: scaled += self.sim.get_rest_pos() # this enforces an action range of -1/1, except if it's relative action - then the action space is asymmetric else: scaled = actions * self.incremental_angle + self.current_action clipped = np.clip(scaled, self.joints_hard_limit_lower, self.joints_hard_limit_uppper) self.current_action = np.copy(clipped) return clipped def get_obs(self): pos, orn, vel = self.sim.get_pos_orn_vel() joint_states = np.array( self.sim.get_joint_states()) / np.pi # to normalize to [-1,1] obs = list(joint_states) + list(orn) + list(vel)[:2] return obs def step(self, action): action_clean = self.sanitize_actions(action) pos_before, _, _ = self.sim.get_pos_orn_vel() # The action command only sets the goals of the motors. It doesn't actually step the simulation forward in # time. Instead of feeding the simulator the action directly, we take the mean of the last N actions, # where N comes from the action_smoothing hyper-parameter self.action_smoothing.append(action_clean) action_agent = np.mean(self.action_smoothing, axis=0) action_gait = controller_to_sim( self.gait.act(velocity_horizontal=(0.2, 0), normalized=False)) action = self.gait_factor * action_gait + ( 1 - self.gait_factor) * action_agent # let's clip again just to be safe and within the boundaries of the expert action = np.clip( action, np.max((self.joints_hard_limit_lower, action_gait + 0.2 * self.joints_hard_limit_lower), axis=0), np.min((self.joints_hard_limit_uppper, action_gait + 0.2 * self.joints_hard_limit_uppper), axis=0), ) self.sim.action(action) for _ in range(self.sim_steps): self.sim.step() pos_after, orn_after, _ = self.sim.get_pos_orn_vel() obs = self.get_obs() # this reward calculation is taken verbatim from halfcheetah-v2, save reward_ctrl = -np.square(action).sum() reward_run = (pos_after[0] - pos_before[0]) / self.dt # technically we should divide the next line by (3*pi^2) but that's really hard to reach reward_stable = -np.square(orn_after).sum() / np.square(np.pi) reward = self.rcoeff_ctrl * reward_ctrl + self.rcoeff_run * reward_run + self.rcoeff_stable * reward_stable done = False self.episode_steps += 1 if self.episode_steps == self.episode_steps_max: done = True if self.stop_on_flip: flipped = self.sim.check_collision() if flipped: done = True reward -= 1000 return obs, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl, reward_stable=reward_stable) def render(self, mode="human"): # todo: if the mode=="human" then this should open and display a # window a la "cv2.imshow('xxx',img), cv2.waitkey(10)" img, _ = self.sim.take_photo() return img