def update(): global curve, plot_container, data_storage, foot_locations, tick horizontal_velocity = (0.2, 0.0) yaw_rate = 0.0 target_height = -0.16 current_height = target_height foot_locations = controller.step_gait(foot_locations, np.array(horizontal_velocity), yaw_rate, target_height, current_height, tick) tick += 1 joints = ik(foot_locations, quat) # replace with current body orientation # foot_history.append(np.copy(foot_locations.T)) data_storage.append(foot_locations.T) data_arr = np.array(data_storage) # sim.action(controller_to_sim(joints)) sim.set(controller_to_sim(joints)) for _ in range(substeps): sim.step() foot1.setData(x=data_arr[:, 0, 0], y=data_arr[:, 0, 2]) foot2.setData(x=data_arr[:, 1, 0], y=data_arr[:, 1, 2]) foot3.setData(x=data_arr[:, 2, 0], y=data_arr[:, 2, 2]) foot4.setData(x=data_arr[:, 3, 0], y=data_arr[:, 3, 2])
def step(self, action): action_clean = self.sanitize_actions(action) pos_before, _, _ = self.sim.get_pos_orn_vel() # The action command only sets the goals of the motors. It doesn't actually step the simulation forward in # time. Instead of feeding the simulator the action directly, we take the mean of the last N actions, # where N comes from the action_smoothing hyper-parameter self.action_smoothing.append(action_clean) action_agent = np.mean(self.action_smoothing, axis=0) action_gait = controller_to_sim( self.gait.act(velocity_horizontal=(0.2, 0), normalized=False)) action = self.gait_factor * action_gait + ( 1 - self.gait_factor) * action_agent # let's clip again just to be safe and within the boundaries of the expert action = np.clip( action, np.max((self.joints_hard_limit_lower, action_gait + 0.2 * self.joints_hard_limit_lower), axis=0), np.min((self.joints_hard_limit_uppper, action_gait + 0.2 * self.joints_hard_limit_uppper), axis=0), ) self.sim.action(action) for _ in range(self.sim_steps): self.sim.step() pos_after, orn_after, _ = self.sim.get_pos_orn_vel() obs = self.get_obs() # this reward calculation is taken verbatim from halfcheetah-v2, save reward_ctrl = -np.square(action).sum() reward_run = (pos_after[0] - pos_before[0]) / self.dt # technically we should divide the next line by (3*pi^2) but that's really hard to reach reward_stable = -np.square(orn_after).sum() / np.square(np.pi) reward = self.rcoeff_ctrl * reward_ctrl + self.rcoeff_run * reward_run + self.rcoeff_stable * reward_stable done = False self.episode_steps += 1 if self.episode_steps == self.episode_steps_max: done = True if self.stop_on_flip: flipped = self.sim.check_collision() if flipped: done = True reward -= 1000 return obs, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl, reward_stable=reward_stable)
def step(self, action): self.start_step() self.old_pos, _, _ = self.sim.get_pos_orn_vel() foot_position = self._get_action(action) joints = ik(foot_position.T, self.quat) # replace with current body orientation # joints = ik(action, self.quat) self.sim.action(controller_to_sim(joints)) for _ in range(self.sim_steps): self.sim.step() self.episode_steps += 1 # return self._get_obs(), self._get_reward(action), self._get_done(), self._get_info() return self._get_obs(), 0, self._get_done(), self._get_info()
def sanitize_actions(self, actions): assert len(actions) == 12 actions_clipped = (np.clip(actions, -1, 1).astype(np.float32) + 1) / 2 # denormalize actions to correspond to foot offset actions_denorm = actions_clipped * self.foot_ranges_diff + self.foot_ranges[:, 0] # stepping the gait to get the foot positions self.gait.act(velocity_horizontal=(0.2, 0), normalized=False) # combine the gait with the offset, half-half actions_merged = 0.5 * actions_denorm + 0.5 * self.gait.state.foot_locations.T.flatten() # make sure feet don't exceed the foot max range clipped_feet = np.clip(actions_merged, self.foot_ranges[:, 0], self.foot_ranges[:, 1]) # reshape and apply inverse kinematics to get joints from feet clipped_feet = clipped_feet.reshape(4, 3).T joints = self.gait.controller.inverse_kinematics(clipped_feet, self.gait.controller.config) # convert joints from 3,4 repr to sim-usable one joints = controller_to_sim(joints) return joints
def _get_action(self, action): """ We do not give raw action to the agent, we need some processing on the raw action values. :param action: raw action :return: processed action """ action_clean = self.sanitize_actions(action) # The action command only sets the goals of the motors. It doesn't actually step the simulation forward in # time. Instead of feeding the simulator the action directly, we take the mean of the last N actions, # where N comes from the action_smoothing hyper-parameter self.action_smoothing.append(action_clean) action_agent = np.mean(self.action_smoothing, axis=0) # TODO: Verify this, there an array of 2 and legs actions are in first dim, I added [0] # (maybe because of an update of gait for the arm actions ?) action_gait = controller_to_sim(self.gait.act(velocity_horizontal=(0.2, 0), normalized=False)[0]) action = self.gait_factor * action_gait + (1 - self.gait_factor) * action_agent # let's clip again just to be safe and within the boundaries of the expert return np.clip( action, np.max((self.joints_hard_limit_lower, action_gait + 0.2 * self.joints_hard_limit_lower), axis=0), np.min((self.joints_hard_limit_uppper, action_gait + 0.2 * self.joints_hard_limit_uppper), axis=0), )
import time import gym import gym_pupper from gym_pupper.common.Utilities import controller_to_sim from gym_pupper.pupper.policy import HardPolicy ROLLOUTS = 5 env = gym.make( "Pupper-Walk-Absolute-aScale_1.0-gFact_0.0-RandomZRot_1-Graphical-v0") for run in range(ROLLOUTS): state = env.reset() policy = HardPolicy() while True: action = controller_to_sim( policy.act(velocity_horizontal=(0.2, 0), normalized=True)) print(action) quit() next_state, rew, done, misc = env.step(action) if done: break time.sleep(1 / 60)
# txt = "arm up" # elif 60 * 41 < i <= 60 * 47: # vel_arm = (0, 0.0001, -0.001) # txt = "arm down" # else: # quit() # if 60 * 29 < i: # vel_body = (idle_walk_fw, 0) action, action_arm = policy.act(velocity_horizontal=(0.2, 0), yaw_rate=0.8, normalized=True, velocity_arm=(0, 0, 0)) # print(action_arm) action = controller_to_sim(action) * np.pi # denormalize arm_angles.append(action_arm) # print(i // 60, policy.state.arm_pos, action_arm) sim.action(action) sim.action_arm(action_arm) for _ in range(substeps): sim.step() # time.sleep(0.01) if i % 2 == 0: img, _ = sim.take_photo(follow_bot=False, camera_offset=(0.5, 0.4, 0.4), lookat_offset=(0.5, 0, 0)) font = cv2.FONT_HERSHEY_SIMPLEX img = np.array(img) # cv2.putText(img, txt, (200, 200), font, 1, (255, 106, 213), 2, cv2.LINE_AA)
config = SimpleConfig(dt=1 / FPS) controller = SimpleController(config) foot_locations = DEFAULT_STANCE + np.array([0, 0, DEFAULT_Z_REF ])[:, np.newaxis] sim = PupperSim2(with_arm=False, frequency=240, debug=True, img_size=(512, 256)) # sim.make_kinematic((0.588, 0.419, 1, 1)) # use with sim.set(), otherwise use sim.action() sim.reset() # sim.action_arm([0, 0, 0]) # sim.change_color((0.588, 0.419, 1, 1)) for tick in trange(steps): horizontal_velocity = (0.2, 0.0) yaw_rate = 0.0 target_height = -0.16 current_height = target_height foot_locations = controller.step_gait(foot_locations, np.array(horizontal_velocity), yaw_rate, target_height, current_height, tick) joints = ik(foot_locations, quat) # replace with current body orientation sim.action(controller_to_sim(joints)) # sim.set(controller_to_sim(joints)) for _ in range(substeps): sim.step()
policy = HardPolicy() last_reset_val = -1 last_plot_val = -1 joints = [] counter = 0 while True: vel = [0, 0] for param_idx, param in enumerate(debug_params): val = sim.p.readUserDebugParameter(param) if param_idx < 2: vel[param_idx] = val if param_idx == 2 and val != last_reset_val: reset_sim() last_reset_val = val if param_idx == 3 and val != last_plot_val: if len(joints) > 0: plot_joints(joints) last_plot_val = val joints.clear() cmd, _ = policy.act(velocity_horizontal=vel) action = controller_to_sim(cmd) joints.append(np.copy(action)) sim.action(action) for _ in range(substeps): sim.step() time.sleep(1 / FPS)
from gym_pupper.pupper.policy import HardPolicy FREQ_CTRL = 60 TEST_BODY = False TEST_ARM = True policy = HardPolicy(fps=FREQ_CTRL) ########### BODY if TEST_BODY: iterations = 10_000 times = 0 for _ in range(iterations): start = time.time() _ = controller_to_sim(policy.act(velocity_horizontal=(0.2, 0), yaw_rate=1.0, normalized=True)) * np.pi diff = time.time() - start times += diff print( f"BODY: ran {iterations} iteractions at {np.around(iterations/times,1)}Hz, " f"so avg of {times/iterations}s per iterations" ) # MBP 2018, ran 100000 iteractions at 1749.8Hz, so avg of 0.000571490352153778s per iterations ########### ARM if TEST_ARM: iterations = 100 times = 0 for _ in range(iterations): start = time.time()
import numpy as np from scipy import fftpack from tqdm import trange import matplotlib.pyplot as plt from gym_pupper.common.Utilities import controller_to_sim from gym_pupper.pupper.policy import HardPolicy policy = HardPolicy() steps = 2000 joints = [] feet = [] for _ in trange(steps): joints_legs, _ = policy.act(velocity_horizontal=(0.2, 0), yaw_rate=0.0, normalized=False) action = controller_to_sim(joints_legs) joints.append(action) feet.append(policy.state.foot_locations.T) feet = np.array(feet) print(feet.shape) x = np.arange(0, steps) fig, axs = plt.subplots(2, 2, sharex=True, sharey=True) for foot in range(4): im = axs[foot // 2][foot % 2].scatter(feet[:, foot, 0], feet[:, foot, 2], alpha=0.5, c=x, cmap="viridis") # axs[foot%2][foot//2].legend()