def _thunk(): return CassieEnv(*args, **kwargs)
def _thunk(): return CassieEnv("walking", clock_based=True, state_est=state_est)
def eval_policy(policy, args, run_args, testing_speed): max_traj_len = args.traj_len + args.ramp_up visualize = args.viz # run_args.dyn_random = True env = CassieEnv(traj="aslip", state_est=run_args.state_est, no_delta=run_args.no_delta, learn_gains=run_args.learn_gains, ik_baseline=run_args.ik_baseline, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward="aslip_old", history=run_args.history) if args.debug: env.debug = True print(env.reward_func) if hasattr(policy, 'init_hidden_state'): policy.init_hidden_state() orient_add = 0 if visualize: env.render() render_state = True state = env.reset_for_test() done = False timesteps = 0 eval_reward = 0 # Data to track time_log = [] # time in seconds traj_info = [] # Information from reference trajectory library actual_state_info = [] # actual mujoco state of the robot l_footstep = [ ] # (time, left foot desired placement, left foot actual placement) r_footstep = [ ] # (time, right foot desired placement, right foot actual placement) # footstep = [] env.update_speed(testing_speed) print(env.speed) while timesteps < max_traj_len: if hasattr(env, 'simrate'): start = time.time() # if (not env.vis.ispaused()): # Update Orientation env.orient_add = orient_add # quaternion = euler2quat(z=orient_add, y=0, x=0) # iquaternion = inverse_quaternion(quaternion) # # TODO: Should probably not assume these indices. Should make them not hard coded # if env.state_est: # curr_orient = state[1:5] # curr_transvel = state[15:18] # else: # curr_orient = state[2:6] # curr_transvel = state[20:23] # new_orient = quaternion_product(iquaternion, curr_orient) # if new_orient[0] < 0: # new_orient = -new_orient # new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) # if env.state_est: # state[1:5] = torch.FloatTensor(new_orient) # state[15:18] = torch.FloatTensor(new_translationalVelocity) # # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. # else: # state[2:6] = torch.FloatTensor(new_orient) # state[20:23] = torch.FloatTensor(new_translationalVelocity) action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() state, reward, done, _ = env.step(action) # if timesteps > args.ramp_up: # print(env.counter) a, _, _, d = env.get_traj_and_state_info() traj_info.append(a) actual_state_info.append(d) time_log.append(timesteps / 40) if a[1][2] == 0.0: l_footstep.append(np.linalg.norm(a[1] - d[1])) elif a[2][2] == 0.0: r_footstep.append(np.linalg.norm(a[2] - d[2])) # if traj_info[] # if env.lfoot_vel[2] < -0.6: # print("left foot z vel over 0.6: ", env.lfoot_vel[2]) # if env.rfoot_vel[2] < -0.6: # print("right foot z vel over 0.6: ", env.rfoot_vel[2]) eval_reward += reward timesteps += 1 qvel = env.sim.qvel() # print("actual speed: ", np.linalg.norm(qvel[0:2])) # print("commanded speed: ", env.speed) if visualize: render_state = env.render() # if hasattr(env, 'simrate'): # # assume 40hz # end = time.time() # delaytime = max(0, 1000 / 40000 - (end-start)) # time.sleep(delaytime) actual_state_info = actual_state_info[:-1] traj_info = traj_info[:-1] time_log = time_log[:-1] print("Eval reward: ", eval_reward) traj_info = np.array(traj_info) actual_state_info = np.array(actual_state_info) time_log = np.array(time_log) fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, projection='3d') ax.set_title("Taskspace tracking performance : {} m/s (simulation)".format( testing_speed)) ax.plot(traj_info[:, 0, 0], traj_info[:, 0, 1], traj_info[:, 0, 2], label='ROM', c='g') ax.plot(traj_info[:, 1, 0], traj_info[:, 1, 1], traj_info[:, 1, 2], c='g') ax.plot(traj_info[:, 2, 0], traj_info[:, 2, 1], traj_info[:, 2, 2], c='g') ax.plot(actual_state_info[:, 0, 0], actual_state_info[:, 0, 1], actual_state_info[:, 0, 2], label='robot', c='r') ax.plot(actual_state_info[:, 1, 0], actual_state_info[:, 1, 1], actual_state_info[:, 1, 2], c='r') ax.plot(actual_state_info[:, 2, 0], actual_state_info[:, 2, 1], actual_state_info[:, 2, 2], c='r') set_axes_equal(ax) plt.legend() plt.tight_layout() plt.savefig("./plots/taskspace{}.png".format(testing_speed)) traj_info = traj_info.reshape(-1, 9) actual_state_info = actual_state_info.reshape(-1, 9) time_log = time_log.reshape(-1, 1) l_footstep = np.array(l_footstep) r_footstep = np.array(r_footstep) x_error = np.linalg.norm(traj_info[:, 0] - actual_state_info[:, 0]) y_error = np.linalg.norm(traj_info[:, 1] - actual_state_info[:, 1]) z_error = np.linalg.norm(traj_info[:, 2] - actual_state_info[:, 2]) # print(traj_info.shape) # print(actual_state_info.shape) # print(time_log.shape) # return matrix of logged data return np.array([x_error, y_error, z_error]), np.array([l_footstep, r_footstep])
from rl.utils import renderpolicy from cassie import CassieEnv from rl.policies import GaussianMLP, BetaMLP import torch import numpy as np import os import time cassie_env = CassieEnv("stepping", clock_based=True, state_est=True) # cassie_env = CassieIKEnv(clock_based=True) obs_dim = cassie_env.observation_space.shape[ 0] # TODO: could make obs and ac space static properties action_dim = cassie_env.action_space.shape[0] # policy = torch.load("./trained_models/stiff_spring.pt") policy = torch.load("./trained_models/stiff_StateEst_step3.pt") policy.eval() # policies = [] # for i in range(5, 12): # policy = torch.load("./trained_models/regular_spring{}.pt".format(i)) # policy.eval() # policies.append(policy) # policy = torch.load("./trained_models/Normal.pt") # policy.eval() # policies.append(policy) iters = 200 state = torch.Tensor(cassie_env.reset_for_test())
def eval_policy(policy, args, run_args): import tty import termios import select import numpy as np from cassie import CassieEnv, CassieStandingEnv def isData(): return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) max_traj_len = args.traj_len visualize = True if run_args.env_name == "Cassie-v0": env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history) else: env = CassieStandingEnv(state_est=run_args.state_est) old_settings = termios.tcgetattr(sys.stdin) orient_add = 0 try: tty.setcbreak(sys.stdin.fileno()) state = env.reset_for_test() done = False timesteps = 0 eval_reward = 0 speed = 0.0 while True: if isData(): c = sys.stdin.read(1) if c == 'w': speed += 0.1 elif c == 's': speed -= 0.1 elif c == 'l': orient_add += .1 print("Increasing orient_add to: ", orient_add) elif c == 'k': orient_add -= .1 print("Decreasing orient_add to: ", orient_add) elif c == 'p': push = 100 push_dir = 2 force_arr = np.zeros(6) force_arr[push_dir] = push env.sim.apply_force(force_arr) env.update_speed(speed) print("speed: ", env.speed) # Update Orientation quaternion = euler2quat(z=orient_add, y=0, x=0) iquaternion = inverse_quaternion(quaternion) if env.state_est: curr_orient = state[1:5] curr_transvel = state[14:17] else: curr_orient = state[2:6] curr_transvel = state[20:23] new_orient = quaternion_product(iquaternion, curr_orient) if new_orient[0] < 0: new_orient = -new_orient new_translationalVelocity = rotate_by_quaternion( curr_transvel, iquaternion) if env.state_est: state[1:5] = torch.FloatTensor(new_orient) state[14:17] = torch.FloatTensor(new_translationalVelocity) # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. else: state[2:6] = torch.FloatTensor(new_orient) state[20:23] = torch.FloatTensor(new_translationalVelocity) if hasattr(env, 'simrate'): start = time.time() action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() state, reward, done, _ = env.step(action) if visualize: env.render() eval_reward += reward timesteps += 1 if hasattr(env, 'simrate'): # assume 30hz (hack) end = time.time() delaytime = max(0, 1000 / 30000 - (end - start)) time.sleep(delaytime) print("Eval reward: ", eval_reward) finally: termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
def eval_policy(policy, args, run_args): aslip = True if run_args.traj == "aslip" else False cassie_env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history, reward=run_args.reward) cassie_env.debug = args.debug visualize = not args.no_viz traj_len = args.traj_len if aslip: traj_info = [] # traj_cmd_info = [] # what actually gets sent to robot as state robot_state_info = [] # robot's estimated state actual_state_info = [] # actual mujoco state of the robot state = torch.Tensor(cassie_env.reset_for_test()) cassie_env.update_speed(2.0) print(cassie_env.speed) count, passed, done = 0, 1, False while count < traj_len and not done: if visualize: cassie_env.render() # Get action and act action = policy(state, True) action = action.data.numpy() state, reward, done, _ = cassie_env.step(action) state = torch.Tensor(state) print(reward) # print(cassie_env.phase) # See if end state reached if done or cassie_env.sim.qpos()[2] < 0.4: print(done) passed = 0 print("failed") # Get trajectory info and robot info if aslip: a, b, c, d = cassie_env.get_traj_and_state_info() traj_info.append(a) traj_cmd_info.append(b) else: c, d = cassie_env.get_state_info() robot_state_info.append(c) actual_state_info.append(d) count += 1 robot_state_info = robot_state_info[:-1] actual_state_info = actual_state_info[:-1] if aslip: traj_info = traj_info[:-1] traj_cmd_info = traj_cmd_info[:-1] traj_info = np.array(traj_info) traj_cmd_info = np.array(traj_cmd_info) robot_state_info = np.array(robot_state_info) actual_state_info = np.array(actual_state_info) fig, axs = plt.subplots(2, 2, figsize=(10, 10)) # print(traj_info) print(traj_info.shape) axs[0][0].set_title("XZ plane of traj_info") axs[0][0].plot(traj_info[:, 0, 0], traj_info[:, 0, 2], 'o-', label='cpos') axs[0][0].plot(traj_info[:, 1, 0], traj_info[:, 1, 2], 'o-', label='lpos') axs[0][0].plot(traj_info[:, 2, 0], traj_info[:, 2, 2], 'o-', label='rpos') print(traj_cmd_info.shape) axs[0][1].set_title("XZ plane of traj_cmd_info") axs[0][1].plot(traj_cmd_info[:, 0, 0], traj_cmd_info[:, 0, 2], label='cpos') axs[0][1].plot(traj_cmd_info[:, 1, 0], traj_cmd_info[:, 1, 2], label='lpos') axs[0][1].plot(traj_cmd_info[:, 2, 0], traj_cmd_info[:, 2, 2], label='rpos') print(robot_state_info.shape) axs[1][0].set_title("XZ plane of robot_state_info") axs[1][0].plot(robot_state_info[:, 0, 0], robot_state_info[:, 0, 2], label='cpos') axs[1][0].plot(robot_state_info[:, 1, 0], robot_state_info[:, 1, 2], label='lpos') axs[1][0].plot(robot_state_info[:, 2, 0], robot_state_info[:, 2, 2], label='rpos') print(actual_state_info.shape) axs[1][1].set_title("XZ plane of actual_state_info") axs[1][1].plot(actual_state_info[:, 0, 0], actual_state_info[:, 0, 2], label='cpos') axs[1][1].plot(actual_state_info[:, 1, 0], actual_state_info[:, 1, 2], label='lpos') axs[1][1].plot(actual_state_info[:, 2, 0], actual_state_info[:, 2, 2], label='rpos') plt.legend() plt.tight_layout() plt.show() else: robot_state_info = np.array(robot_state_info) actual_state_info = np.array(actual_state_info) fig, axs = plt.subplots(1, 2, figsize=(10, 10)) print(robot_state_info.shape) axs[0].set_title("XZ plane of robot_state_info") axs[0].plot(robot_state_info[:, 0, 0], robot_state_info[:, 0, 2], label='cpos') axs[0].plot(robot_state_info[:, 1, 0], robot_state_info[:, 1, 2], label='lpos') axs[0].plot(robot_state_info[:, 2, 0], robot_state_info[:, 2, 2], label='rpos') print(actual_state_info.shape) axs[1].set_title("XZ plane of actual_state_info") axs[1].plot(actual_state_info[:, 0, 0], actual_state_info[:, 0, 2], label='cpos') axs[1].plot(actual_state_info[:, 1, 0], actual_state_info[:, 1, 2], label='lpos') axs[1].plot(actual_state_info[:, 2, 0], actual_state_info[:, 2, 2], label='rpos') plt.legend() plt.tight_layout() plt.show()
import sys sys.path.append("..") # Adds higher directory to python modules path. from rl.utils import renderpolicy, rendermultipolicy, renderpolicy_speedinput, rendermultipolicy_speedinput from cassie import CassieEnv import torch import numpy as np import os import time import argparse import pickle parser = argparse.ArgumentParser() parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details") args = parser.parse_args() run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta) policy = torch.load(args.path + "actor.pt") policy.eval() # cassie_env = CassieEnv(traj="aslip", clock_based=False, state_est=True, dynamics_randomization=False, no_delta=False) # policy = torch.load(args.path + "aslip_unified_task10_v7.pt") # policy.eval() renderpolicy_speedinput(cassie_env, policy, deterministic=False, dt=0.05, speedup = 2)
def _thunk(): return CassieEnv("walking", clock_based=True)
type=str, default=None, help="path to folder containing policy and run details") args = parser.parse_args() run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) RUN_NAME = run_args.run_name if run_args.run_name != None else "plot" # RUN_NAME = "7b7e24-seed0" # POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt" # Load environment and policy # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True) cassie_env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history) policy = torch.load(args.path + "actor.pt") def avg_pols(policies, state): total_act = np.zeros(10) for policy in policies: action = policy.forward(torch.Tensor(state), True).detach().numpy() total_act += action return total_act / len(policies) obs_dim = cassie_env.observation_space.shape[ 0] # TODO: could make obs and ac space static properties
import sys, time from cassie.quaternion_function import * import tty import termios import select import numpy as np from functools import partial from rl.envs.wrappers import SymmetricEnv from cassie import CassieEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_noaccel_footdist def isData(): return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) env = CassieEnv(state_est=True, dynamics_randomization=False, history=0) env_fn = partial(CassieEnv, state_est=True, dynamics_randomization=False, history=0) # env = CassieEnv_noaccel_footdist(state_est=True, dynamics_randomization=False, history=0) # env_fn = partial(CassieEnv_noaccel_footdist, state_est=True, dynamics_randomization=False, history=0) sym_env = SymmetricEnv(env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=[-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4]) # obs = env.get_full_state() # print("obs len: ", len(obs)) # exit() path = "./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/"
def eval_policy(policies, args, run_args, testing_speed, num_steps=4, num_trials=2): FULL_l_foot_poses_actual = [] FULL_l_foot_poses_ideal = [] FULL_r_foot_poses_actual = [] FULL_r_foot_poses_ideal = [] FULL_all_footplace_err = [] for trial_num in range(num_trials): l_foot_poses_actual = [] l_foot_poses_ideal = [] r_foot_poses_actual = [] r_foot_poses_ideal = [] all_footplace_err = [] for policy in policies: footplace_err = [] max_traj_len = args.traj_len visualize = not args.no_vis # run_args.dyn_random = True env = CassieEnv(traj="aslip", state_est=run_args.state_est, no_delta=run_args.no_delta, learn_gains=run_args.learn_gains, ik_baseline=run_args.ik_baseline, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward="aslip_old", history=run_args.history) if args.debug: env.debug = True print(env.reward_func) if hasattr(policy, 'init_hidden_state'): policy.init_hidden_state() orient_add = 0 if visualize: env.render() render_state = True state = env.reset_for_test() done = False timesteps = 0 eval_reward = 0 # Data to track left_swing, right_swing, l_ideal_land_pos, r_ideal_land_pos = False, False, None, None footstep_count = 0 env.update_speed(testing_speed) print(env.speed) ## Get the fixed delta for each footstep position change right_delta = get_ref_aslip_global_state_no_drift_correct(env, phase=7) while footstep_count-1 < num_steps: if hasattr(env, 'simrate'): start = time.time() # if (not env.vis.ispaused()): # Update Orientation env.orient_add = orient_add # quaternion = euler2quat(z=orient_add, y=0, x=0) # iquaternion = inverse_quaternion(quaternion) # # TODO: Should probably not assume these indices. Should make them not hard coded # if env.state_est: # curr_orient = state[1:5] # curr_transvel = state[15:18] # else: # curr_orient = state[2:6] # curr_transvel = state[20:23] # new_orient = quaternion_product(iquaternion, curr_orient) # if new_orient[0] < 0: # new_orient = -new_orient # new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) # if env.state_est: # state[1:5] = torch.FloatTensor(new_orient) # state[15:18] = torch.FloatTensor(new_translationalVelocity) # # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. # else: # state[2:6] = torch.FloatTensor(new_orient) # state[20:23] = torch.FloatTensor(new_translationalVelocity) action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() state, reward, done, _ = env.step(action) # get the delta for right and left by looking at phases where both feet are in stance right_to_left = get_ref_aslip_global_state_no_drift_correct(env, phase=16)[2][:2] - get_ref_aslip_global_state_no_drift_correct(env, phase=16)[0][:2] left_to_right = get_ref_aslip_global_state_no_drift_correct(env, phase=29)[0][:2] - get_ref_aslip_global_state_no_drift_correct(env, phase=29)[2][:2] delta = right_to_left + left_to_right # print(right_to_left) # print(left_to_right) # print(delta) # print(get_ref_aslip_global_state_no_drift_correct(env, phase=16)[2][:2]) # print(get_ref_aslip_global_state_no_drift_correct(env, phase=16)[2][:2] + delta) # exit() # print("{} : {} : {}".format(env.phase, env.l_foot_pos[2], env.r_foot_pos[2])) # allow some ramp up time if timesteps > env.phaselen * 2: # fill r_last_ideal_pos and l_last_ideal_pos with values before collecting data # check if we are in left swing phase # left foot swing, right foot stance if env.phase == 7: left_swing = True right_swing = False r_actual_land_pos = env.r_foot_pos[:2] # if we have data from last step, we can calculate error from where right foot landed to where it should have landed, BEFORE updating r_ideal_land_pos if footstep_count >= 1: r_foot_poses_actual.append(r_actual_land_pos) footplace_err.append(np.linalg.norm(r_ideal_land_pos - r_actual_land_pos)) # print("right landed at\t\t\t{}".format(r_actual_land_pos)) l_ideal_land_pos = r_actual_land_pos + right_to_left l_foot_poses_ideal.append(l_ideal_land_pos) # print("next, left should land at\t{}\t{}\n".format(l_ideal_land_pos, r_actual_land_pos)) # print("left-swing : right should land at {}".format(r_ideal_land_pos)) # left foot stance, right foot swing elif env.phase == 23: left_swing = False right_swing = True l_actual_land_pos = env.l_foot_pos[:2] # if we have data from last step, we can calculate error from where right foot landed to where it should have landed, BEFORE updating r_ideal_land_pos if footstep_count >= 1: l_foot_poses_actual.append(l_actual_land_pos) footplace_err.append(np.linalg.norm(l_ideal_land_pos - l_actual_land_pos)) # print("left landed at\t\t\t{}".format(l_actual_land_pos)) r_ideal_land_pos = l_actual_land_pos + left_to_right r_foot_poses_ideal.append(r_ideal_land_pos) # print("next, right should land at\t{}\t{}\n".format(r_ideal_land_pos, l_actual_land_pos)) footstep_count += 1 # print("right-swing : left should land at {}".format(l_ideal_land_pos)) eval_reward += reward timesteps += 1 qvel = env.sim.qvel() # print("actual speed: ", np.linalg.norm(qvel[0:2])) # print("commanded speed: ", env.speed) if visualize: render_state = env.render() # if hasattr(env, 'simrate'): # # assume 40hz # end = time.time() # delaytime = max(0, 1000 / 40000 - (end-start)) # time.sleep(delaytime) all_footplace_err.append(np.array(footplace_err)) print("Avg Foot Placement Error: ", np.mean(all_footplace_err, axis=1)) print("Stddev Foot Placement Error: ", np.std(all_footplace_err, axis=1)) print("Max/Min Foot Placement Error: {} / {}".format(np.max(all_footplace_err, axis=1), np.min(all_footplace_err, axis=1))) print("Num pairs: ", len(all_footplace_err) * len(all_footplace_err[0])) # trim the ideal footplace poses l_foot_poses_ideal = l_foot_poses_ideal[1:] r_foot_poses_ideal = r_foot_poses_ideal[:-1] # l_foot_poses_ideal = np.array(l_foot_poses_ideal) # r_foot_poses_ideal = np.array(r_foot_poses_ideal) # l_foot_poses_actual = np.array(l_foot_poses_actual) # r_foot_poses_actual = np.array(r_foot_poses_actual) # all_footplace_err = np.array(all_footplace_err) FULL_l_foot_poses_actual.append(l_actual_land_pos) FULL_l_foot_poses_ideal.append(l_ideal_land_pos) FULL_r_foot_poses_actual.append(r_actual_land_pos) FULL_r_foot_poses_ideal.append(r_ideal_land_pos) FULL_all_footplace_err.append(all_footplace_err) FULL_l_foot_poses_ideal = np.array(FULL_l_foot_poses_ideal) FULL_r_foot_poses_ideal = np.array(FULL_r_foot_poses_ideal) FULL_l_foot_poses_actual = np.array(FULL_l_foot_poses_actual) FULL_r_foot_poses_actual = np.array(FULL_r_foot_poses_actual) FULL_all_footplace_err = np.array(FULL_all_footplace_err) # print(FULL_all_footplace_err.shape) # exit() fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) ax.scatter(FULL_l_foot_poses_ideal[:, 0], FULL_l_foot_poses_ideal[:, 1], c='tab:green', label='desired', alpha=0.5) ax.scatter(FULL_r_foot_poses_ideal[:, 0], FULL_l_foot_poses_actual[:, 1], c='tab:green', alpha=0.5) ax.scatter(FULL_l_foot_poses_actual[:, 0], FULL_l_foot_poses_actual[:, 1], c='tab:red', label='actual', alpha=0.5) ax.scatter(FULL_all_footplace_err[:, 0], FULL_all_footplace_err[:, 1], c='tab:red', alpha=0.5) ax.axis('equal') ax.set_ylabel('y (m)') ax.set_xlabel('x (m)') ax.set_title('Desired vs Actual Foot Placements ({} m/s)'.format(testing_speed)) plt.savefig('./plots/footplace{}.png'.format(testing_speed)) # plt.show() return FULL_all_footplace_err, np.std(FULL_all_footplace_err, axis=1)
parser = argparse.ArgumentParser() parser.add_argument("--path", type=str, default=None, help="path to folder containing policy and run details") args = parser.parse_args() run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) # RUN_NAME = "7b7e24-seed0" # POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt" # Load environment and policy # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True) cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) policy = torch.load(args.path + "actor.pt") state = torch.Tensor(cassie_env.reset_for_test()) # cassie_env.sim.step_pd(self.u) cassie_env.speed = 0.5 cassie_env.phase_add = 1 num_steps = cassie_env.phaselen + 1 # Simulate for "wait_time" first to stabilize for i in range(num_steps * 2): action = policy(state, True) action = action.data.numpy() state, reward, done, _ = cassie_env.step(action) state = torch.Tensor(state) curr_time = cassie_env.sim.time()
import time import numpy as np from cassie import CassieEnv from cassie.cassiemujoco import * from cassie.trajectory import CassieTrajectory traj = CassieTrajectory("cassie/trajectory/stepdata-2018.bin") env = CassieEnv("cassie/trajectory/stepdata.bin") u = pd_in_t() print(len(traj)) # test actual trajectory def test_ref(): csim = CassieSim() cvis = CassieVis() for t in range(len(traj.qpos)): qpos = traj.qpos[t] qvel = traj.qvel[t] csim.set_qpos(qpos) csim.set_qvel(qvel)