Ejemplo n.º 1
0
 def _thunk():
     return CassieEnv(*args, **kwargs)
Ejemplo n.º 2
0
 def _thunk():
     return CassieEnv("walking", clock_based=True, state_est=state_est)
Ejemplo n.º 3
0
def eval_policy(policy, args, run_args, testing_speed):

    max_traj_len = args.traj_len + args.ramp_up
    visualize = args.viz
    # run_args.dyn_random = True

    env = CassieEnv(traj="aslip",
                    state_est=run_args.state_est,
                    no_delta=run_args.no_delta,
                    learn_gains=run_args.learn_gains,
                    ik_baseline=run_args.ik_baseline,
                    dynamics_randomization=run_args.dyn_random,
                    clock_based=run_args.clock_based,
                    reward="aslip_old",
                    history=run_args.history)

    if args.debug:
        env.debug = True

    print(env.reward_func)

    if hasattr(policy, 'init_hidden_state'):
        policy.init_hidden_state()

    orient_add = 0

    if visualize:
        env.render()
    render_state = True

    state = env.reset_for_test()
    done = False
    timesteps = 0
    eval_reward = 0

    # Data to track
    time_log = []  # time in seconds
    traj_info = []  # Information from reference trajectory library
    actual_state_info = []  # actual mujoco state of the robot
    l_footstep = [
    ]  # (time, left foot desired placement, left foot actual placement)
    r_footstep = [
    ]  # (time, right foot desired placement, right foot actual placement)
    # footstep = []

    env.update_speed(testing_speed)
    print(env.speed)

    while timesteps < max_traj_len:

        if hasattr(env, 'simrate'):
            start = time.time()

        # if (not env.vis.ispaused()):
        # Update Orientation
        env.orient_add = orient_add
        # quaternion = euler2quat(z=orient_add, y=0, x=0)
        # iquaternion = inverse_quaternion(quaternion)

        # # TODO: Should probably not assume these indices. Should make them not hard coded
        # if env.state_est:
        #     curr_orient = state[1:5]
        #     curr_transvel = state[15:18]
        # else:
        #     curr_orient = state[2:6]
        #     curr_transvel = state[20:23]

        # new_orient = quaternion_product(iquaternion, curr_orient)

        # if new_orient[0] < 0:
        #     new_orient = -new_orient

        # new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion)

        # if env.state_est:
        #     state[1:5] = torch.FloatTensor(new_orient)
        #     state[15:18] = torch.FloatTensor(new_translationalVelocity)
        #     # state[0] = 1      # For use with StateEst. Replicate hack that height is always set to one on hardware.
        # else:
        #     state[2:6] = torch.FloatTensor(new_orient)
        #     state[20:23] = torch.FloatTensor(new_translationalVelocity)

        action = policy.forward(torch.Tensor(state),
                                deterministic=True).detach().numpy()

        state, reward, done, _ = env.step(action)

        # if timesteps > args.ramp_up:
        # print(env.counter)
        a, _, _, d = env.get_traj_and_state_info()
        traj_info.append(a)
        actual_state_info.append(d)
        time_log.append(timesteps / 40)

        if a[1][2] == 0.0:
            l_footstep.append(np.linalg.norm(a[1] - d[1]))
        elif a[2][2] == 0.0:
            r_footstep.append(np.linalg.norm(a[2] - d[2]))

        # if traj_info[]

        # if env.lfoot_vel[2] < -0.6:
        #     print("left foot z vel over 0.6: ", env.lfoot_vel[2])
        # if env.rfoot_vel[2] < -0.6:
        #     print("right foot z vel over 0.6: ", env.rfoot_vel[2])

        eval_reward += reward
        timesteps += 1
        qvel = env.sim.qvel()
        # print("actual speed: ", np.linalg.norm(qvel[0:2]))
        # print("commanded speed: ", env.speed)

        if visualize:
            render_state = env.render()
        # if hasattr(env, 'simrate'):
        #     # assume 40hz
        #     end = time.time()
        #     delaytime = max(0, 1000 / 40000 - (end-start))
        #     time.sleep(delaytime)

    actual_state_info = actual_state_info[:-1]
    traj_info = traj_info[:-1]
    time_log = time_log[:-1]

    print("Eval reward: ", eval_reward)

    traj_info = np.array(traj_info)
    actual_state_info = np.array(actual_state_info)
    time_log = np.array(time_log)

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')

    ax.set_title("Taskspace tracking performance : {} m/s (simulation)".format(
        testing_speed))
    ax.plot(traj_info[:, 0, 0],
            traj_info[:, 0, 1],
            traj_info[:, 0, 2],
            label='ROM',
            c='g')
    ax.plot(traj_info[:, 1, 0], traj_info[:, 1, 1], traj_info[:, 1, 2], c='g')
    ax.plot(traj_info[:, 2, 0], traj_info[:, 2, 1], traj_info[:, 2, 2], c='g')
    ax.plot(actual_state_info[:, 0, 0],
            actual_state_info[:, 0, 1],
            actual_state_info[:, 0, 2],
            label='robot',
            c='r')
    ax.plot(actual_state_info[:, 1, 0],
            actual_state_info[:, 1, 1],
            actual_state_info[:, 1, 2],
            c='r')
    ax.plot(actual_state_info[:, 2, 0],
            actual_state_info[:, 2, 1],
            actual_state_info[:, 2, 2],
            c='r')

    set_axes_equal(ax)

    plt.legend()
    plt.tight_layout()
    plt.savefig("./plots/taskspace{}.png".format(testing_speed))

    traj_info = traj_info.reshape(-1, 9)
    actual_state_info = actual_state_info.reshape(-1, 9)
    time_log = time_log.reshape(-1, 1)
    l_footstep = np.array(l_footstep)
    r_footstep = np.array(r_footstep)

    x_error = np.linalg.norm(traj_info[:, 0] - actual_state_info[:, 0])
    y_error = np.linalg.norm(traj_info[:, 1] - actual_state_info[:, 1])
    z_error = np.linalg.norm(traj_info[:, 2] - actual_state_info[:, 2])

    # print(traj_info.shape)
    # print(actual_state_info.shape)
    # print(time_log.shape)

    # return matrix of logged data
    return np.array([x_error, y_error,
                     z_error]), np.array([l_footstep, r_footstep])
Ejemplo n.º 4
0
from rl.utils import renderpolicy
from cassie import CassieEnv
from rl.policies import GaussianMLP, BetaMLP

import torch
import numpy as np
import os
import time

cassie_env = CassieEnv("stepping", clock_based=True, state_est=True)
# cassie_env = CassieIKEnv(clock_based=True)
obs_dim = cassie_env.observation_space.shape[
    0]  # TODO: could make obs and ac space static properties
action_dim = cassie_env.action_space.shape[0]

# policy = torch.load("./trained_models/stiff_spring.pt")

policy = torch.load("./trained_models/stiff_StateEst_step3.pt")
policy.eval()

# policies = []
# for i in range(5, 12):
#    policy = torch.load("./trained_models/regular_spring{}.pt".format(i))
#    policy.eval()
#    policies.append(policy)
# policy = torch.load("./trained_models/Normal.pt")
# policy.eval()
# policies.append(policy)

iters = 200
state = torch.Tensor(cassie_env.reset_for_test())
Ejemplo n.º 5
0
def eval_policy(policy, args, run_args):

    import tty
    import termios
    import select
    import numpy as np
    from cassie import CassieEnv, CassieStandingEnv

    def isData():
        return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], [])

    max_traj_len = args.traj_len
    visualize = True

    if run_args.env_name == "Cassie-v0":
        env = CassieEnv(traj=run_args.traj,
                        state_est=run_args.state_est,
                        dynamics_randomization=run_args.dyn_random,
                        clock_based=run_args.clock_based,
                        history=run_args.history)
    else:
        env = CassieStandingEnv(state_est=run_args.state_est)

    old_settings = termios.tcgetattr(sys.stdin)

    orient_add = 0

    try:
        tty.setcbreak(sys.stdin.fileno())

        state = env.reset_for_test()
        done = False
        timesteps = 0
        eval_reward = 0
        speed = 0.0

        while True:

            if isData():
                c = sys.stdin.read(1)
                if c == 'w':
                    speed += 0.1
                elif c == 's':
                    speed -= 0.1
                elif c == 'l':
                    orient_add += .1
                    print("Increasing orient_add to: ", orient_add)
                elif c == 'k':
                    orient_add -= .1
                    print("Decreasing orient_add to: ", orient_add)
                elif c == 'p':
                    push = 100
                    push_dir = 2
                    force_arr = np.zeros(6)
                    force_arr[push_dir] = push
                    env.sim.apply_force(force_arr)

                env.update_speed(speed)
                print("speed: ", env.speed)

            # Update Orientation
            quaternion = euler2quat(z=orient_add, y=0, x=0)
            iquaternion = inverse_quaternion(quaternion)

            if env.state_est:
                curr_orient = state[1:5]
                curr_transvel = state[14:17]
            else:
                curr_orient = state[2:6]
                curr_transvel = state[20:23]

            new_orient = quaternion_product(iquaternion, curr_orient)

            if new_orient[0] < 0:
                new_orient = -new_orient

            new_translationalVelocity = rotate_by_quaternion(
                curr_transvel, iquaternion)

            if env.state_est:
                state[1:5] = torch.FloatTensor(new_orient)
                state[14:17] = torch.FloatTensor(new_translationalVelocity)
                # state[0] = 1      # For use with StateEst. Replicate hack that height is always set to one on hardware.
            else:
                state[2:6] = torch.FloatTensor(new_orient)
                state[20:23] = torch.FloatTensor(new_translationalVelocity)

            if hasattr(env, 'simrate'):
                start = time.time()

            action = policy.forward(torch.Tensor(state),
                                    deterministic=True).detach().numpy()
            state, reward, done, _ = env.step(action)
            if visualize:
                env.render()
            eval_reward += reward
            timesteps += 1

            if hasattr(env, 'simrate'):
                # assume 30hz (hack)
                end = time.time()
                delaytime = max(0, 1000 / 30000 - (end - start))
                time.sleep(delaytime)

        print("Eval reward: ", eval_reward)

    finally:
        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
Ejemplo n.º 6
0
def eval_policy(policy, args, run_args):

    aslip = True if run_args.traj == "aslip" else False

    cassie_env = CassieEnv(traj=run_args.traj,
                           state_est=run_args.state_est,
                           no_delta=run_args.no_delta,
                           dynamics_randomization=run_args.dyn_random,
                           clock_based=run_args.clock_based,
                           history=run_args.history,
                           reward=run_args.reward)
    cassie_env.debug = args.debug
    visualize = not args.no_viz
    traj_len = args.traj_len

    if aslip:
        traj_info = []  #
        traj_cmd_info = []  # what actually gets sent to robot as state
    robot_state_info = []  # robot's estimated state
    actual_state_info = []  # actual mujoco state of the robot

    state = torch.Tensor(cassie_env.reset_for_test())
    cassie_env.update_speed(2.0)
    print(cassie_env.speed)
    count, passed, done = 0, 1, False
    while count < traj_len and not done:

        if visualize:
            cassie_env.render()

        # Get action and act
        action = policy(state, True)
        action = action.data.numpy()
        state, reward, done, _ = cassie_env.step(action)
        state = torch.Tensor(state)

        print(reward)

        # print(cassie_env.phase)

        # See if end state reached
        if done or cassie_env.sim.qpos()[2] < 0.4:
            print(done)
            passed = 0
            print("failed")

        # Get trajectory info and robot info
        if aslip:
            a, b, c, d = cassie_env.get_traj_and_state_info()
            traj_info.append(a)
            traj_cmd_info.append(b)
        else:
            c, d = cassie_env.get_state_info()
        robot_state_info.append(c)
        actual_state_info.append(d)

        count += 1

    robot_state_info = robot_state_info[:-1]
    actual_state_info = actual_state_info[:-1]

    if aslip:

        traj_info = traj_info[:-1]
        traj_cmd_info = traj_cmd_info[:-1]

        traj_info = np.array(traj_info)
        traj_cmd_info = np.array(traj_cmd_info)
        robot_state_info = np.array(robot_state_info)
        actual_state_info = np.array(actual_state_info)

        fig, axs = plt.subplots(2, 2, figsize=(10, 10))

        # print(traj_info)

        print(traj_info.shape)
        axs[0][0].set_title("XZ plane of traj_info")
        axs[0][0].plot(traj_info[:, 0, 0],
                       traj_info[:, 0, 2],
                       'o-',
                       label='cpos')
        axs[0][0].plot(traj_info[:, 1, 0],
                       traj_info[:, 1, 2],
                       'o-',
                       label='lpos')
        axs[0][0].plot(traj_info[:, 2, 0],
                       traj_info[:, 2, 2],
                       'o-',
                       label='rpos')

        print(traj_cmd_info.shape)
        axs[0][1].set_title("XZ plane of traj_cmd_info")
        axs[0][1].plot(traj_cmd_info[:, 0, 0],
                       traj_cmd_info[:, 0, 2],
                       label='cpos')
        axs[0][1].plot(traj_cmd_info[:, 1, 0],
                       traj_cmd_info[:, 1, 2],
                       label='lpos')
        axs[0][1].plot(traj_cmd_info[:, 2, 0],
                       traj_cmd_info[:, 2, 2],
                       label='rpos')

        print(robot_state_info.shape)
        axs[1][0].set_title("XZ plane of robot_state_info")
        axs[1][0].plot(robot_state_info[:, 0, 0],
                       robot_state_info[:, 0, 2],
                       label='cpos')
        axs[1][0].plot(robot_state_info[:, 1, 0],
                       robot_state_info[:, 1, 2],
                       label='lpos')
        axs[1][0].plot(robot_state_info[:, 2, 0],
                       robot_state_info[:, 2, 2],
                       label='rpos')

        print(actual_state_info.shape)
        axs[1][1].set_title("XZ plane of actual_state_info")
        axs[1][1].plot(actual_state_info[:, 0, 0],
                       actual_state_info[:, 0, 2],
                       label='cpos')
        axs[1][1].plot(actual_state_info[:, 1, 0],
                       actual_state_info[:, 1, 2],
                       label='lpos')
        axs[1][1].plot(actual_state_info[:, 2, 0],
                       actual_state_info[:, 2, 2],
                       label='rpos')

        plt.legend()
        plt.tight_layout()
        plt.show()

    else:

        robot_state_info = np.array(robot_state_info)
        actual_state_info = np.array(actual_state_info)

        fig, axs = plt.subplots(1, 2, figsize=(10, 10))

        print(robot_state_info.shape)
        axs[0].set_title("XZ plane of robot_state_info")
        axs[0].plot(robot_state_info[:, 0, 0],
                    robot_state_info[:, 0, 2],
                    label='cpos')
        axs[0].plot(robot_state_info[:, 1, 0],
                    robot_state_info[:, 1, 2],
                    label='lpos')
        axs[0].plot(robot_state_info[:, 2, 0],
                    robot_state_info[:, 2, 2],
                    label='rpos')

        print(actual_state_info.shape)
        axs[1].set_title("XZ plane of actual_state_info")
        axs[1].plot(actual_state_info[:, 0, 0],
                    actual_state_info[:, 0, 2],
                    label='cpos')
        axs[1].plot(actual_state_info[:, 1, 0],
                    actual_state_info[:, 1, 2],
                    label='lpos')
        axs[1].plot(actual_state_info[:, 2, 0],
                    actual_state_info[:, 2, 2],
                    label='rpos')

        plt.legend()
        plt.tight_layout()
        plt.show()
Ejemplo n.º 7
0
import sys
sys.path.append("..") # Adds higher directory to python modules path.

from rl.utils import renderpolicy, rendermultipolicy, renderpolicy_speedinput, rendermultipolicy_speedinput
from cassie import CassieEnv

import torch

import numpy as np
import os
import time

import argparse
import pickle

parser = argparse.ArgumentParser()
parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details")
args = parser.parse_args()
run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))

cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta)
policy = torch.load(args.path + "actor.pt")
policy.eval()

# cassie_env = CassieEnv(traj="aslip", clock_based=False, state_est=True, dynamics_randomization=False, no_delta=False)
# policy = torch.load(args.path + "aslip_unified_task10_v7.pt")
# policy.eval()

renderpolicy_speedinput(cassie_env, policy, deterministic=False, dt=0.05, speedup = 2)
Ejemplo n.º 8
0
 def _thunk():
     return CassieEnv("walking", clock_based=True)
Ejemplo n.º 9
0
                    type=str,
                    default=None,
                    help="path to folder containing policy and run details")
args = parser.parse_args()
run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))

RUN_NAME = run_args.run_name if run_args.run_name != None else "plot"

# RUN_NAME = "7b7e24-seed0"
# POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt"

# Load environment and policy
# env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True)
cassie_env = CassieEnv(traj=run_args.traj,
                       state_est=run_args.state_est,
                       dynamics_randomization=run_args.dyn_random,
                       clock_based=run_args.clock_based,
                       history=run_args.history)
policy = torch.load(args.path + "actor.pt")


def avg_pols(policies, state):
    total_act = np.zeros(10)
    for policy in policies:
        action = policy.forward(torch.Tensor(state), True).detach().numpy()
        total_act += action
    return total_act / len(policies)


obs_dim = cassie_env.observation_space.shape[
    0]  # TODO: could make obs and ac space static properties
Ejemplo n.º 10
0
import sys, time
from cassie.quaternion_function import *
import tty
import termios
import select
import numpy as np
from functools import partial
from rl.envs.wrappers import SymmetricEnv
from cassie import CassieEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_noaccel_footdist


def isData():
    return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], [])


env = CassieEnv(state_est=True, dynamics_randomization=False, history=0)
env_fn = partial(CassieEnv,
                 state_est=True,
                 dynamics_randomization=False,
                 history=0)
# env = CassieEnv_noaccel_footdist(state_est=True, dynamics_randomization=False, history=0)
# env_fn = partial(CassieEnv_noaccel_footdist, state_est=True, dynamics_randomization=False, history=0)

sym_env = SymmetricEnv(env_fn,
                       mirrored_obs=env_fn().mirrored_obs,
                       mirrored_act=[-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4])
# obs = env.get_full_state()
# print("obs len: ", len(obs))
# exit()

path = "./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/"
Ejemplo n.º 11
0
def eval_policy(policies, args, run_args, testing_speed, num_steps=4, num_trials=2):

    FULL_l_foot_poses_actual = []
    FULL_l_foot_poses_ideal = []
    FULL_r_foot_poses_actual = []
    FULL_r_foot_poses_ideal = []
    FULL_all_footplace_err = []

    for trial_num in range(num_trials):
    
        l_foot_poses_actual = []
        l_foot_poses_ideal = []
        r_foot_poses_actual = []
        r_foot_poses_ideal = []
        all_footplace_err = []

        for policy in policies:

            footplace_err = []

            max_traj_len = args.traj_len
            visualize = not args.no_vis
            # run_args.dyn_random = True

            env = CassieEnv(traj="aslip", state_est=run_args.state_est, no_delta=run_args.no_delta, learn_gains=run_args.learn_gains, ik_baseline=run_args.ik_baseline, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward="aslip_old", history=run_args.history)
            
            if args.debug:
                env.debug = True

            print(env.reward_func)

            if hasattr(policy, 'init_hidden_state'):
                policy.init_hidden_state()

            orient_add = 0

            if visualize:
                env.render()
            render_state = True

            state = env.reset_for_test()
            done = False
            timesteps = 0
            eval_reward = 0

            # Data to track
            left_swing, right_swing, l_ideal_land_pos, r_ideal_land_pos = False, False, None, None
            footstep_count = 0

            env.update_speed(testing_speed)
            print(env.speed)

            ## Get the fixed delta for each footstep position change
            right_delta = get_ref_aslip_global_state_no_drift_correct(env, phase=7)

            while footstep_count-1 < num_steps:
            
                if hasattr(env, 'simrate'):
                    start = time.time()

                # if (not env.vis.ispaused()):
                # Update Orientation
                env.orient_add = orient_add
                # quaternion = euler2quat(z=orient_add, y=0, x=0)
                # iquaternion = inverse_quaternion(quaternion)

                # # TODO: Should probably not assume these indices. Should make them not hard coded
                # if env.state_est:
                #     curr_orient = state[1:5]
                #     curr_transvel = state[15:18]
                # else:
                #     curr_orient = state[2:6]
                #     curr_transvel = state[20:23]
                
                # new_orient = quaternion_product(iquaternion, curr_orient)

                # if new_orient[0] < 0:
                #     new_orient = -new_orient

                # new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion)
                
                # if env.state_est:
                #     state[1:5] = torch.FloatTensor(new_orient)
                #     state[15:18] = torch.FloatTensor(new_translationalVelocity)
                #     # state[0] = 1      # For use with StateEst. Replicate hack that height is always set to one on hardware.
                # else:
                #     state[2:6] = torch.FloatTensor(new_orient)
                #     state[20:23] = torch.FloatTensor(new_translationalVelocity)          
                    
                action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy()

                state, reward, done, _ = env.step(action)

                # get the delta for right and left by looking at phases where both feet are in stance
                right_to_left = get_ref_aslip_global_state_no_drift_correct(env, phase=16)[2][:2] - get_ref_aslip_global_state_no_drift_correct(env, phase=16)[0][:2]
                left_to_right = get_ref_aslip_global_state_no_drift_correct(env, phase=29)[0][:2] - get_ref_aslip_global_state_no_drift_correct(env, phase=29)[2][:2]
                delta = right_to_left + left_to_right
                # print(right_to_left)
                # print(left_to_right)
                # print(delta)
                # print(get_ref_aslip_global_state_no_drift_correct(env, phase=16)[2][:2])
                # print(get_ref_aslip_global_state_no_drift_correct(env, phase=16)[2][:2] + delta)
                # exit()

                # print("{} : {} : {}".format(env.phase, env.l_foot_pos[2], env.r_foot_pos[2]))
                # allow some ramp up time
                if timesteps > env.phaselen * 2:
                    # fill r_last_ideal_pos and l_last_ideal_pos with values before collecting data

                    # check if we are in left swing phase
                    # left foot swing, right foot stance
                    if env.phase == 7:
                        left_swing = True
                        right_swing = False
                        r_actual_land_pos = env.r_foot_pos[:2]
                        # if we have data from last step, we can calculate error from where right foot landed to where it should have landed, BEFORE updating r_ideal_land_pos
                        if footstep_count >= 1:
                            r_foot_poses_actual.append(r_actual_land_pos)
                            footplace_err.append(np.linalg.norm(r_ideal_land_pos - r_actual_land_pos))
                            # print("right landed at\t\t\t{}".format(r_actual_land_pos))
                        l_ideal_land_pos = r_actual_land_pos + right_to_left
                        l_foot_poses_ideal.append(l_ideal_land_pos)
                        # print("next, left should land at\t{}\t{}\n".format(l_ideal_land_pos, r_actual_land_pos))
                        # print("left-swing : right should land at {}".format(r_ideal_land_pos))
                    # left foot stance, right foot swing
                    elif env.phase == 23:
                        left_swing = False
                        right_swing = True
                        l_actual_land_pos = env.l_foot_pos[:2]
                        # if we have data from last step, we can calculate error from where right foot landed to where it should have landed, BEFORE updating r_ideal_land_pos
                        if footstep_count >= 1:
                            l_foot_poses_actual.append(l_actual_land_pos)
                            footplace_err.append(np.linalg.norm(l_ideal_land_pos - l_actual_land_pos))
                            # print("left landed at\t\t\t{}".format(l_actual_land_pos))
                        r_ideal_land_pos = l_actual_land_pos + left_to_right
                        r_foot_poses_ideal.append(r_ideal_land_pos)
                        # print("next, right should land at\t{}\t{}\n".format(r_ideal_land_pos, l_actual_land_pos))
                        footstep_count += 1
                        # print("right-swing : left should land at {}".format(l_ideal_land_pos))

                eval_reward += reward
                timesteps += 1
                qvel = env.sim.qvel()
                # print("actual speed: ", np.linalg.norm(qvel[0:2]))
                # print("commanded speed: ", env.speed)

                if visualize:
                    render_state = env.render()
                # if hasattr(env, 'simrate'):
                #     # assume 40hz
                #     end = time.time()
                #     delaytime = max(0, 1000 / 40000 - (end-start))
                #     time.sleep(delaytime)   

            all_footplace_err.append(np.array(footplace_err))
        
        print("Avg Foot Placement Error: ", np.mean(all_footplace_err, axis=1))
        print("Stddev Foot Placement Error: ", np.std(all_footplace_err, axis=1))
        print("Max/Min Foot Placement Error:  {} / {}".format(np.max(all_footplace_err, axis=1), np.min(all_footplace_err, axis=1)))
        print("Num pairs: ", len(all_footplace_err) * len(all_footplace_err[0]))

        # trim the ideal footplace poses
        l_foot_poses_ideal = l_foot_poses_ideal[1:]
        r_foot_poses_ideal = r_foot_poses_ideal[:-1]

        # l_foot_poses_ideal = np.array(l_foot_poses_ideal)
        # r_foot_poses_ideal = np.array(r_foot_poses_ideal)
        # l_foot_poses_actual = np.array(l_foot_poses_actual)
        # r_foot_poses_actual = np.array(r_foot_poses_actual)
        # all_footplace_err = np.array(all_footplace_err)

        FULL_l_foot_poses_actual.append(l_actual_land_pos)
        FULL_l_foot_poses_ideal.append(l_ideal_land_pos)
        FULL_r_foot_poses_actual.append(r_actual_land_pos)
        FULL_r_foot_poses_ideal.append(r_ideal_land_pos)
        FULL_all_footplace_err.append(all_footplace_err)


    FULL_l_foot_poses_ideal = np.array(FULL_l_foot_poses_ideal)
    FULL_r_foot_poses_ideal = np.array(FULL_r_foot_poses_ideal)
    FULL_l_foot_poses_actual = np.array(FULL_l_foot_poses_actual)
    FULL_r_foot_poses_actual = np.array(FULL_r_foot_poses_actual)
    FULL_all_footplace_err = np.array(FULL_all_footplace_err)
    # print(FULL_all_footplace_err.shape)
    # exit()

    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)

    ax.scatter(FULL_l_foot_poses_ideal[:, 0], FULL_l_foot_poses_ideal[:, 1], c='tab:green', label='desired', alpha=0.5)
    ax.scatter(FULL_r_foot_poses_ideal[:, 0], FULL_l_foot_poses_actual[:, 1], c='tab:green', alpha=0.5)
    ax.scatter(FULL_l_foot_poses_actual[:, 0], FULL_l_foot_poses_actual[:, 1], c='tab:red', label='actual', alpha=0.5)
    ax.scatter(FULL_all_footplace_err[:, 0], FULL_all_footplace_err[:, 1], c='tab:red', alpha=0.5)
    ax.axis('equal')
    ax.set_ylabel('y (m)')
    ax.set_xlabel('x (m)')
    ax.set_title('Desired vs Actual Foot Placements ({} m/s)'.format(testing_speed))
    plt.savefig('./plots/footplace{}.png'.format(testing_speed))
    # plt.show()

    return FULL_all_footplace_err, np.std(FULL_all_footplace_err, axis=1)
Ejemplo n.º 12
0
parser = argparse.ArgumentParser()
parser.add_argument("--path",
                    type=str,
                    default=None,
                    help="path to folder containing policy and run details")
args = parser.parse_args()
run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))

# RUN_NAME = "7b7e24-seed0"
# POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt"

# Load environment and policy
# env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True)
cassie_env = CassieEnv(traj=run_args.traj,
                       clock_based=run_args.clock_based,
                       state_est=run_args.state_est,
                       dynamics_randomization=run_args.dyn_random)
policy = torch.load(args.path + "actor.pt")

state = torch.Tensor(cassie_env.reset_for_test())
# cassie_env.sim.step_pd(self.u)
cassie_env.speed = 0.5
cassie_env.phase_add = 1
num_steps = cassie_env.phaselen + 1
# Simulate for "wait_time" first to stabilize
for i in range(num_steps * 2):
    action = policy(state, True)
    action = action.data.numpy()
    state, reward, done, _ = cassie_env.step(action)
    state = torch.Tensor(state)
curr_time = cassie_env.sim.time()
Ejemplo n.º 13
0
import time 

import numpy as np

from cassie import CassieEnv

from cassie.cassiemujoco import *
from cassie.trajectory import CassieTrajectory


traj = CassieTrajectory("cassie/trajectory/stepdata-2018.bin")


env = CassieEnv("cassie/trajectory/stepdata.bin")


u = pd_in_t()
print(len(traj))
# test actual trajectory
def test_ref():
    csim = CassieSim()
    cvis = CassieVis()

    for t in range(len(traj.qpos)):
        qpos = traj.qpos[t]
        qvel = traj.qvel[t]

        csim.set_qpos(qpos)
        csim.set_qvel(qvel)