Ejemplo n.º 1
0
def eval_damping():
    """ Plot joint trajectories for different joint damping parameters """
    # Load experiment and remove possible randomization wrappers
    ex_dir = ask_for_experiment()
    env, policy, _ = load_experiment(ex_dir)
    env = inner_env(env)
    env.domain_param = WAMBallInCupSim.get_nominal_domain_param()

    data = []
    t = []
    dampings = [0., 1e-2, 1e-1, 1e0]
    print_cbt(f'Run policy for damping coefficients: {dampings}')
    for d in dampings:
        env.reset(domain_param=dict(joint_damping=d))
        ro = rollout(env,
                     policy,
                     render_mode=RenderMode(video=False),
                     eval=True)
        t.append(ro.env_infos['t'])
        data.append(ro.env_infos['qpos'])

    fig, ax = plt.subplots(3, sharex='all')
    ls = ['k-', 'b--', 'g-.', 'r:']  # line style setting for better visibility
    for i, idx in enumerate([1, 3, 5]):
        for j in range(len(dampings)):
            ax[i].plot(t[j],
                       data[j][:, idx],
                       ls[j],
                       label=f'damping: {dampings[j]}')
            if i == 0:
                ax[i].legend()
        ax[i].set_ylabel(f'joint {idx} pos [rad]')
    ax[2].set_xlabel('time [s]')
    plt.suptitle('Evaluation of joint damping coefficient')
    plt.show()
Ejemplo n.º 2
0
    def load_teacher_experiment(self, exp: Experiment):
        """
        Load teachers from PDDRTeachers experiment.

        :param exp: the teacher's experiment object
        """
        _, _, extra = load_experiment(exp)
        self.unpack_teachers(extra)
Ejemplo n.º 3
0
def test_pddr(ex_dir, env: SimEnv, policy, algo_hparam):
    pyrado.set_seed(0)

    # Create algorithm and train
    teacher_policy = deepcopy(policy)
    critic = GAE(
        vfcn=FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                       hidden_sizes=[16, 16],
                       hidden_nonlin=to.tanh))
    teacher_algo_hparam = dict(critic=critic, min_steps=1500, max_iter=2)
    teacher_algo = PPO

    # Wrapper
    randomizer = create_default_randomizer(env)
    env = DomainRandWrapperLive(env, randomizer)

    # Subroutine
    algo_hparam = dict(
        max_iter=2,
        min_steps=env.max_steps,
        std_init=0.15,
        num_epochs=10,
        num_teachers=2,
        teacher_policy=teacher_policy,
        teacher_algo=teacher_algo,
        teacher_algo_hparam=teacher_algo_hparam,
        num_workers=1,
    )

    algo = PDDR(ex_dir, env, policy, **algo_hparam)

    algo.train()

    assert algo.curr_iter == algo.max_iter

    # Save and load
    algo.save_snapshot(meta_info=None)
    algo_loaded = Algorithm.load_snapshot(load_dir=ex_dir)
    assert isinstance(algo_loaded, Algorithm)
    policy_loaded = algo_loaded.policy

    # Check
    assert all(algo.policy.param_values == policy_loaded.param_values)

    # Load the experiment. Since we did not save any hyper-parameters, we ignore the errors when loading.
    env, policy, extra = load_experiment(ex_dir)
    assert isinstance(env, Env)
    assert isinstance(policy, Policy)
    assert isinstance(extra, dict)
Ejemplo n.º 4
0
def conditional_actnorm_wrapper(env: Env, ex_dirs: list, idx: int):
    """
    Wrap the environment with an action normalization wrapper if the simulated environment had one.

    :param env: environment to sample from
    :param ex_dirs: list of experiment directories that will be loaded
    :param idx: index of the current directory
    :return: modified environment
    """
    # Get the simulation environment
    env_sim, _, _ = load_experiment(ex_dirs[idx])

    if typed_env(env_sim, ActNormWrapper) is not None:
        env = ActNormWrapper(env)
        print_cbt(
            f'Added an action normalization wrapper to {idx + 1}-th evaluation policy.',
            'y')
    else:
        env = remove_env(env, ActNormWrapper)
        print_cbt(
            f'Removed an action normalization wrapper to {idx + 1}-th evaluation policy.',
            'y')
    return env
            mode = input(
                "Pass ep for episodic and sb for step-based control mode: "
            ).lower()
        qpos_real = np.load(osp.join(ex_dir, f"qpos_real_{mode}.npy"))
        qvel_real = np.load(osp.join(ex_dir, f"qvel_real_{mode}.npy"))
    except FileNotFoundError:
        real_data_exists = False
        print_cbt(
            f"Did not find a recorded real trajectory (qpos_real_{mode} and qvel_real_{mode}) for this policy. "
            f"Run deployment/run_policy_wam.py to get real-world trajectories.",
            "y",
            bright=True,
        )

    # Load the policy and the environment
    env, policy, _ = load_experiment(ex_dir, args)

    # Get nominal environment
    env = remove_all_dr_wrappers(env)
    env.domain_param = env.get_nominal_domain_param()
    env.stop_on_collision = False

    # Fix seed for reproducibility
    pyrado.set_seed(args.seed)

    # Use the recorded initial state from the real system
    init_state = env.init_space.sample_uniform()
    if real_data_exists:
        if input(
                "Use the recorded initial state from the real system? [y] / n "
        ).lower() == "" or "y":
Ejemplo n.º 6
0
from pyrado.environments.mujoco.wam import WAMBallInCupSim
from pyrado.logger.experiment import ask_for_experiment
from pyrado.sampling.rollout import rollout, after_rollout_query
from pyrado.utils.experiments import wrap_like_other_env, load_experiment
from pyrado.utils.input_output import print_cbt
from pyrado.utils.argparser import get_argparser

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()

    # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    # Detect the correct real-world counterpart and create it
    if isinstance(inner_env(env_sim), WAMBallInCupSim):
        # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation
        max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps
        dt = args.dt if args.dt is not None else env_sim.dt
        env_real = WAMBallInCupReal(dt=dt, max_steps=max_steps)
    else:
        raise pyrado.TypeErr(given=env_sim, expected_type=WAMBallInCupSim)

    # Finally wrap the env in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run on device
    done = False
Ejemplo n.º 7
0
from pyrado.logger.experiment import ask_for_experiment
from pyrado.sampling.rollout import rollout, after_rollout_query
from pyrado.utils.argparser import get_argparser
from pyrado.utils.experiments import load_experiment
from pyrado.utils.input_output import print_cbt
from pyrado.utils.data_types import RenderMode

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir

    # Load the environment and the policy
    env, policy, kwout = load_experiment(ex_dir, args)

    # Override the time step size if specified
    if args.dt is not None:
        env.dt = args.dt

    if args.verbose:
        print('Hyper-parameters of the experiment')
        pprint(kwout.get('hparams', 'No hyper-parameters found!'))

    if args.remove_dr_wrappers:
        env = remove_all_dr_wrappers(env, verbose=True)

    # Use the environments number of steps in case of the default argument (inf)
    max_steps = env.max_steps if args.max_steps == pyrado.inf else args.max_steps
Ejemplo n.º 8
0
        env = ActDelayWrapper(env)
        # param_spec['act_delay'] = np.linspace(0, 60, num=21, endpoint=True, dtype=int)

    if not len(param_spec.keys()) == 1:
        raise pyrado.ValueErr(msg='Do not vary more than one domain parameter for this script! (Check action delay.)')
    varied_param_key = ''.join(param_spec.keys())  # to get a str

    if not (len(prefixes) == len(exp_names) and len(prefixes) == len(exp_labels)):
        raise pyrado.ShapeErr(msg=f'The lengths of prefixes, exp_names, and exp_labels must be equal, '
                                  f'but they are {len(prefixes)}, {len(exp_names)}, and {len(exp_labels)}!')

    # Load the policies
    ex_dirs = [osp.join(p, e) for p, e in zip(prefixes, exp_names)]
    policies = []
    for ex_dir in ex_dirs:
        _, policy, _ = load_experiment(ex_dir)
        policies.append(policy)

    # Create one-dim results grid and ensure right number of rollouts
    param_list = param_grid(param_spec)
    param_list *= args.num_ro_per_config

    # Fix initial state (set to None if it should not be fixed)
    init_state = None

    # Crate empty data frame
    df = pd.DataFrame(columns=['policy', 'ret', 'len', varied_param_key])

    # Evaluate all policies
    for i, policy in enumerate(policies):
        # Create a new sampler pool for every policy to synchronize the random seeds i.e. init states
Ejemplo n.º 9
0
def test_snapshots_notmeta(ex_dir, env: SimEnv, policy, algo_class,
                           algo_hparam):
    # Collect hyper-parameters, create algorithm, and train
    common_hparam = dict(max_iter=1, num_workers=1)
    common_hparam.update(algo_hparam)

    if issubclass(algo_class, ActorCritic):
        common_hparam.update(
            min_rollouts=3,
            critic=GAE(
                vfcn=FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                               hidden_sizes=[16, 16],
                               hidden_nonlin=to.tanh)),
        )
    elif issubclass(algo_class, ParameterExploring):
        common_hparam.update(num_init_states_per_domain=1)
    elif issubclass(algo_class, (DQL, SAC)):
        common_hparam.update(memory_size=1000,
                             num_updates_per_step=2,
                             gamma=0.99,
                             min_rollouts=1)
        fnn_hparam = dict(hidden_sizes=[8, 8], hidden_nonlin=to.tanh)
        if issubclass(algo_class, DQL):
            # Override the setting
            env = BallOnBeamDiscSim(env.dt, env.max_steps)
            net = FNN(
                input_size=DiscreteActQValPolicy.get_qfcn_input_size(env.spec),
                output_size=DiscreteActQValPolicy.get_qfcn_output_size(),
                **fnn_hparam,
            )
            policy = DiscreteActQValPolicy(spec=env.spec, net=net)
        else:
            # Override the setting
            env = ActNormWrapper(env)
            policy = TwoHeadedGRUPolicy(env.spec,
                                        shared_hidden_size=8,
                                        shared_num_recurrent_layers=1)
            obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
            common_hparam.update(qfcn_1=FNNPolicy(
                spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam))
            common_hparam.update(qfcn_2=FNNPolicy(
                spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam))
    else:
        raise NotImplementedError

    # Simulate training
    algo = algo_class(ex_dir, env, policy, **common_hparam)
    algo.policy.param_values += to.tensor([42.0])
    if isinstance(algo, ActorCritic):
        algo.critic.vfcn.param_values += to.tensor([42.0])

    # Save and load
    algo.save_snapshot(meta_info=None)
    algo_loaded = Algorithm.load_snapshot(load_dir=ex_dir)
    assert isinstance(algo_loaded, Algorithm)
    policy_loaded = algo_loaded.policy
    if isinstance(algo, ActorCritic):
        critic_loaded = algo_loaded.critic

    # Check
    assert all(algo.policy.param_values == policy_loaded.param_values)
    if isinstance(algo, ActorCritic):
        assert all(
            algo.critic.vfcn.param_values == critic_loaded.vfcn.param_values)

    # Load the experiment. Since we did not save any hyper-parameters, we ignore the errors when loading.
    env, policy, extra = load_experiment(ex_dir)
    assert isinstance(env, Env)
    assert isinstance(policy, Policy)
    assert isinstance(extra, dict)
Ejemplo n.º 10
0
    # Check arguments
    src_domain_param_args = ["ml", "nominal", "posterior", "prior", None]
    if args.src_domain_param not in src_domain_param_args:
        raise pyrado.ValueErr(given_name="src_domain_param",
                              eq_constraint=src_domain_param_args)

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment(
        hparam_list=args.show_hparams) if args.dir is None else args.dir

    # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart)
    if args.iter != -1:
        args.policy_name = f"iter_{args.iter}_policy"
    if args.init:
        args.policy_name = "init_policy"
    env_sim, policy, extra = load_experiment(ex_dir, args)

    # Create the domain parameter mapping
    dp_mapping = dict()
    if extra is not None:
        dp_counter = 0
        for key in sorted(extra["hparams"]["dp_mapping"].keys()):
            dp = extra["hparams"]["dp_mapping"][key]
            if dp in extra["hparams"]["dp_selection"]:
                dp_mapping[dp_counter] = dp
                dp_counter += 1

    pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=policy)

    # Reset the policy's domain parameter if desired
    prior, posterior = None, None
Ejemplo n.º 11
0
        state = state.repeat(varying.shape[0], varying.shape[1], 1)
        # Insert the values of the evaluation mesh grid into the selected state dimensions
        state[:, :, self._idcs] = varying
        return self._fcn(state)


if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()
    plt.rc("text", usetex=args.use_tex)

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment(hparam_list=args.show_hparams) if args.dir is None else args.dir

    # Load the environment and the value function
    env, _, kwout = load_experiment(ex_dir, args)
    vfcn = kwout["vfcn"]

    if not len(args.idcs) == 2:
        pyrado.ShapeErr(msg="Please provide exactly two indices to slice the value function input space (obs_space)!")

    # Use the environments lower and upper bounds to parametrize the mesh grid
    lb, ub = env.obs_space.bounds
    lb_inf_check = np.isinf(lb)
    ub_inf_check = np.isinf(ub)
    if lb_inf_check.any():
        warn("Detected at least one inf entry in mesh grid's lower bound, replacing all with -1.")
        lb[lb_inf_check] = -1.0
    if ub_inf_check.any():
        warn("Detected at least one inf entry in mesh grid's upper bound, replacing all with 1.")
        ub[ub_inf_check] = 1.0
Ejemplo n.º 12
0
def evaluate_policy(args, ex_dir):
    """Helper function to evaluate the policy from an experiment in the associated environment."""
    env, policy, _ = load_experiment(ex_dir, args)

    # Create multi-dim evaluation grid
    param_spec = dict()
    param_spec_dim = None

    if isinstance(inner_env(env), BallOnPlateSim):
        param_spec["ball_radius"] = np.linspace(0.02, 0.08, num=2, endpoint=True)
        param_spec["ball_rolling_friction_coefficient"] = np.linspace(0.0295, 0.9, num=2, endpoint=True)

    elif isinstance(inner_env(env), QQubeSwingUpSim):
        eval_num = 200
        # Use nominal values for all other parameters.
        for param, nominal_value in env.get_nominal_domain_param().items():
            param_spec[param] = nominal_value
        # param_spec["gravity_const"] = np.linspace(5.0, 15.0, num=eval_num, endpoint=True)
        param_spec["damping_pend_pole"] = np.linspace(0.0, 0.0001, num=eval_num, endpoint=True)
        param_spec["damping_rot_pole"] = np.linspace(0.0, 0.0006, num=eval_num, endpoint=True)
        param_spec_dim = 2

    elif isinstance(inner_env(env), QBallBalancerSim):
        # param_spec["gravity_const"] = np.linspace(7.91, 11.91, num=11, endpoint=True)
        # param_spec["ball_mass"] = np.linspace(0.003, 0.3, num=11, endpoint=True)
        # param_spec["ball_radius"] = np.linspace(0.01, 0.1, num=11, endpoint=True)
        param_spec["plate_length"] = np.linspace(0.275, 0.275, num=11, endpoint=True)
        param_spec["arm_radius"] = np.linspace(0.0254, 0.0254, num=11, endpoint=True)
        # param_spec["load_inertia"] = np.linspace(5.2822e-5*0.5, 5.2822e-5*1.5, num=11, endpoint=True)
        # param_spec["motor_inertia"] = np.linspace(4.6063e-7*0.5, 4.6063e-7*1.5, num=11, endpoint=True)
        # param_spec["gear_ratio"] = np.linspace(60, 80, num=11, endpoint=True)
        # param_spec["gear_efficiency"] = np.linspace(0.6, 1.0, num=11, endpoint=True)
        # param_spec["motor_efficiency"] = np.linspace(0.49, 0.89, num=11, endpoint=True)
        # param_spec["motor_back_emf"] = np.linspace(0.006, 0.066, num=11, endpoint=True)
        # param_spec["motor_resistance"] = np.linspace(2.6*0.5, 2.6*1.5, num=11, endpoint=True)
        # param_spec["combined_damping"] = np.linspace(0.0, 0.05, num=11, endpoint=True)
        # param_spec["friction_coeff"] = np.linspace(0, 0.015, num=11, endpoint=True)
        # param_spec["voltage_thold_x_pos"] = np.linspace(0.0, 1.0, num=11, endpoint=True)
        # param_spec["voltage_thold_x_neg"] = np.linspace(-1., 0.0, num=11, endpoint=True)
        # param_spec["voltage_thold_y_pos"] = np.linspace(0.0, 1.0, num=11, endpoint=True)
        # param_spec["voltage_thold_y_neg"] = np.linspace(-1.0, 0, num=11, endpoint=True)
        # param_spec["offset_th_x"] = np.linspace(-5/180*np.pi, 5/180*np.pi, num=11, endpoint=True)
        # param_spec["offset_th_y"] = np.linspace(-5/180*np.pi, 5/180*np.pi, num=11, endpoint=True)

    else:
        raise NotImplementedError

    # Always add an action delay wrapper (with 0 delay by default)
    if typed_env(env, ActDelayWrapper) is None:
        env = ActDelayWrapper(env)
    # param_spec['act_delay'] = np.linspace(0, 30, num=11, endpoint=True, dtype=int)

    add_info = "-".join(param_spec.keys())

    # Create multidimensional results grid and ensure right number of rollouts
    param_list = param_grid(param_spec)
    param_list *= args.num_rollouts_per_config

    # Fix initial state (set to None if it should not be fixed)
    init_state = np.array([0.0, 0.0, 0.0, 0.0])

    # Create sampler
    pool = SamplerPool(args.num_workers)
    if args.seed is not None:
        pool.set_seed(args.seed)
        print_cbt(f"Set the random number generators' seed to {args.seed}.", "w")
    else:
        print_cbt("No seed was set", "y")

    # Sample rollouts
    ros = eval_domain_params(pool, env, policy, param_list, init_state)

    # Compute metrics
    lod = []
    for ro in ros:
        d = dict(**ro.rollout_info["domain_param"], ret=ro.undiscounted_return(), len=ro.length)
        # Simply remove the observation noise from the domain parameters
        try:
            d.pop("obs_noise_mean")
            d.pop("obs_noise_std")
        except KeyError:
            pass
        lod.append(d)

    df = pd.DataFrame(lod)
    metrics = dict(
        avg_len=df["len"].mean(),
        avg_ret=df["ret"].mean(),
        median_ret=df["ret"].median(),
        min_ret=df["ret"].min(),
        max_ret=df["ret"].max(),
        std_ret=df["ret"].std(),
    )
    pprint(metrics, indent=4)

    # Create subfolder and save
    timestamp = datetime.datetime.now()
    add_info = timestamp.strftime(pyrado.timestamp_format) + "--" + add_info
    save_dir = osp.join(ex_dir, "eval_domain_grid", add_info)
    os.makedirs(save_dir, exist_ok=True)

    save_dicts_to_yaml(
        {"ex_dir": str(ex_dir)},
        {"varied_params": list(param_spec.keys())},
        {"num_rpp": args.num_rollouts_per_config, "seed": args.seed},
        {"metrics": dict_arraylike_to_float(metrics)},
        save_dir=save_dir,
        file_name="summary",
    )
    pyrado.save(df, f"df_sp_grid_{len(param_spec) if param_spec_dim is None else param_spec_dim}d.pkl", save_dir)