def experiment_wo_distruber(env_real: RealEnv, env_sim: SimEnv): # Wrap the environment in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run learned policy on the device print_cbt('Running the evaluation policy ...', 'c', bright=True) return rollout(env_real, policy, eval=True, max_steps=args.max_steps, render_mode=RenderMode(text=True), no_reset=True, no_close=True)
from pyrado.utils.argparser import get_argparser if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) # Detect the correct real-world counterpart and create it if isinstance(inner_env(env_sim), WAMBallInCupSim): # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps dt = args.dt if args.dt is not None else env_sim.dt env_real = WAMBallInCupReal(dt=dt, max_steps=max_steps) else: raise pyrado.TypeErr(given=env_sim, expected_type=WAMBallInCupSim) # Finally wrap the env in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run on device done = False while not done: ro = rollout(env_real, policy, eval=True) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, _, _ = after_rollout_query(env_real, policy, ro)
def experiment_w_distruber(env_real: RealEnv, env_sim: SimEnv): # Wrap the environment in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run learned policy on the device print_cbt('Running the evaluation policy ...', 'c', bright=True) ro1 = rollout(env_real, policy, eval=True, max_steps=args.max_steps // 3, render_mode=RenderMode(), no_reset=True, no_close=True) # Run disturber env_real = inner_env(env_real) # since we are reusing it print_cbt('Running the 1st disturber ...', 'c', bright=True) rollout(env_real, disturber_pos, eval=True, max_steps=steps_disturb, render_mode=RenderMode(), no_reset=True, no_close=True) # Wrap the environment in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run learned policy on the device print_cbt('Running the evaluation policy ...', 'c', bright=True) ro2 = rollout(env_real, policy, eval=True, max_steps=args.max_steps // 3, render_mode=RenderMode(), no_reset=True, no_close=True) # Run disturber env_real = inner_env(env_real) # since we are reusing it print_cbt('Running the 2nd disturber ...', 'c', bright=True) rollout(env_real, disturber_neg, eval=True, max_steps=steps_disturb, render_mode=RenderMode(), no_reset=True, no_close=True) # Wrap the environment in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run learned policy on the device print_cbt('Running the evaluation policy ...', 'c', bright=True) ro3 = rollout(env_real, policy, eval=True, max_steps=args.max_steps // 3, render_mode=RenderMode(), no_reset=True, no_close=True) return StepSequence.concat([ro1, ro2, ro3])
df = pd.DataFrame(columns=['policy', 'ret', 'len']) # Evaluate all policies for i, (env_sim, policy) in enumerate(zip(env_sim_list, policy_list)): # Create a new sampler pool for every policy to synchronize the random seeds i.e. init states pool = SamplerPool(args.num_workers) # Seed the sampler if args.seed is not None: pool.set_seed(args.seed) print_cbt(f"Set the random number generators' seed to {args.seed}.", 'w') else: print_cbt('No seed was set', 'y') # Add the same wrappers as during training env = wrap_like_other_env(env, env_sim) # Sample rollouts ros = eval_randomized_domain(pool, env, pert, policy, init_state_list) # internally calls DomainRandWrapperLive # Compute results metrics rets = [ro.undiscounted_return() for ro in ros] lengths = [float(ro.length) for ro in ros] # int values are not numeric in pandas df = df.append(pd.DataFrame(dict(policy=ex_labels[i], ret=rets, len=lengths)), ignore_index=True) metrics = dict( avg_len=df.groupby('policy').mean()['len'].to_dict(), avg_ret=df.groupby('policy').mean()['ret'].to_dict(), median_ret=df.groupby('policy').median()['ret'].to_dict(), min_ret=df.groupby('policy').min()['ret'].to_dict(), max_ret=df.groupby('policy').max()['ret'].to_dict(),