def test_real_env_contructors(dt, max_steps): qbbr = QBallBalancerReal(dt=dt, max_steps=max_steps) assert qbbr is not None qcp_st = QCartPoleStabReal(dt=dt, max_steps=max_steps) assert qcp_st is not None qcp_su = QCartPoleSwingUpReal(dt=dt, max_steps=max_steps) assert qcp_su is not None qqr = QQubeReal(dt=dt, max_steps=max_steps) assert qqr is not None
if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Experiment (set seed before creating the modules) ex_dir = setup_experiment( QQubeSwingUpSim.name, f'{SimOpt.name}-{REPS.name}-{PPO.name}_{FNNPolicy.name}') num_workers = 16 # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_hparams = dict(dt=1 / 500., max_steps=3000) env_real = QQubeReal(**env_hparams) env_sim = QQubeSwingUpSim(**env_hparams) randomizer = DomainRandomizer( NormalDomainParam(name='Mr', mean=0., std=1e6, clip_lo=1e-3), NormalDomainParam(name='Mp', mean=0., std=1e6, clip_lo=1e-3), NormalDomainParam(name='Lr', mean=0., std=1e6, clip_lo=1e-3), NormalDomainParam(name='Lp', mean=0., std=1e6, clip_lo=1e-3), ) env_sim = DomainRandWrapperLive(env_sim, randomizer) dp_map = { 0: ('Mr', 'mean'), 1: ('Mr', 'std'), 2: ('Mp', 'mean'), 3: ('Mp', 'std'), 4: ('Lr', 'mean'),
# Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) # Detect the correct real-world counterpart and create it if isinstance(inner_env(env_sim), QBallBalancerSim): env_real = QBallBalancerReal(dt=args.dt, max_steps=args.max_steps) elif isinstance(inner_env(env_sim), QCartPoleSim): env_real = QCartPoleReal(dt=args.dt, max_steps=args.max_steps) elif isinstance(inner_env(env_sim), QQubeSim): env_real = QQubeReal(dt=args.dt, max_steps=args.max_steps) else: raise pyrado.TypeErr( given=env_sim, expected_type=[QBallBalancerSim, QCartPoleSim, QQubeSim]) print_cbt(f'Set up env {env_real.name}.', 'c') # Finally wrap the env in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run on device done = False print_cbt('Running loaded policy ...', 'c', bright=True) while not done: ro = rollout(env_real, policy,
bell_feat, RandFourierFeat, MultFeat from pyrado.policies.linear import LinearPolicy import torch as to import numpy as np if __name__ == '__main__': # Experiment (set seed before creating the modules) # ex_dir = setup_experiment(QQubeSim.name, PoWER.name, f'{LinearPolicy}_actnorm', seed=1) ex_dir = setup_experiment(QQubeReal.name, EMVD.name, QQubeSwingUpAndBalanceCtrl.name, seed=2) # Environment env_hparams = dict(dt=1 / 500., max_steps=5000) env = QQubeReal(**env_hparams) # env = ActNormWrapper(env) # Search distribution # init_loc = np.array([np.log(0.02), np.log(50.), 0.3, # -2., 20., -1.0, 6.], # dtype=np.float64) # init_std = 0.5 * np.ones(init_loc.shape[0], dtype=np.float64) # init_loc = np.array([-3.727, 3.8218, 1.04, -0.9979, 20.257, -0.7138, 5.7895], # dtype=np.float64) # init_std = np.array([0.2288, 0.1952, 0.4372, 0.5408, 0.3838, 0.3574, 0.5939], dtype=np.float64) # Seach distribution AT ITERATION 18 init_loc = np.array( [-3.5888, 3.7302, 1.0079, -1.1522, 20.4393, -0.8824, 5.6107],
from pyrado.algorithms.bayrn import BayRn from pyrado.environments.quanser.quanser_qube import QQubeReal from pyrado.logger.experiment import ask_for_experiment, timestamp_format from pyrado.utils.experiments import wrap_like_other_env, load_experiment from pyrado.utils.input_output import print_cbt from pyrado.utils.argparser import get_argparser if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from if not given as command line argument ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir # Load the policy and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) # Create real-world counterpart (without domain randomization) env_real = QQubeReal(env_sim.dt, env_sim.max_steps) print_cbt(f'Set up the QQubeReal environment with dt={env_real.dt} max_steps={env_real.max_steps}.', 'c') env_real = wrap_like_other_env(env_real, env_sim) # Run the policy on the real system ex_ts = datetime.now().strftime(timestamp_format) save_dir = osp.join(ex_dir, 'evaluation') os.makedirs(save_dir, exist_ok=True) est_ret = BayRn.eval_policy(save_dir, env_real, policy, montecarlo_estimator=True, prefix=ex_ts, num_rollouts=5) print_cbt(f'Estimated return: {est_ret.item()}', 'g')
1.2278416e+00, 4.5279346e+00, -1.2385756e-02, 6.0038762e+00, -4.1818547e+00 ])) # policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) print_cbt('Set up controller for the QCartPoleStabReal environment.', 'c') elif args.env_name == QCartPoleSwingUpReal.name: env = QCartPoleSwingUpReal(args.dt, args.max_steps) policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) print_cbt( 'Set up controller for the QCartPoleSwingUpReal environment.', 'c') elif args.env_name == QQubeReal.name: env = QQubeReal(args.dt, args.max_steps) # policy = QQubeSwingUpAndBalanceCtrl(env.spec) # MVD - Learned for the paper policy = QQubeSwingUpAndBalanceCtrl( env.spec, ref_energy=np.exp(-2.9414043), energy_gain=np.exp(3.1400251), energy_th_gain=0.73774934, # for simulation and real system acc_max=5., # Quanser's value: 6 alpha_max_pd_enable=10., # Quanser's value: 20 pd_gains=to.tensor([-1.9773294, 35.084324, -1.1951622, 3.3797605])) print_cbt('Set up controller for the QQubeReal environment.', 'c') else:
# Get the experiment's directory to load from if not given as command line argument ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir # Load the policy and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir, args) if 'argmax' in args.policy_name: policy = to.load(osp.join(ex_dir, 'policy_argmax.pt')) print_cbt(f"Loaded {osp.join(ex_dir, 'policy_argmax.pt')}", 'g', bright=True) # Create real-world counterpart # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps dt = args.dt if args.dt is not None else env_sim.dt env_real = QQubeReal(dt, max_steps) print_cbt( f'Set up the QQubeReal environment with dt={env_real.dt} max_steps={env_real.max_steps}.', 'c') # Finally wrap the env in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) ex_ts = datetime.now().strftime(timestamp_format) save_dir = osp.join(ex_dir, 'evaluation') os.makedirs(save_dir, exist_ok=True) num_ro_per_config = args.num_ro_per_config if args.num_ro_per_config is not None else 5 est_ret = BayRn.eval_policy(save_dir, env_real, policy, mc_estimator=True,
def default_qq_real(): return QQubeReal(dt=1 / 500., max_steps=500)