Exemple #1
0
def test_real_env_contructors(dt, max_steps):
    qbbr = QBallBalancerReal(dt=dt, max_steps=max_steps)
    assert qbbr is not None
    qcp_st = QCartPoleStabReal(dt=dt, max_steps=max_steps)
    assert qcp_st is not None
    qcp_su = QCartPoleSwingUpReal(dt=dt, max_steps=max_steps)
    assert qcp_su is not None
    qqr = QQubeReal(dt=dt, max_steps=max_steps)
    assert qqr is not None
Exemple #2
0
if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(
        QQubeSwingUpSim.name,
        f'{SimOpt.name}-{REPS.name}-{PPO.name}_{FNNPolicy.name}')
    num_workers = 16

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 500., max_steps=3000)
    env_real = QQubeReal(**env_hparams)

    env_sim = QQubeSwingUpSim(**env_hparams)
    randomizer = DomainRandomizer(
        NormalDomainParam(name='Mr', mean=0., std=1e6, clip_lo=1e-3),
        NormalDomainParam(name='Mp', mean=0., std=1e6, clip_lo=1e-3),
        NormalDomainParam(name='Lr', mean=0., std=1e6, clip_lo=1e-3),
        NormalDomainParam(name='Lp', mean=0., std=1e6, clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        0: ('Mr', 'mean'),
        1: ('Mr', 'std'),
        2: ('Mp', 'mean'),
        3: ('Mp', 'std'),
        4: ('Lr', 'mean'),
Exemple #3
0
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()

    # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    # Detect the correct real-world counterpart and create it
    if isinstance(inner_env(env_sim), QBallBalancerSim):
        env_real = QBallBalancerReal(dt=args.dt, max_steps=args.max_steps)
    elif isinstance(inner_env(env_sim), QCartPoleSim):
        env_real = QCartPoleReal(dt=args.dt, max_steps=args.max_steps)
    elif isinstance(inner_env(env_sim), QQubeSim):
        env_real = QQubeReal(dt=args.dt, max_steps=args.max_steps)
    else:
        raise pyrado.TypeErr(
            given=env_sim,
            expected_type=[QBallBalancerSim, QCartPoleSim, QQubeSim])
    print_cbt(f'Set up env {env_real.name}.', 'c')

    # Finally wrap the env in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run on device
    done = False
    print_cbt('Running loaded policy ...', 'c', bright=True)
    while not done:
        ro = rollout(env_real,
                     policy,
Exemple #4
0
    bell_feat, RandFourierFeat, MultFeat
from pyrado.policies.linear import LinearPolicy
import torch as to
import numpy as np

if __name__ == '__main__':
    # Experiment (set seed before creating the modules)
    # ex_dir = setup_experiment(QQubeSim.name, PoWER.name, f'{LinearPolicy}_actnorm', seed=1)
    ex_dir = setup_experiment(QQubeReal.name,
                              EMVD.name,
                              QQubeSwingUpAndBalanceCtrl.name,
                              seed=2)

    # Environment
    env_hparams = dict(dt=1 / 500., max_steps=5000)
    env = QQubeReal(**env_hparams)
    # env = ActNormWrapper(env)

    # Search distribution
    # init_loc = np.array([np.log(0.02), np.log(50.), 0.3,
    #                      -2., 20., -1.0, 6.],
    #                     dtype=np.float64)
    # init_std = 0.5  * np.ones(init_loc.shape[0], dtype=np.float64)

    # init_loc = np.array([-3.727,   3.8218,  1.04,   -0.9979,  20.257,  -0.7138,  5.7895],
    #                     dtype=np.float64)
    # init_std = np.array([0.2288,  0.1952,  0.4372,  0.5408,  0.3838,  0.3574,  0.5939], dtype=np.float64)

    # Seach distribution AT ITERATION 18
    init_loc = np.array(
        [-3.5888, 3.7302, 1.0079, -1.1522, 20.4393, -0.8824, 5.6107],
Exemple #5
0
from pyrado.algorithms.bayrn import BayRn
from pyrado.environments.quanser.quanser_qube import QQubeReal
from pyrado.logger.experiment import ask_for_experiment, timestamp_format
from pyrado.utils.experiments import wrap_like_other_env, load_experiment
from pyrado.utils.input_output import print_cbt
from pyrado.utils.argparser import get_argparser


if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from if not given as command line argument
    ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir

    # Load the policy and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    # Create real-world counterpart (without domain randomization)
    env_real = QQubeReal(env_sim.dt, env_sim.max_steps)
    print_cbt(f'Set up the QQubeReal environment with dt={env_real.dt} max_steps={env_real.max_steps}.', 'c')
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run the policy on the real system
    ex_ts = datetime.now().strftime(timestamp_format)
    save_dir = osp.join(ex_dir, 'evaluation')
    os.makedirs(save_dir, exist_ok=True)
    est_ret = BayRn.eval_policy(save_dir, env_real, policy, montecarlo_estimator=True, prefix=ex_ts, num_rollouts=5)

    print_cbt(f'Estimated return: {est_ret.item()}', 'g')
Exemple #6
0
                                              1.2278416e+00, 4.5279346e+00,
                                              -1.2385756e-02, 6.0038762e+00,
                                              -4.1818547e+00
                                          ]))
        # policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
        print_cbt('Set up controller for the QCartPoleStabReal environment.',
                  'c')

    elif args.env_name == QCartPoleSwingUpReal.name:
        env = QCartPoleSwingUpReal(args.dt, args.max_steps)
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
        print_cbt(
            'Set up controller for the QCartPoleSwingUpReal environment.', 'c')

    elif args.env_name == QQubeReal.name:
        env = QQubeReal(args.dt, args.max_steps)
        # policy = QQubeSwingUpAndBalanceCtrl(env.spec)

        # MVD - Learned for the paper
        policy = QQubeSwingUpAndBalanceCtrl(
            env.spec,
            ref_energy=np.exp(-2.9414043),
            energy_gain=np.exp(3.1400251),
            energy_th_gain=0.73774934,  # for simulation and real system
            acc_max=5.,  # Quanser's value: 6
            alpha_max_pd_enable=10.,  # Quanser's value: 20
            pd_gains=to.tensor([-1.9773294, 35.084324, -1.1951622, 3.3797605]))

        print_cbt('Set up controller for the QQubeReal environment.', 'c')

    else:
Exemple #7
0
    # Get the experiment's directory to load from if not given as command line argument
    ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir

    # Load the policy and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir, args)
    if 'argmax' in args.policy_name:
        policy = to.load(osp.join(ex_dir, 'policy_argmax.pt'))
        print_cbt(f"Loaded {osp.join(ex_dir, 'policy_argmax.pt')}",
                  'g',
                  bright=True)

    # Create real-world counterpart
    # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation
    max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps
    dt = args.dt if args.dt is not None else env_sim.dt
    env_real = QQubeReal(dt, max_steps)
    print_cbt(
        f'Set up the QQubeReal environment with dt={env_real.dt} max_steps={env_real.max_steps}.',
        'c')

    # Finally wrap the env in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    ex_ts = datetime.now().strftime(timestamp_format)
    save_dir = osp.join(ex_dir, 'evaluation')
    os.makedirs(save_dir, exist_ok=True)
    num_ro_per_config = args.num_ro_per_config if args.num_ro_per_config is not None else 5
    est_ret = BayRn.eval_policy(save_dir,
                                env_real,
                                policy,
                                mc_estimator=True,
Exemple #8
0
 def default_qq_real():
     return QQubeReal(dt=1 / 500., max_steps=500)