def test_combination():
    env = QCartPoleSwingUpSim(dt=1/50., max_steps=20)

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {'x_dot': -213., 'theta_dot': -42.}
    eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
    ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations)

    env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta'])
    ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(env_rnp,
                                       noise_mean=0.5*np.ones(env_rnp.act_space.shape),
                                       noise_std=0.1*np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpa.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpa.observations)

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)
Beispiel #2
0
from pyrado.environments.pysim.quanser_cartpole import QCartPoleSwingUpSim
from pyrado.environments.pysim.quanser_qube import QQubeSwingUpSim
from pyrado.policies.feed_forward.dummy import IdlePolicy
from pyrado.sampling.rollout import after_rollout_query, rollout
from pyrado.utils.argparser import get_argparser
from pyrado.utils.data_types import RenderMode
from pyrado.utils.input_output import print_cbt

if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()
    dt = args.dt if args.dt is not None else 0.01

    if args.env_name == QCartPoleSwingUpSim.name:
        env = QCartPoleSwingUpSim(dt=dt,
                                  max_steps=int(5 / dt),
                                  wild_init=False)
        state = np.array([0, 87 / 180 * np.pi, 0, 0])

    elif args.env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0])

    elif args.env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt))
        state = np.array(
            [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0])

    elif args.env_name == OneMassOscillatorSim.name:
        env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.7, 0])
        # Get the experiments' directories to load from
        prefixes = [
            osp.join(pyrado.EXP_DIR, 'ENV_NAME', 'ALGO_NAME'),
        ]
        ex_names = [
            '',
        ]
        ex_labels = [
            '',
        ]

    elif args.env_name in [QCartPoleStabSim.name, QCartPoleSwingUpSim.name]:
        # Create the environment for evaluating
        if args.env_name == QCartPoleSwingUpSim.name:
            env = QCartPoleSwingUpSim(dt=args.dt, max_steps=args.max_steps)
        else:
            env = QCartPoleStabSim(dt=args.dt, max_steps=args.max_steps)

        # Get the experiments' directories to load from
        prefixes = [
            osp.join(pyrado.EXP_DIR, 'ENV_NAME', 'ALGO_NAME'),
        ]
        ex_names = [
            '',
        ]
        ex_labels = [
            '',
        ]

    else:
Beispiel #4
0
from pyrado.policies.special.environment_specific import QCartPoleSwingUpAndBalanceCtrl
from pyrado.sampling.rollout import after_rollout_query, rollout
from pyrado.utils.argparser import get_argparser
from pyrado.utils.data_types import RenderMode
from pyrado.utils.input_output import print_cbt

if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Set up environment and policy (swing-up works reliably if is sampling frequency is >= 400 Hz)
    if args.env_name == "qcp-su":
        env = QCartPoleSwingUpSim(
            dt=args.dt,
            max_steps=int(12 / args.dt),
            long=False,
            simple_dynamics=True,
            wild_init=False,
        )
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)

    elif args.env_name == "qcp-st":
        env = QCartPoleStabSim(
            dt=args.dt,
            max_steps=int(4 / args.dt),
            long=False,
            simple_dynamics=True,
        )
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)

    else:
Beispiel #5
0
from pyrado.algorithms.nes import NES
from pyrado.logger.experiment import setup_experiment, save_list_of_dicts_to_yaml
from pyrado.environments.pysim.quanser_cartpole import QCartPoleSwingUpSim
from pyrado.environment_wrappers.action_normalization import ActNormWrapper
from pyrado.policies.rnn import LSTMPolicy, GRUPolicy

if __name__ == '__main__':
    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(QCartPoleSwingUpSim.name,
                              NES.name,
                              GRUPolicy.name,
                              seed=1001)

    # Environments
    env_hparams = dict(dt=1 / 100., max_steps=1300, long=False)
    env = QCartPoleSwingUpSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        hidden_size=32,
        num_recurrent_layers=1,
        # init_param_kwargs=dict(t_max=50)
    )
    # policy = LSTMPolicy(spec=env.spec, **policy_hparam)
    policy = GRUPolicy(spec=env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=5000,
        pop_size=50,
Beispiel #6
0
 def default_qcpsu():
     return QCartPoleSwingUpSim(dt=0.002, max_steps=8000)
Beispiel #7
0
    if hasattr(rollouts[0], "time"):
        dt = rollouts[0].time[1] - rollouts[0].time[0]  # dt is constant
    elif args.dt is not None:
        dt = args.dt
    else:
        raise pyrado.ValueErr(
            msg=
            "There was no time field in the loaded rollout to infer the time step size from, neither has "
            "it been specified explicitly! Please provide the time step size using --dt."
        )

    if env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt)

    elif env_name == QCartPoleSwingUpSim.name:
        env = QCartPoleSwingUpSim(dt=dt)

    elif env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt)

    elif env_name == "wam-bic":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

        env = WAMBallInCupSim(num_dof=4)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "wam-jsc":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim
Beispiel #8
0
    use_rec_act = True

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(
        QCartPoleSwingUpSim.name,
        f"{NPDR.name}_{QQubeSwingUpAndBalanceCtrl.name}",
        num_segs_str + len_seg_str + seed_str,
    )
    t_end = 7  # s

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_sim_hparams = dict(dt=1 / 250.0, max_steps=int(t_end * 250))
    env_sim = QCartPoleSwingUpSim(**env_sim_hparams)

    # Create the ground truth target domain and the behavioral policy
    env_real = osp.join(pyrado.EVAL_DIR, f"qcp-su_ectrl_250Hz_{t_end}s_filt")
    policy = QCartPoleSwingUpAndBalanceCtrl(
        env_sim.spec)  # replaced by the recorded actions if use_rec_act=True

    # Define a mapping: index - domain parameter
    dp_mapping = {0: "voltage_thold_neg", 1: "voltage_thold_pos"}
    # dp_mapping = {
    #     0: "motor_efficiency",
    #     1: "gear_efficiency",
    #     2: "pole_damping",
    #     3: "combined_damping",
    #     4: "cart_friction_coeff",
    #     5: "gear_ratio",
Beispiel #9
0
import pyrado
from pyrado.environments.pysim.quanser_cartpole import QCartPoleSwingUpSim, QCartPoleStabSim
from pyrado.policies.special.environment_specific import QCartPoleSwingUpAndBalanceCtrl
from pyrado.sampling.rollout import rollout, after_rollout_query
from pyrado.utils.argparser import get_argparser
from pyrado.utils.data_types import RenderMode
from pyrado.utils.input_output import print_cbt

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Set up environment and policy (swing-up works reliably if is sampling frequency is >= 400 Hz)
    if args.env_name == 'qcp-su':
        env = QCartPoleSwingUpSim(dt=args.dt,
                                  max_steps=int(12 / args.dt),
                                  long=False)
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)

    elif args.env_name == 'qcp-st':
        env = QCartPoleStabSim(dt=args.dt,
                               max_steps=int(4 / args.dt),
                               long=False)
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)

    else:
        raise pyrado.ValueErr(given=args.env_name,
                              eq_constraint="'qcp-su' or 'qcp-st'")

    # Simulate
    done, param, state = False, None, None
Beispiel #10
0
if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()
    dt = args.dt or 1 / 500.0
    t_end = 5.5  # s
    max_steps = int(t_end / dt)  # run for 5s
    check_in_sim = False
    # max_amp = 5.0 / 180 * np.pi  # max. amplitude [rad]
    max_amp = -3.5  # max. amplitude [V]

    # Create the simulated and real environments
    if args.env_name == QQubeSwingUpReal.name:
        env_sim = QQubeSwingUpSim(dt, max_steps)
        env_real = QQubeSwingUpReal(dt, max_steps)
    elif args.env_name == QCartPoleSwingUpReal.name:
        env_sim = QCartPoleSwingUpSim(dt, max_steps)
        env_real = QCartPoleSwingUpReal(dt, max_steps)
    elif args.env_name == WAMReal.name:
        env_sim = WAMJointSpaceCtrlSim(frame_skip=4,
                                       num_dof=7,
                                       max_steps=max_steps)
        env_real = WAMJointSpaceCtrlRealStepBased(num_dof=7,
                                                  max_steps=max_steps)
    else:
        raise pyrado.ValueErr(
            given=args.env_name,
            eq_constraint=
            f"{QQubeSwingUpReal.name}, {QCartPoleSwingUpReal.name} or {WAMReal.name}"
        )
    print_cbt(f"Set the {env_real.name} environment.", "g")