Exemple #1
0
"""
Test predefined energy-based controller to make the Quanser Qube swing up.
"""
import torch as to

from pyrado.environments.pysim.quanser_qube import QQubeSim
from pyrado.domain_randomization.utils import print_domain_params
from pyrado.policies.environment_specific import QQubeSwingUpAndBalanceCtrl
from pyrado.sampling.rollout import rollout, after_rollout_query
from pyrado.utils.data_types import RenderMode
from pyrado.utils.input_output import print_cbt


if __name__ == '__main__':
    # Set up environment
    env = QQubeSim(dt=1/500., max_steps=4000)

    # Set up policy
    policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # Simulate
    done, param, state = False, None, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(text=False, video=True), eval=True,
                     reset_kwargs=dict(domain_param=param, init_state=state))
        print_domain_params(env.domain_param)
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, state, param = after_rollout_query(env, policy, ro)
Exemple #2
0
    # Policy
    # policy_hparam = dict(
    #     # feats=FeatureStack([RandFourierFeat(env.obs_space.flat_dim, num_feat=20, bandwidth=env.obs_space.bound_up)])
    #     feats=FeatureStack([identity_feat, sign_feat, abs_feat, squared_feat,
    #                         MultFeat([2, 5]), MultFeat([3, 5]), MultFeat([4, 5])])
    # )
    # policy = LinearPolicy(spec=env.spec, **policy_hparam)
    # policy_hparam = dict(energy_gain=0.587, ref_energy=0.827)
    policy_hparam = dict(
        ref_energy=0.02,
        energy_gain=50.,
        energy_th_gain=0.3,  # This parameter is fixed. (requires_grad = False)
        acc_max=5.,
        alpha_max_pd_enable=10.,
        pd_gains=to.tensor([-2, 35, -1.5, 3]))
    policy = QQubeSwingUpAndBalanceCtrl(env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=50,
        pop_size=50,
        num_rollouts=10,
        # pop_size=2*(6+6),
        # num_rollouts=1,
        num_is_samples=10,
        expl_std_init=1.0,
        expl_std_min=0.000001,
        symm_sampling=False,
        num_sampler_envs=12,
    )
    algo = PoWER(ex_dir, env, policy, **algo_hparam)
Exemple #3
0
    elif args.env_name == QCartPoleSwingUpReal.name:
        env = QCartPoleSwingUpReal(args.dt, args.max_steps)
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
        print_cbt(
            'Set up controller for the QCartPoleSwingUpReal environment.', 'c')

    elif args.env_name == QQubeReal.name:
        env = QQubeReal(args.dt, args.max_steps)
        # policy = QQubeSwingUpAndBalanceCtrl(env.spec)

        # MVD - Learned for the paper
        policy = QQubeSwingUpAndBalanceCtrl(
            env.spec,
            ref_energy=np.exp(-2.9414043),
            energy_gain=np.exp(3.1400251),
            energy_th_gain=0.73774934,  # for simulation and real system
            acc_max=5.,  # Quanser's value: 6
            alpha_max_pd_enable=10.,  # Quanser's value: 20
            pd_gains=to.tensor([-1.9773294, 35.084324, -1.1951622, 3.3797605]))

        print_cbt('Set up controller for the QQubeReal environment.', 'c')

    else:
        raise pyrado.ValueErr(
            given=args.env_name,
            eq_constraint=
            f'{QBallBalancerReal.name}, {QCartPoleSwingUpReal.name}, '
            f'{QCartPoleStabReal.name}, or {QQubeReal.name}')

    # Run on device
    done = False
Exemple #4
0
    dist = GaussianDiagonalLogStdParametrization(init_loc=init_loc,
                                                 init_std=init_std)
    # dist = GaussianDiagonal(init_loc=init_loc, init_std=init_std)

    # Policy
    policy_hparam = dict(
        ref_energy=init_loc[0],
        energy_gain=init_loc[1],
        # energy_th_gain=0.3, # This parameter is fixed.
        energy_th_gain=init_loc[2],  # This parameter is fixed.
        acc_max=5.,
        alpha_max_pd_enable=10.,
        pd_gains=to.tensor(
            [init_loc[3], init_loc[4], init_loc[5], init_loc[6]],
            dtype=to.float64))
    policy = QQubeSwingUpAndBalanceCtrl(env.spec, **policy_hparam)

    # Set the policy parameters to the initial ones...
    # policy.param_values = to.tensor(init_loc)

    # Sample a policy from the final search distribution
    policy.param_values = to.tensor(dist.sample([1]).view(-1))

    # Algorithm
    algo_hparam = dict(max_iter=50,
                       pop_size=1,
                       num_rollouts=1,
                       expl_std_init=1.0,
                       expl_std_min=0.0,
                       num_sampler_envs=1,
                       n_mc_samples_gradient=1,
Exemple #5
0
    dist = GaussianDiagonalLogStdParametrization(init_loc=init_loc,
                                                 init_std=init_std)
    # dist = GaussianDiagonal(init_loc=init_loc, init_std=init_std)

    # Policy
    policy_hparam = dict(
        ref_energy=init_loc[0],
        energy_gain=init_loc[1],
        # energy_th_gain=0.3, # This parameter is fixed.
        energy_th_gain=init_loc[2],  # This parameter is fixed.
        acc_max=5.,
        alpha_max_pd_enable=10.,
        pd_gains=to.tensor([-1.7313308, 35.976177, -1.58682, 3.0102878],
                           dtype=to.float64))
    policy = QQubeSwingUpAndBalanceCtrl(env.spec,
                                        **policy_hparam,
                                        only_swingup_control=True)

    # Set the policy parameters to the initial ones...
    policy.param_values = to.tensor(init_loc)

    # Algorithm
    algo_hparam = dict(max_iter=50,
                       pop_size=1,
                       num_rollouts=1,
                       expl_std_init=1.0,
                       expl_std_min=0.0,
                       num_sampler_envs=16,
                       n_mc_samples_gradient=1,
                       coupling=True,
                       lr=1e-1,