""" Test predefined energy-based controller to make the Quanser Qube swing up. """ import torch as to from pyrado.environments.pysim.quanser_qube import QQubeSim from pyrado.domain_randomization.utils import print_domain_params from pyrado.policies.environment_specific import QQubeSwingUpAndBalanceCtrl from pyrado.sampling.rollout import rollout, after_rollout_query from pyrado.utils.data_types import RenderMode from pyrado.utils.input_output import print_cbt if __name__ == '__main__': # Set up environment env = QQubeSim(dt=1/500., max_steps=4000) # Set up policy policy = QQubeSwingUpAndBalanceCtrl(env.spec) # Simulate done, param, state = False, None, None while not done: ro = rollout(env, policy, render_mode=RenderMode(text=False, video=True), eval=True, reset_kwargs=dict(domain_param=param, init_state=state)) print_domain_params(env.domain_param) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, state, param = after_rollout_query(env, policy, ro)
# Policy # policy_hparam = dict( # # feats=FeatureStack([RandFourierFeat(env.obs_space.flat_dim, num_feat=20, bandwidth=env.obs_space.bound_up)]) # feats=FeatureStack([identity_feat, sign_feat, abs_feat, squared_feat, # MultFeat([2, 5]), MultFeat([3, 5]), MultFeat([4, 5])]) # ) # policy = LinearPolicy(spec=env.spec, **policy_hparam) # policy_hparam = dict(energy_gain=0.587, ref_energy=0.827) policy_hparam = dict( ref_energy=0.02, energy_gain=50., energy_th_gain=0.3, # This parameter is fixed. (requires_grad = False) acc_max=5., alpha_max_pd_enable=10., pd_gains=to.tensor([-2, 35, -1.5, 3])) policy = QQubeSwingUpAndBalanceCtrl(env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=50, pop_size=50, num_rollouts=10, # pop_size=2*(6+6), # num_rollouts=1, num_is_samples=10, expl_std_init=1.0, expl_std_min=0.000001, symm_sampling=False, num_sampler_envs=12, ) algo = PoWER(ex_dir, env, policy, **algo_hparam)
elif args.env_name == QCartPoleSwingUpReal.name: env = QCartPoleSwingUpReal(args.dt, args.max_steps) policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) print_cbt( 'Set up controller for the QCartPoleSwingUpReal environment.', 'c') elif args.env_name == QQubeReal.name: env = QQubeReal(args.dt, args.max_steps) # policy = QQubeSwingUpAndBalanceCtrl(env.spec) # MVD - Learned for the paper policy = QQubeSwingUpAndBalanceCtrl( env.spec, ref_energy=np.exp(-2.9414043), energy_gain=np.exp(3.1400251), energy_th_gain=0.73774934, # for simulation and real system acc_max=5., # Quanser's value: 6 alpha_max_pd_enable=10., # Quanser's value: 20 pd_gains=to.tensor([-1.9773294, 35.084324, -1.1951622, 3.3797605])) print_cbt('Set up controller for the QQubeReal environment.', 'c') else: raise pyrado.ValueErr( given=args.env_name, eq_constraint= f'{QBallBalancerReal.name}, {QCartPoleSwingUpReal.name}, ' f'{QCartPoleStabReal.name}, or {QQubeReal.name}') # Run on device done = False
dist = GaussianDiagonalLogStdParametrization(init_loc=init_loc, init_std=init_std) # dist = GaussianDiagonal(init_loc=init_loc, init_std=init_std) # Policy policy_hparam = dict( ref_energy=init_loc[0], energy_gain=init_loc[1], # energy_th_gain=0.3, # This parameter is fixed. energy_th_gain=init_loc[2], # This parameter is fixed. acc_max=5., alpha_max_pd_enable=10., pd_gains=to.tensor( [init_loc[3], init_loc[4], init_loc[5], init_loc[6]], dtype=to.float64)) policy = QQubeSwingUpAndBalanceCtrl(env.spec, **policy_hparam) # Set the policy parameters to the initial ones... # policy.param_values = to.tensor(init_loc) # Sample a policy from the final search distribution policy.param_values = to.tensor(dist.sample([1]).view(-1)) # Algorithm algo_hparam = dict(max_iter=50, pop_size=1, num_rollouts=1, expl_std_init=1.0, expl_std_min=0.0, num_sampler_envs=1, n_mc_samples_gradient=1,
dist = GaussianDiagonalLogStdParametrization(init_loc=init_loc, init_std=init_std) # dist = GaussianDiagonal(init_loc=init_loc, init_std=init_std) # Policy policy_hparam = dict( ref_energy=init_loc[0], energy_gain=init_loc[1], # energy_th_gain=0.3, # This parameter is fixed. energy_th_gain=init_loc[2], # This parameter is fixed. acc_max=5., alpha_max_pd_enable=10., pd_gains=to.tensor([-1.7313308, 35.976177, -1.58682, 3.0102878], dtype=to.float64)) policy = QQubeSwingUpAndBalanceCtrl(env.spec, **policy_hparam, only_swingup_control=True) # Set the policy parameters to the initial ones... policy.param_values = to.tensor(init_loc) # Algorithm algo_hparam = dict(max_iter=50, pop_size=1, num_rollouts=1, expl_std_init=1.0, expl_std_min=0.0, num_sampler_envs=16, n_mc_samples_gradient=1, coupling=True, lr=1e-1,