def create_qqsu_setup(): # Environments env_hparams = dict(dt=1 / 100.0, max_steps=600) env_real = QQubeSwingUpSim(**env_hparams) env_real.domain_param = dict( mass_rot_pole=0.095 * 0.9, # 0.095*0.9 = 0.0855 mass_pend_pole=0.024 * 1.1, # 0.024*1.1 = 0.0264 length_rot_pole=0.085 * 0.9, # 0.085*0.9 = 0.0765 length_pend_pole=0.129 * 1.1, # 0.129*1.1 = 0.1419 ) env_sim = QQubeSwingUpSim(**env_hparams) randomizer = DomainRandomizer( NormalDomainParam(name="mass_rot_pole", mean=0.0, std=1e-9, clip_lo=1e-3), NormalDomainParam(name="mass_pend_pole", mean=0.0, std=1e-9, clip_lo=1e-3), NormalDomainParam(name="length_rot_pole", mean=0.0, std=1e-9, clip_lo=1e-3), NormalDomainParam(name="length_pend_pole", mean=0.0, std=1e-9, clip_lo=1e-3), ) env_sim = DomainRandWrapperLive(env_sim, randomizer) dp_map = { 0: ("mass_rot_pole", "mean"), 1: ("mass_rot_pole", "std"), 2: ("mass_pend_pole", "mean"), 3: ("mass_pend_pole", "std"), 4: ("length_rot_pole", "mean"), 5: ("length_rot_pole", "std"), 6: ("length_pend_pole", "mean"), 7: ("length_pend_pole", "std"), } # trafo_mask = [False, True, False, True, False, True, False, True] trafo_mask = [True] * 8 env_sim = MetaDomainRandWrapper(env_sim, dp_map) # Policies (the behavioral policy needs to be deterministic) behavior_policy = QQubeSwingUpAndBalanceCtrl(env_sim.spec) prior = DomainRandomizer( NormalDomainParam(name="mass_rot_pole", mean=0.095, std=0.095 / 10), NormalDomainParam(name="mass_pend_pole", mean=0.024, std=0.024 / 10), NormalDomainParam(name="length_rot_pole", mean=0.085, std=0.085 / 10), NormalDomainParam(name="length_pend_pole", mean=0.129, std=0.129 / 10), ) ddp_policy = DomainDistrParamPolicy(mapping=dp_map, trafo_mask=trafo_mask, prior=prior, scale_params=False) return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
def get_default_randomizer_pend() -> DomainRandomizer: """ Get the default randomizer for the `PendulumSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.pendulum import PendulumSim dp_nom = PendulumSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g'] / 10, clip_lo=1e-3), NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='d_pole', mean=dp_nom['d_pole'], std=dp_nom['d_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='tau_max', mean=dp_nom['tau_max'], std=dp_nom['tau_max'] / 10, clip_lo=1e-3))
def get_default_randomizer_pi() -> DomainRandomizer: """ Get the default randomizer for the `PlanarInsertSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.planar_insert import PlanarInsertSim dp_nom = PlanarInsertSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='link1_mass', mean=dp_nom['link1_mass'], std=dp_nom['link1_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link2_mass', mean=dp_nom['link2_mass'], std=dp_nom['link2_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link3_mass', mean=dp_nom['link3_mass'], std=dp_nom['link3_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link4_mass', mean=dp_nom['link4_mass'], std=dp_nom['link4_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link5_mass', mean=dp_nom['link4_mass'], std=dp_nom['link4_mass'] / 5, clip_lo=1e-2), UniformDomainParam(name='upperwall_pos_offset_z', mean=0, halfspan=0.05, clip_lo=0) # only increase the gap )
def create_default_randomizer_pend() -> DomainRandomizer: """ Create the default randomizer for the `PendulumSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.pendulum import PendulumSim dp_nom = PendulumSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_mass", mean=dp_nom["pole_mass"], std=dp_nom["pole_mass"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_length", mean=dp_nom["pole_length"], std=dp_nom["pole_length"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_damping", mean=dp_nom["pole_damping"], std=dp_nom["pole_damping"] / 10, clip_lo=1e-3), NormalDomainParam(name="torque_thold", mean=dp_nom["torque_thold"], std=dp_nom["torque_thold"] / 10, clip_lo=1e-3), )
def get_default_randomizer_bl() -> DomainRandomizer: """ Get the default randomizer for the `BoxLifting`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim dp_nom = BoxShelvingSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='box_length', mean=dp_nom['box_length'], std=dp_nom['box_length'] / 10), NormalDomainParam(name='box_width', mean=dp_nom['box_width'], std=dp_nom['box_width'] / 10), NormalDomainParam(name='box_mass', mean=dp_nom['box_mass'], std=dp_nom['box_mass'] / 5), UniformDomainParam(name='box_friction_coefficient', mean=dp_nom['box_friction_coefficient'], halfspan=dp_nom['box_friction_coefficient'] / 5, clip_lo=1e-5), NormalDomainParam(name='basket_mass', mean=dp_nom['basket_mass'], std=dp_nom['basket_mass'] / 5), UniformDomainParam(name='basket_friction_coefficient', mean=dp_nom['basket_friction_coefficient'], halfspan=dp_nom['basket_friction_coefficient'] / 5, clip_lo=1e-5), )
def get_empty_randomizer() -> DomainRandomizer: """ Get an empty randomizer independent of the environment to be filled later (using `add_domain_params`). :return: empty randomizer """ return DomainRandomizer()
def create_default_randomizer_qbb() -> DomainRandomizer: """ Create the default randomizer for the `QBallBalancerSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim dp_nom = QBallBalancerSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4), NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-3), NormalDomainParam(name='l_plate', mean=dp_nom['l_plate'], std=dp_nom['l_plate']/5, clip_lo=5e-2), NormalDomainParam(name='r_arm', mean=dp_nom['r_arm'], std=dp_nom['r_arm']/5, clip_lo=1e-4), NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-2), NormalDomainParam(name='J_l', mean=dp_nom['J_l'], std=dp_nom['J_l']/4, clip_lo=1e-6), NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9), NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4), NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4), UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4), UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict']/4, clip_lo=1e-4), UniformDomainParam(name='V_thold_x_pos', mean=dp_nom['V_thold_x_pos'], halfspan=dp_nom['V_thold_x_pos']/3), UniformDomainParam(name='V_thold_x_neg', mean=dp_nom['V_thold_x_neg'], halfspan=abs(dp_nom['V_thold_x_neg'])/3), UniformDomainParam(name='V_thold_y_pos', mean=dp_nom['V_thold_y_pos'], halfspan=dp_nom['V_thold_y_pos']/3), UniformDomainParam(name='V_thold_y_neg', mean=dp_nom['V_thold_y_neg'], halfspan=abs(dp_nom['V_thold_y_neg'])/3), UniformDomainParam(name='offset_th_x', mean=dp_nom['offset_th_x'], halfspan=6./180*np.pi), UniformDomainParam(name='offset_th_y', mean=dp_nom['offset_th_y'], halfspan=6./180*np.pi) )
def default_dummy_randomizer(): return DomainRandomizer( DomainParam(name='mass', mean=1.2), DomainParam(name='special', mean=0), DomainParam(name='length', mean=4), DomainParam(name='time_delay', mean=13) )
def create_default_randomizer_bs() -> DomainRandomizer: """ Create the default randomizer for the `BoxShelvingSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim dp_nom = BoxShelvingSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="box_length", mean=dp_nom["box_length"], std=dp_nom["box_length"] / 10), NormalDomainParam(name="box_width", mean=dp_nom["box_width"], std=dp_nom["box_width"] / 10), NormalDomainParam(name="box_mass", mean=dp_nom["box_mass"], std=dp_nom["box_mass"] / 5), UniformDomainParam( name="box_friction_coefficient", mean=dp_nom["box_friction_coefficient"], halfspan=dp_nom["box_friction_coefficient"] / 5, clip_lo=1e-5, ), )
def get_uniform_masses_lengths_randomizer_qq(frac_halfspan: float): """ Get a uniform randomizer that applies to all masses and lengths of the Quanser Qube according to a fraction of their nominal parameter values :param frac_halfspan: fraction of the nominal parameter value :return: `DomainRandomizer` with uniformly distributed masses and lengths """ from pyrado.environments.pysim.quanser_qube import QQubeSim dp_nom = QQubeSim.get_nominal_domain_param() return DomainRandomizer( UniformDomainParam(name='Mp', mean=dp_nom['Mp'], halfspan=dp_nom['Mp'] / frac_halfspan, clip_lo=1e-3), UniformDomainParam(name='Mr', mean=dp_nom['Mr'], halfspan=dp_nom['Mr'] / frac_halfspan, clip_lo=1e-3), UniformDomainParam(name='Lr', mean=dp_nom['Lr'], halfspan=dp_nom['Lr'] / frac_halfspan, clip_lo=1e-2), UniformDomainParam(name='Lp', mean=dp_nom['Lp'], halfspan=dp_nom['Lp'] / frac_halfspan, clip_lo=1e-2), )
def default_randomizer(): return DomainRandomizer( NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100), UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True), NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1), UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True), MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11), )
def default_pert(): return DomainRandomizer( NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100), UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True), NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1), UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True), MultivariateNormalDomainParam(name='multidim', mean=10*to.ones((2,)), cov=2*to.eye(2), clip_up=11) )
def create_example_randomizer_cata() -> DomainRandomizer: """ Create the randomizer for the `CatapultSim` used for the 'illustrative example' in F. Muratore et al, 2019, TAMPI. :return: randomizer based on the nominal domain parameter values """ return DomainRandomizer( BernoulliDomainParam(name='planet', mean=None, val_0=0, val_1=1, prob_1=0.7, roundint=True) ) # 0 = Mars, 1 = Venus
def test_sprl(ex_dir, env: SimEnv, optimize_mean: bool): pyrado.set_seed(0) env = ActNormWrapper(env) env_sprl_params = [ dict( name="gravity_const", target_mean=to.tensor([9.81]), target_cov_chol_flat=to.tensor([1.0]), init_mean=to.tensor([9.81]), init_cov_chol_flat=to.tensor([0.05]), ) ] radnomizer = DomainRandomizer( *[SelfPacedDomainParam(**p) for p in env_sprl_params]) env = DomainRandWrapperLive(env, randomizer=radnomizer) policy = FNNPolicy(env.spec, hidden_sizes=[64, 64], hidden_nonlin=to.tanh) vfcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.relu) vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **vfcn_hparam) critic_hparam = dict( gamma=0.9844534412010116, lamda=0.9710614403461155, num_epoch=10, batch_size=150, standardize_adv=False, lr=0.00016985313083236645, ) critic = GAE(vfcn, **critic_hparam) subrtn_hparam = dict( max_iter=1, eps_clip=0.12648736789309026, min_steps=10 * env.max_steps, num_epoch=3, batch_size=150, std_init=0.7573286998997557, lr=6.999956625305722e-04, max_grad_norm=1.0, num_workers=1, ) algo_hparam = dict( kl_constraints_ub=8000, performance_lower_bound=500, std_lower_bound=0.4, kl_threshold=200, max_iter=1, optimize_mean=optimize_mean, ) algo = SPRL(env, PPO(ex_dir, env, policy, critic, **subrtn_hparam), **algo_hparam) algo.train(snapshot_mode="latest") assert algo.curr_iter == algo.max_iter
def bob_pert(): return DomainRandomizer( NormalDomainParam(name='g', mean=9.81, std=0.981, clip_lo=1e-3), NormalDomainParam(name='r_ball', mean=0.1, std=0.01, clip_lo=1e-3), NormalDomainParam(name='m_ball', mean=0.5, std=0.05, clip_lo=1e-3), NormalDomainParam(name='m_beam', mean=3.0, std=0.3, clip_lo=1e-3), NormalDomainParam(name='d_beam', mean=0.1, std=0.01, clip_lo=1e-3), NormalDomainParam(name='l_beam', mean=2.0, std=0.2, clip_lo=1e-3), UniformDomainParam(name='c_frict', mean=0.05, halfspan=0.05), UniformDomainParam(name='ang_offset', mean=0, halfspan=5.*np.pi/180) )
def create_bob_setup(): # Environments env_hparams = dict(dt=1 / 100., max_steps=500) env_real = BallOnBeamSim(**env_hparams) env_real.domain_param = dict( # l_beam=1.95, # ang_offset=-0.03, g=10.81) env_sim = BallOnBeamSim(**env_hparams) randomizer = DomainRandomizer( # NormalDomainParam(name='l_beam', mean=0, std=1e-12, clip_lo=1.5, clip_up=3.5), # UniformDomainParam(name='ang_offset', mean=0, halfspan=1e-12), NormalDomainParam(name='g', mean=0, std=1e-12), ) env_sim = DomainRandWrapperLive(env_sim, randomizer) dp_map = { # 0: ('l_beam', 'mean'), 1: ('l_beam', 'std'), # 2: ('ang_offset', 'mean'), 3: ('ang_offset', 'halfspan') 0: ('g', 'mean'), 1: ('g', 'std') } env_sim = MetaDomainRandWrapper(env_sim, dp_map) # Policies (the behavioral policy needs to be deterministic) behavior_policy = LinearPolicy(env_sim.spec, feats=FeatureStack( [identity_feat, sin_feat])) behavior_policy.param_values = to.tensor( [3.8090, -3.8036, -1.0786, -2.4510, -0.9875, -1.3252, 3.1503, 1.4443]) prior = DomainRandomizer( # NormalDomainParam(name='l_beam', mean=2.05, std=2.05/10), # UniformDomainParam(name='ang_offset', mean=0.03, halfspan=0.03/10), NormalDomainParam(name='g', mean=8.81, std=8.81 / 10), ) # trafo_mask = [False, True, False, True] trafo_mask = [True, True] ddp_policy = DomainDistrParamPolicy(mapping=dp_map, trafo_mask=trafo_mask, prior=prior, scale_params=True) return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
def get_default_randomizer_wambic() -> DomainRandomizer: from pyrado.environments.mujoco.wam import WAMBallInCupSim dp_nom = WAMBallInCupSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale'] / 5, clip_lo=0.6), # ball needs to fit into the cup NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length'] / 10, clip_lo=0.2), # rope won't be less then 10cm shorter )
def create_default_randomizer_omo() -> DomainRandomizer: """ Create the default randomizer for the `OneMassOscillatorSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim dp_nom = OneMassOscillatorSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='m', mean=dp_nom['m'], std=dp_nom['m']/3, clip_lo=1e-3), NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/3, clip_lo=1e-3), NormalDomainParam(name='d', mean=dp_nom['d'], std=dp_nom['d']/3, clip_lo=1e-3) )
def create_default_randomizer_cata() -> DomainRandomizer: """ Create the default randomizer for the `CatapultSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.one_step.catapult import CatapultSim dp_nom = CatapultSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-3), NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/5, clip_lo=1e-3), NormalDomainParam(name='x', mean=dp_nom['x'], std=dp_nom['x']/5, clip_lo=1e-3) )
def create_default_randomizer_qq() -> DomainRandomizer: """ Create the default randomizer for the `QQubeSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_qube import QQubeSim dp_nom = QQubeSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-3), NormalDomainParam(name="motor_resistance", mean=dp_nom["motor_resistance"], std=dp_nom["motor_resistance"] / 5, clip_lo=1e-3), NormalDomainParam(name="motor_back_emf", mean=dp_nom["motor_back_emf"], std=dp_nom["motor_back_emf"] / 5, clip_lo=1e-4), NormalDomainParam(name="mass_rot_pole", mean=dp_nom["mass_rot_pole"], std=dp_nom["mass_rot_pole"] / 5, clip_lo=1e-4), NormalDomainParam(name="length_rot_pole", mean=dp_nom["length_rot_pole"], std=dp_nom["length_rot_pole"] / 5, clip_lo=1e-4), NormalDomainParam(name="damping_rot_pole", mean=dp_nom["damping_rot_pole"], std=dp_nom["damping_rot_pole"] / 4, clip_lo=1e-9), NormalDomainParam(name="mass_pend_pole", mean=dp_nom["mass_pend_pole"], std=dp_nom["mass_pend_pole"] / 5, clip_lo=1e-4), NormalDomainParam(name="length_pend_pole", mean=dp_nom["length_pend_pole"], std=dp_nom["length_pend_pole"] / 5, clip_lo=1e-4), NormalDomainParam( name="damping_pend_pole", mean=dp_nom["damping_pend_pole"], std=dp_nom["damping_pend_pole"] / 4, clip_lo=1e-9, ), )
def create_default_randomizer_bop() -> DomainRandomizer: """ Create the default randomizer for the `BallOnPlateSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.ball_on_plate import BallOnPlateSim dp_nom = BallOnPlateSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], std=dp_nom["ball_mass"] / 3, clip_lo=1e-2), NormalDomainParam(name="ball_radius", mean=dp_nom["ball_radius"], std=dp_nom["ball_radius"] / 3, clip_lo=1e-2), NormalDomainParam(name="ball_com_x", mean=dp_nom["ball_com_x"], std=0.003), NormalDomainParam(name="ball_com_y", mean=dp_nom["ball_com_y"], std=0.003), NormalDomainParam(name="ball_com_z", mean=dp_nom["ball_com_z"], std=0.003), UniformDomainParam( name="ball_friction_coefficient", mean=dp_nom["ball_friction_coefficient"], halfspan=dp_nom["ball_friction_coefficient"], clip_lo=0, clip_hi=1, ), UniformDomainParam( name="ball_rolling_friction_coefficient", mean=dp_nom["ball_rolling_friction_coefficient"], halfspan=dp_nom["ball_rolling_friction_coefficient"], clip_lo=0, clip_hi=1, ), # Vortex only UniformDomainParam(name="ball_slip", mean=dp_nom["ball_slip"], halfspan=dp_nom["ball_slip"], clip_lo=0) # UniformDomainParam(name='ball_linearvelocitydamnping', mean=0., halfspan=1e-4), # UniformDomainParam(name='ball_angularvelocitydamnping', mean=0., halfspan=1e-4) )
def create_default_randomizer_wambic() -> DomainRandomizer: from pyrado.environments.mujoco.wam import WAMBallInCupSim dp_nom = WAMBallInCupSim.get_nominal_domain_param() return DomainRandomizer( # Ball needs to fit into the cup NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65), # Rope won't be more than 3cm off NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30, clip_lo=0.27, clip_up=0.33), NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2), UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2, clip_lo=0.), UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2, clip_lo=0.), UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2, clip_lo=1e-6), )
def get_default_randomizer_qq() -> DomainRandomizer: """ Get the default randomizer for the `QQubeSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_qube import QQubeSim dp_nom = QQubeSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g'] / 5, clip_lo=1e-3), NormalDomainParam(name='Rm', mean=dp_nom['Rm'], std=dp_nom['Rm'] / 5, clip_lo=1e-3), NormalDomainParam(name='km', mean=dp_nom['km'], std=dp_nom['km'] / 5, clip_lo=1e-4), NormalDomainParam(name='Mr', mean=dp_nom['Mr'], std=dp_nom['Mr'] / 5, clip_lo=1e-4), NormalDomainParam(name='Lr', mean=dp_nom['Lr'], std=dp_nom['Lr'] / 5, clip_lo=1e-4), NormalDomainParam(name='Dr', mean=dp_nom['Dr'], std=dp_nom['Dr'] / 5, clip_lo=1e-9), NormalDomainParam(name='Mp', mean=dp_nom['Mp'], std=dp_nom['Mp'] / 5, clip_lo=1e-4), NormalDomainParam(name='Lp', mean=dp_nom['Lp'], std=dp_nom['Lp'] / 5, clip_lo=1e-4), NormalDomainParam(name='Dp', mean=dp_nom['Dp'], std=dp_nom['Dp'] / 5, clip_lo=1e-9))
def create_default_randomizer_bob() -> DomainRandomizer: """ Create the default randomizer for the `BallOnBeamSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim dp_nom = BallOnBeamSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4), NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-4), NormalDomainParam(name='m_beam', mean=dp_nom['m_beam'], std=dp_nom['m_beam']/5, clip_lo=1e-3), NormalDomainParam(name='l_beam', mean=dp_nom['l_beam'], std=dp_nom['l_beam']/5, clip_lo=1e-3), NormalDomainParam(name='d_beam', mean=dp_nom['d_beam'], std=dp_nom['d_beam']/5, clip_lo=1e-3), UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict'], clip_lo=0), UniformDomainParam(name='ang_offset', mean=0./180*np.pi, halfspan=0.1/180*np.pi) )
def create_default_randomizer_bob() -> DomainRandomizer: """ Create the default randomizer for the `BallOnBeamSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim dp_nom = BallOnBeamSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-4), NormalDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], std=dp_nom["ball_mass"] / 5, clip_lo=1e-4), NormalDomainParam(name="ball_radius", mean=dp_nom["ball_radius"], std=dp_nom["ball_radius"] / 5, clip_lo=1e-4), NormalDomainParam(name="beam_mass", mean=dp_nom["beam_mass"], std=dp_nom["beam_mass"] / 5, clip_lo=1e-3), NormalDomainParam(name="beam_length", mean=dp_nom["beam_length"], std=dp_nom["beam_length"] / 5, clip_lo=1e-3), NormalDomainParam(name="beam_thickness", mean=dp_nom["beam_thickness"], std=dp_nom["beam_thickness"] / 5, clip_lo=1e-3), UniformDomainParam(name="friction_coeff", mean=dp_nom["friction_coeff"], halfspan=dp_nom["friction_coeff"], clip_lo=0), UniformDomainParam(name="ang_offset", mean=0.0 / 180 * np.pi, halfspan=0.1 / 180 * np.pi), )
def create_default_randomizer_cata() -> DomainRandomizer: """ Create the default randomizer for the `CatapultSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.one_step.catapult import CatapultSim dp_nom = CatapultSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-3), NormalDomainParam(name="stiffness", mean=dp_nom["stiffness"], std=dp_nom["stiffness"] / 5, clip_lo=1e-3), NormalDomainParam(name="elongation", mean=dp_nom["elongation"], std=dp_nom["elongation"] / 5, clip_lo=1e-3), )
def create_default_randomizer_qcp() -> DomainRandomizer: """ Create the default randomizer for the `QCartPoleSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim dp_nom = QCartPoleSim.get_nominal_domain_param(long=False) return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_cart', mean=dp_nom['m_cart'], std=dp_nom['m_cart']/5, clip_lo=1e-4), NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole']/5, clip_lo=1e-4), NormalDomainParam(name='l_rail', mean=dp_nom['l_rail'], std=dp_nom['l_rail']/5, clip_lo=1e-2), NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole']/5, clip_lo=1e-2), UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1), NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-4), NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9), NormalDomainParam(name='r_mp', mean=dp_nom['r_mp'], std=dp_nom['r_mp']/5, clip_lo=1e-4), NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4), NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4), UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4), UniformDomainParam(name='B_pole', mean=dp_nom['B_pole'], halfspan=dp_nom['B_pole']/4, clip_lo=1e-4) )
num_workers=8, lr_scheduler=lr_scheduler.ExponentialLR, lr_scheduler_hparam=dict(gamma=0.999), ) env_sprl_params = [ dict( name="gravity_const", target_mean=to.tensor([9.81]), target_cov_chol_flat=to.tensor([1.0]), init_mean=to.tensor([9.81]), init_cov_chol_flat=to.tensor([0.05]), ) ] env = DomainRandWrapperLive( env, randomizer=DomainRandomizer( *[SelfPacedDomainParam(**p) for p in env_sprl_params])) sprl_hparam = dict( kl_constraints_ub=8000, performance_lower_bound=500, std_lower_bound=0.4, kl_threshold=200, max_iter=args.sprl_iterations, optimize_mean=not args.cov_only, ) algo = SPRL(env, PPO(ex_dir, env, policy, critic, **algo_hparam), **sprl_hparam) # Save the hyper-parameters save_dicts_to_yaml( dict(env=env_hparams, seed=args.seed),
task_args=dict( final_factor=500, success_bonus=250, Q=np.diag([0.5, 1e-4, 4e1]), R=np.diag([0, 0, 1e-1, 2e-1]), Q_dev=np.diag([0.0, 0.0, 5]), # R_dev=np.diag([0., 0., 1e-3, 1e-3]) ), ) env = WAMBallInCupSim(**env_hparams) # Randomizer randomizer = DomainRandomizer( UniformDomainParam(name="cup_scale", mean=1.0, halfspan=0.2), NormalDomainParam(name="rope_length", mean=0.3, std=0.005), NormalDomainParam(name="ball_mass", mean=0.021, std=0.001), UniformDomainParam(name="joint_2_damping", mean=0.05, halfspan=0.05), UniformDomainParam(name="joint_2_dryfriction", mean=0.1, halfspan=0.1), ) env = DomainRandWrapperLive(env, randomizer) # Policy bounds = ([0.0, 0.25, 0.5], [1.0, 1.5, 2.5]) policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=9, bounds=bounds, scale=None), dim_mask=2) policy = DualRBFLinearPolicy(env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=15, pop_size=100, num_is_samples=10,
pyrado.set_seed(args.seed, verbose=True) # Environments env_hparams = dict(dt=1 / 100.0, max_steps=600) env_real = QQubeSwingUpReal(**env_hparams) env_sim = QQubeSwingUpSim(**env_hparams) randomizer = DomainRandomizer( NormalDomainParam(name="mass_rot_pole", mean=0.0, std=1e6, clip_lo=1e-3), NormalDomainParam(name="mass_pend_pole", mean=0.0, std=1e6, clip_lo=1e-3), NormalDomainParam(name="length_rot_pole", mean=0.0, std=1e6, clip_lo=1e-3), NormalDomainParam(name="length_pend_pole", mean=0.0, std=1e6, clip_lo=1e-3), ) env_sim = DomainRandWrapperLive(env_sim, randomizer) dp_map = { 0: ("mass_rot_pole", "mean"), 1: ("mass_rot_pole", "std"), 2: ("mass_pend_pole", "mean"), 3: ("mass_pend_pole", "std"), 4: ("length_rot_pole", "mean"),