def get_default_randomizer_pend() -> DomainRandomizer: """ Get the default randomizer for the `PendulumSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.pendulum import PendulumSim dp_nom = PendulumSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g'] / 10, clip_lo=1e-3), NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='d_pole', mean=dp_nom['d_pole'], std=dp_nom['d_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='tau_max', mean=dp_nom['tau_max'], std=dp_nom['tau_max'] / 10, clip_lo=1e-3))
def create_default_randomizer_pend() -> DomainRandomizer: """ Create the default randomizer for the `PendulumSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.pendulum import PendulumSim dp_nom = PendulumSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_mass", mean=dp_nom["pole_mass"], std=dp_nom["pole_mass"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_length", mean=dp_nom["pole_length"], std=dp_nom["pole_length"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_damping", mean=dp_nom["pole_damping"], std=dp_nom["pole_damping"] / 10, clip_lo=1e-3), NormalDomainParam(name="torque_thold", mean=dp_nom["torque_thold"], std=dp_nom["torque_thold"] / 10, clip_lo=1e-3), )
# Environments env_hparams = dict(dt=1 / 50.0, max_steps=400) env_sim = PendulumSim(**env_hparams) env_sim.domain_param = dict(d_pole=0) env_sim.domain_param = dict(tau_max=4.5) # Create a fake ground truth target domain num_real_rollouts = 1 env_real = deepcopy(env_sim) # Define a mapping: index - domain parameter dp_mapping = {0: "pole_mass", 1: "pole_length"} # Prior dp_nom = env_sim.get_nominal_domain_param() prior_hparam = dict( low=to.tensor([dp_nom["pole_mass"] * 0.3, dp_nom["pole_length"] * 0.3]), high=to.tensor([dp_nom["pole_mass"] * 1.7, dp_nom["pole_length"] * 1.7]), ) prior = sbiutils.BoxUniform(**prior_hparam) # Time series embedding embedding_hparam = dict(downsampling_factor=1) embedding = create_embedding(BayesSimEmbedding.name, env_sim.spec, **embedding_hparam) # Behavioral policy policy = create_pend_excitation_policy(env_sim, 1) # Algorithm algo_hparam = dict(