Exemple #1
0
def create_qqsu_setup():
    # Environments
    env_hparams = dict(dt=1 / 100.0, max_steps=600)
    env_real = QQubeSwingUpSim(**env_hparams)
    env_real.domain_param = dict(
        mass_rot_pole=0.095 * 0.9,  # 0.095*0.9 = 0.0855
        mass_pend_pole=0.024 * 1.1,  # 0.024*1.1 = 0.0264
        length_rot_pole=0.085 * 0.9,  # 0.085*0.9 = 0.0765
        length_pend_pole=0.129 * 1.1,  # 0.129*1.1 = 0.1419
    )

    env_sim = QQubeSwingUpSim(**env_hparams)
    randomizer = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
        NormalDomainParam(name="mass_pend_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_rot_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_pend_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        0: ("mass_rot_pole", "mean"),
        1: ("mass_rot_pole", "std"),
        2: ("mass_pend_pole", "mean"),
        3: ("mass_pend_pole", "std"),
        4: ("length_rot_pole", "mean"),
        5: ("length_rot_pole", "std"),
        6: ("length_pend_pole", "mean"),
        7: ("length_pend_pole", "std"),
    }
    # trafo_mask = [False, True, False, True, False, True, False, True]
    trafo_mask = [True] * 8
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Policies (the behavioral policy needs to be deterministic)
    behavior_policy = QQubeSwingUpAndBalanceCtrl(env_sim.spec)
    prior = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole", mean=0.095, std=0.095 / 10),
        NormalDomainParam(name="mass_pend_pole", mean=0.024, std=0.024 / 10),
        NormalDomainParam(name="length_rot_pole", mean=0.085, std=0.085 / 10),
        NormalDomainParam(name="length_pend_pole", mean=0.129, std=0.129 / 10),
    )
    ddp_policy = DomainDistrParamPolicy(mapping=dp_map,
                                        trafo_mask=trafo_mask,
                                        prior=prior,
                                        scale_params=False)

    return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
Exemple #2
0
def get_default_randomizer_pend() -> DomainRandomizer:
    """
    Get the default randomizer for the `PendulumSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.pendulum import PendulumSim
    dp_nom = PendulumSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g',
                          mean=dp_nom['g'],
                          std=dp_nom['g'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='m_pole',
                          mean=dp_nom['m_pole'],
                          std=dp_nom['m_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='l_pole',
                          mean=dp_nom['l_pole'],
                          std=dp_nom['l_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='d_pole',
                          mean=dp_nom['d_pole'],
                          std=dp_nom['d_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='tau_max',
                          mean=dp_nom['tau_max'],
                          std=dp_nom['tau_max'] / 10,
                          clip_lo=1e-3))
Exemple #3
0
def get_default_randomizer_pi() -> DomainRandomizer:
    """
    Get the default randomizer for the `PlanarInsertSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.planar_insert import PlanarInsertSim
    dp_nom = PlanarInsertSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='link1_mass',
                          mean=dp_nom['link1_mass'],
                          std=dp_nom['link1_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link2_mass',
                          mean=dp_nom['link2_mass'],
                          std=dp_nom['link2_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link3_mass',
                          mean=dp_nom['link3_mass'],
                          std=dp_nom['link3_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link4_mass',
                          mean=dp_nom['link4_mass'],
                          std=dp_nom['link4_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link5_mass',
                          mean=dp_nom['link4_mass'],
                          std=dp_nom['link4_mass'] / 5,
                          clip_lo=1e-2),
        UniformDomainParam(name='upperwall_pos_offset_z',
                           mean=0,
                           halfspan=0.05,
                           clip_lo=0)  # only increase the gap
    )
def create_default_randomizer_pend() -> DomainRandomizer:
    """
    Create the default randomizer for the `PendulumSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.pendulum import PendulumSim

    dp_nom = PendulumSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_mass",
                          mean=dp_nom["pole_mass"],
                          std=dp_nom["pole_mass"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_length",
                          mean=dp_nom["pole_length"],
                          std=dp_nom["pole_length"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_damping",
                          mean=dp_nom["pole_damping"],
                          std=dp_nom["pole_damping"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="torque_thold",
                          mean=dp_nom["torque_thold"],
                          std=dp_nom["torque_thold"] / 10,
                          clip_lo=1e-3),
    )
Exemple #5
0
def get_default_randomizer_bl() -> DomainRandomizer:
    """
    Get the default randomizer for the `BoxLifting`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim
    dp_nom = BoxShelvingSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='box_length',
                          mean=dp_nom['box_length'],
                          std=dp_nom['box_length'] / 10),
        NormalDomainParam(name='box_width',
                          mean=dp_nom['box_width'],
                          std=dp_nom['box_width'] / 10),
        NormalDomainParam(name='box_mass',
                          mean=dp_nom['box_mass'],
                          std=dp_nom['box_mass'] / 5),
        UniformDomainParam(name='box_friction_coefficient',
                           mean=dp_nom['box_friction_coefficient'],
                           halfspan=dp_nom['box_friction_coefficient'] / 5,
                           clip_lo=1e-5),
        NormalDomainParam(name='basket_mass',
                          mean=dp_nom['basket_mass'],
                          std=dp_nom['basket_mass'] / 5),
        UniformDomainParam(name='basket_friction_coefficient',
                           mean=dp_nom['basket_friction_coefficient'],
                           halfspan=dp_nom['basket_friction_coefficient'] / 5,
                           clip_lo=1e-5),
    )
Exemple #6
0
def get_empty_randomizer() -> DomainRandomizer:
    """
    Get an empty randomizer independent of the environment to be filled later (using `add_domain_params`).

    :return: empty randomizer
    """
    return DomainRandomizer()
Exemple #7
0
def create_default_randomizer_qbb() -> DomainRandomizer:
    """
    Create the default randomizer for the `QBallBalancerSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
    dp_nom = QBallBalancerSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_plate', mean=dp_nom['l_plate'], std=dp_nom['l_plate']/5, clip_lo=5e-2),
        NormalDomainParam(name='r_arm', mean=dp_nom['r_arm'], std=dp_nom['r_arm']/5, clip_lo=1e-4),
        NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-2),
        NormalDomainParam(name='J_l', mean=dp_nom['J_l'], std=dp_nom['J_l']/4, clip_lo=1e-6),
        NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9),
        NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4),
        NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4),
        UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict']/4, clip_lo=1e-4),
        UniformDomainParam(name='V_thold_x_pos', mean=dp_nom['V_thold_x_pos'], halfspan=dp_nom['V_thold_x_pos']/3),
        UniformDomainParam(name='V_thold_x_neg', mean=dp_nom['V_thold_x_neg'], halfspan=abs(dp_nom['V_thold_x_neg'])/3),
        UniformDomainParam(name='V_thold_y_pos', mean=dp_nom['V_thold_y_pos'], halfspan=dp_nom['V_thold_y_pos']/3),
        UniformDomainParam(name='V_thold_y_neg', mean=dp_nom['V_thold_y_neg'], halfspan=abs(dp_nom['V_thold_y_neg'])/3),
        UniformDomainParam(name='offset_th_x', mean=dp_nom['offset_th_x'], halfspan=6./180*np.pi),
        UniformDomainParam(name='offset_th_y', mean=dp_nom['offset_th_y'], halfspan=6./180*np.pi)
    )
Exemple #8
0
def default_dummy_randomizer():
    return DomainRandomizer(
        DomainParam(name='mass', mean=1.2),
        DomainParam(name='special', mean=0),
        DomainParam(name='length', mean=4),
        DomainParam(name='time_delay', mean=13)
    )
def create_default_randomizer_bs() -> DomainRandomizer:
    """
    Create the default randomizer for the `BoxShelvingSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim

    dp_nom = BoxShelvingSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="box_length",
                          mean=dp_nom["box_length"],
                          std=dp_nom["box_length"] / 10),
        NormalDomainParam(name="box_width",
                          mean=dp_nom["box_width"],
                          std=dp_nom["box_width"] / 10),
        NormalDomainParam(name="box_mass",
                          mean=dp_nom["box_mass"],
                          std=dp_nom["box_mass"] / 5),
        UniformDomainParam(
            name="box_friction_coefficient",
            mean=dp_nom["box_friction_coefficient"],
            halfspan=dp_nom["box_friction_coefficient"] / 5,
            clip_lo=1e-5,
        ),
    )
Exemple #10
0
def get_uniform_masses_lengths_randomizer_qq(frac_halfspan: float):
    """
    Get a uniform randomizer that applies to all masses and lengths of the Quanser Qube according to a fraction of their
    nominal parameter values

    :param frac_halfspan: fraction of the nominal parameter value
    :return: `DomainRandomizer` with uniformly distributed masses and lengths
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim
    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        UniformDomainParam(name='Mp',
                           mean=dp_nom['Mp'],
                           halfspan=dp_nom['Mp'] / frac_halfspan,
                           clip_lo=1e-3),
        UniformDomainParam(name='Mr',
                           mean=dp_nom['Mr'],
                           halfspan=dp_nom['Mr'] / frac_halfspan,
                           clip_lo=1e-3),
        UniformDomainParam(name='Lr',
                           mean=dp_nom['Lr'],
                           halfspan=dp_nom['Lr'] / frac_halfspan,
                           clip_lo=1e-2),
        UniformDomainParam(name='Lp',
                           mean=dp_nom['Lp'],
                           halfspan=dp_nom['Lp'] / frac_halfspan,
                           clip_lo=1e-2),
    )
Exemple #11
0
def default_randomizer():
    return DomainRandomizer(
        NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11),
    )
Exemple #12
0
def default_pert():
    return DomainRandomizer(
        NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name='multidim', mean=10*to.ones((2,)), cov=2*to.eye(2), clip_up=11)
    )
Exemple #13
0
def create_example_randomizer_cata() -> DomainRandomizer:
    """
    Create the randomizer for the `CatapultSim` used for the 'illustrative example' in F. Muratore et al, 2019, TAMPI.

    :return: randomizer based on the nominal domain parameter values
    """
    return DomainRandomizer(
        BernoulliDomainParam(name='planet', mean=None, val_0=0, val_1=1, prob_1=0.7, roundint=True)
    )  # 0 = Mars, 1 = Venus
Exemple #14
0
def test_sprl(ex_dir, env: SimEnv, optimize_mean: bool):
    pyrado.set_seed(0)

    env = ActNormWrapper(env)
    env_sprl_params = [
        dict(
            name="gravity_const",
            target_mean=to.tensor([9.81]),
            target_cov_chol_flat=to.tensor([1.0]),
            init_mean=to.tensor([9.81]),
            init_cov_chol_flat=to.tensor([0.05]),
        )
    ]
    radnomizer = DomainRandomizer(
        *[SelfPacedDomainParam(**p) for p in env_sprl_params])
    env = DomainRandWrapperLive(env, randomizer=radnomizer)

    policy = FNNPolicy(env.spec, hidden_sizes=[64, 64], hidden_nonlin=to.tanh)

    vfcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.relu)
    vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                     **vfcn_hparam)
    critic_hparam = dict(
        gamma=0.9844534412010116,
        lamda=0.9710614403461155,
        num_epoch=10,
        batch_size=150,
        standardize_adv=False,
        lr=0.00016985313083236645,
    )
    critic = GAE(vfcn, **critic_hparam)

    subrtn_hparam = dict(
        max_iter=1,
        eps_clip=0.12648736789309026,
        min_steps=10 * env.max_steps,
        num_epoch=3,
        batch_size=150,
        std_init=0.7573286998997557,
        lr=6.999956625305722e-04,
        max_grad_norm=1.0,
        num_workers=1,
    )

    algo_hparam = dict(
        kl_constraints_ub=8000,
        performance_lower_bound=500,
        std_lower_bound=0.4,
        kl_threshold=200,
        max_iter=1,
        optimize_mean=optimize_mean,
    )

    algo = SPRL(env, PPO(ex_dir, env, policy, critic, **subrtn_hparam),
                **algo_hparam)
    algo.train(snapshot_mode="latest")
    assert algo.curr_iter == algo.max_iter
Exemple #15
0
def bob_pert():
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=9.81, std=0.981, clip_lo=1e-3),
        NormalDomainParam(name='r_ball', mean=0.1, std=0.01, clip_lo=1e-3),
        NormalDomainParam(name='m_ball', mean=0.5, std=0.05, clip_lo=1e-3),
        NormalDomainParam(name='m_beam', mean=3.0, std=0.3, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=0.1, std=0.01, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=2.0, std=0.2, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=0.05, halfspan=0.05),
        UniformDomainParam(name='ang_offset', mean=0, halfspan=5.*np.pi/180)
    )
Exemple #16
0
def create_bob_setup():
    # Environments
    env_hparams = dict(dt=1 / 100., max_steps=500)
    env_real = BallOnBeamSim(**env_hparams)
    env_real.domain_param = dict(
        # l_beam=1.95,
        # ang_offset=-0.03,
        g=10.81)

    env_sim = BallOnBeamSim(**env_hparams)
    randomizer = DomainRandomizer(
        # NormalDomainParam(name='l_beam', mean=0, std=1e-12, clip_lo=1.5, clip_up=3.5),
        # UniformDomainParam(name='ang_offset', mean=0, halfspan=1e-12),
        NormalDomainParam(name='g', mean=0, std=1e-12), )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        # 0: ('l_beam', 'mean'), 1: ('l_beam', 'std'),
        # 2: ('ang_offset', 'mean'), 3: ('ang_offset', 'halfspan')
        0: ('g', 'mean'),
        1: ('g', 'std')
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Policies (the behavioral policy needs to be deterministic)
    behavior_policy = LinearPolicy(env_sim.spec,
                                   feats=FeatureStack(
                                       [identity_feat, sin_feat]))
    behavior_policy.param_values = to.tensor(
        [3.8090, -3.8036, -1.0786, -2.4510, -0.9875, -1.3252, 3.1503, 1.4443])
    prior = DomainRandomizer(
        # NormalDomainParam(name='l_beam', mean=2.05, std=2.05/10),
        # UniformDomainParam(name='ang_offset', mean=0.03, halfspan=0.03/10),
        NormalDomainParam(name='g', mean=8.81, std=8.81 / 10), )
    # trafo_mask = [False, True, False, True]
    trafo_mask = [True, True]
    ddp_policy = DomainDistrParamPolicy(mapping=dp_map,
                                        trafo_mask=trafo_mask,
                                        prior=prior,
                                        scale_params=True)

    return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
Exemple #17
0
def get_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='cup_scale',
                          mean=dp_nom['cup_scale'],
                          std=dp_nom['cup_scale'] / 5,
                          clip_lo=0.6),  # ball needs to fit into the cup
        NormalDomainParam(name='rope_length',
                          mean=dp_nom['rope_length'],
                          std=dp_nom['rope_length'] / 10,
                          clip_lo=0.2),  # rope won't be less then 10cm shorter
    )
Exemple #18
0
def create_default_randomizer_omo() -> DomainRandomizer:
    """
    Create the default randomizer for the `OneMassOscillatorSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim
    dp_nom = OneMassOscillatorSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='m', mean=dp_nom['m'], std=dp_nom['m']/3, clip_lo=1e-3),
        NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/3, clip_lo=1e-3),
        NormalDomainParam(name='d', mean=dp_nom['d'], std=dp_nom['d']/3, clip_lo=1e-3)
    )
Exemple #19
0
def create_default_randomizer_cata() -> DomainRandomizer:
    """
    Create the default randomizer for the `CatapultSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.one_step.catapult import CatapultSim

    dp_nom = CatapultSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-3),
        NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/5, clip_lo=1e-3),
        NormalDomainParam(name='x', mean=dp_nom['x'], std=dp_nom['x']/5, clip_lo=1e-3)
    )
def create_default_randomizer_qq() -> DomainRandomizer:
    """
    Create the default randomizer for the `QQubeSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim

    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="motor_resistance",
                          mean=dp_nom["motor_resistance"],
                          std=dp_nom["motor_resistance"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="motor_back_emf",
                          mean=dp_nom["motor_back_emf"],
                          std=dp_nom["motor_back_emf"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="mass_rot_pole",
                          mean=dp_nom["mass_rot_pole"],
                          std=dp_nom["mass_rot_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="length_rot_pole",
                          mean=dp_nom["length_rot_pole"],
                          std=dp_nom["length_rot_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="damping_rot_pole",
                          mean=dp_nom["damping_rot_pole"],
                          std=dp_nom["damping_rot_pole"] / 4,
                          clip_lo=1e-9),
        NormalDomainParam(name="mass_pend_pole",
                          mean=dp_nom["mass_pend_pole"],
                          std=dp_nom["mass_pend_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="length_pend_pole",
                          mean=dp_nom["length_pend_pole"],
                          std=dp_nom["length_pend_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(
            name="damping_pend_pole",
            mean=dp_nom["damping_pend_pole"],
            std=dp_nom["damping_pend_pole"] / 4,
            clip_lo=1e-9,
        ),
    )
def create_default_randomizer_bop() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnPlateSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.ball_on_plate import BallOnPlateSim

    dp_nom = BallOnPlateSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 3,
                          clip_lo=1e-2),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 3,
                          clip_lo=1e-2),
        NormalDomainParam(name="ball_com_x",
                          mean=dp_nom["ball_com_x"],
                          std=0.003),
        NormalDomainParam(name="ball_com_y",
                          mean=dp_nom["ball_com_y"],
                          std=0.003),
        NormalDomainParam(name="ball_com_z",
                          mean=dp_nom["ball_com_z"],
                          std=0.003),
        UniformDomainParam(
            name="ball_friction_coefficient",
            mean=dp_nom["ball_friction_coefficient"],
            halfspan=dp_nom["ball_friction_coefficient"],
            clip_lo=0,
            clip_hi=1,
        ),
        UniformDomainParam(
            name="ball_rolling_friction_coefficient",
            mean=dp_nom["ball_rolling_friction_coefficient"],
            halfspan=dp_nom["ball_rolling_friction_coefficient"],
            clip_lo=0,
            clip_hi=1,
        ),
        # Vortex only
        UniformDomainParam(name="ball_slip",
                           mean=dp_nom["ball_slip"],
                           halfspan=dp_nom["ball_slip"],
                           clip_lo=0)
        # UniformDomainParam(name='ball_linearvelocitydamnping', mean=0., halfspan=1e-4),
        # UniformDomainParam(name='ball_angularvelocitydamnping', mean=0., halfspan=1e-4)
    )
Exemple #22
0
def create_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        # Ball needs to fit into the cup
        NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65),
        # Rope won't be more than 3cm off
        NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30,
                          clip_lo=0.27, clip_up=0.33),
        NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2),
        UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2,
                           clip_lo=0.),
        UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2,
                           clip_lo=0.),
        UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2,
                           clip_lo=1e-6),
    )
Exemple #23
0
def get_default_randomizer_qq() -> DomainRandomizer:
    """
    Get the default randomizer for the `QQubeSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim
    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g',
                          mean=dp_nom['g'],
                          std=dp_nom['g'] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name='Rm',
                          mean=dp_nom['Rm'],
                          std=dp_nom['Rm'] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name='km',
                          mean=dp_nom['km'],
                          std=dp_nom['km'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Mr',
                          mean=dp_nom['Mr'],
                          std=dp_nom['Mr'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Lr',
                          mean=dp_nom['Lr'],
                          std=dp_nom['Lr'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Dr',
                          mean=dp_nom['Dr'],
                          std=dp_nom['Dr'] / 5,
                          clip_lo=1e-9),
        NormalDomainParam(name='Mp',
                          mean=dp_nom['Mp'],
                          std=dp_nom['Mp'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Lp',
                          mean=dp_nom['Lp'],
                          std=dp_nom['Lp'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Dp',
                          mean=dp_nom['Dp'],
                          std=dp_nom['Dp'] / 5,
                          clip_lo=1e-9))
Exemple #24
0
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_beam', mean=dp_nom['m_beam'], std=dp_nom['m_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=dp_nom['l_beam'], std=dp_nom['l_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=dp_nom['d_beam'], std=dp_nom['d_beam']/5, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict'], clip_lo=0),
        UniformDomainParam(name='ang_offset', mean=0./180*np.pi, halfspan=0.1/180*np.pi)
    )
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim

    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="beam_mass",
                          mean=dp_nom["beam_mass"],
                          std=dp_nom["beam_mass"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_length",
                          mean=dp_nom["beam_length"],
                          std=dp_nom["beam_length"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_thickness",
                          mean=dp_nom["beam_thickness"],
                          std=dp_nom["beam_thickness"] / 5,
                          clip_lo=1e-3),
        UniformDomainParam(name="friction_coeff",
                           mean=dp_nom["friction_coeff"],
                           halfspan=dp_nom["friction_coeff"],
                           clip_lo=0),
        UniformDomainParam(name="ang_offset",
                           mean=0.0 / 180 * np.pi,
                           halfspan=0.1 / 180 * np.pi),
    )
def create_default_randomizer_cata() -> DomainRandomizer:
    """
    Create the default randomizer for the `CatapultSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.one_step.catapult import CatapultSim

    dp_nom = CatapultSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="stiffness",
                          mean=dp_nom["stiffness"],
                          std=dp_nom["stiffness"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="elongation",
                          mean=dp_nom["elongation"],
                          std=dp_nom["elongation"] / 5,
                          clip_lo=1e-3),
    )
Exemple #27
0
def create_default_randomizer_qcp() -> DomainRandomizer:
    """
    Create the default randomizer for the `QCartPoleSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim
    dp_nom = QCartPoleSim.get_nominal_domain_param(long=False)
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_cart', mean=dp_nom['m_cart'], std=dp_nom['m_cart']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole']/5, clip_lo=1e-4),
        NormalDomainParam(name='l_rail', mean=dp_nom['l_rail'], std=dp_nom['l_rail']/5, clip_lo=1e-2),
        NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole']/5, clip_lo=1e-2),
        UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1),
        NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-4),
        NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9),
        NormalDomainParam(name='r_mp', mean=dp_nom['r_mp'], std=dp_nom['r_mp']/5, clip_lo=1e-4),
        NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4),
        NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4),
        UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4),
        UniformDomainParam(name='B_pole', mean=dp_nom['B_pole'], halfspan=dp_nom['B_pole']/4, clip_lo=1e-4)
    )
Exemple #28
0
        num_workers=8,
        lr_scheduler=lr_scheduler.ExponentialLR,
        lr_scheduler_hparam=dict(gamma=0.999),
    )
    env_sprl_params = [
        dict(
            name="gravity_const",
            target_mean=to.tensor([9.81]),
            target_cov_chol_flat=to.tensor([1.0]),
            init_mean=to.tensor([9.81]),
            init_cov_chol_flat=to.tensor([0.05]),
        )
    ]
    env = DomainRandWrapperLive(
        env,
        randomizer=DomainRandomizer(
            *[SelfPacedDomainParam(**p) for p in env_sprl_params]))

    sprl_hparam = dict(
        kl_constraints_ub=8000,
        performance_lower_bound=500,
        std_lower_bound=0.4,
        kl_threshold=200,
        max_iter=args.sprl_iterations,
        optimize_mean=not args.cov_only,
    )
    algo = SPRL(env, PPO(ex_dir, env, policy, critic, **algo_hparam),
                **sprl_hparam)

    # Save the hyper-parameters
    save_dicts_to_yaml(
        dict(env=env_hparams, seed=args.seed),
        task_args=dict(
            final_factor=500,
            success_bonus=250,
            Q=np.diag([0.5, 1e-4, 4e1]),
            R=np.diag([0, 0, 1e-1, 2e-1]),
            Q_dev=np.diag([0.0, 0.0, 5]),
            # R_dev=np.diag([0., 0., 1e-3, 1e-3])
        ),
    )
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    randomizer = DomainRandomizer(
        UniformDomainParam(name="cup_scale", mean=1.0, halfspan=0.2),
        NormalDomainParam(name="rope_length", mean=0.3, std=0.005),
        NormalDomainParam(name="ball_mass", mean=0.021, std=0.001),
        UniformDomainParam(name="joint_2_damping", mean=0.05, halfspan=0.05),
        UniformDomainParam(name="joint_2_dryfriction", mean=0.1, halfspan=0.1),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    bounds = ([0.0, 0.25, 0.5], [1.0, 1.5, 2.5])
    policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=9, bounds=bounds, scale=None), dim_mask=2)
    policy = DualRBFLinearPolicy(env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=15,
        pop_size=100,
        num_is_samples=10,
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 100.0, max_steps=600)
    env_real = QQubeSwingUpReal(**env_hparams)

    env_sim = QQubeSwingUpSim(**env_hparams)
    randomizer = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="mass_pend_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_rot_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_pend_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        0: ("mass_rot_pole", "mean"),
        1: ("mass_rot_pole", "std"),
        2: ("mass_pend_pole", "mean"),
        3: ("mass_pend_pole", "std"),
        4: ("length_rot_pole", "mean"),