Ejemplo n.º 1
0
def get_default_randomizer_pend() -> DomainRandomizer:
    """
    Get the default randomizer for the `PendulumSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.pendulum import PendulumSim
    dp_nom = PendulumSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g',
                          mean=dp_nom['g'],
                          std=dp_nom['g'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='m_pole',
                          mean=dp_nom['m_pole'],
                          std=dp_nom['m_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='l_pole',
                          mean=dp_nom['l_pole'],
                          std=dp_nom['l_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='d_pole',
                          mean=dp_nom['d_pole'],
                          std=dp_nom['d_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='tau_max',
                          mean=dp_nom['tau_max'],
                          std=dp_nom['tau_max'] / 10,
                          clip_lo=1e-3))
Ejemplo n.º 2
0
def create_default_randomizer_pend() -> DomainRandomizer:
    """
    Create the default randomizer for the `PendulumSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.pendulum import PendulumSim

    dp_nom = PendulumSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_mass",
                          mean=dp_nom["pole_mass"],
                          std=dp_nom["pole_mass"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_length",
                          mean=dp_nom["pole_length"],
                          std=dp_nom["pole_length"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_damping",
                          mean=dp_nom["pole_damping"],
                          std=dp_nom["pole_damping"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="torque_thold",
                          mean=dp_nom["torque_thold"],
                          std=dp_nom["torque_thold"] / 10,
                          clip_lo=1e-3),
    )
Ejemplo n.º 3
0
def create_default_randomizer_qbb() -> DomainRandomizer:
    """
    Create the default randomizer for the `QBallBalancerSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
    dp_nom = QBallBalancerSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_plate', mean=dp_nom['l_plate'], std=dp_nom['l_plate']/5, clip_lo=5e-2),
        NormalDomainParam(name='r_arm', mean=dp_nom['r_arm'], std=dp_nom['r_arm']/5, clip_lo=1e-4),
        NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-2),
        NormalDomainParam(name='J_l', mean=dp_nom['J_l'], std=dp_nom['J_l']/4, clip_lo=1e-6),
        NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9),
        NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4),
        NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4),
        UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict']/4, clip_lo=1e-4),
        UniformDomainParam(name='V_thold_x_pos', mean=dp_nom['V_thold_x_pos'], halfspan=dp_nom['V_thold_x_pos']/3),
        UniformDomainParam(name='V_thold_x_neg', mean=dp_nom['V_thold_x_neg'], halfspan=abs(dp_nom['V_thold_x_neg'])/3),
        UniformDomainParam(name='V_thold_y_pos', mean=dp_nom['V_thold_y_pos'], halfspan=dp_nom['V_thold_y_pos']/3),
        UniformDomainParam(name='V_thold_y_neg', mean=dp_nom['V_thold_y_neg'], halfspan=abs(dp_nom['V_thold_y_neg'])/3),
        UniformDomainParam(name='offset_th_x', mean=dp_nom['offset_th_x'], halfspan=6./180*np.pi),
        UniformDomainParam(name='offset_th_y', mean=dp_nom['offset_th_y'], halfspan=6./180*np.pi)
    )
Ejemplo n.º 4
0
def get_default_randomizer_pi() -> DomainRandomizer:
    """
    Get the default randomizer for the `PlanarInsertSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.planar_insert import PlanarInsertSim
    dp_nom = PlanarInsertSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='link1_mass',
                          mean=dp_nom['link1_mass'],
                          std=dp_nom['link1_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link2_mass',
                          mean=dp_nom['link2_mass'],
                          std=dp_nom['link2_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link3_mass',
                          mean=dp_nom['link3_mass'],
                          std=dp_nom['link3_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link4_mass',
                          mean=dp_nom['link4_mass'],
                          std=dp_nom['link4_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link5_mass',
                          mean=dp_nom['link4_mass'],
                          std=dp_nom['link4_mass'] / 5,
                          clip_lo=1e-2),
        UniformDomainParam(name='upperwall_pos_offset_z',
                           mean=0,
                           halfspan=0.05,
                           clip_lo=0)  # only increase the gap
    )
Ejemplo n.º 5
0
def create_default_randomizer_bs() -> DomainRandomizer:
    """
    Create the default randomizer for the `BoxShelvingSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim

    dp_nom = BoxShelvingSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="box_length",
                          mean=dp_nom["box_length"],
                          std=dp_nom["box_length"] / 10),
        NormalDomainParam(name="box_width",
                          mean=dp_nom["box_width"],
                          std=dp_nom["box_width"] / 10),
        NormalDomainParam(name="box_mass",
                          mean=dp_nom["box_mass"],
                          std=dp_nom["box_mass"] / 5),
        UniformDomainParam(
            name="box_friction_coefficient",
            mean=dp_nom["box_friction_coefficient"],
            halfspan=dp_nom["box_friction_coefficient"] / 5,
            clip_lo=1e-5,
        ),
    )
Ejemplo n.º 6
0
def default_randomizer():
    return DomainRandomizer(
        NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11),
    )
Ejemplo n.º 7
0
def default_pert():
    return DomainRandomizer(
        NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name='multidim', mean=10*to.ones((2,)), cov=2*to.eye(2), clip_up=11)
    )
Ejemplo n.º 8
0
def create_default_randomizer_omo() -> DomainRandomizer:
    """
    Create the default randomizer for the `OneMassOscillatorSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim
    dp_nom = OneMassOscillatorSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='m', mean=dp_nom['m'], std=dp_nom['m']/3, clip_lo=1e-3),
        NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/3, clip_lo=1e-3),
        NormalDomainParam(name='d', mean=dp_nom['d'], std=dp_nom['d']/3, clip_lo=1e-3)
    )
Ejemplo n.º 9
0
def get_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='cup_scale',
                          mean=dp_nom['cup_scale'],
                          std=dp_nom['cup_scale'] / 5,
                          clip_lo=0.6),  # ball needs to fit into the cup
        NormalDomainParam(name='rope_length',
                          mean=dp_nom['rope_length'],
                          std=dp_nom['rope_length'] / 10,
                          clip_lo=0.2),  # rope won't be less then 10cm shorter
    )
Ejemplo n.º 10
0
def create_default_randomizer_cata() -> DomainRandomizer:
    """
    Create the default randomizer for the `CatapultSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.one_step.catapult import CatapultSim

    dp_nom = CatapultSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-3),
        NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/5, clip_lo=1e-3),
        NormalDomainParam(name='x', mean=dp_nom['x'], std=dp_nom['x']/5, clip_lo=1e-3)
    )
Ejemplo n.º 11
0
def create_qqsu_setup():
    # Environments
    env_hparams = dict(dt=1 / 100.0, max_steps=600)
    env_real = QQubeSwingUpSim(**env_hparams)
    env_real.domain_param = dict(
        mass_rot_pole=0.095 * 0.9,  # 0.095*0.9 = 0.0855
        mass_pend_pole=0.024 * 1.1,  # 0.024*1.1 = 0.0264
        length_rot_pole=0.085 * 0.9,  # 0.085*0.9 = 0.0765
        length_pend_pole=0.129 * 1.1,  # 0.129*1.1 = 0.1419
    )

    env_sim = QQubeSwingUpSim(**env_hparams)
    randomizer = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
        NormalDomainParam(name="mass_pend_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_rot_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_pend_pole",
                          mean=0.0,
                          std=1e-9,
                          clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        0: ("mass_rot_pole", "mean"),
        1: ("mass_rot_pole", "std"),
        2: ("mass_pend_pole", "mean"),
        3: ("mass_pend_pole", "std"),
        4: ("length_rot_pole", "mean"),
        5: ("length_rot_pole", "std"),
        6: ("length_pend_pole", "mean"),
        7: ("length_pend_pole", "std"),
    }
    # trafo_mask = [False, True, False, True, False, True, False, True]
    trafo_mask = [True] * 8
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Policies (the behavioral policy needs to be deterministic)
    behavior_policy = QQubeSwingUpAndBalanceCtrl(env_sim.spec)
    prior = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole", mean=0.095, std=0.095 / 10),
        NormalDomainParam(name="mass_pend_pole", mean=0.024, std=0.024 / 10),
        NormalDomainParam(name="length_rot_pole", mean=0.085, std=0.085 / 10),
        NormalDomainParam(name="length_pend_pole", mean=0.129, std=0.129 / 10),
    )
    ddp_policy = DomainDistrParamPolicy(mapping=dp_map,
                                        trafo_mask=trafo_mask,
                                        prior=prior,
                                        scale_params=False)

    return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
Ejemplo n.º 12
0
def create_default_randomizer_bop() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnPlateSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.ball_on_plate import BallOnPlateSim

    dp_nom = BallOnPlateSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 3,
                          clip_lo=1e-2),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 3,
                          clip_lo=1e-2),
        NormalDomainParam(name="ball_com_x",
                          mean=dp_nom["ball_com_x"],
                          std=0.003),
        NormalDomainParam(name="ball_com_y",
                          mean=dp_nom["ball_com_y"],
                          std=0.003),
        NormalDomainParam(name="ball_com_z",
                          mean=dp_nom["ball_com_z"],
                          std=0.003),
        UniformDomainParam(
            name="ball_friction_coefficient",
            mean=dp_nom["ball_friction_coefficient"],
            halfspan=dp_nom["ball_friction_coefficient"],
            clip_lo=0,
            clip_hi=1,
        ),
        UniformDomainParam(
            name="ball_rolling_friction_coefficient",
            mean=dp_nom["ball_rolling_friction_coefficient"],
            halfspan=dp_nom["ball_rolling_friction_coefficient"],
            clip_lo=0,
            clip_hi=1,
        ),
        # Vortex only
        UniformDomainParam(name="ball_slip",
                           mean=dp_nom["ball_slip"],
                           halfspan=dp_nom["ball_slip"],
                           clip_lo=0)
        # UniformDomainParam(name='ball_linearvelocitydamnping', mean=0., halfspan=1e-4),
        # UniformDomainParam(name='ball_angularvelocitydamnping', mean=0., halfspan=1e-4)
    )
Ejemplo n.º 13
0
def create_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        # Ball needs to fit into the cup
        NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65),
        # Rope won't be more than 3cm off
        NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30,
                          clip_lo=0.27, clip_up=0.33),
        NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2),
        UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2,
                           clip_lo=0.),
        UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2,
                           clip_lo=0.),
        UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2,
                           clip_lo=1e-6),
    )
Ejemplo n.º 14
0
def create_default_randomizer_qq() -> DomainRandomizer:
    """
    Create the default randomizer for the `QQubeSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim

    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="motor_resistance",
                          mean=dp_nom["motor_resistance"],
                          std=dp_nom["motor_resistance"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="motor_back_emf",
                          mean=dp_nom["motor_back_emf"],
                          std=dp_nom["motor_back_emf"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="mass_rot_pole",
                          mean=dp_nom["mass_rot_pole"],
                          std=dp_nom["mass_rot_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="length_rot_pole",
                          mean=dp_nom["length_rot_pole"],
                          std=dp_nom["length_rot_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="damping_rot_pole",
                          mean=dp_nom["damping_rot_pole"],
                          std=dp_nom["damping_rot_pole"] / 4,
                          clip_lo=1e-9),
        NormalDomainParam(name="mass_pend_pole",
                          mean=dp_nom["mass_pend_pole"],
                          std=dp_nom["mass_pend_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="length_pend_pole",
                          mean=dp_nom["length_pend_pole"],
                          std=dp_nom["length_pend_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(
            name="damping_pend_pole",
            mean=dp_nom["damping_pend_pole"],
            std=dp_nom["damping_pend_pole"] / 4,
            clip_lo=1e-9,
        ),
    )
Ejemplo n.º 15
0
def create_bob_setup():
    # Environments
    env_hparams = dict(dt=1 / 100., max_steps=500)
    env_real = BallOnBeamSim(**env_hparams)
    env_real.domain_param = dict(
        # l_beam=1.95,
        # ang_offset=-0.03,
        g=10.81)

    env_sim = BallOnBeamSim(**env_hparams)
    randomizer = DomainRandomizer(
        # NormalDomainParam(name='l_beam', mean=0, std=1e-12, clip_lo=1.5, clip_up=3.5),
        # UniformDomainParam(name='ang_offset', mean=0, halfspan=1e-12),
        NormalDomainParam(name='g', mean=0, std=1e-12), )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        # 0: ('l_beam', 'mean'), 1: ('l_beam', 'std'),
        # 2: ('ang_offset', 'mean'), 3: ('ang_offset', 'halfspan')
        0: ('g', 'mean'),
        1: ('g', 'std')
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Policies (the behavioral policy needs to be deterministic)
    behavior_policy = LinearPolicy(env_sim.spec,
                                   feats=FeatureStack(
                                       [identity_feat, sin_feat]))
    behavior_policy.param_values = to.tensor(
        [3.8090, -3.8036, -1.0786, -2.4510, -0.9875, -1.3252, 3.1503, 1.4443])
    prior = DomainRandomizer(
        # NormalDomainParam(name='l_beam', mean=2.05, std=2.05/10),
        # UniformDomainParam(name='ang_offset', mean=0.03, halfspan=0.03/10),
        NormalDomainParam(name='g', mean=8.81, std=8.81 / 10), )
    # trafo_mask = [False, True, False, True]
    trafo_mask = [True, True]
    ddp_policy = DomainDistrParamPolicy(mapping=dp_map,
                                        trafo_mask=trafo_mask,
                                        prior=prior,
                                        scale_params=True)

    return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
Ejemplo n.º 16
0
def get_default_randomizer_qq() -> DomainRandomizer:
    """
    Get the default randomizer for the `QQubeSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim
    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g',
                          mean=dp_nom['g'],
                          std=dp_nom['g'] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name='Rm',
                          mean=dp_nom['Rm'],
                          std=dp_nom['Rm'] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name='km',
                          mean=dp_nom['km'],
                          std=dp_nom['km'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Mr',
                          mean=dp_nom['Mr'],
                          std=dp_nom['Mr'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Lr',
                          mean=dp_nom['Lr'],
                          std=dp_nom['Lr'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Dr',
                          mean=dp_nom['Dr'],
                          std=dp_nom['Dr'] / 5,
                          clip_lo=1e-9),
        NormalDomainParam(name='Mp',
                          mean=dp_nom['Mp'],
                          std=dp_nom['Mp'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Lp',
                          mean=dp_nom['Lp'],
                          std=dp_nom['Lp'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Dp',
                          mean=dp_nom['Dp'],
                          std=dp_nom['Dp'] / 5,
                          clip_lo=1e-9))
Ejemplo n.º 17
0
def create_default_randomizer_cata() -> DomainRandomizer:
    """
    Create the default randomizer for the `CatapultSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.one_step.catapult import CatapultSim

    dp_nom = CatapultSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="stiffness",
                          mean=dp_nom["stiffness"],
                          std=dp_nom["stiffness"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="elongation",
                          mean=dp_nom["elongation"],
                          std=dp_nom["elongation"] / 5,
                          clip_lo=1e-3),
    )
Ejemplo n.º 18
0
def create_default_randomizer_qcp() -> DomainRandomizer:
    """
    Create the default randomizer for the `QCartPoleSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim
    dp_nom = QCartPoleSim.get_nominal_domain_param(long=False)
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_cart', mean=dp_nom['m_cart'], std=dp_nom['m_cart']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole']/5, clip_lo=1e-4),
        NormalDomainParam(name='l_rail', mean=dp_nom['l_rail'], std=dp_nom['l_rail']/5, clip_lo=1e-2),
        NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole']/5, clip_lo=1e-2),
        UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1),
        NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-4),
        NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9),
        NormalDomainParam(name='r_mp', mean=dp_nom['r_mp'], std=dp_nom['r_mp']/5, clip_lo=1e-4),
        NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4),
        NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4),
        UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4),
        UniformDomainParam(name='B_pole', mean=dp_nom['B_pole'], halfspan=dp_nom['B_pole']/4, clip_lo=1e-4)
    )
Ejemplo n.º 19
0
def bob_pert():
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=9.81, std=0.981, clip_lo=1e-3),
        NormalDomainParam(name='r_ball', mean=0.1, std=0.01, clip_lo=1e-3),
        NormalDomainParam(name='m_ball', mean=0.5, std=0.05, clip_lo=1e-3),
        NormalDomainParam(name='m_beam', mean=3.0, std=0.3, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=0.1, std=0.01, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=2.0, std=0.2, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=0.05, halfspan=0.05),
        UniformDomainParam(name='ang_offset', mean=0, halfspan=5.*np.pi/180)
    )
Ejemplo n.º 20
0
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_beam', mean=dp_nom['m_beam'], std=dp_nom['m_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=dp_nom['l_beam'], std=dp_nom['l_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=dp_nom['d_beam'], std=dp_nom['d_beam']/5, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict'], clip_lo=0),
        UniformDomainParam(name='ang_offset', mean=0./180*np.pi, halfspan=0.1/180*np.pi)
    )
Ejemplo n.º 21
0
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim

    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="beam_mass",
                          mean=dp_nom["beam_mass"],
                          std=dp_nom["beam_mass"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_length",
                          mean=dp_nom["beam_length"],
                          std=dp_nom["beam_length"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_thickness",
                          mean=dp_nom["beam_thickness"],
                          std=dp_nom["beam_thickness"] / 5,
                          clip_lo=1e-3),
        UniformDomainParam(name="friction_coeff",
                           mean=dp_nom["friction_coeff"],
                           halfspan=dp_nom["friction_coeff"],
                           clip_lo=0),
        UniformDomainParam(name="ang_offset",
                           mean=0.0 / 180 * np.pi,
                           halfspan=0.1 / 180 * np.pi),
    )
Ejemplo n.º 22
0
from pyrado.domain_randomization.domain_parameter import (
    BernoulliDomainParam,
    DomainParam,
    MultivariateNormalDomainParam,
    NormalDomainParam,
    UniformDomainParam,
)
from pyrado.domain_randomization.domain_randomizer import DomainRandomizer

DomainParam(name="a", mean=1)

BernoulliDomainParam(name="b", val_0=2, val_1=5, prob_1=0.8)

DomainRandomizer(
    NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100))

DomainRandomizer(
    NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
    UniformDomainParam(name="special",
                       mean=0,
                       halfspan=42,
                       clip_lo=-7.4,
                       roundint=True),
    NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
    UniformDomainParam(name="time_delay",
                       mean=13,
                       halfspan=6,
                       clip_up=17,
                       roundint=True),
    MultivariateNormalDomainParam(name="multidim",
Ejemplo n.º 23
0
import torch as to

from pyrado.domain_randomization.domain_parameter import DomainParam, BernoulliDomainParam, UniformDomainParam,\
    NormalDomainParam, MultivariateNormalDomainParam
from pyrado.domain_randomization.domain_randomizer import DomainRandomizer


DomainParam(name='a', mean=1)

BernoulliDomainParam(name='b', val_0=2, val_1=5, prob_1=0.8)

DomainRandomizer(
    NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100)
)

DomainRandomizer(
    NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100),
    UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
    NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1),
    UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True),
    MultivariateNormalDomainParam(name='multidim', mean=10 * to.ones((2,)), cov=2*to.eye(2), clip_up=11)
)
Ejemplo n.º 24
0
        observe_ball=True,
        task_args=dict(
            final_factor=500,
            success_bonus=250,
            Q=np.diag([0.5, 1e-4, 4e1]),
            R=np.diag([0, 0, 1e-1, 2e-1]),
            Q_dev=np.diag([0.0, 0.0, 5]),
            # R_dev=np.diag([0., 0., 1e-3, 1e-3])
        ),
    )
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    randomizer = DomainRandomizer(
        UniformDomainParam(name="cup_scale", mean=1.0, halfspan=0.2),
        NormalDomainParam(name="rope_length", mean=0.3, std=0.005),
        NormalDomainParam(name="ball_mass", mean=0.021, std=0.001),
        UniformDomainParam(name="joint_2_damping", mean=0.05, halfspan=0.05),
        UniformDomainParam(name="joint_2_dryfriction", mean=0.1, halfspan=0.1),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    bounds = ([0.0, 0.25, 0.5], [1.0, 1.5, 2.5])
    policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=9, bounds=bounds, scale=None), dim_mask=2)
    policy = DualRBFLinearPolicy(env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=15,
        pop_size=100,
Ejemplo n.º 25
0
    ex_dir = setup_experiment(
        QQubeSwingUpSim.name,
        f"{SimOpt.name}-{NES.name}-{PPO.name}_{FNNPolicy.name}")
    num_workers = 16

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 100.0, max_steps=600)
    env_real = QQubeSwingUpReal(**env_hparams)

    env_sim = QQubeSwingUpSim(**env_hparams)
    randomizer = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="mass_pend_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_rot_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_pend_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
Ejemplo n.º 26
0
    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(
        QQubeSwingUpSim.name,
        f'{SimOpt.name}-{REPS.name}-{PPO.name}_{FNNPolicy.name}')
    num_workers = 16

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 500., max_steps=3000)
    env_real = QQubeReal(**env_hparams)

    env_sim = QQubeSwingUpSim(**env_hparams)
    randomizer = DomainRandomizer(
        NormalDomainParam(name='Mr', mean=0., std=1e6, clip_lo=1e-3),
        NormalDomainParam(name='Mp', mean=0., std=1e6, clip_lo=1e-3),
        NormalDomainParam(name='Lr', mean=0., std=1e6, clip_lo=1e-3),
        NormalDomainParam(name='Lp', mean=0., std=1e6, clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        0: ('Mr', 'mean'),
        1: ('Mr', 'std'),
        2: ('Mp', 'mean'),
        3: ('Mp', 'std'),
        4: ('Lr', 'mean'),
        5: ('Lr', 'std'),
        6: ('Lp', 'mean'),
        7: ('Lp', 'std')
    }
Ejemplo n.º 27
0
from pyrado.domain_randomization.domain_parameter import (
    BernoulliDomainParam,
    MultivariateNormalDomainParam,
    NormalDomainParam,
    UniformDomainParam,
)
from pyrado.domain_randomization.utils import param_grid
from pyrado.environments.sim_base import SimEnv


@pytest.mark.parametrize(
    "dp",
    [
        UniformDomainParam(
            name="", mean=3.0, halfspan=11.0, clip_lo=-5, clip_up=5),
        NormalDomainParam(name="", mean=10, std=1.0, clip_lo=9, clip_up=11),
        MultivariateNormalDomainParam(name="",
                                      mean=to.ones((2, 1)),
                                      cov=to.eye(2),
                                      clip_lo=-1,
                                      clip_up=1.0),
        MultivariateNormalDomainParam(
            name="", mean=10 * to.ones((2, )), cov=2 * to.eye(2), clip_up=11),
        BernoulliDomainParam(name="", val_0=2, val_1=5, prob_1=0.8),
        BernoulliDomainParam(name="", val_0=-3, val_1=5, prob_1=0.8,
                             clip_up=4),
    ],
    ids=["U", "N", "MVN_v1", "MVN_v2", "B_v1", "B_v2"],
)
@pytest.mark.parametrize("num_samples", [1, 5, 100])
def test_domain_param(dp, num_samples):
Ejemplo n.º 28
0
    hparams = load_dict_from_yaml(osp.join(ref_ex_dir, "hyperparams.yaml"))

    # Environment
    env_hparams = hparams["env_sim"]
    env = QQubeSwingUpSim(**env_hparams)
    env = ActNormWrapper(env)

    # Randomizer
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    randomizer = DomainRandomizer(
        # UniformDomainParam(name='mass_pend_pole', mean=0.024, halfspan=0.0048),
        # UniformDomainParam(name='mass_rot_pole', mean=0.095, halfspan=0.0190),
        # UniformDomainParam(name='length_pend_pole', mean=0.129, halfspan=0.0258),
        # UniformDomainParam(name='length_rot_pole', mean=0.085, halfspan=0.0170),
        # #
        NormalDomainParam(name="mass_pend_pole", mean=0.0227, std=0.0009),
        NormalDomainParam(name="mass_rot_pole", mean=0.0899, std=0.0039),
        NormalDomainParam(name="length_pend_pole", mean=0.1474, std=0.0046),
        NormalDomainParam(name="length_rot_pole", mean=0.0777, std=0.003),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy = to.load(osp.join(ref_ex_dir, "policy.pt"))
    policy.init_param()

    # Critic
    vfcn = to.load(osp.join(ref_ex_dir, "valuefcn.pt"))
    vfcn.init_param()
    critic = GAE(vfcn, **hparams["critic"])
Ejemplo n.º 29
0
def test_simopt_cem_ppo(ex_dir, env: SimEnv):
    pyrado.set_seed(0)

    # Environments
    env_real = deepcopy(env)
    env_real = ActNormWrapper(env_real)
    env_sim = ActNormWrapper(env)
    randomizer = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="mass_pend_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_rot_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
        NormalDomainParam(name="length_pend_pole",
                          mean=0.0,
                          std=1e6,
                          clip_lo=1e-3),
    )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        0: ("mass_rot_pole", "mean"),
        1: ("mass_rot_pole", "std"),
        2: ("mass_pend_pole", "mean"),
        3: ("mass_pend_pole", "std"),
        4: ("length_rot_pole", "mean"),
        5: ("length_rot_pole", "std"),
        6: ("length_pend_pole", "mean"),
        7: ("length_pend_pole", "std"),
    }
    trafo_mask = [True] * 8
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Subroutine for policy improvement
    behav_policy_hparam = dict(hidden_sizes=[16, 16], hidden_nonlin=to.tanh)
    behav_policy = FNNPolicy(spec=env_sim.spec, **behav_policy_hparam)
    vfcn_hparam = dict(hidden_sizes=[16, 16], hidden_nonlin=to.relu)
    vfcn = FNNPolicy(spec=EnvSpec(env_sim.obs_space, ValueFunctionSpace),
                     **vfcn_hparam)
    critic_hparam = dict(
        gamma=0.99,
        lamda=0.98,
        num_epoch=2,
        batch_size=128,
        standardize_adv=True,
        lr=8e-4,
        max_grad_norm=5.0,
    )
    critic = GAE(vfcn, **critic_hparam)
    subrtn_policy_hparam = dict(
        max_iter=2,
        eps_clip=0.13,
        min_steps=4 * env_sim.max_steps,
        num_epoch=3,
        batch_size=128,
        std_init=0.75,
        lr=3e-04,
        max_grad_norm=1.0,
        num_workers=1,
    )
    subrtn_policy = PPO(ex_dir, env_sim, behav_policy, critic,
                        **subrtn_policy_hparam)

    prior = DomainRandomizer(
        NormalDomainParam(name="mass_rot_pole", mean=0.095, std=0.095 / 10),
        NormalDomainParam(name="mass_pend_pole", mean=0.024, std=0.024 / 10),
        NormalDomainParam(name="length_rot_pole", mean=0.085, std=0.085 / 10),
        NormalDomainParam(name="length_pend_pole", mean=0.129, std=0.129 / 10),
    )
    ddp_policy_hparam = dict(mapping=dp_map,
                             trafo_mask=trafo_mask,
                             scale_params=True)
    ddp_policy = DomainDistrParamPolicy(prior=prior, **ddp_policy_hparam)
    subsubrtn_distr_hparam = dict(
        max_iter=2,
        pop_size=10,
        num_init_states_per_domain=1,
        num_is_samples=8,
        expl_std_init=1e-2,
        expl_std_min=1e-5,
        extra_expl_std_init=1e-2,
        extra_expl_decay_iter=5,
        num_workers=1,
    )
    subsubrtn_distr = CEM(ex_dir, env_sim, ddp_policy,
                          **subsubrtn_distr_hparam)
    subrtn_distr_hparam = dict(
        metric=None,
        obs_dim_weight=[1, 1, 1, 1, 10, 10],
        num_rollouts_per_distr=3,
        num_workers=1,
    )
    subrtn_distr = SysIdViaEpisodicRL(subsubrtn_distr,
                                      behavior_policy=behav_policy,
                                      **subrtn_distr_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=1,
        num_eval_rollouts=5,
        warmstart=True,
    )
    algo = SimOpt(ex_dir, env_sim, env_real, subrtn_policy, subrtn_distr,
                  **algo_hparam)
    algo.train()

    assert algo.curr_iter == algo.max_iter
Ejemplo n.º 30
0
import pytest
import torch as to
import numpy as np
from copy import deepcopy

from pytest_lazyfixture import lazy_fixture
from pyrado.domain_randomization.domain_parameter import NormalDomainParam, MultivariateNormalDomainParam, \
    BernoulliDomainParam
from pyrado.domain_randomization.utils import param_grid
from tests.conftest import m_needs_bullet, m_needs_mujoco


@pytest.mark.sampling
@pytest.mark.parametrize('dp', [
    NormalDomainParam(name='', mean=10, std=1., clip_lo=9, clip_up=11),
    MultivariateNormalDomainParam(
        name='', mean=to.ones((2, 1)), cov=to.eye(2), clip_lo=-1, clip_up=1.),
    MultivariateNormalDomainParam(
        name='', mean=10 * to.ones((2, )), cov=2 * to.eye(2), clip_up=11),
    BernoulliDomainParam(name='', val_0=2, val_1=5, prob_1=0.8),
    BernoulliDomainParam(name='', val_0=-3, val_1=5, prob_1=0.8, clip_up=4),
],
                         ids=[
                             '1dim', '2dim_v1', '2dim_v2', 'bern_v1', 'bern_v2'
                         ])
def test_domain_param(dp):
    for num_samples in [1, 5, 25]:
        s = dp.sample(num_samples)
        assert len(s) == num_samples