Beispiel #1
0
def get_uniform_masses_lengths_randomizer_qq(frac_halfspan: float):
    """
    Get a uniform randomizer that applies to all masses and lengths of the Quanser Qube according to a fraction of their
    nominal parameter values

    :param frac_halfspan: fraction of the nominal parameter value
    :return: `DomainRandomizer` with uniformly distributed masses and lengths
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim
    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        UniformDomainParam(name='Mp',
                           mean=dp_nom['Mp'],
                           halfspan=dp_nom['Mp'] / frac_halfspan,
                           clip_lo=1e-3),
        UniformDomainParam(name='Mr',
                           mean=dp_nom['Mr'],
                           halfspan=dp_nom['Mr'] / frac_halfspan,
                           clip_lo=1e-3),
        UniformDomainParam(name='Lr',
                           mean=dp_nom['Lr'],
                           halfspan=dp_nom['Lr'] / frac_halfspan,
                           clip_lo=1e-2),
        UniformDomainParam(name='Lp',
                           mean=dp_nom['Lp'],
                           halfspan=dp_nom['Lp'] / frac_halfspan,
                           clip_lo=1e-2),
    )
Beispiel #2
0
def create_default_randomizer_qbb() -> DomainRandomizer:
    """
    Create the default randomizer for the `QBallBalancerSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
    dp_nom = QBallBalancerSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_plate', mean=dp_nom['l_plate'], std=dp_nom['l_plate']/5, clip_lo=5e-2),
        NormalDomainParam(name='r_arm', mean=dp_nom['r_arm'], std=dp_nom['r_arm']/5, clip_lo=1e-4),
        NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-2),
        NormalDomainParam(name='J_l', mean=dp_nom['J_l'], std=dp_nom['J_l']/4, clip_lo=1e-6),
        NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9),
        NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4),
        NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4),
        UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict']/4, clip_lo=1e-4),
        UniformDomainParam(name='V_thold_x_pos', mean=dp_nom['V_thold_x_pos'], halfspan=dp_nom['V_thold_x_pos']/3),
        UniformDomainParam(name='V_thold_x_neg', mean=dp_nom['V_thold_x_neg'], halfspan=abs(dp_nom['V_thold_x_neg'])/3),
        UniformDomainParam(name='V_thold_y_pos', mean=dp_nom['V_thold_y_pos'], halfspan=dp_nom['V_thold_y_pos']/3),
        UniformDomainParam(name='V_thold_y_neg', mean=dp_nom['V_thold_y_neg'], halfspan=abs(dp_nom['V_thold_y_neg'])/3),
        UniformDomainParam(name='offset_th_x', mean=dp_nom['offset_th_x'], halfspan=6./180*np.pi),
        UniformDomainParam(name='offset_th_y', mean=dp_nom['offset_th_y'], halfspan=6./180*np.pi)
    )
Beispiel #3
0
def get_default_randomizer_bl() -> DomainRandomizer:
    """
    Get the default randomizer for the `BoxLifting`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim
    dp_nom = BoxShelvingSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='box_length',
                          mean=dp_nom['box_length'],
                          std=dp_nom['box_length'] / 10),
        NormalDomainParam(name='box_width',
                          mean=dp_nom['box_width'],
                          std=dp_nom['box_width'] / 10),
        NormalDomainParam(name='box_mass',
                          mean=dp_nom['box_mass'],
                          std=dp_nom['box_mass'] / 5),
        UniformDomainParam(name='box_friction_coefficient',
                           mean=dp_nom['box_friction_coefficient'],
                           halfspan=dp_nom['box_friction_coefficient'] / 5,
                           clip_lo=1e-5),
        NormalDomainParam(name='basket_mass',
                          mean=dp_nom['basket_mass'],
                          std=dp_nom['basket_mass'] / 5),
        UniformDomainParam(name='basket_friction_coefficient',
                           mean=dp_nom['basket_friction_coefficient'],
                           halfspan=dp_nom['basket_friction_coefficient'] / 5,
                           clip_lo=1e-5),
    )
Beispiel #4
0
def default_randomizer():
    return DomainRandomizer(
        NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11),
    )
Beispiel #5
0
def default_pert():
    return DomainRandomizer(
        NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name='multidim', mean=10*to.ones((2,)), cov=2*to.eye(2), clip_up=11)
    )
Beispiel #6
0
def bob_pert():
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=9.81, std=0.981, clip_lo=1e-3),
        NormalDomainParam(name='r_ball', mean=0.1, std=0.01, clip_lo=1e-3),
        NormalDomainParam(name='m_ball', mean=0.5, std=0.05, clip_lo=1e-3),
        NormalDomainParam(name='m_beam', mean=3.0, std=0.3, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=0.1, std=0.01, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=2.0, std=0.2, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=0.05, halfspan=0.05),
        UniformDomainParam(name='ang_offset', mean=0, halfspan=5.*np.pi/180)
    )
def create_default_randomizer_bop() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnPlateSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.ball_on_plate import BallOnPlateSim

    dp_nom = BallOnPlateSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 3,
                          clip_lo=1e-2),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 3,
                          clip_lo=1e-2),
        NormalDomainParam(name="ball_com_x",
                          mean=dp_nom["ball_com_x"],
                          std=0.003),
        NormalDomainParam(name="ball_com_y",
                          mean=dp_nom["ball_com_y"],
                          std=0.003),
        NormalDomainParam(name="ball_com_z",
                          mean=dp_nom["ball_com_z"],
                          std=0.003),
        UniformDomainParam(
            name="ball_friction_coefficient",
            mean=dp_nom["ball_friction_coefficient"],
            halfspan=dp_nom["ball_friction_coefficient"],
            clip_lo=0,
            clip_hi=1,
        ),
        UniformDomainParam(
            name="ball_rolling_friction_coefficient",
            mean=dp_nom["ball_rolling_friction_coefficient"],
            halfspan=dp_nom["ball_rolling_friction_coefficient"],
            clip_lo=0,
            clip_hi=1,
        ),
        # Vortex only
        UniformDomainParam(name="ball_slip",
                           mean=dp_nom["ball_slip"],
                           halfspan=dp_nom["ball_slip"],
                           clip_lo=0)
        # UniformDomainParam(name='ball_linearvelocitydamnping', mean=0., halfspan=1e-4),
        # UniformDomainParam(name='ball_angularvelocitydamnping', mean=0., halfspan=1e-4)
    )
Beispiel #8
0
def get_default_randomizer_pi() -> DomainRandomizer:
    """
    Get the default randomizer for the `PlanarInsertSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.planar_insert import PlanarInsertSim
    dp_nom = PlanarInsertSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='link1_mass',
                          mean=dp_nom['link1_mass'],
                          std=dp_nom['link1_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link2_mass',
                          mean=dp_nom['link2_mass'],
                          std=dp_nom['link2_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link3_mass',
                          mean=dp_nom['link3_mass'],
                          std=dp_nom['link3_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link4_mass',
                          mean=dp_nom['link4_mass'],
                          std=dp_nom['link4_mass'] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name='link5_mass',
                          mean=dp_nom['link4_mass'],
                          std=dp_nom['link4_mass'] / 5,
                          clip_lo=1e-2),
        UniformDomainParam(name='upperwall_pos_offset_z',
                           mean=0,
                           halfspan=0.05,
                           clip_lo=0)  # only increase the gap
    )
def create_default_randomizer_bs() -> DomainRandomizer:
    """
    Create the default randomizer for the `BoxShelvingSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim

    dp_nom = BoxShelvingSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="box_length",
                          mean=dp_nom["box_length"],
                          std=dp_nom["box_length"] / 10),
        NormalDomainParam(name="box_width",
                          mean=dp_nom["box_width"],
                          std=dp_nom["box_width"] / 10),
        NormalDomainParam(name="box_mass",
                          mean=dp_nom["box_mass"],
                          std=dp_nom["box_mass"] / 5),
        UniformDomainParam(
            name="box_friction_coefficient",
            mean=dp_nom["box_friction_coefficient"],
            halfspan=dp_nom["box_friction_coefficient"] / 5,
            clip_lo=1e-5,
        ),
    )
Beispiel #10
0
def create_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        # Ball needs to fit into the cup
        NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65),
        # Rope won't be more than 3cm off
        NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30,
                          clip_lo=0.27, clip_up=0.33),
        NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2),
        UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2,
                           clip_lo=0.),
        UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2,
                           clip_lo=0.),
        UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2,
                           clip_lo=1e-6),
    )
Beispiel #11
0
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_beam', mean=dp_nom['m_beam'], std=dp_nom['m_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=dp_nom['l_beam'], std=dp_nom['l_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=dp_nom['d_beam'], std=dp_nom['d_beam']/5, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict'], clip_lo=0),
        UniformDomainParam(name='ang_offset', mean=0./180*np.pi, halfspan=0.1/180*np.pi)
    )
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim

    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="beam_mass",
                          mean=dp_nom["beam_mass"],
                          std=dp_nom["beam_mass"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_length",
                          mean=dp_nom["beam_length"],
                          std=dp_nom["beam_length"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_thickness",
                          mean=dp_nom["beam_thickness"],
                          std=dp_nom["beam_thickness"] / 5,
                          clip_lo=1e-3),
        UniformDomainParam(name="friction_coeff",
                           mean=dp_nom["friction_coeff"],
                           halfspan=dp_nom["friction_coeff"],
                           clip_lo=0),
        UniformDomainParam(name="ang_offset",
                           mean=0.0 / 180 * np.pi,
                           halfspan=0.1 / 180 * np.pi),
    )
Beispiel #13
0
def create_default_randomizer_qcp() -> DomainRandomizer:
    """
    Create the default randomizer for the `QCartPoleSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim
    dp_nom = QCartPoleSim.get_nominal_domain_param(long=False)
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_cart', mean=dp_nom['m_cart'], std=dp_nom['m_cart']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole']/5, clip_lo=1e-4),
        NormalDomainParam(name='l_rail', mean=dp_nom['l_rail'], std=dp_nom['l_rail']/5, clip_lo=1e-2),
        NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole']/5, clip_lo=1e-2),
        UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1),
        UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1),
        NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-4),
        NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9),
        NormalDomainParam(name='r_mp', mean=dp_nom['r_mp'], std=dp_nom['r_mp']/5, clip_lo=1e-4),
        NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4),
        NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4),
        UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4),
        UniformDomainParam(name='B_pole', mean=dp_nom['B_pole'], halfspan=dp_nom['B_pole']/4, clip_lo=1e-4)
    )
Beispiel #14
0
    if setup_type == 'idle':
        env, policy = create_idle_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits)
    elif setup_type == 'ik':
        env, policy = create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits)
    elif setup_type == 'pos':
        env, policy = create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame,
                                                checkJointLimits)
    elif setup_type == 'vel':
        env, policy = create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame,
                                                checkJointLimits)
    else:
        raise pyrado.ValueErr(given=setup_type, eq_constraint="'idle', 'pos', 'vel', or 'ik_activation'")

    if randomize:
        dp_nom = env.get_nominal_domain_param()
        randomizer = DomainRandomizer(
            UniformDomainParam(name='box_mass', mean=dp_nom['box_mass'], halfspan=dp_nom['box_mass']/5),
            UniformDomainParam(name='box_width', mean=dp_nom['box_width'], halfspan=dp_nom['box_length']/5)
        )
        env = DomainRandWrapperLive(env, randomizer)

    # Simulate and plot
    print('observations:\n', env.obs_space.labels)
    done, param, state = False, None, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(text=False, video=True), eval=True, max_steps=max_steps,
                     reset_kwargs=dict(domain_param=param, init_state=state), stop_on_done=False)
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, state, param = after_rollout_query(env, policy, ro)
Beispiel #15
0
            1 / 180 * pi,
            0.0025,
            0.0025,  # [rad, rad, m, m, ...
            2 / 180 * pi,
            2 / 180 * pi,
            0.05,
            0.05,
        ],
    )  # ... rad/s, rad/s, m/s, m/s]
    env = ActNormWrapper(env)
    env = ActDelayWrapper(env)
    randomizer = create_default_randomizer_qbb()
    randomizer.add_domain_params(
        UniformDomainParam(name="act_delay",
                           mean=15,
                           halfspan=15,
                           clip_lo=0,
                           roundint=True))
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy_hparam = dict(hidden_sizes=[64, 64], hidden_nonlin=to.tanh)  # FNN
    # policy_hparam = dict(hidden_size=64, num_recurrent_layers=1)  # LSTM & GRU
    policy = FNNPolicy(spec=env.spec, **policy_hparam)
    # policy = RNNPolicy(spec=env.spec, **policy_hparam)
    # policy = LSTMPolicy(spec=env.spec, **policy_hparam)
    # policy = GRUPolicy(spec=env.spec, **policy_hparam)

    # Critic
    vfcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh)  # FNN
    # vfcn_hparam = dict(hidden_size=32, num_recurrent_layers=1)  # LSTM & GRU
Beispiel #16
0
from tests.conftest import m_needs_bullet, m_needs_mujoco

from pyrado.domain_randomization.domain_parameter import (
    BernoulliDomainParam,
    MultivariateNormalDomainParam,
    NormalDomainParam,
    UniformDomainParam,
)
from pyrado.domain_randomization.utils import param_grid
from pyrado.environments.sim_base import SimEnv


@pytest.mark.parametrize(
    "dp",
    [
        UniformDomainParam(
            name="", mean=3.0, halfspan=11.0, clip_lo=-5, clip_up=5),
        NormalDomainParam(name="", mean=10, std=1.0, clip_lo=9, clip_up=11),
        MultivariateNormalDomainParam(name="",
                                      mean=to.ones((2, 1)),
                                      cov=to.eye(2),
                                      clip_lo=-1,
                                      clip_up=1.0),
        MultivariateNormalDomainParam(
            name="", mean=10 * to.ones((2, )), cov=2 * to.eye(2), clip_up=11),
        BernoulliDomainParam(name="", val_0=2, val_1=5, prob_1=0.8),
        BernoulliDomainParam(name="", val_0=-3, val_1=5, prob_1=0.8,
                             clip_up=4),
    ],
    ids=["U", "N", "MVN_v1", "MVN_v2", "B_v1", "B_v2"],
)
@pytest.mark.parametrize("num_samples", [1, 5, 100])
Beispiel #17
0
    elif setup_type == "lin":
        env, policy = create_lin_setup(physicsEngine, dt, max_steps,
                                       checkJointLimits)
    elif setup_type == "time":
        env, policy = create_time_setup(physicsEngine, dt, max_steps,
                                        checkJointLimits)
    else:
        raise pyrado.ValueErr(given=setup_type,
                              eq_constraint="idle, pst, lin, or time")

    if randomize:
        dp_nom = env.get_nominal_domain_param()
        randomizer = DomainRandomizer(
            UniformDomainParam(
                name="ball_restitution",
                mean=dp_nom["ball_restitution"],
                halfspan=dp_nom["ball_restitution"],
            ),
            UniformDomainParam(name="ball_radius",
                               mean=dp_nom["ball_radius"],
                               halfspan=dp_nom["ball_radius"] / 5,
                               clip_lo=5e-3),
            UniformDomainParam(name="ball_mass",
                               mean=dp_nom["ball_mass"],
                               halfspan=dp_nom["ball_mass"] / 2,
                               clip_lo=0),
            UniformDomainParam(name="club_mass",
                               mean=dp_nom["club_mass"],
                               halfspan=dp_nom["club_mass"] / 5),
            UniformDomainParam(
                name="ball_friction_coefficient",
Beispiel #18
0
def test_sysidasrl_reps(ex_dir, env: SimEnv, num_eval_rollouts: int):
    pyrado.set_seed(0)

    def eval_ddp_policy(rollouts_real):
        init_states_real = np.array([ro.states[0, :] for ro in rollouts_real])
        rollouts_sim = []
        for i, _ in enumerate(range(num_eval_rollouts)):
            rollouts_sim.append(
                rollout(env_sim,
                        behavior_policy,
                        eval=True,
                        reset_kwargs=dict(init_state=init_states_real[i, :])))

        # Clip the rollouts rollouts yielding two lists of pairwise equally long rollouts
        ros_real_tr, ros_sim_tr = algo.truncate_rollouts(rollouts_real,
                                                         rollouts_sim,
                                                         replicate=False)
        assert len(ros_real_tr) == len(ros_sim_tr)
        assert all([
            np.allclose(r.states[0, :], s.states[0, :])
            for r, s in zip(ros_real_tr, ros_sim_tr)
        ])

        # Return the average the loss
        losses = [
            algo.loss_fcn(ro_r, ro_s)
            for ro_r, ro_s in zip(ros_real_tr, ros_sim_tr)
        ]
        return float(np.mean(np.asarray(losses)))

    # Environments
    env_real = deepcopy(env)
    env_real.domain_param = dict(ang_offset=-2 * np.pi / 180)

    env_sim = deepcopy(env)
    randomizer = DomainRandomizer(
        UniformDomainParam(name="ang_offset", mean=0, halfspan=1e-6), )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {0: ("ang_offset", "mean"), 1: ("ang_offset", "halfspan")}
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    assert env_real is not env_sim

    # Policies (the behavioral policy needs to be deterministic)
    behavior_policy = LinearPolicy(env_sim.spec,
                                   feats=FeatureStack(identity_feat))
    prior = DomainRandomizer(
        UniformDomainParam(name="ang_offset",
                           mean=1 * np.pi / 180,
                           halfspan=1 * np.pi / 180), )
    ddp_policy = DomainDistrParamPolicy(mapping=dp_map,
                                        trafo_mask=[False, True],
                                        prior=prior)

    # Subroutine
    subrtn_hparam = dict(
        max_iter=2,
        eps=1.0,
        pop_size=100,
        num_init_states_per_domain=1,
        expl_std_init=5e-2,
        expl_std_min=1e-4,
        num_workers=1,
    )
    subrtn = REPS(ex_dir, env_sim, ddp_policy, **subrtn_hparam)

    algo_hparam = dict(metric=None,
                       obs_dim_weight=np.ones(env_sim.obs_space.shape),
                       num_rollouts_per_distr=5,
                       num_workers=1)
    algo = SysIdViaEpisodicRL(subrtn, behavior_policy, **algo_hparam)

    rollouts_real_tst = []
    for _ in range(num_eval_rollouts):
        rollouts_real_tst.append(rollout(env_real, behavior_policy, eval=True))
    loss_pre = eval_ddp_policy(rollouts_real_tst)

    # Mimic training
    while algo.curr_iter < algo.max_iter and not algo.stopping_criterion_met():
        algo.logger.add_value(algo.iteration_key, algo.curr_iter)

        # Creat fake real-world data
        rollouts_real = []
        for _ in range(num_eval_rollouts):
            rollouts_real.append(rollout(env_real, behavior_policy, eval=True))

        algo.step(snapshot_mode="latest",
                  meta_info=dict(rollouts_real=rollouts_real))

        algo.logger.record_step()
        algo._curr_iter += 1

    loss_post = eval_ddp_policy(rollouts_real_tst)
    assert loss_post <= loss_pre  # don't have to be better every step
Beispiel #19
0
def test_basic_meta(ex_dir, policy, env: SimEnv, algo, algo_hparam: dict):
    pyrado.set_seed(0)

    # Policy and subroutine
    env = GaussianObsNoiseWrapper(
        env,
        noise_std=[
            1 / 180 * np.pi,
            1 / 180 * np.pi,
            0.0025,
            0.0025,
            2 / 180 * np.pi,
            2 / 180 * np.pi,
            0.05,
            0.05,
        ],
    )
    env = ActNormWrapper(env)
    env = ActDelayWrapper(env)
    randomizer = create_default_randomizer_qbb()
    randomizer.add_domain_params(
        UniformDomainParam(name="act_delay",
                           mean=15,
                           halfspan=15,
                           clip_lo=0,
                           roundint=True))
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy_hparam = dict(hidden_sizes=[16, 16], hidden_nonlin=to.tanh)  # FNN
    policy = FNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    vfcn_hparam = dict(hidden_sizes=[16, 16], hidden_nonlin=to.tanh)  # FNN
    vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                     **vfcn_hparam)
    critic_hparam = dict(
        gamma=0.9995,
        lamda=0.98,
        num_epoch=2,
        batch_size=64,
        lr=5e-4,
        standardize_adv=False,
    )
    critic = GAE(vfcn, **critic_hparam)

    subrtn_hparam = dict(
        max_iter=3,
        min_rollouts=5,
        num_epoch=2,
        eps_clip=0.1,
        batch_size=64,
        std_init=0.8,
        lr=2e-4,
        num_workers=1,
    )
    subrtn = PPO(ex_dir, env, policy, critic, **subrtn_hparam)
    algo = algo(env, subrtn, **algo_hparam)

    algo.train()

    assert algo.curr_iter == algo.max_iter
def create_default_randomizer_qcp() -> DomainRandomizer:
    """
    Create the default randomizer for the `QCartPoleSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim

    dp_nom = QCartPoleSim.get_nominal_domain_param(long=False)
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-4),
        NormalDomainParam(name="cart_mass",
                          mean=dp_nom["cart_mass"],
                          std=dp_nom["cart_mass"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="pole_mass",
                          mean=dp_nom["pole_mass"],
                          std=dp_nom["pole_mass"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="rail_length",
                          mean=dp_nom["rail_length"],
                          std=dp_nom["rail_length"] / 5,
                          clip_lo=1e-2),
        NormalDomainParam(name="pole_length",
                          mean=dp_nom["pole_length"],
                          std=dp_nom["pole_length"] / 5,
                          clip_lo=1e-2),
        UniformDomainParam(
            name="motor_efficiency",
            mean=dp_nom["motor_efficiency"],
            halfspan=dp_nom["motor_efficiency"] / 4,
            clip_lo=1e-4,
            clip_up=1,
        ),
        UniformDomainParam(
            name="gear_efficiency",
            mean=dp_nom["gear_efficiency"],
            halfspan=dp_nom["gear_efficiency"] / 4,
            clip_lo=1e-4,
            clip_up=1,
        ),
        NormalDomainParam(name="gear_ratio",
                          mean=dp_nom["gear_ratio"],
                          std=dp_nom["gear_ratio"] / 4,
                          clip_lo=1e-4),
        NormalDomainParam(name="motor_inertia",
                          mean=dp_nom["motor_inertia"],
                          std=dp_nom["motor_inertia"] / 4,
                          clip_lo=1e-9),
        NormalDomainParam(name="pinion_radius",
                          mean=dp_nom["pinion_radius"],
                          std=dp_nom["pinion_radius"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="motor_resistance",
                          mean=dp_nom["motor_resistance"],
                          std=dp_nom["motor_resistance"] / 4,
                          clip_lo=1e-4),
        NormalDomainParam(name="motor_back_emf",
                          mean=dp_nom["motor_back_emf"],
                          std=dp_nom["motor_back_emf"] / 4,
                          clip_lo=1e-4),
        UniformDomainParam(
            name="combined_damping",
            mean=dp_nom["combined_damping"],
            halfspan=dp_nom["combined_damping"] / 4,
            clip_lo=1e-4,
        ),
        UniformDomainParam(name="pole_damping",
                           mean=dp_nom["pole_damping"],
                           halfspan=dp_nom["pole_damping"] / 4,
                           clip_lo=1e-4),
        UniformDomainParam(
            name="cart_friction_coeff",
            mean=dp_nom["cart_friction_coeff"],
            halfspan=dp_nom["cart_friction_coeff"] / 2,
            clip_lo=0,
        ),
    )
        ]
        ex_labels = [
            '',
        ]

    else:
        raise pyrado.ValueErr(given=args.env_name, eq_constraint=f'{QCartPoleSwingUpSim.name}, {QCartPoleStabSim.name},'
                                                                 f' or {QCartPoleSwingUpSim.name}')

    if not check_all_lengths_equal([prefixes, ex_names, ex_labels]):
        raise pyrado.ShapeErr(msg=f'The lengths of prefixes, ex_names, and ex_labels must be equal, '
                                  f'but they are {len(prefixes)}, {len(ex_names)}, and {len(ex_labels)}!')

    # Create Randomizer
    pert = create_conservative_randomizer(env)
    pert.add_domain_params(UniformDomainParam(name='act_delay', mean=20, halfspan=20, clip_lo=0, roundint=True))

    # Loading the policies
    ex_dirs = [osp.join(p, e) for p, e in zip(prefixes, ex_names)]
    env_sim_list = []
    policy_list = []
    for ex_dir in ex_dirs:
        env_sim, policy, _ = load_experiment(ex_dir, args)
        policy_list.append(policy)

    # Fix initial state (set to None if it should not be fixed)
    init_state_list = [None]*args.num_ro_per_config

    # Crate empty data frame
    df = pd.DataFrame(columns=['policy', 'ret', 'len'])
def create_default_randomizer_wamjsc() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim

    dp_nom = WAMJointSpaceCtrlSim.get_nominal_domain_param()
    return DomainRandomizer(
        UniformDomainParam(name="joint_1_damping",
                           mean=dp_nom["joint_1_damping"],
                           halfspan=dp_nom["joint_1_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_2_damping",
                           mean=dp_nom["joint_2_damping"],
                           halfspan=dp_nom["joint_2_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_3_damping",
                           mean=dp_nom["joint_3_damping"],
                           halfspan=dp_nom["joint_3_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_4_damping",
                           mean=dp_nom["joint_4_damping"],
                           halfspan=dp_nom["joint_4_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_5_damping",
                           mean=dp_nom["joint_5_damping"],
                           halfspan=dp_nom["joint_5_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_6_damping",
                           mean=dp_nom["joint_6_damping"],
                           halfspan=dp_nom["joint_6_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_7_damping",
                           mean=dp_nom["joint_7_damping"],
                           halfspan=dp_nom["joint_7_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(
            name="joint_1_dryfriction",
            mean=dp_nom["joint_1_dryfriction"],
            halfspan=dp_nom["joint_1_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_2_dryfriction",
            mean=dp_nom["joint_2_dryfriction"],
            halfspan=dp_nom["joint_2_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_3_dryfriction",
            mean=dp_nom["joint_3_dryfriction"],
            halfspan=dp_nom["joint_3_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_4_dryfriction",
            mean=dp_nom["joint_4_dryfriction"],
            halfspan=dp_nom["joint_4_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_5_dryfriction",
            mean=dp_nom["joint_5_dryfriction"],
            halfspan=dp_nom["joint_5_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_6_dryfriction",
            mean=dp_nom["joint_6_dryfriction"],
            halfspan=dp_nom["joint_6_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_7_dryfriction",
            mean=dp_nom["joint_7_dryfriction"],
            halfspan=dp_nom["joint_7_dryfriction"] / 2,
            clip_lo=0.0,
        ),
    )
Beispiel #23
0
            1 / 180 * pi,
            1 / 180 * pi,
            0.005,
            0.005,  # [rad, rad, m, m, ...
            10 / 180 * pi,
            10 / 180 * pi,
            0.05,
            0.05
        ])  # ... rad/s, rad/s, m/s, m/s]
    env = ActNormWrapper(env)
    env = ActDelayWrapper(env)
    randomizer = get_conservative_randomizer(env)
    randomizer.add_domain_params(
        UniformDomainParam(name='act_delay',
                           mean=5,
                           halfspan=5,
                           clip_lo=0,
                           roundint=True))
    env = DomainRandWrapperBuffer(env, randomizer)

    # Policy
    # policy_hparam = dict(hidden_sizes=[64, 64], hidden_nonlin=to.tanh)  # FNN
    # policy_hparam = dict(hidden_size=32, num_recurrent_layers=1, hidden_nonlin='tanh')  # RNN
    policy_hparam = dict(hidden_size=32, num_recurrent_layers=1)  # LSTM & GRU
    # policy = FNNPolicy(spec=env.spec, **policy_hparam)
    # policy = RNNPolicy(spec=env.spec, **policy_hparam)
    # policy = LSTMPolicy(spec=env.spec, **policy_hparam)
    policy = GRUPolicy(spec=env.spec, **policy_hparam)

    # Critic
    # value_fcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh)  # FNN
        fixed_init_state=False,
        observe_ball=True,
        task_args=dict(
            final_factor=500,
            success_bonus=250,
            Q=np.diag([0.5, 1e-4, 4e1]),
            R=np.diag([0, 0, 1e-1, 2e-1]),
            Q_dev=np.diag([0.0, 0.0, 5]),
            # R_dev=np.diag([0., 0., 1e-3, 1e-3])
        ),
    )
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    randomizer = DomainRandomizer(
        UniformDomainParam(name="cup_scale", mean=1.0, halfspan=0.2),
        NormalDomainParam(name="rope_length", mean=0.3, std=0.005),
        NormalDomainParam(name="ball_mass", mean=0.021, std=0.001),
        UniformDomainParam(name="joint_2_damping", mean=0.05, halfspan=0.05),
        UniformDomainParam(name="joint_2_dryfriction", mean=0.1, halfspan=0.1),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    bounds = ([0.0, 0.25, 0.5], [1.0, 1.5, 2.5])
    policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=9, bounds=bounds, scale=None), dim_mask=2)
    policy = DualRBFLinearPolicy(env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=15,
def create_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 10,
                          clip_lo=1e-2),
        # Ball needs to fit into the cup
        NormalDomainParam(name="cup_scale",
                          mean=dp_nom["cup_scale"],
                          std=dp_nom["cup_scale"] / 5,
                          clip_lo=0.65),
        # Rope won't be more than 3cm off
        NormalDomainParam(name="rope_length",
                          mean=dp_nom["rope_length"],
                          std=dp_nom["rope_length"] / 30,
                          clip_lo=0.27,
                          clip_up=0.33),
        UniformDomainParam(name="rope_damping",
                           mean=dp_nom["rope_damping"],
                           halfspan=dp_nom["rope_damping"] / 2,
                           clip_lo=1e-6),
        UniformDomainParam(name="joint_1_damping",
                           mean=dp_nom["joint_1_damping"],
                           halfspan=dp_nom["joint_1_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_2_damping",
                           mean=dp_nom["joint_2_damping"],
                           halfspan=dp_nom["joint_2_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_3_damping",
                           mean=dp_nom["joint_3_damping"],
                           halfspan=dp_nom["joint_3_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_4_damping",
                           mean=dp_nom["joint_4_damping"],
                           halfspan=dp_nom["joint_4_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_5_damping",
                           mean=dp_nom["joint_5_damping"],
                           halfspan=dp_nom["joint_5_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_6_damping",
                           mean=dp_nom["joint_6_damping"],
                           halfspan=dp_nom["joint_6_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_7_damping",
                           mean=dp_nom["joint_7_damping"],
                           halfspan=dp_nom["joint_7_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(
            name="joint_1_dryfriction",
            mean=dp_nom["joint_1_dryfriction"],
            halfspan=dp_nom["joint_1_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_2_dryfriction",
            mean=dp_nom["joint_2_dryfriction"],
            halfspan=dp_nom["joint_2_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_3_dryfriction",
            mean=dp_nom["joint_3_dryfriction"],
            halfspan=dp_nom["joint_3_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_4_dryfriction",
            mean=dp_nom["joint_4_dryfriction"],
            halfspan=dp_nom["joint_4_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_5_dryfriction",
            mean=dp_nom["joint_5_dryfriction"],
            halfspan=dp_nom["joint_5_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_6_dryfriction",
            mean=dp_nom["joint_6_dryfriction"],
            halfspan=dp_nom["joint_6_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_7_dryfriction",
            mean=dp_nom["joint_7_dryfriction"],
            halfspan=dp_nom["joint_7_dryfriction"] / 2,
            clip_lo=0.0,
        ),
    )
Beispiel #26
0
        env, policy = create_position_mps_setup(physicsEngine, graphFileName,
                                                dt, max_steps, ref_frame,
                                                checkJointLimits)
    elif setup_type == 'vel':
        env, policy = create_velocity_mps_setup(physicsEngine, graphFileName,
                                                dt, max_steps, ref_frame,
                                                checkJointLimits)
    else:
        raise pyrado.ValueErr(given=setup_type,
                              eq_constraint="'idle', 'pos', 'vel'")

    if randomize:
        dp_nom = env.get_nominal_domain_param()
        randomizer = DomainRandomizer(
            UniformDomainParam(name='box_mass',
                               mean=dp_nom['box_mass'],
                               halfspan=dp_nom['box_mass'] / 5),
            UniformDomainParam(name='box_width',
                               mean=dp_nom['box_width'],
                               halfspan=dp_nom['box_length'] / 5),
            UniformDomainParam(name='basket_friction_coefficient',
                               mean=dp_nom['basket_friction_coefficient'],
                               halfspan=dp_nom['basket_friction_coefficient'] /
                               5))
        env = DomainRandWrapperLive(env, randomizer)

    # Simulate and plot
    print('observations:\n', env.obs_space.labels)
    done, param, state = False, None, None
    while not done:
        ro = rollout(env,
Beispiel #27
0
    NormalDomainParam,
    UniformDomainParam,
)
from pyrado.domain_randomization.domain_randomizer import DomainRandomizer

DomainParam(name="a", mean=1)

BernoulliDomainParam(name="b", val_0=2, val_1=5, prob_1=0.8)

DomainRandomizer(
    NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100))

DomainRandomizer(
    NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
    UniformDomainParam(name="special",
                       mean=0,
                       halfspan=42,
                       clip_lo=-7.4,
                       roundint=True),
    NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
    UniformDomainParam(name="time_delay",
                       mean=13,
                       halfspan=6,
                       clip_up=17,
                       roundint=True),
    MultivariateNormalDomainParam(name="multidim",
                                  mean=10 * to.ones((2, )),
                                  cov=2 * to.eye(2),
                                  clip_up=11),
)
Beispiel #28
0
 dp_nom = WAMBallInCupSim.get_nominal_domain_param()
 randomizer = DomainRandomizer(
     # UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05),
     # UniformDomainParam(name='ball_mass', mean=2.1000e-02, halfspan=3.1500e-03, clip_lo=0),
     # UniformDomainParam(name='rope_length', mean=3.0000e-01, halfspan=1.5000e-02, clip_lo=0.27, clip_up=0.33),
     # UniformDomainParam(name='rope_damping', mean=1.0000e-04, halfspan=1.0000e-04, clip_lo=1e-2),
     # UniformDomainParam(name='joint_2_damping', mean=5.0000e-02, halfspan=5.0000e-02, clip_lo=1e-6),
     # UniformDomainParam(name='joint_2_dryfriction', mean=2.0000e-01, halfspan=2.0000e-01, clip_lo=0),
     #
     NormalDomainParam(name="rope_length",
                       mean=2.9941e-01,
                       std=1.0823e-02,
                       clip_lo=0.27,
                       clip_up=0.33),
     UniformDomainParam(name="rope_damping",
                        mean=3.0182e-05,
                        halfspan=4.5575e-05,
                        clip_lo=0.0),
     NormalDomainParam(name="ball_mass",
                       mean=1.8412e-02,
                       std=1.9426e-03,
                       clip_lo=1e-2),
     UniformDomainParam(name="joint_2_dryfriction",
                        mean=1.9226e-01,
                        halfspan=2.5739e-02,
                        clip_lo=0),
     UniformDomainParam(name="joint_2_damping",
                        mean=9.4057e-03,
                        halfspan=5.0000e-04,
                        clip_lo=1e-6),
 )
 env = DomainRandWrapperLive(env, randomizer)
Beispiel #29
0
from pyrado.sampling.sequences import *

if __name__ == '__main__':
    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(QBallBalancerSim.name, f'{SPOTA.name}-{HCNormal.name}',
                              f'{LinearPolicy.name}_obsnoise-s_actedlay-10', seed=1001)

    # Environment and domain randomization
    env_hparams = dict(dt=1/100., max_steps=500)
    env = QBallBalancerSim(**env_hparams)
    env = GaussianObsNoiseWrapper(env, noise_std=[1/180*pi, 1/180*pi, 0.005, 0.005,  # [rad, rad, m, m, ...
                                                  10/180*pi, 10/180*pi, 0.05, 0.05])  # ... rad/s, rad/s, m/s, m/s]
    # env = ObsPartialWrapper(env, mask=[0, 0, 0, 0, 1, 1, 0, 0])
    env = ActDelayWrapper(env)
    randomizer = get_default_randomizer(env)
    randomizer.add_domain_params(UniformDomainParam(name='act_delay', mean=5, halfspan=5, clip_lo=0, roundint=True))
    env = DomainRandWrapperBuffer(env, randomizer)

    # Policy
    policy_hparam = dict(feats=FeatureStack([identity_feat]))
    policy = LinearPolicy(spec=env.spec, **policy_hparam)

    # Initialize with Quanser's PD gains
    init_policy_param_values = to.tensor([[-14., 0, -14*3.45, 0, 0, 0, -14*2.11, 0],
                                          [0, -14., 0, -14*3.45, 0, 0, 0, -14*2.11]])

    # Algorithm
    subrtn_hparam_cand = dict(
        max_iter=100,
        num_rollouts=0,  # will be overwritten by SPOTA
        pop_size=50,
        f'{UDR.name}-{PoWER.name}_{DualRBFLinearPolicy.name}',
        'rand-cs-rl-bm-jd-js')

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environment
    env_hparams = dict(num_dof=4,
                       max_steps=1750,
                       task_args=dict(final_factor=0.5),
                       fixed_init_state=False)
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    randomizer = DomainRandomizer(
        UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05),
        NormalDomainParam(name='rope_length', mean=0.3, std=0.005),
        NormalDomainParam(name='ball_mass', mean=0.021, std=0.001),
        UniformDomainParam(name='joint_damping', mean=0.05, halfspan=0.05),
        UniformDomainParam(name='joint_stiction', mean=0.1, halfspan=0.1),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=10,
                                         bounds=(0., 1.),
                                         scale=None),
                         dim_mask=2)
    policy = DualRBFLinearPolicy(env.spec, **policy_hparam)

    # Algorithm