def get_uniform_masses_lengths_randomizer_qq(frac_halfspan: float): """ Get a uniform randomizer that applies to all masses and lengths of the Quanser Qube according to a fraction of their nominal parameter values :param frac_halfspan: fraction of the nominal parameter value :return: `DomainRandomizer` with uniformly distributed masses and lengths """ from pyrado.environments.pysim.quanser_qube import QQubeSim dp_nom = QQubeSim.get_nominal_domain_param() return DomainRandomizer( UniformDomainParam(name='Mp', mean=dp_nom['Mp'], halfspan=dp_nom['Mp'] / frac_halfspan, clip_lo=1e-3), UniformDomainParam(name='Mr', mean=dp_nom['Mr'], halfspan=dp_nom['Mr'] / frac_halfspan, clip_lo=1e-3), UniformDomainParam(name='Lr', mean=dp_nom['Lr'], halfspan=dp_nom['Lr'] / frac_halfspan, clip_lo=1e-2), UniformDomainParam(name='Lp', mean=dp_nom['Lp'], halfspan=dp_nom['Lp'] / frac_halfspan, clip_lo=1e-2), )
def create_default_randomizer_qbb() -> DomainRandomizer: """ Create the default randomizer for the `QBallBalancerSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim dp_nom = QBallBalancerSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4), NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-3), NormalDomainParam(name='l_plate', mean=dp_nom['l_plate'], std=dp_nom['l_plate']/5, clip_lo=5e-2), NormalDomainParam(name='r_arm', mean=dp_nom['r_arm'], std=dp_nom['r_arm']/5, clip_lo=1e-4), NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-2), NormalDomainParam(name='J_l', mean=dp_nom['J_l'], std=dp_nom['J_l']/4, clip_lo=1e-6), NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9), NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4), NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4), UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4), UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict']/4, clip_lo=1e-4), UniformDomainParam(name='V_thold_x_pos', mean=dp_nom['V_thold_x_pos'], halfspan=dp_nom['V_thold_x_pos']/3), UniformDomainParam(name='V_thold_x_neg', mean=dp_nom['V_thold_x_neg'], halfspan=abs(dp_nom['V_thold_x_neg'])/3), UniformDomainParam(name='V_thold_y_pos', mean=dp_nom['V_thold_y_pos'], halfspan=dp_nom['V_thold_y_pos']/3), UniformDomainParam(name='V_thold_y_neg', mean=dp_nom['V_thold_y_neg'], halfspan=abs(dp_nom['V_thold_y_neg'])/3), UniformDomainParam(name='offset_th_x', mean=dp_nom['offset_th_x'], halfspan=6./180*np.pi), UniformDomainParam(name='offset_th_y', mean=dp_nom['offset_th_y'], halfspan=6./180*np.pi) )
def get_default_randomizer_bl() -> DomainRandomizer: """ Get the default randomizer for the `BoxLifting`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim dp_nom = BoxShelvingSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='box_length', mean=dp_nom['box_length'], std=dp_nom['box_length'] / 10), NormalDomainParam(name='box_width', mean=dp_nom['box_width'], std=dp_nom['box_width'] / 10), NormalDomainParam(name='box_mass', mean=dp_nom['box_mass'], std=dp_nom['box_mass'] / 5), UniformDomainParam(name='box_friction_coefficient', mean=dp_nom['box_friction_coefficient'], halfspan=dp_nom['box_friction_coefficient'] / 5, clip_lo=1e-5), NormalDomainParam(name='basket_mass', mean=dp_nom['basket_mass'], std=dp_nom['basket_mass'] / 5), UniformDomainParam(name='basket_friction_coefficient', mean=dp_nom['basket_friction_coefficient'], halfspan=dp_nom['basket_friction_coefficient'] / 5, clip_lo=1e-5), )
def default_randomizer(): return DomainRandomizer( NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100), UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True), NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1), UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True), MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11), )
def default_pert(): return DomainRandomizer( NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100), UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True), NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1), UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True), MultivariateNormalDomainParam(name='multidim', mean=10*to.ones((2,)), cov=2*to.eye(2), clip_up=11) )
def bob_pert(): return DomainRandomizer( NormalDomainParam(name='g', mean=9.81, std=0.981, clip_lo=1e-3), NormalDomainParam(name='r_ball', mean=0.1, std=0.01, clip_lo=1e-3), NormalDomainParam(name='m_ball', mean=0.5, std=0.05, clip_lo=1e-3), NormalDomainParam(name='m_beam', mean=3.0, std=0.3, clip_lo=1e-3), NormalDomainParam(name='d_beam', mean=0.1, std=0.01, clip_lo=1e-3), NormalDomainParam(name='l_beam', mean=2.0, std=0.2, clip_lo=1e-3), UniformDomainParam(name='c_frict', mean=0.05, halfspan=0.05), UniformDomainParam(name='ang_offset', mean=0, halfspan=5.*np.pi/180) )
def create_default_randomizer_bop() -> DomainRandomizer: """ Create the default randomizer for the `BallOnPlateSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.ball_on_plate import BallOnPlateSim dp_nom = BallOnPlateSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], std=dp_nom["ball_mass"] / 3, clip_lo=1e-2), NormalDomainParam(name="ball_radius", mean=dp_nom["ball_radius"], std=dp_nom["ball_radius"] / 3, clip_lo=1e-2), NormalDomainParam(name="ball_com_x", mean=dp_nom["ball_com_x"], std=0.003), NormalDomainParam(name="ball_com_y", mean=dp_nom["ball_com_y"], std=0.003), NormalDomainParam(name="ball_com_z", mean=dp_nom["ball_com_z"], std=0.003), UniformDomainParam( name="ball_friction_coefficient", mean=dp_nom["ball_friction_coefficient"], halfspan=dp_nom["ball_friction_coefficient"], clip_lo=0, clip_hi=1, ), UniformDomainParam( name="ball_rolling_friction_coefficient", mean=dp_nom["ball_rolling_friction_coefficient"], halfspan=dp_nom["ball_rolling_friction_coefficient"], clip_lo=0, clip_hi=1, ), # Vortex only UniformDomainParam(name="ball_slip", mean=dp_nom["ball_slip"], halfspan=dp_nom["ball_slip"], clip_lo=0) # UniformDomainParam(name='ball_linearvelocitydamnping', mean=0., halfspan=1e-4), # UniformDomainParam(name='ball_angularvelocitydamnping', mean=0., halfspan=1e-4) )
def get_default_randomizer_pi() -> DomainRandomizer: """ Get the default randomizer for the `PlanarInsertSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.planar_insert import PlanarInsertSim dp_nom = PlanarInsertSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='link1_mass', mean=dp_nom['link1_mass'], std=dp_nom['link1_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link2_mass', mean=dp_nom['link2_mass'], std=dp_nom['link2_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link3_mass', mean=dp_nom['link3_mass'], std=dp_nom['link3_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link4_mass', mean=dp_nom['link4_mass'], std=dp_nom['link4_mass'] / 5, clip_lo=1e-2), NormalDomainParam(name='link5_mass', mean=dp_nom['link4_mass'], std=dp_nom['link4_mass'] / 5, clip_lo=1e-2), UniformDomainParam(name='upperwall_pos_offset_z', mean=0, halfspan=0.05, clip_lo=0) # only increase the gap )
def create_default_randomizer_bs() -> DomainRandomizer: """ Create the default randomizer for the `BoxShelvingSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.rcspysim.box_shelving import BoxShelvingSim dp_nom = BoxShelvingSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="box_length", mean=dp_nom["box_length"], std=dp_nom["box_length"] / 10), NormalDomainParam(name="box_width", mean=dp_nom["box_width"], std=dp_nom["box_width"] / 10), NormalDomainParam(name="box_mass", mean=dp_nom["box_mass"], std=dp_nom["box_mass"] / 5), UniformDomainParam( name="box_friction_coefficient", mean=dp_nom["box_friction_coefficient"], halfspan=dp_nom["box_friction_coefficient"] / 5, clip_lo=1e-5, ), )
def create_default_randomizer_wambic() -> DomainRandomizer: from pyrado.environments.mujoco.wam import WAMBallInCupSim dp_nom = WAMBallInCupSim.get_nominal_domain_param() return DomainRandomizer( # Ball needs to fit into the cup NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65), # Rope won't be more than 3cm off NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30, clip_lo=0.27, clip_up=0.33), NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2), UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2, clip_lo=0.), UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2, clip_lo=0.), UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2, clip_lo=1e-6), )
def create_default_randomizer_bob() -> DomainRandomizer: """ Create the default randomizer for the `BallOnBeamSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim dp_nom = BallOnBeamSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4), NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-4), NormalDomainParam(name='m_beam', mean=dp_nom['m_beam'], std=dp_nom['m_beam']/5, clip_lo=1e-3), NormalDomainParam(name='l_beam', mean=dp_nom['l_beam'], std=dp_nom['l_beam']/5, clip_lo=1e-3), NormalDomainParam(name='d_beam', mean=dp_nom['d_beam'], std=dp_nom['d_beam']/5, clip_lo=1e-3), UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict'], clip_lo=0), UniformDomainParam(name='ang_offset', mean=0./180*np.pi, halfspan=0.1/180*np.pi) )
def create_default_randomizer_bob() -> DomainRandomizer: """ Create the default randomizer for the `BallOnBeamSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim dp_nom = BallOnBeamSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-4), NormalDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], std=dp_nom["ball_mass"] / 5, clip_lo=1e-4), NormalDomainParam(name="ball_radius", mean=dp_nom["ball_radius"], std=dp_nom["ball_radius"] / 5, clip_lo=1e-4), NormalDomainParam(name="beam_mass", mean=dp_nom["beam_mass"], std=dp_nom["beam_mass"] / 5, clip_lo=1e-3), NormalDomainParam(name="beam_length", mean=dp_nom["beam_length"], std=dp_nom["beam_length"] / 5, clip_lo=1e-3), NormalDomainParam(name="beam_thickness", mean=dp_nom["beam_thickness"], std=dp_nom["beam_thickness"] / 5, clip_lo=1e-3), UniformDomainParam(name="friction_coeff", mean=dp_nom["friction_coeff"], halfspan=dp_nom["friction_coeff"], clip_lo=0), UniformDomainParam(name="ang_offset", mean=0.0 / 180 * np.pi, halfspan=0.1 / 180 * np.pi), )
def create_default_randomizer_qcp() -> DomainRandomizer: """ Create the default randomizer for the `QCartPoleSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim dp_nom = QCartPoleSim.get_nominal_domain_param(long=False) return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_cart', mean=dp_nom['m_cart'], std=dp_nom['m_cart']/5, clip_lo=1e-4), NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole']/5, clip_lo=1e-4), NormalDomainParam(name='l_rail', mean=dp_nom['l_rail'], std=dp_nom['l_rail']/5, clip_lo=1e-2), NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole']/5, clip_lo=1e-2), UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1), NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-4), NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9), NormalDomainParam(name='r_mp', mean=dp_nom['r_mp'], std=dp_nom['r_mp']/5, clip_lo=1e-4), NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4), NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4), UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4), UniformDomainParam(name='B_pole', mean=dp_nom['B_pole'], halfspan=dp_nom['B_pole']/4, clip_lo=1e-4) )
if setup_type == 'idle': env, policy = create_idle_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits) elif setup_type == 'ik': env, policy = create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits) elif setup_type == 'pos': env, policy = create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits) elif setup_type == 'vel': env, policy = create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits) else: raise pyrado.ValueErr(given=setup_type, eq_constraint="'idle', 'pos', 'vel', or 'ik_activation'") if randomize: dp_nom = env.get_nominal_domain_param() randomizer = DomainRandomizer( UniformDomainParam(name='box_mass', mean=dp_nom['box_mass'], halfspan=dp_nom['box_mass']/5), UniformDomainParam(name='box_width', mean=dp_nom['box_width'], halfspan=dp_nom['box_length']/5) ) env = DomainRandWrapperLive(env, randomizer) # Simulate and plot print('observations:\n', env.obs_space.labels) done, param, state = False, None, None while not done: ro = rollout(env, policy, render_mode=RenderMode(text=False, video=True), eval=True, max_steps=max_steps, reset_kwargs=dict(domain_param=param, init_state=state), stop_on_done=False) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, state, param = after_rollout_query(env, policy, ro)
1 / 180 * pi, 0.0025, 0.0025, # [rad, rad, m, m, ... 2 / 180 * pi, 2 / 180 * pi, 0.05, 0.05, ], ) # ... rad/s, rad/s, m/s, m/s] env = ActNormWrapper(env) env = ActDelayWrapper(env) randomizer = create_default_randomizer_qbb() randomizer.add_domain_params( UniformDomainParam(name="act_delay", mean=15, halfspan=15, clip_lo=0, roundint=True)) env = DomainRandWrapperLive(env, randomizer) # Policy policy_hparam = dict(hidden_sizes=[64, 64], hidden_nonlin=to.tanh) # FNN # policy_hparam = dict(hidden_size=64, num_recurrent_layers=1) # LSTM & GRU policy = FNNPolicy(spec=env.spec, **policy_hparam) # policy = RNNPolicy(spec=env.spec, **policy_hparam) # policy = LSTMPolicy(spec=env.spec, **policy_hparam) # policy = GRUPolicy(spec=env.spec, **policy_hparam) # Critic vfcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh) # FNN # vfcn_hparam = dict(hidden_size=32, num_recurrent_layers=1) # LSTM & GRU
from tests.conftest import m_needs_bullet, m_needs_mujoco from pyrado.domain_randomization.domain_parameter import ( BernoulliDomainParam, MultivariateNormalDomainParam, NormalDomainParam, UniformDomainParam, ) from pyrado.domain_randomization.utils import param_grid from pyrado.environments.sim_base import SimEnv @pytest.mark.parametrize( "dp", [ UniformDomainParam( name="", mean=3.0, halfspan=11.0, clip_lo=-5, clip_up=5), NormalDomainParam(name="", mean=10, std=1.0, clip_lo=9, clip_up=11), MultivariateNormalDomainParam(name="", mean=to.ones((2, 1)), cov=to.eye(2), clip_lo=-1, clip_up=1.0), MultivariateNormalDomainParam( name="", mean=10 * to.ones((2, )), cov=2 * to.eye(2), clip_up=11), BernoulliDomainParam(name="", val_0=2, val_1=5, prob_1=0.8), BernoulliDomainParam(name="", val_0=-3, val_1=5, prob_1=0.8, clip_up=4), ], ids=["U", "N", "MVN_v1", "MVN_v2", "B_v1", "B_v2"], ) @pytest.mark.parametrize("num_samples", [1, 5, 100])
elif setup_type == "lin": env, policy = create_lin_setup(physicsEngine, dt, max_steps, checkJointLimits) elif setup_type == "time": env, policy = create_time_setup(physicsEngine, dt, max_steps, checkJointLimits) else: raise pyrado.ValueErr(given=setup_type, eq_constraint="idle, pst, lin, or time") if randomize: dp_nom = env.get_nominal_domain_param() randomizer = DomainRandomizer( UniformDomainParam( name="ball_restitution", mean=dp_nom["ball_restitution"], halfspan=dp_nom["ball_restitution"], ), UniformDomainParam(name="ball_radius", mean=dp_nom["ball_radius"], halfspan=dp_nom["ball_radius"] / 5, clip_lo=5e-3), UniformDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], halfspan=dp_nom["ball_mass"] / 2, clip_lo=0), UniformDomainParam(name="club_mass", mean=dp_nom["club_mass"], halfspan=dp_nom["club_mass"] / 5), UniformDomainParam( name="ball_friction_coefficient",
def test_sysidasrl_reps(ex_dir, env: SimEnv, num_eval_rollouts: int): pyrado.set_seed(0) def eval_ddp_policy(rollouts_real): init_states_real = np.array([ro.states[0, :] for ro in rollouts_real]) rollouts_sim = [] for i, _ in enumerate(range(num_eval_rollouts)): rollouts_sim.append( rollout(env_sim, behavior_policy, eval=True, reset_kwargs=dict(init_state=init_states_real[i, :]))) # Clip the rollouts rollouts yielding two lists of pairwise equally long rollouts ros_real_tr, ros_sim_tr = algo.truncate_rollouts(rollouts_real, rollouts_sim, replicate=False) assert len(ros_real_tr) == len(ros_sim_tr) assert all([ np.allclose(r.states[0, :], s.states[0, :]) for r, s in zip(ros_real_tr, ros_sim_tr) ]) # Return the average the loss losses = [ algo.loss_fcn(ro_r, ro_s) for ro_r, ro_s in zip(ros_real_tr, ros_sim_tr) ] return float(np.mean(np.asarray(losses))) # Environments env_real = deepcopy(env) env_real.domain_param = dict(ang_offset=-2 * np.pi / 180) env_sim = deepcopy(env) randomizer = DomainRandomizer( UniformDomainParam(name="ang_offset", mean=0, halfspan=1e-6), ) env_sim = DomainRandWrapperLive(env_sim, randomizer) dp_map = {0: ("ang_offset", "mean"), 1: ("ang_offset", "halfspan")} env_sim = MetaDomainRandWrapper(env_sim, dp_map) assert env_real is not env_sim # Policies (the behavioral policy needs to be deterministic) behavior_policy = LinearPolicy(env_sim.spec, feats=FeatureStack(identity_feat)) prior = DomainRandomizer( UniformDomainParam(name="ang_offset", mean=1 * np.pi / 180, halfspan=1 * np.pi / 180), ) ddp_policy = DomainDistrParamPolicy(mapping=dp_map, trafo_mask=[False, True], prior=prior) # Subroutine subrtn_hparam = dict( max_iter=2, eps=1.0, pop_size=100, num_init_states_per_domain=1, expl_std_init=5e-2, expl_std_min=1e-4, num_workers=1, ) subrtn = REPS(ex_dir, env_sim, ddp_policy, **subrtn_hparam) algo_hparam = dict(metric=None, obs_dim_weight=np.ones(env_sim.obs_space.shape), num_rollouts_per_distr=5, num_workers=1) algo = SysIdViaEpisodicRL(subrtn, behavior_policy, **algo_hparam) rollouts_real_tst = [] for _ in range(num_eval_rollouts): rollouts_real_tst.append(rollout(env_real, behavior_policy, eval=True)) loss_pre = eval_ddp_policy(rollouts_real_tst) # Mimic training while algo.curr_iter < algo.max_iter and not algo.stopping_criterion_met(): algo.logger.add_value(algo.iteration_key, algo.curr_iter) # Creat fake real-world data rollouts_real = [] for _ in range(num_eval_rollouts): rollouts_real.append(rollout(env_real, behavior_policy, eval=True)) algo.step(snapshot_mode="latest", meta_info=dict(rollouts_real=rollouts_real)) algo.logger.record_step() algo._curr_iter += 1 loss_post = eval_ddp_policy(rollouts_real_tst) assert loss_post <= loss_pre # don't have to be better every step
def test_basic_meta(ex_dir, policy, env: SimEnv, algo, algo_hparam: dict): pyrado.set_seed(0) # Policy and subroutine env = GaussianObsNoiseWrapper( env, noise_std=[ 1 / 180 * np.pi, 1 / 180 * np.pi, 0.0025, 0.0025, 2 / 180 * np.pi, 2 / 180 * np.pi, 0.05, 0.05, ], ) env = ActNormWrapper(env) env = ActDelayWrapper(env) randomizer = create_default_randomizer_qbb() randomizer.add_domain_params( UniformDomainParam(name="act_delay", mean=15, halfspan=15, clip_lo=0, roundint=True)) env = DomainRandWrapperLive(env, randomizer) # Policy policy_hparam = dict(hidden_sizes=[16, 16], hidden_nonlin=to.tanh) # FNN policy = FNNPolicy(spec=env.spec, **policy_hparam) # Critic vfcn_hparam = dict(hidden_sizes=[16, 16], hidden_nonlin=to.tanh) # FNN vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **vfcn_hparam) critic_hparam = dict( gamma=0.9995, lamda=0.98, num_epoch=2, batch_size=64, lr=5e-4, standardize_adv=False, ) critic = GAE(vfcn, **critic_hparam) subrtn_hparam = dict( max_iter=3, min_rollouts=5, num_epoch=2, eps_clip=0.1, batch_size=64, std_init=0.8, lr=2e-4, num_workers=1, ) subrtn = PPO(ex_dir, env, policy, critic, **subrtn_hparam) algo = algo(env, subrtn, **algo_hparam) algo.train() assert algo.curr_iter == algo.max_iter
def create_default_randomizer_qcp() -> DomainRandomizer: """ Create the default randomizer for the `QCartPoleSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_cartpole import QCartPoleSim dp_nom = QCartPoleSim.get_nominal_domain_param(long=False) return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-4), NormalDomainParam(name="cart_mass", mean=dp_nom["cart_mass"], std=dp_nom["cart_mass"] / 5, clip_lo=1e-4), NormalDomainParam(name="pole_mass", mean=dp_nom["pole_mass"], std=dp_nom["pole_mass"] / 5, clip_lo=1e-4), NormalDomainParam(name="rail_length", mean=dp_nom["rail_length"], std=dp_nom["rail_length"] / 5, clip_lo=1e-2), NormalDomainParam(name="pole_length", mean=dp_nom["pole_length"], std=dp_nom["pole_length"] / 5, clip_lo=1e-2), UniformDomainParam( name="motor_efficiency", mean=dp_nom["motor_efficiency"], halfspan=dp_nom["motor_efficiency"] / 4, clip_lo=1e-4, clip_up=1, ), UniformDomainParam( name="gear_efficiency", mean=dp_nom["gear_efficiency"], halfspan=dp_nom["gear_efficiency"] / 4, clip_lo=1e-4, clip_up=1, ), NormalDomainParam(name="gear_ratio", mean=dp_nom["gear_ratio"], std=dp_nom["gear_ratio"] / 4, clip_lo=1e-4), NormalDomainParam(name="motor_inertia", mean=dp_nom["motor_inertia"], std=dp_nom["motor_inertia"] / 4, clip_lo=1e-9), NormalDomainParam(name="pinion_radius", mean=dp_nom["pinion_radius"], std=dp_nom["pinion_radius"] / 5, clip_lo=1e-4), NormalDomainParam(name="motor_resistance", mean=dp_nom["motor_resistance"], std=dp_nom["motor_resistance"] / 4, clip_lo=1e-4), NormalDomainParam(name="motor_back_emf", mean=dp_nom["motor_back_emf"], std=dp_nom["motor_back_emf"] / 4, clip_lo=1e-4), UniformDomainParam( name="combined_damping", mean=dp_nom["combined_damping"], halfspan=dp_nom["combined_damping"] / 4, clip_lo=1e-4, ), UniformDomainParam(name="pole_damping", mean=dp_nom["pole_damping"], halfspan=dp_nom["pole_damping"] / 4, clip_lo=1e-4), UniformDomainParam( name="cart_friction_coeff", mean=dp_nom["cart_friction_coeff"], halfspan=dp_nom["cart_friction_coeff"] / 2, clip_lo=0, ), )
] ex_labels = [ '', ] else: raise pyrado.ValueErr(given=args.env_name, eq_constraint=f'{QCartPoleSwingUpSim.name}, {QCartPoleStabSim.name},' f' or {QCartPoleSwingUpSim.name}') if not check_all_lengths_equal([prefixes, ex_names, ex_labels]): raise pyrado.ShapeErr(msg=f'The lengths of prefixes, ex_names, and ex_labels must be equal, ' f'but they are {len(prefixes)}, {len(ex_names)}, and {len(ex_labels)}!') # Create Randomizer pert = create_conservative_randomizer(env) pert.add_domain_params(UniformDomainParam(name='act_delay', mean=20, halfspan=20, clip_lo=0, roundint=True)) # Loading the policies ex_dirs = [osp.join(p, e) for p, e in zip(prefixes, ex_names)] env_sim_list = [] policy_list = [] for ex_dir in ex_dirs: env_sim, policy, _ = load_experiment(ex_dir, args) policy_list.append(policy) # Fix initial state (set to None if it should not be fixed) init_state_list = [None]*args.num_ro_per_config # Crate empty data frame df = pd.DataFrame(columns=['policy', 'ret', 'len'])
def create_default_randomizer_wamjsc() -> DomainRandomizer: from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim dp_nom = WAMJointSpaceCtrlSim.get_nominal_domain_param() return DomainRandomizer( UniformDomainParam(name="joint_1_damping", mean=dp_nom["joint_1_damping"], halfspan=dp_nom["joint_1_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_2_damping", mean=dp_nom["joint_2_damping"], halfspan=dp_nom["joint_2_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_3_damping", mean=dp_nom["joint_3_damping"], halfspan=dp_nom["joint_3_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_4_damping", mean=dp_nom["joint_4_damping"], halfspan=dp_nom["joint_4_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_5_damping", mean=dp_nom["joint_5_damping"], halfspan=dp_nom["joint_5_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_6_damping", mean=dp_nom["joint_6_damping"], halfspan=dp_nom["joint_6_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_7_damping", mean=dp_nom["joint_7_damping"], halfspan=dp_nom["joint_7_damping"] / 2, clip_lo=0.0), UniformDomainParam( name="joint_1_dryfriction", mean=dp_nom["joint_1_dryfriction"], halfspan=dp_nom["joint_1_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_2_dryfriction", mean=dp_nom["joint_2_dryfriction"], halfspan=dp_nom["joint_2_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_3_dryfriction", mean=dp_nom["joint_3_dryfriction"], halfspan=dp_nom["joint_3_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_4_dryfriction", mean=dp_nom["joint_4_dryfriction"], halfspan=dp_nom["joint_4_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_5_dryfriction", mean=dp_nom["joint_5_dryfriction"], halfspan=dp_nom["joint_5_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_6_dryfriction", mean=dp_nom["joint_6_dryfriction"], halfspan=dp_nom["joint_6_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_7_dryfriction", mean=dp_nom["joint_7_dryfriction"], halfspan=dp_nom["joint_7_dryfriction"] / 2, clip_lo=0.0, ), )
1 / 180 * pi, 1 / 180 * pi, 0.005, 0.005, # [rad, rad, m, m, ... 10 / 180 * pi, 10 / 180 * pi, 0.05, 0.05 ]) # ... rad/s, rad/s, m/s, m/s] env = ActNormWrapper(env) env = ActDelayWrapper(env) randomizer = get_conservative_randomizer(env) randomizer.add_domain_params( UniformDomainParam(name='act_delay', mean=5, halfspan=5, clip_lo=0, roundint=True)) env = DomainRandWrapperBuffer(env, randomizer) # Policy # policy_hparam = dict(hidden_sizes=[64, 64], hidden_nonlin=to.tanh) # FNN # policy_hparam = dict(hidden_size=32, num_recurrent_layers=1, hidden_nonlin='tanh') # RNN policy_hparam = dict(hidden_size=32, num_recurrent_layers=1) # LSTM & GRU # policy = FNNPolicy(spec=env.spec, **policy_hparam) # policy = RNNPolicy(spec=env.spec, **policy_hparam) # policy = LSTMPolicy(spec=env.spec, **policy_hparam) policy = GRUPolicy(spec=env.spec, **policy_hparam) # Critic # value_fcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh) # FNN
fixed_init_state=False, observe_ball=True, task_args=dict( final_factor=500, success_bonus=250, Q=np.diag([0.5, 1e-4, 4e1]), R=np.diag([0, 0, 1e-1, 2e-1]), Q_dev=np.diag([0.0, 0.0, 5]), # R_dev=np.diag([0., 0., 1e-3, 1e-3]) ), ) env = WAMBallInCupSim(**env_hparams) # Randomizer randomizer = DomainRandomizer( UniformDomainParam(name="cup_scale", mean=1.0, halfspan=0.2), NormalDomainParam(name="rope_length", mean=0.3, std=0.005), NormalDomainParam(name="ball_mass", mean=0.021, std=0.001), UniformDomainParam(name="joint_2_damping", mean=0.05, halfspan=0.05), UniformDomainParam(name="joint_2_dryfriction", mean=0.1, halfspan=0.1), ) env = DomainRandWrapperLive(env, randomizer) # Policy bounds = ([0.0, 0.25, 0.5], [1.0, 1.5, 2.5]) policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=9, bounds=bounds, scale=None), dim_mask=2) policy = DualRBFLinearPolicy(env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=15,
def create_default_randomizer_wambic() -> DomainRandomizer: from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim dp_nom = WAMBallInCupSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], std=dp_nom["ball_mass"] / 10, clip_lo=1e-2), # Ball needs to fit into the cup NormalDomainParam(name="cup_scale", mean=dp_nom["cup_scale"], std=dp_nom["cup_scale"] / 5, clip_lo=0.65), # Rope won't be more than 3cm off NormalDomainParam(name="rope_length", mean=dp_nom["rope_length"], std=dp_nom["rope_length"] / 30, clip_lo=0.27, clip_up=0.33), UniformDomainParam(name="rope_damping", mean=dp_nom["rope_damping"], halfspan=dp_nom["rope_damping"] / 2, clip_lo=1e-6), UniformDomainParam(name="joint_1_damping", mean=dp_nom["joint_1_damping"], halfspan=dp_nom["joint_1_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_2_damping", mean=dp_nom["joint_2_damping"], halfspan=dp_nom["joint_2_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_3_damping", mean=dp_nom["joint_3_damping"], halfspan=dp_nom["joint_3_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_4_damping", mean=dp_nom["joint_4_damping"], halfspan=dp_nom["joint_4_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_5_damping", mean=dp_nom["joint_5_damping"], halfspan=dp_nom["joint_5_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_6_damping", mean=dp_nom["joint_6_damping"], halfspan=dp_nom["joint_6_damping"] / 2, clip_lo=0.0), UniformDomainParam(name="joint_7_damping", mean=dp_nom["joint_7_damping"], halfspan=dp_nom["joint_7_damping"] / 2, clip_lo=0.0), UniformDomainParam( name="joint_1_dryfriction", mean=dp_nom["joint_1_dryfriction"], halfspan=dp_nom["joint_1_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_2_dryfriction", mean=dp_nom["joint_2_dryfriction"], halfspan=dp_nom["joint_2_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_3_dryfriction", mean=dp_nom["joint_3_dryfriction"], halfspan=dp_nom["joint_3_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_4_dryfriction", mean=dp_nom["joint_4_dryfriction"], halfspan=dp_nom["joint_4_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_5_dryfriction", mean=dp_nom["joint_5_dryfriction"], halfspan=dp_nom["joint_5_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_6_dryfriction", mean=dp_nom["joint_6_dryfriction"], halfspan=dp_nom["joint_6_dryfriction"] / 2, clip_lo=0.0, ), UniformDomainParam( name="joint_7_dryfriction", mean=dp_nom["joint_7_dryfriction"], halfspan=dp_nom["joint_7_dryfriction"] / 2, clip_lo=0.0, ), )
env, policy = create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits) elif setup_type == 'vel': env, policy = create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits) else: raise pyrado.ValueErr(given=setup_type, eq_constraint="'idle', 'pos', 'vel'") if randomize: dp_nom = env.get_nominal_domain_param() randomizer = DomainRandomizer( UniformDomainParam(name='box_mass', mean=dp_nom['box_mass'], halfspan=dp_nom['box_mass'] / 5), UniformDomainParam(name='box_width', mean=dp_nom['box_width'], halfspan=dp_nom['box_length'] / 5), UniformDomainParam(name='basket_friction_coefficient', mean=dp_nom['basket_friction_coefficient'], halfspan=dp_nom['basket_friction_coefficient'] / 5)) env = DomainRandWrapperLive(env, randomizer) # Simulate and plot print('observations:\n', env.obs_space.labels) done, param, state = False, None, None while not done: ro = rollout(env,
NormalDomainParam, UniformDomainParam, ) from pyrado.domain_randomization.domain_randomizer import DomainRandomizer DomainParam(name="a", mean=1) BernoulliDomainParam(name="b", val_0=2, val_1=5, prob_1=0.8) DomainRandomizer( NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100)) DomainRandomizer( NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100), UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True), NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1), UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True), MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2, )), cov=2 * to.eye(2), clip_up=11), )
dp_nom = WAMBallInCupSim.get_nominal_domain_param() randomizer = DomainRandomizer( # UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05), # UniformDomainParam(name='ball_mass', mean=2.1000e-02, halfspan=3.1500e-03, clip_lo=0), # UniformDomainParam(name='rope_length', mean=3.0000e-01, halfspan=1.5000e-02, clip_lo=0.27, clip_up=0.33), # UniformDomainParam(name='rope_damping', mean=1.0000e-04, halfspan=1.0000e-04, clip_lo=1e-2), # UniformDomainParam(name='joint_2_damping', mean=5.0000e-02, halfspan=5.0000e-02, clip_lo=1e-6), # UniformDomainParam(name='joint_2_dryfriction', mean=2.0000e-01, halfspan=2.0000e-01, clip_lo=0), # NormalDomainParam(name="rope_length", mean=2.9941e-01, std=1.0823e-02, clip_lo=0.27, clip_up=0.33), UniformDomainParam(name="rope_damping", mean=3.0182e-05, halfspan=4.5575e-05, clip_lo=0.0), NormalDomainParam(name="ball_mass", mean=1.8412e-02, std=1.9426e-03, clip_lo=1e-2), UniformDomainParam(name="joint_2_dryfriction", mean=1.9226e-01, halfspan=2.5739e-02, clip_lo=0), UniformDomainParam(name="joint_2_damping", mean=9.4057e-03, halfspan=5.0000e-04, clip_lo=1e-6), ) env = DomainRandWrapperLive(env, randomizer)
from pyrado.sampling.sequences import * if __name__ == '__main__': # Experiment (set seed before creating the modules) ex_dir = setup_experiment(QBallBalancerSim.name, f'{SPOTA.name}-{HCNormal.name}', f'{LinearPolicy.name}_obsnoise-s_actedlay-10', seed=1001) # Environment and domain randomization env_hparams = dict(dt=1/100., max_steps=500) env = QBallBalancerSim(**env_hparams) env = GaussianObsNoiseWrapper(env, noise_std=[1/180*pi, 1/180*pi, 0.005, 0.005, # [rad, rad, m, m, ... 10/180*pi, 10/180*pi, 0.05, 0.05]) # ... rad/s, rad/s, m/s, m/s] # env = ObsPartialWrapper(env, mask=[0, 0, 0, 0, 1, 1, 0, 0]) env = ActDelayWrapper(env) randomizer = get_default_randomizer(env) randomizer.add_domain_params(UniformDomainParam(name='act_delay', mean=5, halfspan=5, clip_lo=0, roundint=True)) env = DomainRandWrapperBuffer(env, randomizer) # Policy policy_hparam = dict(feats=FeatureStack([identity_feat])) policy = LinearPolicy(spec=env.spec, **policy_hparam) # Initialize with Quanser's PD gains init_policy_param_values = to.tensor([[-14., 0, -14*3.45, 0, 0, 0, -14*2.11, 0], [0, -14., 0, -14*3.45, 0, 0, 0, -14*2.11]]) # Algorithm subrtn_hparam_cand = dict( max_iter=100, num_rollouts=0, # will be overwritten by SPOTA pop_size=50,
f'{UDR.name}-{PoWER.name}_{DualRBFLinearPolicy.name}', 'rand-cs-rl-bm-jd-js') # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environment env_hparams = dict(num_dof=4, max_steps=1750, task_args=dict(final_factor=0.5), fixed_init_state=False) env = WAMBallInCupSim(**env_hparams) # Randomizer randomizer = DomainRandomizer( UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05), NormalDomainParam(name='rope_length', mean=0.3, std=0.005), NormalDomainParam(name='ball_mass', mean=0.021, std=0.001), UniformDomainParam(name='joint_damping', mean=0.05, halfspan=0.05), UniformDomainParam(name='joint_stiction', mean=0.1, halfspan=0.1), ) env = DomainRandWrapperLive(env, randomizer) # Policy policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=10, bounds=(0., 1.), scale=None), dim_mask=2) policy = DualRBFLinearPolicy(env.spec, **policy_hparam) # Algorithm