def test_domain_param_transforms(env: SimEnv, trafo_class: Type): pyrado.set_seed(0) # Create a mask for a random domain parameter offset = 1 idx = random.randint(0, len(env.supported_domain_param) - 1) sel_dp_change = list(env.supported_domain_param)[idx] sel_dp_fix = list( env.supported_domain_param)[(idx + offset) % len(env.supported_domain_param)] while (offset == 1 or any([ item in sel_dp_change for item in VORTEX_ONLY_DOMAIN_PARAM_LIST ]) or any([item in sel_dp_fix for item in VORTEX_ONLY_DOMAIN_PARAM_LIST])): idx = random.randint(0, len(env.supported_domain_param) - 1) sel_dp_change = list(env.supported_domain_param)[idx] sel_dp_fix = list( env.supported_domain_param)[(idx + offset) % len(env.supported_domain_param)] offset += 1 mask = (sel_dp_change, ) wenv = trafo_class(env, mask) assert isinstance(wenv, DomainParamTransform) # Check 5 random values for _ in range(5): # Change the selected domain parameter new_dp_val = random.random() * env.get_nominal_domain_param( )[sel_dp_change] new_dp_val = abs( new_dp_val) + 1e-6 # due to the domain of the new params transformed_new_dp_val = wenv.forward(new_dp_val) wenv.domain_param = { sel_dp_change: transformed_new_dp_val } # calls inverse transform if not isinstance(inner_env(wenv), SimPyEnv): wenv.reset( ) # the RcsPySim and MujocoSim classes need to be reset to apply the new domain param # Test the actual domain param and the the getters assert inner_env(wenv)._domain_param[sel_dp_change] == pytest.approx( new_dp_val, abs=1e-5) assert wenv.domain_param[sel_dp_change] == pytest.approx(new_dp_val, abs=1e-5) assert wenv.domain_param[sel_dp_fix] != pytest.approx(new_dp_val)
def test_npdr_and_bayessim( ex_dir, algo_name: str, env: SimEnv, num_segments: int, len_segments: int, num_real_rollouts: int, num_sbi_rounds: int, use_rec_act: bool, ): pyrado.set_seed(0) # Create a fake ground truth target domain env_real = deepcopy(env) dp_nom = env.get_nominal_domain_param() env_real.domain_param = dict(mass_pend_pole=dp_nom["mass_pend_pole"] * 1.2, length_pend_pole=dp_nom["length_pend_pole"] * 0.8) # Reduce the number of steps to make this test run faster env.max_steps = 40 env_real.max_steps = 40 # Policy policy = QQubeSwingUpAndBalanceCtrl(env.spec) # Define a mapping: index - domain parameter dp_mapping = {1: "mass_pend_pole", 2: "length_pend_pole"} # Prior prior_hparam = dict( low=to.tensor( [dp_nom["mass_pend_pole"] * 0.5, dp_nom["length_pend_pole"] * 0.5]), high=to.tensor( [dp_nom["mass_pend_pole"] * 1.5, dp_nom["length_pend_pole"] * 1.5]), ) prior = sbiutils.BoxUniform(**prior_hparam) # Time series embedding embedding = BayesSimEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), downsampling_factor=3, ) # Posterior (normalizing flow) posterior_hparam = dict(model="maf", embedding_net=nn.Identity(), hidden_features=20, num_transforms=3) # Policy optimization subroutine subrtn_policy_hparam = dict( max_iter=1, pop_size=2, num_init_states_per_domain=1, num_domains=2, expl_std_init=0.1, expl_factor=1.1, num_workers=1, ) subrtn_policy = HCNormal(ex_dir, env, policy, **subrtn_policy_hparam) # Algorithm algo_hparam = dict( max_iter=1, num_sim_per_round=20, num_real_rollouts=num_real_rollouts, num_sbi_rounds=num_sbi_rounds, simulation_batch_size=1, normalize_posterior=False, num_eval_samples=2, num_segments=num_segments, len_segments=len_segments, use_rec_act=use_rec_act, stop_on_done=True, subrtn_sbi_training_hparam=dict( max_num_epochs=1), # only train for 1 iteration # subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=True, mcmc_parameters=dict(warmup_steps=20)), num_workers=1, ) skip = False if algo_name == NPDR.name: algo = NPDR( save_dir=ex_dir, env_sim=env, env_real=env_real, policy=policy, dp_mapping=dp_mapping, prior=prior, embedding=embedding, subrtn_sbi_class=SNPE_C, posterior_hparam=posterior_hparam, subrtn_policy=subrtn_policy, **algo_hparam, ) elif algo_name == BayesSim.name: # We are not checking multi-round SNPE-A since it has known issues if algo_hparam["num_sbi_rounds"] > 1: skip = True algo = BayesSim( save_dir=ex_dir, env_sim=env, env_real=env_real, policy=policy, dp_mapping=dp_mapping, embedding=embedding, prior=prior, subrtn_policy=subrtn_policy, **algo_hparam, ) else: raise NotImplementedError if not skip: algo.train() # Just checking the interface here assert algo.curr_iter == algo.max_iter