def test_mask_invert(): mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three'])) # Use a simple mask to drop the second element mask = [0, 1, 0] wenv = ObsPartialWrapper(mockenv, mask, keep_selected=True) # Test some observation values mockenv.next_obs = [1, 2, 3] obs, _, _, _ = wenv.step(None) assert list(obs) == [2] mockenv.next_obs = [4, 7, 9] obs, _, _, _ = wenv.step(None) assert list(obs) == [7]
def wrap_like_other_env(env_targ: Env, env_src: [SimEnv, EnvWrapper]) -> Env: """ Wrap a given real environment like it's simulated counterpart (except the domain randomization of course). :param env_targ: target environment e.g. environment representing the physical device :param env_src: source environment e.g. simulation environment used for training :return: target environment """ if env_src.dt > env_targ.dt: ds_factor = int(env_src.dt / env_targ.dt) env_targ = DownsamplingWrapper(env_targ, ds_factor) print_cbt( f'Wrapped the env with an DownsamplingWrapper of factor {ds_factor}.', 'c') if typed_env(env_src, ActNormWrapper) is not None: env_targ = ActNormWrapper(env_targ) print_cbt('Wrapped the env with an ActNormWrapper.', 'c') if typed_env(env_src, ObsNormWrapper) is not None: env_targ = ObsNormWrapper(env_targ) print_cbt('Wrapped the env with an ObsNormWrapper.', 'c') elif typed_env(env_src, ObsRunningNormWrapper) is not None: env_targ = ObsRunningNormWrapper(env_targ) print_cbt('Wrapped the env with an ObsRunningNormWrapper.', 'c') if typed_env(env_src, ObsPartialWrapper) is not None: env_targ = ObsPartialWrapper(env_targ, mask=typed_env( env_src, ObsPartialWrapper).keep_mask, keep_selected=True) print_cbt('Wrapped the env with an ObsPartialWrapper.', 'c') return env_targ
def test_wrap_like_other_env(env: SimEnv): wenv_like = deepcopy(env) wenv_like.dt /= 3 wenv = DownsamplingWrapper(env, factor=3) assert type(wenv_like) != type(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv, use_downsampling=True) assert type(wenv_like) == type(wenv) wenv = ActNormWrapper(wenv) assert type(wenv_like) != type(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) wenv = ObsNormWrapper(wenv) assert type(wenv_like) != type(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env) wenv = ObsRunningNormWrapper(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env) wenv = ObsPartialWrapper(wenv, idcs=["x"]) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)
def test_spaces(): mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three'])) # Use a simple mask to drop the second element mask = [0, 1, 0] wenv = ObsPartialWrapper(mockenv, mask) # Check resulting space lb, ub = wenv.obs_space.bounds assert list(lb) == [-1, -3] assert list(ub) == [1, 3] assert list(wenv.obs_space.labels) == ['one', 'three']
def test_combination(): env = QCartPoleSwingUpSim(dt=1/50., max_steps=20) randomizer = create_default_randomizer(env) env_r = DomainRandWrapperBuffer(env, randomizer) env_r.fill_buffer(num_domains=3) dp_before = [] dp_after = [] for i in range(4): dp_before.append(env_r.domain_param) rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) dp_after.append(env_r.domain_param) assert dp_after[i] != dp_before[i] assert dp_after[0] == dp_after[3] env_rn = ActNormWrapper(env) elb = {'x_dot': -213., 'theta_dot': -42.} eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123} env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub) alb, aub = env_rn.act_space.bounds assert all(alb == -1) assert all(aub == 1) olb, oub = env_rn.obs_space.bounds assert all(olb == -1) assert all(oub == 1) ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations) env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta']) ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode()) env_rnpa = GaussianActNoiseWrapper(env_rnp, noise_mean=0.5*np.ones(env_rnp.act_space.shape), noise_std=0.1*np.ones(env_rnp.act_space.shape)) ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpa.actions) assert not np.allclose(ro_rnp.observations, ro_rnpa.observations) env_rnpd = ActDelayWrapper(env_rnp, delay=3) ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpd.actions) assert not np.allclose(ro_rnp.observations, ro_rnpd.observations) assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim) assert typed_env(env_rnpd, ObsPartialWrapper) is not None assert isinstance(env_rnpd, ActDelayWrapper) env_rnpdr = remove_env(env_rnpd, ActDelayWrapper) assert not isinstance(env_rnpdr, ActDelayWrapper)
observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemGoalDistance=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, ) env = Planar3LinkTASim(**env_hparams) # env = Planar3LinkIKActivationSim(**env_hparams) # eub = { # 'GD_DS0': 2., # 'GD_DS1': 2., # 'GD_DS2': 2., # } # env = ObsNormWrapper(env, explicit_ub=eub) env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z']) # env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z', 'Effector_Xd', 'Effector_Zd']) # Policy policy_hparam = dict( hidden_size=3, conv_out_channels=1, mirrored_conv_weights=True, conv_kernel_size=1, conv_padding_mode='circular', init_param_kwargs=dict(bell=True), activation_nonlin=to.sigmoid, tau_init=10., tau_learnable=True, kappa_init=0, kappa_learnable=False,
def wrap_like_other_env( env_targ: Union[SimEnv, RealEnv], env_src: [SimEnv, EnvWrapper], use_downsampling: bool = False) -> Union[SimEnv, RealEnv]: """ Wrap a given real environment like it's simulated counterpart (except the domain randomization of course). :param env_targ: target environment e.g. environment representing the physical device :param env_src: source environment e.g. simulation environment used for training :param use_downsampling: apply a wrapper that downsamples the actions if the sampling frequencies don't match :return: target environment """ if use_downsampling and env_src.dt > env_targ.dt: if typed_env(env_targ, DownsamplingWrapper) is None: ds_factor = int(env_src.dt / env_targ.dt) env_targ = DownsamplingWrapper(env_targ, ds_factor) print_cbt( f"Wrapped the target environment with a DownsamplingWrapper of factor {ds_factor}.", "y") else: print_cbt( "The target environment was already wrapped with a DownsamplingWrapper.", "y") if typed_env(env_src, ActNormWrapper) is not None: if typed_env(env_targ, ActNormWrapper) is None: env_targ = ActNormWrapper(env_targ) print_cbt("Wrapped the target environment with an ActNormWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ActNormWrapper.", "y") if typed_env(env_src, ObsNormWrapper) is not None: if typed_env(env_targ, ObsNormWrapper) is None: env_targ = ObsNormWrapper(env_targ) print_cbt("Wrapped the target environment with an ObsNormWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ObsNormWrapper.", "y") if typed_env(env_src, ObsRunningNormWrapper) is not None: if typed_env(env_targ, ObsRunningNormWrapper) is None: env_targ = ObsRunningNormWrapper(env_targ) print_cbt( "Wrapped the target environment with an ObsRunningNormWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ObsRunningNormWrapper.", "y") if typed_env(env_src, ObsPartialWrapper) is not None: if typed_env(env_targ, ObsPartialWrapper) is None: env_targ = ObsPartialWrapper(env_targ, mask=typed_env( env_src, ObsPartialWrapper).keep_mask, keep_selected=True) print_cbt( "Wrapped the target environment with an ObsPartialWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ObsPartialWrapper.", "y") return env_targ
checkJointLimits=True, taskCombinationMethod="sum", collisionAvoidanceIK=False, observeVelocity=False, observeCollisionCost=False, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=False, observeForceTorque=True, observeDynamicalSystemGoalDistance=False, ) env = BoxLiftingVelIKActivationSim(**env_hparams) env = ObsPartialWrapper(env, idcs=["Box_Y", "Box_Z", "Box_A"]) # Domain randomizer dp_nom = env.get_nominal_domain_param() randomizer = create_default_randomizer(env) env = DomainRandWrapperLive(env, randomizer) # Policy policy_hparam = dict( tau_init=50.0, tau_learnable=True, kappa_init=1e-3, kappa_learnable=True, activation_nonlin=to.tanh, potentials_dyn_fcn=pd_cubic, potential_init_learnable=False,
observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemGoalDistance=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, ) env = Planar3LinkTASim(**env_hparams) # env = Planar3LinkIKActivationSim(**env_hparams) # eub = { # 'GD_DS0': 2., # 'GD_DS1': 2., # 'GD_DS2': 2., # } # env = ObsNormWrapper(env, explicit_ub=eub) env = ObsPartialWrapper(env, idcs=["Effector_DiscrepTS_X", "Effector_DiscrepTS_Z"]) # env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z', 'Effector_Xd', 'Effector_Zd']) # Policy policy_hparam = dict( hidden_size=3, conv_out_channels=1, mirrored_conv_weights=True, conv_kernel_size=1, conv_padding_mode="circular", init_param_kwargs=dict(bell=True), activation_nonlin=to.sigmoid, tau_init=10.0, tau_learnable=True, kappa_init=1e-3, kappa_learnable=True,
def test_combination(env: SimEnv): pyrado.set_seed(0) env.max_steps = 20 randomizer = create_default_randomizer(env) env_r = DomainRandWrapperBuffer(env, randomizer) env_r.fill_buffer(num_domains=3) dp_before = [] dp_after = [] for i in range(4): dp_before.append(env_r.domain_param) rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) dp_after.append(env_r.domain_param) assert dp_after[i] != dp_before[i] assert dp_after[0] == dp_after[3] env_rn = ActNormWrapper(env) elb = {"x_dot": -213.0, "theta_dot": -42.0} eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123} env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub) alb, aub = env_rn.act_space.bounds assert all(alb == -1) assert all(aub == 1) olb, oub = env_rn.obs_space.bounds assert all(olb == -1) assert all(oub == 1) ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations) env_rnp = ObsPartialWrapper( env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]]) ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode()) env_rnpa = GaussianActNoiseWrapper( env_rnp, noise_mean=0.5 * np.ones(env_rnp.act_space.shape), noise_std=0.1 * np.ones(env_rnp.act_space.shape)) ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode()) assert not np.allclose( ro_rnp.observations, ro_rnpa.observations) # the action noise changed to rollout env_rnpd = ActDelayWrapper(env_rnp, delay=3) ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpd.actions) assert not np.allclose(ro_rnp.observations, ro_rnpd.observations) assert type(inner_env(env_rnpd)) == type(env) assert typed_env(env_rnpd, ObsPartialWrapper) is not None assert isinstance(env_rnpd, ActDelayWrapper) env_rnpdr = remove_env(env_rnpd, ActDelayWrapper) assert not isinstance(env_rnpdr, ActDelayWrapper)