def test_denormalization(mock_obs_space): mockenv = MockEnv(obs_space=mock_obs_space) wenv = ObsNormWrapper(mockenv) for _ in range(100): # Generate random observations obs, _, _, _ = wenv.step(np.array([0, 0, 0])) assert (abs(obs) <= 1).all
def wrap_like_other_env(env_targ: Env, env_src: [SimEnv, EnvWrapper]) -> Env: """ Wrap a given real environment like it's simulated counterpart (except the domain randomization of course). :param env_targ: target environment e.g. environment representing the physical device :param env_src: source environment e.g. simulation environment used for training :return: target environment """ if env_src.dt > env_targ.dt: ds_factor = int(env_src.dt / env_targ.dt) env_targ = DownsamplingWrapper(env_targ, ds_factor) print_cbt( f'Wrapped the env with an DownsamplingWrapper of factor {ds_factor}.', 'c') if typed_env(env_src, ActNormWrapper) is not None: env_targ = ActNormWrapper(env_targ) print_cbt('Wrapped the env with an ActNormWrapper.', 'c') if typed_env(env_src, ObsNormWrapper) is not None: env_targ = ObsNormWrapper(env_targ) print_cbt('Wrapped the env with an ObsNormWrapper.', 'c') elif typed_env(env_src, ObsRunningNormWrapper) is not None: env_targ = ObsRunningNormWrapper(env_targ) print_cbt('Wrapped the env with an ObsRunningNormWrapper.', 'c') if typed_env(env_src, ObsPartialWrapper) is not None: env_targ = ObsPartialWrapper(env_targ, mask=typed_env( env_src, ObsPartialWrapper).keep_mask, keep_selected=True) print_cbt('Wrapped the env with an ObsPartialWrapper.', 'c') return env_targ
def test_wrap_like_other_env(env: SimEnv): wenv_like = deepcopy(env) wenv_like.dt /= 3 wenv = DownsamplingWrapper(env, factor=3) assert type(wenv_like) != type(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv, use_downsampling=True) assert type(wenv_like) == type(wenv) wenv = ActNormWrapper(wenv) assert type(wenv_like) != type(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) wenv = ObsNormWrapper(wenv) assert type(wenv_like) != type(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env) wenv = ObsRunningNormWrapper(wenv) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env) wenv = ObsPartialWrapper(wenv, idcs=["x"]) wenv_like = wrap_like_other_env(wenv_like, wenv) assert type(wenv_like) == type(wenv) assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)
def test_space(mock_obs_space): mockenv = MockEnv(obs_space=mock_obs_space) wenv = ObsNormWrapper(mockenv) # Check observation space bounds lb, ub = wenv.obs_space.bounds assert np.all(lb == -1) assert np.all(ub == 1)
def override_obs_bounds(bound_lo: np.ndarray, bound_up: np.ndarray, labels: np.ndarray) -> (np.ndarray, np.ndarray): """ Default overriding method for the bounds of an observation space. This is necessary when the observations are scaled with their range, e.g. to compare a deviation over different kinds of observations like position and annular velocity. Thus, infinite bounds are not feasible. :param bound_lo: lower bound of the observation space :param bound_up: upper bound of the observation space :param labels: label for each dimension of the observation space to override :return: clipped lower and upper bound """ bound_lo = ObsNormWrapper.override_bounds(bound_lo, { "theta_dot": -20.0, "alpha_dot": -20.0 }, labels) bound_up = ObsNormWrapper.override_bounds(bound_up, { "theta_dot": 20.0, "alpha_dot": 20.0 }, labels) return bound_lo, bound_up
def test_combination(): env = QCartPoleSwingUpSim(dt=1/50., max_steps=20) randomizer = create_default_randomizer(env) env_r = DomainRandWrapperBuffer(env, randomizer) env_r.fill_buffer(num_domains=3) dp_before = [] dp_after = [] for i in range(4): dp_before.append(env_r.domain_param) rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) dp_after.append(env_r.domain_param) assert dp_after[i] != dp_before[i] assert dp_after[0] == dp_after[3] env_rn = ActNormWrapper(env) elb = {'x_dot': -213., 'theta_dot': -42.} eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123} env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub) alb, aub = env_rn.act_space.bounds assert all(alb == -1) assert all(aub == 1) olb, oub = env_rn.obs_space.bounds assert all(olb == -1) assert all(oub == 1) ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations) env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta']) ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode()) env_rnpa = GaussianActNoiseWrapper(env_rnp, noise_mean=0.5*np.ones(env_rnp.act_space.shape), noise_std=0.1*np.ones(env_rnp.act_space.shape)) ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpa.actions) assert not np.allclose(ro_rnp.observations, ro_rnpa.observations) env_rnpd = ActDelayWrapper(env_rnp, delay=3) ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpd.actions) assert not np.allclose(ro_rnp.observations, ro_rnpd.observations) assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim) assert typed_env(env_rnpd, ObsPartialWrapper) is not None assert isinstance(env_rnpd, ActDelayWrapper) env_rnpdr = remove_env(env_rnpd, ActDelayWrapper) assert not isinstance(env_rnpdr, ActDelayWrapper)
def wrap_like_other_env( env_targ: Union[SimEnv, RealEnv], env_src: [SimEnv, EnvWrapper], use_downsampling: bool = False) -> Union[SimEnv, RealEnv]: """ Wrap a given real environment like it's simulated counterpart (except the domain randomization of course). :param env_targ: target environment e.g. environment representing the physical device :param env_src: source environment e.g. simulation environment used for training :param use_downsampling: apply a wrapper that downsamples the actions if the sampling frequencies don't match :return: target environment """ if use_downsampling and env_src.dt > env_targ.dt: if typed_env(env_targ, DownsamplingWrapper) is None: ds_factor = int(env_src.dt / env_targ.dt) env_targ = DownsamplingWrapper(env_targ, ds_factor) print_cbt( f"Wrapped the target environment with a DownsamplingWrapper of factor {ds_factor}.", "y") else: print_cbt( "The target environment was already wrapped with a DownsamplingWrapper.", "y") if typed_env(env_src, ActNormWrapper) is not None: if typed_env(env_targ, ActNormWrapper) is None: env_targ = ActNormWrapper(env_targ) print_cbt("Wrapped the target environment with an ActNormWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ActNormWrapper.", "y") if typed_env(env_src, ObsNormWrapper) is not None: if typed_env(env_targ, ObsNormWrapper) is None: env_targ = ObsNormWrapper(env_targ) print_cbt("Wrapped the target environment with an ObsNormWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ObsNormWrapper.", "y") if typed_env(env_src, ObsRunningNormWrapper) is not None: if typed_env(env_targ, ObsRunningNormWrapper) is None: env_targ = ObsRunningNormWrapper(env_targ) print_cbt( "Wrapped the target environment with an ObsRunningNormWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ObsRunningNormWrapper.", "y") if typed_env(env_src, ObsPartialWrapper) is not None: if typed_env(env_targ, ObsPartialWrapper) is None: env_targ = ObsPartialWrapper(env_targ, mask=typed_env( env_src, ObsPartialWrapper).keep_mask, keep_selected=True) print_cbt( "Wrapped the target environment with an ObsPartialWrapper.", "y") else: print_cbt( "The target environment was already wrapped with an ObsPartialWrapper.", "y") return env_targ
observeTaskSpaceDiscrepancy=True, usePhysicsNode=True, ) env = PlanarInsertSim(**env_hparams) # Explicit normalization bounds elb = { 'DiscrepDS_Effector_X': -1., 'DiscrepDS_Effector_Z': -1., 'DiscrepDS_Effector_Bd': -1, } eub = { 'DiscrepDS_Effector_X': 1., 'DiscrepDS_Effector_Z': 1., 'DiscrepDS_Effector_Bd': 1, } env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub) randomizer = get_default_randomizer(env) # randomizer = get_empty_randomizer() env = ActDelayWrapper(env) randomizer.add_domain_params(UniformDomainParam(name='act_delay', mean=2, halfspan=2, clip_lo=0, roundint=True)) env = DomainRandWrapperLive(env, randomizer) # Policy policy_hparam = dict( obs_layer=FNN(input_size=env.obs_space.flat_dim, output_size=env.act_space.flat_dim, hidden_sizes=[32, 32], hidden_nonlin=to.tanh, dropout=0.), tau_init=5.,
def adn_variant(dt, max_steps, max_dist_force, physics_engine, normalize_obs=True, obsnorm_cpp=True): pyrado.set_seed(1001) # Explicit normalization bounds elb = { 'EffectorLoadCell_Fx': -100., 'EffectorLoadCell_Fz': -100., 'Effector_Xd': -1, 'Effector_Zd': -1, 'GD_DS0d': -1, 'GD_DS1d': -1, 'GD_DS2d': -1, } eub = { 'GD_DS0': 3., 'GD_DS1': 3, 'GD_DS2': 3, 'EffectorLoadCell_Fx': 100., 'EffectorLoadCell_Fz': 100., 'Effector_Xd': .5, 'Effector_Zd': .5, 'GD_DS0d': .5, 'GD_DS1d': .5, 'GD_DS2d': .5, 'PredCollCost_h50': 1000. } extra_kwargs = {} if normalize_obs and obsnorm_cpp: extra_kwargs['normalizeObservations'] = True extra_kwargs['obsNormOverrideLower'] = elb extra_kwargs['obsNormOverrideUpper'] = eub # Set up environment env = Planar3LinkTASim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, collisionAvoidanceIK=True, taskCombinationMethod='sum', **extra_kwargs) if normalize_obs and not obsnorm_cpp: env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub) # Set up random policy policy_hparam = dict( tau_init=0.2, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, ) policy = ADNPolicy(spec=env.spec, dt=dt, **policy_hparam) print_cbt('Running ADNPolicy with random initialization', 'c', bright=True) # Simulate and plot potentials ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True) plot_potentials(ro) return ro
def create_adn_setup(dt, max_steps, max_dist_force, physics_engine, normalize_obs=True, obsnorm_cpp=True): pyrado.set_seed(0) # Explicit normalization bounds elb = { "EffectorLoadCell_Fx": -100.0, "EffectorLoadCell_Fz": -100.0, "Effector_Xd": -1, "Effector_Zd": -1, "GD_DS0d": -1, "GD_DS1d": -1, "GD_DS2d": -1, } eub = { "GD_DS0": 3.0, "GD_DS1": 3, "GD_DS2": 3, "EffectorLoadCell_Fx": 100.0, "EffectorLoadCell_Fz": 100.0, "Effector_Xd": 0.5, "Effector_Zd": 0.5, "GD_DS0d": 0.5, "GD_DS1d": 0.5, "GD_DS2d": 0.5, "PredCollCost_h50": 1000.0, } extra_kwargs = {} if normalize_obs and obsnorm_cpp: extra_kwargs["normalizeObservations"] = True extra_kwargs["obsNormOverrideLower"] = elb extra_kwargs["obsNormOverrideUpper"] = eub # Set up environment env = Planar3LinkTASim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, positionTasks=True, collisionAvoidanceIK=True, taskCombinationMethod="sum", observeTaskSpaceDiscrepancy=True, **extra_kwargs, ) if normalize_obs and not obsnorm_cpp: env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub) # Set up random policy policy_hparam = dict( tau_init=10.0, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, ) policy = ADNPolicy(spec=env.spec, **policy_hparam) print_cbt("Running ADNPolicy with random initialization", "c", bright=True) # Simulate and plot potentials ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True) plot_potentials(ro) return ro
def test_combination(env: SimEnv): pyrado.set_seed(0) env.max_steps = 20 randomizer = create_default_randomizer(env) env_r = DomainRandWrapperBuffer(env, randomizer) env_r.fill_buffer(num_domains=3) dp_before = [] dp_after = [] for i in range(4): dp_before.append(env_r.domain_param) rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) dp_after.append(env_r.domain_param) assert dp_after[i] != dp_before[i] assert dp_after[0] == dp_after[3] env_rn = ActNormWrapper(env) elb = {"x_dot": -213.0, "theta_dot": -42.0} eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123} env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub) alb, aub = env_rn.act_space.bounds assert all(alb == -1) assert all(aub == 1) olb, oub = env_rn.obs_space.bounds assert all(olb == -1) assert all(oub == 1) ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations) env_rnp = ObsPartialWrapper( env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]]) ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode()) env_rnpa = GaussianActNoiseWrapper( env_rnp, noise_mean=0.5 * np.ones(env_rnp.act_space.shape), noise_std=0.1 * np.ones(env_rnp.act_space.shape)) ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode()) assert not np.allclose( ro_rnp.observations, ro_rnpa.observations) # the action noise changed to rollout env_rnpd = ActDelayWrapper(env_rnp, delay=3) ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpd.actions) assert not np.allclose(ro_rnp.observations, ro_rnpd.observations) assert type(inner_env(env_rnpd)) == type(env) assert typed_env(env_rnpd, ObsPartialWrapper) is not None assert isinstance(env_rnpd, ActDelayWrapper) env_rnpdr = remove_env(env_rnpd, ActDelayWrapper) assert not isinstance(env_rnpdr, ActDelayWrapper)