Esempio n. 1
0
def test_order_act_noise_act_norm(env):
    # First noise wrapper then normalization wrapper
    wrapped_env_noise = GaussianActNoiseWrapper(
        env,
        noise_mean=0.2 * np.ones(env.act_space.shape),
        noise_std=0.1 * np.ones(env.act_space.shape))
    wrapped_env_noise_norm = ActNormWrapper(wrapped_env_noise)

    # First normalization wrapper then noise wrapper
    wrapped_env_norm = ActNormWrapper(env)
    wrapped_env_norm_noise = GaussianActNoiseWrapper(
        wrapped_env_norm,
        noise_mean=0.2 * np.ones(env.act_space.shape),
        noise_std=0.1 * np.ones(env.act_space.shape))

    # Sample some values directly from the act_spaces
    for i in range(3):
        pyrado.set_seed(i)
        act_noise_norm = wrapped_env_noise_norm.act_space.sample_uniform()

        pyrado.set_seed(i)
        act_norm_noise = wrapped_env_norm_noise.act_space.sample_uniform()

        # These samples must be the same since were not passed to _process_act function
        assert np.all(act_noise_norm == act_norm_noise)

    # Process a sampled action
    for i in range(3):
        # Sample a small random action such that the denormalization doe not map it to the act_space limits
        rand_act = 0.01 * env.act_space.sample_uniform()

        pyrado.set_seed(i)
        o1 = wrapped_env_noise_norm.reset()
        obs_noise_norm, _, _, _ = wrapped_env_noise_norm.step(rand_act)

        pyrado.set_seed(i)
        o2 = wrapped_env_norm_noise.reset()
        obs_norm_noise, _, _, _ = wrapped_env_norm_noise.step(rand_act)

        # The order of processing (first normalization or first randomization must make a difference)
        assert not np.all(obs_noise_norm == obs_norm_noise)
def test_combination():
    env = QCartPoleSwingUpSim(dt=1/50., max_steps=20)

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {'x_dot': -213., 'theta_dot': -42.}
    eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
    ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations)

    env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta'])
    ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(env_rnp,
                                       noise_mean=0.5*np.ones(env_rnp.act_space.shape),
                                       noise_std=0.1*np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpa.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpa.observations)

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)
Esempio n. 3
0
def test_act_noise_simple(env: SimEnv):
    # Typical case with zero mean and non-zero std
    wrapped_env = GaussianActNoiseWrapper(env,
                                          noise_std=0.2 *
                                          np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)

    # Unusual case with non-zero mean and zero std
    wrapped_env = GaussianActNoiseWrapper(env,
                                          noise_mean=0.1 *
                                          np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)

    # General case with non-zero mean and non-zero std
    wrapped_env = GaussianActNoiseWrapper(
        env,
        noise_mean=0.1 * np.ones(env.act_space.shape),
        noise_std=0.2 * np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)
Esempio n. 4
0
def test_combination(env: SimEnv):
    pyrado.set_seed(0)
    env.max_steps = 20

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r,
                DummyPolicy(env_r.spec),
                eval=True,
                seed=0,
                render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {"x_dot": -213.0, "theta_dot": -42.0}
    eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r,
                   DummyPolicy(env_r.spec),
                   eval=True,
                   seed=0,
                   render_mode=RenderMode())
    ro_rn = rollout(env_rn,
                    DummyPolicy(env_rn.spec),
                    eval=True,
                    seed=0,
                    render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations),
                       ro_rn.observations)

    env_rnp = ObsPartialWrapper(
        env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]])
    ro_rnp = rollout(env_rnp,
                     DummyPolicy(env_rnp.spec),
                     eval=True,
                     seed=0,
                     render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(
        env_rnp,
        noise_mean=0.5 * np.ones(env_rnp.act_space.shape),
        noise_std=0.1 * np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa,
                      DummyPolicy(env_rnpa.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert not np.allclose(
        ro_rnp.observations,
        ro_rnpa.observations)  # the action noise changed to rollout

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd,
                      DummyPolicy(env_rnpd.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert type(inner_env(env_rnpd)) == type(env)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)