Ejemplo n.º 1
0
def test_act_downsampling():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([2, 4]))  # should be ignored
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))  # should be ignored
    assert mockenv.last_act == [1, 2]
Ejemplo n.º 2
0
def test_combination_delay_downsampling():
    """ After delay number of actions, the actions are downsampled by the factor """
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv_dl_ds = ActDelayWrapper(mockenv, delay=3)
    wenv_dl_ds = DownsamplingWrapper(wenv_dl_ds, factor=2)

    # Reset to initialize buffer
    wenv_dl_ds.reset()

    # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0
    wenv_dl_ds.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv_dl_ds.step(np.array([0, 2]))
    assert mockenv.last_act == [0, 0]
    wenv_dl_ds.step(np.array([0, 3]))
    assert mockenv.last_act == [0, 0]
    # One time step earlier than the other order of wrappers
    wenv_dl_ds.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 1]
    wenv_dl_ds.step(np.array([0, 5]))
    assert mockenv.last_act == [0, 1]
    wenv_dl_ds.step(np.array([0, 6]))
    assert mockenv.last_act == [0, 3]
    wenv_dl_ds.step(np.array([0, 7]))
    assert mockenv.last_act == [0, 3]
    wenv_dl_ds.step(np.array([0, 8]))
    assert mockenv.last_act == [0, 5]
    wenv_dl_ds.step(np.array([0, 9]))
    assert mockenv.last_act == [0, 5]
    wenv_dl_ds.step(np.array([1, 0]))
    assert mockenv.last_act == [0, 7]
    wenv_dl_ds.step(np.array([1, 1]))
    assert mockenv.last_act == [0, 7]
Ejemplo n.º 3
0
def wrap_like_other_env(env_targ: Env, env_src: [SimEnv, EnvWrapper]) -> Env:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :return: target environment
    """
    if env_src.dt > env_targ.dt:
        ds_factor = int(env_src.dt / env_targ.dt)
        env_targ = DownsamplingWrapper(env_targ, ds_factor)
        print_cbt(
            f'Wrapped the env with an DownsamplingWrapper of factor {ds_factor}.',
            'c')

    if typed_env(env_src, ActNormWrapper) is not None:
        env_targ = ActNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ActNormWrapper.', 'c')

    if typed_env(env_src, ObsNormWrapper) is not None:
        env_targ = ObsNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsNormWrapper.', 'c')
    elif typed_env(env_src, ObsRunningNormWrapper) is not None:
        env_targ = ObsRunningNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsRunningNormWrapper.', 'c')

    if typed_env(env_src, ObsPartialWrapper) is not None:
        env_targ = ObsPartialWrapper(env_targ,
                                     mask=typed_env(
                                         env_src, ObsPartialWrapper).keep_mask,
                                     keep_selected=True)
        print_cbt('Wrapped the env with an ObsPartialWrapper.', 'c')

    return env_targ
Ejemplo n.º 4
0
def test_wrap_like_other_env(env: SimEnv):
    wenv_like = deepcopy(env)
    wenv_like.dt /= 3

    wenv = DownsamplingWrapper(env, factor=3)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv, use_downsampling=True)
    assert type(wenv_like) == type(wenv)

    wenv = ActNormWrapper(wenv)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)

    wenv = ObsNormWrapper(wenv)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)

    wenv = ObsRunningNormWrapper(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)

    wenv = ObsPartialWrapper(wenv, idcs=["x"])
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)
Ejemplo n.º 5
0
def create_qbb_setup(factor, dt, max_steps):
    # Set up environment
    init_state = np.array([0, 0, 0.1, 0.1, 0, 0, 0, 0])
    env = QBallBalancerSim(dt=dt, max_steps=max_steps)
    env = ActNormWrapper(env)

    # Set up policy
    policy = QBallBalancerPDCtrl(env.spec)

    # Simulate
    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state),
        render_mode=RenderMode(video=True),
        max_steps=max_steps,
    )
    act_500Hz = ro.actions

    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt * factor),
                          init_state=init_state),
        render_mode=RenderMode(video=True),
        max_steps=int(max_steps / factor),
    )
    act_100Hz = ro.actions

    env = DownsamplingWrapper(env, factor)
    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state),
        render_mode=render_mode,
        max_steps=max_steps,
    )
    act_500Hz_w = ro.actions

    # Time in seconds
    time_500Hz = np.linspace(0, int(len(act_500Hz) * dt), int(len(act_500Hz)))
    time_100Hz = np.linspace(0, int(len(act_100Hz) * dt), int(len(act_100Hz)))
    time_500Hz_w = np.linspace(0, int(len(act_500Hz_w) * dt),
                               int(len(act_500Hz_w)))

    # Plot
    _, axs = plt.subplots(nrows=2)
    for i in range(2):
        axs[i].plot(time_500Hz, act_500Hz[:, i], label="500 Hz (original)")
        axs[i].plot(time_100Hz, act_100Hz[:, i], label="100 Hz", ls="--")
        axs[i].plot(time_500Hz_w,
                    act_500Hz_w[:, i],
                    label="500 Hz (wrapped)",
                    ls="--")
        axs[i].legend()
        axs[i].set_ylabel(env.act_space.labels[i])
    axs[1].set_xlabel("time [s]")
Ejemplo n.º 6
0
def create_qq_setup(factor, dt, max_steps, render_mode):
    # Set up environment
    init_state = np.array([0.1, 0.0, 0.0, 0.0])
    env = QQubeSwingUpSim(dt=dt, max_steps=max_steps)
    env = ActNormWrapper(env)

    # Set up policy
    policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # Simulate
    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state),
        render_mode=render_mode,
        max_steps=max_steps,
    )
    act_500Hz = ro.actions

    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt * factor),
                          init_state=init_state),
        render_mode=render_mode,
        max_steps=int(max_steps / factor),
    )
    act_100Hz = ro.actions

    env = DownsamplingWrapper(env, factor)
    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state),
        render_mode=render_mode,
        max_steps=max_steps,
    )
    act_500Hz_w = ro.actions

    # Time in seconds
    time_500Hz = np.linspace(0, int(len(act_500Hz) * dt), int(len(act_500Hz)))
    time_100Hz = np.linspace(0, int(len(act_100Hz) * dt), int(len(act_100Hz)))
    time_500Hz_w = np.linspace(0, int(len(act_500Hz_w) * dt),
                               int(len(act_500Hz_w)))

    # Plot
    _, ax = plt.subplots(nrows=1)
    ax.plot(time_500Hz, act_500Hz, label="500 Hz (original)")
    ax.plot(time_100Hz, act_100Hz, label="100 Hz", ls="--")
    ax.plot(time_500Hz_w, act_500Hz_w, label="500 Hz (wrapped)", ls="--")
    ax.legend()
    ax.set_ylabel(env.act_space.labels)
    ax.set_xlabel("time [s]")
Ejemplo n.º 7
0
def test_combination_wrappers_domain_params(env: SimEnv):
    env_d = DownsamplingWrapper(env, factor=5)
    env_do = GaussianObsNoiseWrapper(
        env_d,
        noise_std=2 * np.ones(env_d.obs_space.shape),
        noise_mean=3 * np.ones(env_d.obs_space.shape))
    env_dot = LogDomainParamTransform(env_do,
                                      mask=list(env_do.supported_domain_param))

    assert env_dot.domain_param["downsampling"] == 5
    assert np.all(env_dot.domain_param["obs_noise_std"] == 2 *
                  np.ones(env_d.obs_space.shape))
    assert np.all(env_dot.domain_param["obs_noise_mean"] == 3 *
                  np.ones(env_d.obs_space.shape))
Ejemplo n.º 8
0
def test_no_downsampling():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=1)

    # Perform some actions
    wenv.step(np.array([4, 1]))
    assert mockenv.last_act == [4, 1]
    wenv.step(np.array([7, 5]))
    assert mockenv.last_act == [7, 5]
Ejemplo n.º 9
0
def create_qq_setup(factor, dt, max_steps):
    # Set up environment
    init_state = np.array([0.1, 0.0, 0.0, 0.0])
    env = QQubeSwingUpSim(dt=dt, max_steps=max_steps)
    env = ActNormWrapper(env)

    # Set up policy
    policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # Simulate
    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state),
        render_mode=RenderMode(video=True),
        max_steps=max_steps,
    )
    act_500Hz = ro.actions

    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt * factor),
                          init_state=init_state),
        render_mode=RenderMode(video=True),
        max_steps=int(max_steps / factor),
    )
    act_100Hz = ro.actions
    act_100Hz_zoh = np.repeat(act_100Hz, 5, axis=0)

    env = DownsamplingWrapper(env, factor)
    ro = rollout(
        env,
        policy,
        reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state),
        render_mode=RenderMode(video=True),
        max_steps=max_steps,
    )
    act_500Hz_wrapped = ro.actions

    # Plot
    _, ax = plt.subplots(nrows=1)
    ax.plot(act_500Hz, label="500 Hz (original)")
    ax.plot(act_100Hz_zoh, label="100 Hz (zoh)")
    ax.plot(act_500Hz_wrapped, label="500 Hz (wrapped)")
    ax.legend()
    ax.set_ylabel(env.act_space.labels)
    ax.set_xlabel("time steps")
    plt.show()
Ejemplo n.º 10
0
                 reset_kwargs=dict(domain_param=dict(dt=dt),
                                   init_state=init_state),
                 render_mode=RenderMode(video=True),
                 max_steps=max_steps)
    act_500Hz = ro.actions

    ro = rollout(env,
                 policy,
                 reset_kwargs=dict(domain_param=dict(dt=dt * factor),
                                   init_state=init_state),
                 render_mode=RenderMode(video=True),
                 max_steps=int(max_steps / factor))
    act_100Hz = ro.actions
    act_100Hz_zoh = np.repeat(act_100Hz, 5, axis=0)

    env = DownsamplingWrapper(env, factor)
    ro = rollout(env,
                 policy,
                 reset_kwargs=dict(domain_param=dict(dt=dt),
                                   init_state=init_state),
                 render_mode=RenderMode(video=True),
                 max_steps=max_steps)
    act_500Hz_wrapped = ro.actions

    # Plot
    _, axs = plt.subplots(nrows=2)
    for i in range(2):
        axs[i].plot(act_500Hz[:, i], label='500 Hz (original)')
        axs[i].plot(act_100Hz_zoh[:, i], label='100 Hz (zoh)')
        axs[i].plot(act_500Hz_wrapped[:, i], label='500 Hz (wrapped)')
        axs[i].legend()
Ejemplo n.º 11
0
def wrap_like_other_env(
        env_targ: Union[SimEnv, RealEnv],
        env_src: [SimEnv, EnvWrapper],
        use_downsampling: bool = False) -> Union[SimEnv, RealEnv]:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :param use_downsampling: apply a wrapper that downsamples the actions if the sampling frequencies don't match
    :return: target environment
    """
    if use_downsampling and env_src.dt > env_targ.dt:
        if typed_env(env_targ, DownsamplingWrapper) is None:
            ds_factor = int(env_src.dt / env_targ.dt)
            env_targ = DownsamplingWrapper(env_targ, ds_factor)
            print_cbt(
                f"Wrapped the target environment with a DownsamplingWrapper of factor {ds_factor}.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with a DownsamplingWrapper.",
                "y")

    if typed_env(env_src, ActNormWrapper) is not None:
        if typed_env(env_targ, ActNormWrapper) is None:
            env_targ = ActNormWrapper(env_targ)
            print_cbt("Wrapped the target environment with an ActNormWrapper.",
                      "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ActNormWrapper.",
                "y")

    if typed_env(env_src, ObsNormWrapper) is not None:
        if typed_env(env_targ, ObsNormWrapper) is None:
            env_targ = ObsNormWrapper(env_targ)
            print_cbt("Wrapped the target environment with an ObsNormWrapper.",
                      "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsNormWrapper.",
                "y")

    if typed_env(env_src, ObsRunningNormWrapper) is not None:
        if typed_env(env_targ, ObsRunningNormWrapper) is None:
            env_targ = ObsRunningNormWrapper(env_targ)
            print_cbt(
                "Wrapped the target environment with an ObsRunningNormWrapper.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsRunningNormWrapper.",
                "y")

    if typed_env(env_src, ObsPartialWrapper) is not None:
        if typed_env(env_targ, ObsPartialWrapper) is None:
            env_targ = ObsPartialWrapper(env_targ,
                                         mask=typed_env(
                                             env_src,
                                             ObsPartialWrapper).keep_mask,
                                         keep_selected=True)
            print_cbt(
                "Wrapped the target environment with an ObsPartialWrapper.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsPartialWrapper.",
                "y")

    return env_targ
Ejemplo n.º 12
0
def test_combination_downsampling_delay():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv_ds_dl = DownsamplingWrapper(mockenv, factor=2)
    wenv_ds_dl = ActDelayWrapper(wenv_ds_dl, delay=3)

    # Reset to initialize buffer
    wenv_ds_dl.reset()

    # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0
    wenv_ds_dl.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 2]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 3]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 4]))
    # Intuitively one would think last_act would be [0, 1] here, but this is the effect of the wrappers' combination
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 5]))
    assert mockenv.last_act == [0, 2]
    wenv_ds_dl.step(np.array([0, 6]))
    assert mockenv.last_act == [0, 2]
    wenv_ds_dl.step(np.array([0, 7]))
    assert mockenv.last_act == [0, 4]
    wenv_ds_dl.step(np.array([0, 8]))
    assert mockenv.last_act == [0, 4]
    wenv_ds_dl.step(np.array([0, 9]))
    assert mockenv.last_act == [0, 6]
    wenv_ds_dl.step(np.array([1, 0]))
    assert mockenv.last_act == [0, 6]
Ejemplo n.º 13
0
def test_domain_param():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([2, 4]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [4, 4]

    # change the downsampling and reset
    wenv.domain_param = {'downsampling': 1}
    wenv.reset()

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [2, 3]
    wenv.step(np.array([8, 9]))
    assert mockenv.last_act == [8, 9]
Ejemplo n.º 14
0
def test_reset():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Perform some actions
    wenv.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 4]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [0, 4]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [4, 4]

    # The next action would be [4, 4] again, but now we reset
    wenv.reset()
    assert wenv._act_last is None
    assert wenv._cnt == 0

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [1, 2]