def test_mask_invert():
    mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three']))

    # Use a simple mask to drop the second element
    mask = [0, 1, 0]
    wenv = ObsPartialWrapper(mockenv, mask, keep_selected=True)

    # Test some observation values
    mockenv.next_obs = [1, 2, 3]
    obs, _, _, _ = wenv.step(None)
    assert list(obs) == [2]

    mockenv.next_obs = [4, 7, 9]
    obs, _, _, _ = wenv.step(None)
    assert list(obs) == [7]
Ejemplo n.º 2
0
def test_combination_downsampling_delay():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv_ds_dl = DownsamplingWrapper(mockenv, factor=2)
    wenv_ds_dl = ActDelayWrapper(wenv_ds_dl, delay=3)

    # Reset to initialize buffer
    wenv_ds_dl.reset()

    # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0
    wenv_ds_dl.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 2]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 3]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 4]))
    # Intuitively one would think last_act would be [0, 1] here, but this is the effect of the wrappers' combination
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 5]))
    assert mockenv.last_act == [0, 2]
    wenv_ds_dl.step(np.array([0, 6]))
    assert mockenv.last_act == [0, 2]
    wenv_ds_dl.step(np.array([0, 7]))
    assert mockenv.last_act == [0, 4]
    wenv_ds_dl.step(np.array([0, 8]))
    assert mockenv.last_act == [0, 4]
    wenv_ds_dl.step(np.array([0, 9]))
    assert mockenv.last_act == [0, 6]
    wenv_ds_dl.step(np.array([1, 0]))
    assert mockenv.last_act == [0, 6]
Ejemplo n.º 3
0
def test_domain_param():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([2, 4]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [4, 4]

    # change the downsampling and reset
    wenv.domain_param = {'downsampling': 1}
    wenv.reset()

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [2, 3]
    wenv.step(np.array([8, 9]))
    assert mockenv.last_act == [8, 9]
Ejemplo n.º 4
0
def test_combination_delay_downsampling():
    """ After delay number of actions, the actions are downsampled by the factor """
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv_dl_ds = ActDelayWrapper(mockenv, delay=3)
    wenv_dl_ds = DownsamplingWrapper(wenv_dl_ds, factor=2)

    # Reset to initialize buffer
    wenv_dl_ds.reset()

    # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0
    wenv_dl_ds.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv_dl_ds.step(np.array([0, 2]))
    assert mockenv.last_act == [0, 0]
    wenv_dl_ds.step(np.array([0, 3]))
    assert mockenv.last_act == [0, 0]
    # One time step earlier than the other order of wrappers
    wenv_dl_ds.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 1]
    wenv_dl_ds.step(np.array([0, 5]))
    assert mockenv.last_act == [0, 1]
    wenv_dl_ds.step(np.array([0, 6]))
    assert mockenv.last_act == [0, 3]
    wenv_dl_ds.step(np.array([0, 7]))
    assert mockenv.last_act == [0, 3]
    wenv_dl_ds.step(np.array([0, 8]))
    assert mockenv.last_act == [0, 5]
    wenv_dl_ds.step(np.array([0, 9]))
    assert mockenv.last_act == [0, 5]
    wenv_dl_ds.step(np.array([1, 0]))
    assert mockenv.last_act == [0, 7]
    wenv_dl_ds.step(np.array([1, 1]))
    assert mockenv.last_act == [0, 7]
Ejemplo n.º 5
0
def test_denormalization(mock_obs_space):
    mockenv = MockEnv(obs_space=mock_obs_space)
    wenv = ObsNormWrapper(mockenv)

    for _ in range(100):
        # Generate random observations
        obs, _, _, _ = wenv.step(np.array([0, 0, 0]))
        assert (abs(obs) <= 1).all
Ejemplo n.º 6
0
def test_space(mock_obs_space):
    mockenv = MockEnv(obs_space=mock_obs_space)
    wenv = ObsNormWrapper(mockenv)

    # Check observation space bounds
    lb, ub = wenv.obs_space.bounds
    assert np.all(lb == -1)
    assert np.all(ub == 1)
Ejemplo n.º 7
0
def test_no_downsampling():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=1)

    # Perform some actions
    wenv.step(np.array([4, 1]))
    assert mockenv.last_act == [4, 1]
    wenv.step(np.array([7, 5]))
    assert mockenv.last_act == [7, 5]
Ejemplo n.º 8
0
def test_one_bin():
    mockenv = MockEnv(act_space=BoxSpace(-1.0, 1.0, shape=(1, )))
    wenv = ActDiscreteWrapper(mockenv, num_bins=1)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0]))
    assert mockenv.last_act == [-1.0]
Ejemplo n.º 9
0
def test_space():
    # Use mock env
    mockenv = MockEnv(act_space=BoxSpace([-2, -1, 0], [2, 3, 1]))

    uut = ActNormWrapper(mockenv)

    # Check action space bounds
    lb, ub = uut.act_space.bounds
    assert np.all(lb == -1)
    assert np.all(ub == 1)
Ejemplo n.º 10
0
def test_action_space_eles(num_bins: int):
    mockenv = MockEnv(act_space=BoxSpace(-1.0, 1.0, shape=(1, )))
    wenv = ActDiscreteWrapper(mockenv, num_bins=num_bins)

    # Reset to initialize buffer
    wenv.reset()

    # Test if action space is correct
    assert isinstance(wenv.act_space, DiscreteSpace)
    assert (wenv.act_space.eles == np.array(range(num_bins)).reshape(
        (-1, 1))).all()
def test_spaces():
    mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three']))

    # Use a simple mask to drop the second element
    mask = [0, 1, 0]
    wenv = ObsPartialWrapper(mockenv, mask)

    # Check resulting space
    lb, ub = wenv.obs_space.bounds
    assert list(lb) == [-1, -3]
    assert list(ub) == [1, 3]
    assert list(wenv.obs_space.labels) == ['one', 'three']
Ejemplo n.º 12
0
def test_no_delay():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = ActDelayWrapper(mockenv, delay=0)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([4, 1]))
    assert mockenv.last_act == [4, 1]
    wenv.step(np.array([7, 5]))
    assert mockenv.last_act == [7, 5]
Ejemplo n.º 13
0
def test_act_downsampling():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([2, 4]))  # should be ignored
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))  # should be ignored
    assert mockenv.last_act == [1, 2]
Ejemplo n.º 14
0
def test_denormalization():
    # Use mock env
    mockenv = MockEnv(act_space=BoxSpace([-2, -1, 0], [2, 3, 1]))

    uut = ActNormWrapper(mockenv)

    # Pass a bunch of actions
    uut.step(np.array([0, 0, 0]))
    assert mockenv.last_act == [0, 1, 0.5]

    uut.step(np.array([1, 1, 1]))
    assert mockenv.last_act == [2, 3, 1]

    uut.step(np.array([-1, -1, -1]))
    assert mockenv.last_act == [-2, -1, 0]
Ejemplo n.º 15
0
def test_reset():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = ActDelayWrapper(mockenv, delay=1)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [0, 4]

    # The next action would be [4, 4], but now we reset again
    wenv.reset()

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [1, 2]
Ejemplo n.º 16
0
def test_reset():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Perform some actions
    wenv.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 4]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [0, 4]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [4, 4]

    # The next action would be [4, 4] again, but now we reset
    wenv.reset()
    assert wenv._act_last is None
    assert wenv._cnt == 0

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [1, 2]
Ejemplo n.º 17
0
def test_stepsequence_from_pandas(mock_data, given_rewards: bool):
    rewards, states, observations, actions, hidden, policy_infos = mock_data
    states = np.asarray(states)
    observations = np.asarray(observations)
    actions = to.stack(actions).numpy()
    rewards = np.asarray(rewards)

    # Create fake observed data set. The labels must match the labels of the spaces. The order can be mixed.
    content = dict(
        s0=states[:, 0],
        s1=states[:, 1],
        s2=states[:, 2],
        o3=observations[:, 3],
        o0=observations[:, 0],
        o2=observations[:, 2],
        o1=observations[:, 1],
        a1=actions[:, 1],
        a0=actions[:, 0],
        # Some content that was not in
        steps=np.arange(0, states.shape[0]),
        infos=[dict(foo="bar")] * 6,
    )
    if given_rewards:
        content["rewards"] = rewards
    df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in content.items()]))

    env = MockEnv(
        state_space=InfBoxSpace(shape=states[0].shape,
                                labels=["s0", "s1", "s2"]),
        obs_space=InfBoxSpace(shape=observations[0].shape,
                              labels=["o0", "o1", "o2", "o3"]),
        act_space=InfBoxSpace(shape=actions[0].shape, labels=["a0", "a1"]),
    )

    reconstructed = StepSequence.from_pandas(df, env.spec)

    assert len(reconstructed.rewards) == len(rewards)
    assert np.allclose(reconstructed.states, states)
    assert np.allclose(reconstructed.observations, observations)
    assert np.allclose(reconstructed.actions, actions)
Ejemplo n.º 18
0
def test_domain_param():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = ActDelayWrapper(mockenv, delay=1)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([2, 4]))
    assert mockenv.last_act == [0, 1]

    # change the delay and reset
    wenv.domain_param = {'act_delay': 2}
    wenv.reset()

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([8, 9]))
    assert mockenv.last_act == [1, 2]
Ejemplo n.º 19
0
def test_indi_nonlin_layer(in_features, same_nonlin, bias, weight):
    if not same_nonlin and in_features > 1:
        nonlin = in_features*[to.tanh]
    else:
        nonlin = to.sigmoid
    layer = IndiNonlinLayer(in_features, nonlin, bias, weight)
    assert isinstance(layer, nn.Module)

    i = to.randn(in_features)
    o = layer(i)
    assert isinstance(o, to.Tensor)
    assert i.shape == o.shape


@pytest.mark.parametrize(
    'env', [MockEnv(obs_space=InfBoxSpace(shape=1), act_space=InfBoxSpace(shape=1))]
)
@pytest.mark.parametrize(
    'policy', [
        # Two-headed policies are not supported
        'rnn_policy',
        'lstm_policy',
        'gru_policy',
        'adn_policy',
        'nf_policy',
    ],
    ids=['rnn', 'lstm', 'gru', 'adn', 'nf'],
    indirect=True
)
@pytest.mark.parametrize('windowed', [True, False],
                         ids=['windowed', 'not_windowed'])