def test_mask_invert(): mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three'])) # Use a simple mask to drop the second element mask = [0, 1, 0] wenv = ObsPartialWrapper(mockenv, mask, keep_selected=True) # Test some observation values mockenv.next_obs = [1, 2, 3] obs, _, _, _ = wenv.step(None) assert list(obs) == [2] mockenv.next_obs = [4, 7, 9] obs, _, _, _ = wenv.step(None) assert list(obs) == [7]
def test_combination_downsampling_delay(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv_ds_dl = DownsamplingWrapper(mockenv, factor=2) wenv_ds_dl = ActDelayWrapper(wenv_ds_dl, delay=3) # Reset to initialize buffer wenv_ds_dl.reset() # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0 wenv_ds_dl.step(np.array([0, 1])) assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 2])) assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 3])) assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 4])) # Intuitively one would think last_act would be [0, 1] here, but this is the effect of the wrappers' combination assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 5])) assert mockenv.last_act == [0, 2] wenv_ds_dl.step(np.array([0, 6])) assert mockenv.last_act == [0, 2] wenv_ds_dl.step(np.array([0, 7])) assert mockenv.last_act == [0, 4] wenv_ds_dl.step(np.array([0, 8])) assert mockenv.last_act == [0, 4] wenv_ds_dl.step(np.array([0, 9])) assert mockenv.last_act == [0, 6] wenv_ds_dl.step(np.array([1, 0])) assert mockenv.last_act == [0, 6]
def test_domain_param(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv = DownsamplingWrapper(mockenv, factor=2) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([0, 1])) assert mockenv.last_act == [0, 1] wenv.step(np.array([2, 4])) assert mockenv.last_act == [0, 1] wenv.step(np.array([4, 4])) assert mockenv.last_act == [4, 4] # change the downsampling and reset wenv.domain_param = {'downsampling': 1} wenv.reset() wenv.step(np.array([1, 2])) assert mockenv.last_act == [1, 2] wenv.step(np.array([2, 3])) assert mockenv.last_act == [2, 3] wenv.step(np.array([8, 9])) assert mockenv.last_act == [8, 9]
def test_combination_delay_downsampling(): """ After delay number of actions, the actions are downsampled by the factor """ mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv_dl_ds = ActDelayWrapper(mockenv, delay=3) wenv_dl_ds = DownsamplingWrapper(wenv_dl_ds, factor=2) # Reset to initialize buffer wenv_dl_ds.reset() # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0 wenv_dl_ds.step(np.array([0, 1])) assert mockenv.last_act == [0, 0] wenv_dl_ds.step(np.array([0, 2])) assert mockenv.last_act == [0, 0] wenv_dl_ds.step(np.array([0, 3])) assert mockenv.last_act == [0, 0] # One time step earlier than the other order of wrappers wenv_dl_ds.step(np.array([0, 4])) assert mockenv.last_act == [0, 1] wenv_dl_ds.step(np.array([0, 5])) assert mockenv.last_act == [0, 1] wenv_dl_ds.step(np.array([0, 6])) assert mockenv.last_act == [0, 3] wenv_dl_ds.step(np.array([0, 7])) assert mockenv.last_act == [0, 3] wenv_dl_ds.step(np.array([0, 8])) assert mockenv.last_act == [0, 5] wenv_dl_ds.step(np.array([0, 9])) assert mockenv.last_act == [0, 5] wenv_dl_ds.step(np.array([1, 0])) assert mockenv.last_act == [0, 7] wenv_dl_ds.step(np.array([1, 1])) assert mockenv.last_act == [0, 7]
def test_denormalization(mock_obs_space): mockenv = MockEnv(obs_space=mock_obs_space) wenv = ObsNormWrapper(mockenv) for _ in range(100): # Generate random observations obs, _, _, _ = wenv.step(np.array([0, 0, 0])) assert (abs(obs) <= 1).all
def test_space(mock_obs_space): mockenv = MockEnv(obs_space=mock_obs_space) wenv = ObsNormWrapper(mockenv) # Check observation space bounds lb, ub = wenv.obs_space.bounds assert np.all(lb == -1) assert np.all(ub == 1)
def test_no_downsampling(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,))) wenv = DownsamplingWrapper(mockenv, factor=1) # Perform some actions wenv.step(np.array([4, 1])) assert mockenv.last_act == [4, 1] wenv.step(np.array([7, 5])) assert mockenv.last_act == [7, 5]
def test_one_bin(): mockenv = MockEnv(act_space=BoxSpace(-1.0, 1.0, shape=(1, ))) wenv = ActDiscreteWrapper(mockenv, num_bins=1) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([0])) assert mockenv.last_act == [-1.0]
def test_space(): # Use mock env mockenv = MockEnv(act_space=BoxSpace([-2, -1, 0], [2, 3, 1])) uut = ActNormWrapper(mockenv) # Check action space bounds lb, ub = uut.act_space.bounds assert np.all(lb == -1) assert np.all(ub == 1)
def test_action_space_eles(num_bins: int): mockenv = MockEnv(act_space=BoxSpace(-1.0, 1.0, shape=(1, ))) wenv = ActDiscreteWrapper(mockenv, num_bins=num_bins) # Reset to initialize buffer wenv.reset() # Test if action space is correct assert isinstance(wenv.act_space, DiscreteSpace) assert (wenv.act_space.eles == np.array(range(num_bins)).reshape( (-1, 1))).all()
def test_spaces(): mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three'])) # Use a simple mask to drop the second element mask = [0, 1, 0] wenv = ObsPartialWrapper(mockenv, mask) # Check resulting space lb, ub = wenv.obs_space.bounds assert list(lb) == [-1, -3] assert list(ub) == [1, 3] assert list(wenv.obs_space.labels) == ['one', 'three']
def test_no_delay(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, ))) wenv = ActDelayWrapper(mockenv, delay=0) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([4, 1])) assert mockenv.last_act == [4, 1] wenv.step(np.array([7, 5])) assert mockenv.last_act == [7, 5]
def test_act_downsampling(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,))) wenv = DownsamplingWrapper(mockenv, factor=2) # Perform some actions wenv.step(np.array([0, 1])) assert mockenv.last_act == [0, 1] wenv.step(np.array([2, 4])) # should be ignored assert mockenv.last_act == [0, 1] wenv.step(np.array([1, 2])) assert mockenv.last_act == [1, 2] wenv.step(np.array([2, 3])) # should be ignored assert mockenv.last_act == [1, 2]
def test_denormalization(): # Use mock env mockenv = MockEnv(act_space=BoxSpace([-2, -1, 0], [2, 3, 1])) uut = ActNormWrapper(mockenv) # Pass a bunch of actions uut.step(np.array([0, 0, 0])) assert mockenv.last_act == [0, 1, 0.5] uut.step(np.array([1, 1, 1])) assert mockenv.last_act == [2, 3, 1] uut.step(np.array([-1, -1, -1])) assert mockenv.last_act == [-2, -1, 0]
def test_reset(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, ))) wenv = ActDelayWrapper(mockenv, delay=1) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([0, 4])) assert mockenv.last_act == [0, 0] wenv.step(np.array([4, 4])) assert mockenv.last_act == [0, 4] # The next action would be [4, 4], but now we reset again wenv.reset() wenv.step(np.array([1, 2])) assert mockenv.last_act == [0, 0] wenv.step(np.array([2, 3])) assert mockenv.last_act == [1, 2]
def test_reset(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,))) wenv = DownsamplingWrapper(mockenv, factor=2) # Perform some actions wenv.step(np.array([0, 4])) assert mockenv.last_act == [0, 4] wenv.step(np.array([4, 4])) assert mockenv.last_act == [0, 4] wenv.step(np.array([4, 4])) assert mockenv.last_act == [4, 4] # The next action would be [4, 4] again, but now we reset wenv.reset() assert wenv._act_last is None assert wenv._cnt == 0 wenv.step(np.array([1, 2])) assert mockenv.last_act == [1, 2] wenv.step(np.array([2, 3])) assert mockenv.last_act == [1, 2]
def test_stepsequence_from_pandas(mock_data, given_rewards: bool): rewards, states, observations, actions, hidden, policy_infos = mock_data states = np.asarray(states) observations = np.asarray(observations) actions = to.stack(actions).numpy() rewards = np.asarray(rewards) # Create fake observed data set. The labels must match the labels of the spaces. The order can be mixed. content = dict( s0=states[:, 0], s1=states[:, 1], s2=states[:, 2], o3=observations[:, 3], o0=observations[:, 0], o2=observations[:, 2], o1=observations[:, 1], a1=actions[:, 1], a0=actions[:, 0], # Some content that was not in steps=np.arange(0, states.shape[0]), infos=[dict(foo="bar")] * 6, ) if given_rewards: content["rewards"] = rewards df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in content.items()])) env = MockEnv( state_space=InfBoxSpace(shape=states[0].shape, labels=["s0", "s1", "s2"]), obs_space=InfBoxSpace(shape=observations[0].shape, labels=["o0", "o1", "o2", "o3"]), act_space=InfBoxSpace(shape=actions[0].shape, labels=["a0", "a1"]), ) reconstructed = StepSequence.from_pandas(df, env.spec) assert len(reconstructed.rewards) == len(rewards) assert np.allclose(reconstructed.states, states) assert np.allclose(reconstructed.observations, observations) assert np.allclose(reconstructed.actions, actions)
def test_domain_param(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, ))) wenv = ActDelayWrapper(mockenv, delay=1) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([0, 1])) assert mockenv.last_act == [0, 0] wenv.step(np.array([2, 4])) assert mockenv.last_act == [0, 1] # change the delay and reset wenv.domain_param = {'act_delay': 2} wenv.reset() wenv.step(np.array([1, 2])) assert mockenv.last_act == [0, 0] wenv.step(np.array([2, 3])) assert mockenv.last_act == [0, 0] wenv.step(np.array([8, 9])) assert mockenv.last_act == [1, 2]
def test_indi_nonlin_layer(in_features, same_nonlin, bias, weight): if not same_nonlin and in_features > 1: nonlin = in_features*[to.tanh] else: nonlin = to.sigmoid layer = IndiNonlinLayer(in_features, nonlin, bias, weight) assert isinstance(layer, nn.Module) i = to.randn(in_features) o = layer(i) assert isinstance(o, to.Tensor) assert i.shape == o.shape @pytest.mark.parametrize( 'env', [MockEnv(obs_space=InfBoxSpace(shape=1), act_space=InfBoxSpace(shape=1))] ) @pytest.mark.parametrize( 'policy', [ # Two-headed policies are not supported 'rnn_policy', 'lstm_policy', 'gru_policy', 'adn_policy', 'nf_policy', ], ids=['rnn', 'lstm', 'gru', 'adn', 'nf'], indirect=True ) @pytest.mark.parametrize('windowed', [True, False], ids=['windowed', 'not_windowed'])