def test_convert(mock_data, other_format, tensor_type): rewards, states, observations, actions, hidden, policy_infos = mock_data ro = StepSequence( rewards=rewards, observations=observations, states=states, actions=actions, policy_infos=policy_infos, hidden=hidden, data_format=other_format, ) # convert if other_format == "numpy": ro.torch() elif other_format == "torch": ro.numpy() # Verify assert isinstance(ro.rewards, tensor_type) assert isinstance(ro.observations, tensor_type) assert isinstance(ro.actions, tensor_type) assert isinstance(ro.policy_infos["mean"], tensor_type) assert isinstance(ro.policy_infos["std"], tensor_type) assert isinstance(ro.hidden[0], tensor_type) # Done should always be a ndarray assert isinstance(ro.done, np.ndarray)
def test_stepsequence_padding(mock_data, data_format: str, pad_value: Union[int, float], pad_len: int): # Create too short rollout rewards, states, observations, actions, hidden, policy_infos = mock_data ro = StepSequence( rewards=rewards, observations=observations, states=states, actions=actions, hidden=hidden, policy_infos=policy_infos, ) len_orig = ro.length if data_format == "torch": ro.torch() # Pad it StepSequence.pad(ro, len_to_pad_to=len(ro) + pad_len, pad_value=pad_value) # Check ro.numpy() # for simplified checking assert np.allclose(ro.states[len_orig + 1:], pad_value * np.ones_like(ro.states[len_orig + 1:])) assert np.allclose( ro.observations[len_orig + 1:], pad_value * np.ones_like(ro.observations[len_orig + 1:])) assert np.allclose(ro.actions[len_orig:], pad_value * np.ones_like(ro.actions[len_orig:])) assert np.allclose(ro.rewards[len_orig:], pad_value * np.ones_like(ro.rewards[len_orig:])) for k, v in ro.policy_infos.items(): assert np.allclose(v[len_orig:], pad_value * np.ones_like(v[len_orig:])) assert ro.length == len_orig + pad_len assert all(ro.rollout_bounds == np.array([0, len_orig + pad_len])) assert len(ro.states) == len_orig + 8 # check for final step assert len(ro.observations) == len_orig + 8 # check for final step assert len(ro.actions) == len_orig + pad_len assert len(ro.rewards) == len_orig + pad_len for h in ro.hidden: assert len(h) == len_orig + pad_len
def test_convert(other_format, tensor_type): ro = StepSequence(rewards=rewards, observations=observations, actions=actions, policy_infos=policy_infos, hidden=hidden, data_format=other_format) # convert if other_format == 'numpy': ro.torch() elif other_format == 'torch': ro.numpy() # Verify assert isinstance(ro.rewards, tensor_type) assert isinstance(ro.observations, tensor_type) assert isinstance(ro.actions, tensor_type) assert isinstance(ro.policy_infos['mean'], tensor_type) assert isinstance(ro.policy_infos['std'], tensor_type) assert isinstance(ro.hidden[0], tensor_type) # Done should always be a ndarray assert isinstance(ro.done, np.ndarray)