def test_process(mock_data, data_format: str): rewards, states, observations, actions, hidden, policy_infos = mock_data # Create the rollout ro = StepSequence(rewards=rewards, observations=observations, states=states, actions=actions, hidden=hidden) if data_format == "numpy": # Create the filter (arbitrary values) b, a = signal.butter(N=5, Wn=10, fs=100) # Filter the signals, but not the time ro_proc = StepSequence.process_data(ro, signal.filtfilt, fcn_arg_name="x", exclude_fields=["time"], b=b, a=a, padlen=2, axis=0) else: # Transform to PyTorch data and define a simple function ro.torch() ro_proc = StepSequence.process_data(ro, lambda x: x * 2, fcn_arg_name="x", include_fields=["time"], fcn_arg_types=to.Tensor) assert isinstance(ro_proc, StepSequence) assert ro_proc.length == ro.length
def test_convert(mock_data, other_format, tensor_type): rewards, states, observations, actions, hidden, policy_infos = mock_data ro = StepSequence( rewards=rewards, observations=observations, states=states, actions=actions, policy_infos=policy_infos, hidden=hidden, data_format=other_format, ) # convert if other_format == "numpy": ro.torch() elif other_format == "torch": ro.numpy() # Verify assert isinstance(ro.rewards, tensor_type) assert isinstance(ro.observations, tensor_type) assert isinstance(ro.actions, tensor_type) assert isinstance(ro.policy_infos["mean"], tensor_type) assert isinstance(ro.policy_infos["std"], tensor_type) assert isinstance(ro.hidden[0], tensor_type) # Done should always be a ndarray assert isinstance(ro.done, np.ndarray)
def test_action_statistics(env: SimEnv, policy: Policy): sigma = 1.0 # with lower values like 0.1 we can observe violations of the tolerances # Create an action-based exploration strategy explstrat = NormalActNoiseExplStrat(policy, std_init=sigma) # Sample a deterministic rollout ro_policy = rollout(env, policy, eval=True, max_steps=1000, stop_on_done=False, seed=0) ro_policy.torch(to.get_default_dtype()) # Run the exploration strategy on the previously sampled rollout if policy.is_recurrent: if isinstance(policy, TwoHeadedPolicy): act_expl, _, _ = explstrat(ro_policy.observations) else: act_expl, _ = explstrat(ro_policy.observations) # Get the hidden states from the deterministic rollout hidden_states = ro_policy.hidden_states else: if isinstance(policy, TwoHeadedPolicy): act_expl, _ = explstrat(ro_policy.observations) else: act_expl = explstrat(ro_policy.observations) hidden_states = [ 0.0 ] * ro_policy.length # just something that does not violate the format ro_expl = StepSequence( actions=act_expl[:-1], # truncate act due to last obs observations=ro_policy.observations, rewards=ro_policy.rewards, # don't care but necessary hidden_states=hidden_states, ) ro_expl.torch() # Compute action statistics and the ground truth actstats = compute_action_statistics(ro_expl, explstrat) gt_logprobs = Normal(loc=ro_policy.actions, scale=sigma).log_prob(ro_expl.actions) gt_entropy = Normal(loc=ro_policy.actions, scale=sigma).entropy() to.testing.assert_allclose(actstats.log_probs, gt_logprobs, rtol=1e-4, atol=1e-5) to.testing.assert_allclose(actstats.entropy, gt_entropy, rtol=1e-4, atol=1e-5)
def convert_step_sequence(traj: StepSequence): """ Converts a StepSequence to a Tensor which can be fed through a Network :param traj: A step sequence containing a trajectory :return: A Tensor containing the trajectory """ assert isinstance(traj, StepSequence) traj.torch() state = traj.get_data_values('observations')[:-1].double() next_state = traj.get_data_values('observations')[1::].double() action = traj.get_data_values('actions').narrow( 0, 0, next_state.shape[0]).double() traj = to.cat((state, next_state, action), 1).cpu().double() return traj
def test_stepsequence_padding(mock_data, data_format: str, pad_value: Union[int, float], pad_len: int): # Create too short rollout rewards, states, observations, actions, hidden, policy_infos = mock_data ro = StepSequence( rewards=rewards, observations=observations, states=states, actions=actions, hidden=hidden, policy_infos=policy_infos, ) len_orig = ro.length if data_format == "torch": ro.torch() # Pad it StepSequence.pad(ro, len_to_pad_to=len(ro) + pad_len, pad_value=pad_value) # Check ro.numpy() # for simplified checking assert np.allclose(ro.states[len_orig + 1:], pad_value * np.ones_like(ro.states[len_orig + 1:])) assert np.allclose( ro.observations[len_orig + 1:], pad_value * np.ones_like(ro.observations[len_orig + 1:])) assert np.allclose(ro.actions[len_orig:], pad_value * np.ones_like(ro.actions[len_orig:])) assert np.allclose(ro.rewards[len_orig:], pad_value * np.ones_like(ro.rewards[len_orig:])) for k, v in ro.policy_infos.items(): assert np.allclose(v[len_orig:], pad_value * np.ones_like(v[len_orig:])) assert ro.length == len_orig + pad_len assert all(ro.rollout_bounds == np.array([0, len_orig + pad_len])) assert len(ro.states) == len_orig + 8 # check for final step assert len(ro.observations) == len_orig + 8 # check for final step assert len(ro.actions) == len_orig + pad_len assert len(ro.rewards) == len_orig + pad_len for h in ro.hidden: assert len(h) == len_orig + pad_len
def test_convert(other_format, tensor_type): ro = StepSequence(rewards=rewards, observations=observations, actions=actions, policy_infos=policy_infos, hidden=hidden, data_format=other_format) # convert if other_format == 'numpy': ro.torch() elif other_format == 'torch': ro.numpy() # Verify assert isinstance(ro.rewards, tensor_type) assert isinstance(ro.observations, tensor_type) assert isinstance(ro.actions, tensor_type) assert isinstance(ro.policy_infos['mean'], tensor_type) assert isinstance(ro.policy_infos['std'], tensor_type) assert isinstance(ro.hidden[0], tensor_type) # Done should always be a ndarray assert isinstance(ro.done, np.ndarray)
def preprocess_rollout(rollout: StepSequence) -> StepSequence: """ Extracts observations and actions from a `StepSequence` and packs them into a PyTorch tensor which can be fed through a network. :param rollout: a `StepSequence` instance containing a trajectory :return: a PyTorch tensor` containing the trajectory """ if not isinstance(rollout, StepSequence): raise pyrado.TypeErr(given=rollout, expected_type=StepSequence) # Convert data type rollout.torch(to.get_default_dtype()) # Extract the data state = rollout.get_data_values("observations")[:-1] next_state = rollout.get_data_values("observations")[1::] action = rollout.get_data_values("actions").narrow(0, 0, next_state.shape[0]) rollout = to.cat((state, next_state, action), 1) return rollout