Exemple #1
0
def test_process(mock_data, data_format: str):
    rewards, states, observations, actions, hidden, policy_infos = mock_data

    # Create the rollout
    ro = StepSequence(rewards=rewards,
                      observations=observations,
                      states=states,
                      actions=actions,
                      hidden=hidden)

    if data_format == "numpy":
        # Create the filter (arbitrary values)
        b, a = signal.butter(N=5, Wn=10, fs=100)

        # Filter the signals, but not the time
        ro_proc = StepSequence.process_data(ro,
                                            signal.filtfilt,
                                            fcn_arg_name="x",
                                            exclude_fields=["time"],
                                            b=b,
                                            a=a,
                                            padlen=2,
                                            axis=0)

    else:
        # Transform to PyTorch data and define a simple function
        ro.torch()
        ro_proc = StepSequence.process_data(ro,
                                            lambda x: x * 2,
                                            fcn_arg_name="x",
                                            include_fields=["time"],
                                            fcn_arg_types=to.Tensor)

    assert isinstance(ro_proc, StepSequence)
    assert ro_proc.length == ro.length
Exemple #2
0
def test_convert(mock_data, other_format, tensor_type):
    rewards, states, observations, actions, hidden, policy_infos = mock_data

    ro = StepSequence(
        rewards=rewards,
        observations=observations,
        states=states,
        actions=actions,
        policy_infos=policy_infos,
        hidden=hidden,
        data_format=other_format,
    )
    # convert
    if other_format == "numpy":
        ro.torch()
    elif other_format == "torch":
        ro.numpy()
    # Verify
    assert isinstance(ro.rewards, tensor_type)
    assert isinstance(ro.observations, tensor_type)
    assert isinstance(ro.actions, tensor_type)
    assert isinstance(ro.policy_infos["mean"], tensor_type)
    assert isinstance(ro.policy_infos["std"], tensor_type)
    assert isinstance(ro.hidden[0], tensor_type)

    # Done should always be a ndarray
    assert isinstance(ro.done, np.ndarray)
Exemple #3
0
def test_action_statistics(env: SimEnv, policy: Policy):
    sigma = 1.0  # with lower values like 0.1 we can observe violations of the tolerances

    # Create an action-based exploration strategy
    explstrat = NormalActNoiseExplStrat(policy, std_init=sigma)

    # Sample a deterministic rollout
    ro_policy = rollout(env,
                        policy,
                        eval=True,
                        max_steps=1000,
                        stop_on_done=False,
                        seed=0)
    ro_policy.torch(to.get_default_dtype())

    # Run the exploration strategy on the previously sampled rollout
    if policy.is_recurrent:
        if isinstance(policy, TwoHeadedPolicy):
            act_expl, _, _ = explstrat(ro_policy.observations)
        else:
            act_expl, _ = explstrat(ro_policy.observations)
        # Get the hidden states from the deterministic rollout
        hidden_states = ro_policy.hidden_states
    else:
        if isinstance(policy, TwoHeadedPolicy):
            act_expl, _ = explstrat(ro_policy.observations)
        else:
            act_expl = explstrat(ro_policy.observations)
        hidden_states = [
            0.0
        ] * ro_policy.length  # just something that does not violate the format

    ro_expl = StepSequence(
        actions=act_expl[:-1],  # truncate act due to last obs
        observations=ro_policy.observations,
        rewards=ro_policy.rewards,  # don't care but necessary
        hidden_states=hidden_states,
    )
    ro_expl.torch()

    # Compute action statistics and the ground truth
    actstats = compute_action_statistics(ro_expl, explstrat)
    gt_logprobs = Normal(loc=ro_policy.actions,
                         scale=sigma).log_prob(ro_expl.actions)
    gt_entropy = Normal(loc=ro_policy.actions, scale=sigma).entropy()

    to.testing.assert_allclose(actstats.log_probs,
                               gt_logprobs,
                               rtol=1e-4,
                               atol=1e-5)
    to.testing.assert_allclose(actstats.entropy,
                               gt_entropy,
                               rtol=1e-4,
                               atol=1e-5)
Exemple #4
0
def convert_step_sequence(traj: StepSequence):
    """
    Converts a StepSequence to a Tensor which can be fed through a Network

    :param traj: A step sequence containing a trajectory
    :return: A Tensor containing the trajectory
    """
    assert isinstance(traj, StepSequence)
    traj.torch()
    state = traj.get_data_values('observations')[:-1].double()
    next_state = traj.get_data_values('observations')[1::].double()
    action = traj.get_data_values('actions').narrow(
        0, 0, next_state.shape[0]).double()
    traj = to.cat((state, next_state, action), 1).cpu().double()
    return traj
Exemple #5
0
def test_stepsequence_padding(mock_data, data_format: str,
                              pad_value: Union[int, float], pad_len: int):
    # Create too short rollout
    rewards, states, observations, actions, hidden, policy_infos = mock_data
    ro = StepSequence(
        rewards=rewards,
        observations=observations,
        states=states,
        actions=actions,
        hidden=hidden,
        policy_infos=policy_infos,
    )
    len_orig = ro.length

    if data_format == "torch":
        ro.torch()

    # Pad it
    StepSequence.pad(ro, len_to_pad_to=len(ro) + pad_len, pad_value=pad_value)

    # Check
    ro.numpy()  # for simplified checking
    assert np.allclose(ro.states[len_orig + 1:],
                       pad_value * np.ones_like(ro.states[len_orig + 1:]))
    assert np.allclose(
        ro.observations[len_orig + 1:],
        pad_value * np.ones_like(ro.observations[len_orig + 1:]))
    assert np.allclose(ro.actions[len_orig:],
                       pad_value * np.ones_like(ro.actions[len_orig:]))
    assert np.allclose(ro.rewards[len_orig:],
                       pad_value * np.ones_like(ro.rewards[len_orig:]))
    for k, v in ro.policy_infos.items():
        assert np.allclose(v[len_orig:],
                           pad_value * np.ones_like(v[len_orig:]))

    assert ro.length == len_orig + pad_len
    assert all(ro.rollout_bounds == np.array([0, len_orig + pad_len]))

    assert len(ro.states) == len_orig + 8  # check for final step
    assert len(ro.observations) == len_orig + 8  # check for final step
    assert len(ro.actions) == len_orig + pad_len
    assert len(ro.rewards) == len_orig + pad_len
    for h in ro.hidden:
        assert len(h) == len_orig + pad_len
Exemple #6
0
def test_convert(other_format, tensor_type):
    ro = StepSequence(rewards=rewards,
                      observations=observations,
                      actions=actions,
                      policy_infos=policy_infos,
                      hidden=hidden,
                      data_format=other_format)
    # convert
    if other_format == 'numpy':
        ro.torch()
    elif other_format == 'torch':
        ro.numpy()
    # Verify
    assert isinstance(ro.rewards, tensor_type)
    assert isinstance(ro.observations, tensor_type)
    assert isinstance(ro.actions, tensor_type)
    assert isinstance(ro.policy_infos['mean'], tensor_type)
    assert isinstance(ro.policy_infos['std'], tensor_type)
    assert isinstance(ro.hidden[0], tensor_type)

    # Done should always be a ndarray
    assert isinstance(ro.done, np.ndarray)
Exemple #7
0
def preprocess_rollout(rollout: StepSequence) -> StepSequence:
    """
    Extracts observations and actions from a `StepSequence` and packs them into a PyTorch tensor which can be fed
    through a network.

    :param rollout: a `StepSequence` instance containing a trajectory
    :return: a PyTorch tensor` containing the trajectory
    """
    if not isinstance(rollout, StepSequence):
        raise pyrado.TypeErr(given=rollout, expected_type=StepSequence)

    # Convert data type
    rollout.torch(to.get_default_dtype())

    # Extract the data
    state = rollout.get_data_values("observations")[:-1]
    next_state = rollout.get_data_values("observations")[1::]
    action = rollout.get_data_values("actions").narrow(0, 0,
                                                       next_state.shape[0])

    rollout = to.cat((state, next_state, action), 1)
    return rollout