예제 #1
0
def plot_observations(ro: StepSequence, idcs_sel: Sequence[int] = None):
    """
    Plot all observation trajectories of the given rollout.

    :param ro: input rollout
    :param idcs_sel: indices of the selected selected observations, if `None` plot all
    """
    if hasattr(ro, "observations"):
        if not isinstance(ro.observations, np.ndarray):
            raise pyrado.TypeErr(given=ro.observations,
                                 expected_type=np.ndarray)

        # Select dimensions to plot
        dim_obs = range(
            ro.observations.shape[1]) if idcs_sel is None else idcs_sel

        # Use recorded time stamps if possible
        t = getattr(ro, "time", np.arange(0, ro.length + 1))

        if len(dim_obs) <= 6:
            divisor = 2
        elif len(dim_obs) <= 12:
            divisor = 4
        else:
            divisor = 8
        num_cols = int(np.ceil(len(dim_obs) / divisor))
        num_rows = int(np.ceil(len(dim_obs) / num_cols))

        fig, axs = plt.subplots(num_rows,
                                num_cols,
                                figsize=(num_cols * 5, num_rows * 3),
                                tight_layout=True)
        axs = np.atleast_2d(axs)
        axs = correct_atleast_2d(axs)
        fig.canvas.manager.set_window_title("Observations over Time")
        colors = plt.get_cmap("tab20")(np.linspace(0, 1, len(dim_obs)))

        if len(dim_obs) == 1:
            axs[0, 0].plot(t,
                           ro.observations[:, dim_obs[0]],
                           label=_get_obs_label(ro, dim_obs[0]))
            axs[0, 0].legend()
            axs[0, 0].plot(t,
                           ro.observations[:, dim_obs[0]],
                           label=_get_obs_label(ro, dim_obs[0]))
            axs[0, 0].legend()
        else:
            for i in range(num_rows):
                for j in range(num_cols):
                    if j + i * num_cols < len(dim_obs):
                        # Omit the last observation for simplicity
                        axs[i, j].plot(t,
                                       ro.observations[:, j + i * num_cols],
                                       c=colors[j + i * num_cols])
                        axs[i,
                            j].set_ylabel(_get_obs_label(ro, j + i * num_cols))
                    else:
                        # We might create more subplots than there are observations
                        axs[i, j].remove()
예제 #2
0
def plot_observations_actions_rewards(ro: StepSequence):
    """
    Plot all observation, action, and reward trajectories of the given rollout.

    :param ro: input rollout
    """
    if hasattr(ro, "observations") and hasattr(ro, "actions") and hasattr(
            ro, "env_infos"):
        if not isinstance(ro.observations, np.ndarray):
            raise pyrado.TypeErr(given=ro.observations,
                                 expected_type=np.ndarray)
        if not isinstance(ro.actions, np.ndarray):
            raise pyrado.TypeErr(given=ro.actions, expected_type=np.ndarray)

        dim_obs = ro.observations.shape[1]
        dim_act = ro.actions.shape[1]

        # Use recorded time stamps if possible
        t = getattr(ro, "time", np.arange(0, ro.length + 1))

        num_rows, num_cols = num_rows_cols_from_length(dim_obs + dim_act + 1,
                                                       transposed=True)
        fig, axs = plt.subplots(num_rows,
                                num_cols,
                                figsize=(14, 10),
                                tight_layout=True)
        axs = np.atleast_2d(axs)
        axs = correct_atleast_2d(axs)
        fig.canvas.manager.set_window_title(
            "Observations, Actions, and Reward over Time")
        colors = plt.get_cmap("tab20")(np.linspace(
            0, 1, dim_obs if dim_obs > dim_act else dim_act))

        # Observations (without the last time step)
        for idx_o in range(dim_obs):
            ax = axs[idx_o // num_cols,
                     idx_o % num_cols] if isinstance(axs, np.ndarray) else axs
            ax.plot(t, ro.observations[:, idx_o], c=colors[idx_o])
            ax.set_ylabel(_get_obs_label(ro, idx_o))

        # Actions
        for idx_a in range(dim_obs, dim_obs + dim_act):
            ax = axs[idx_a // num_cols,
                     idx_a % num_cols] if isinstance(axs, np.ndarray) else axs
            ax.plot(t[:len(ro.actions[:, idx_a - dim_obs])],
                    ro.actions[:, idx_a - dim_obs],
                    c=colors[idx_a - dim_obs])
            ax.set_ylabel(_get_act_label(ro, idx_a - dim_obs))
        # action_labels = env.unwrapped.action_space.labels; label=action_labels[0]

        # Rewards
        ax = axs[num_rows - 1,
                 num_cols - 1] if isinstance(axs, np.ndarray) else axs
        ax.plot(t[:len(ro.rewards)], ro.rewards, c="k")
        ax.set_ylabel("reward")
        ax.set_xlabel("time")
        plt.subplots_adjust(hspace=0.5)
예제 #3
0
def plot_features(ro: StepSequence, policy: Policy):
    """
    Plot all features given the policy and the observation trajectories.

    :param policy: linear policy used during the rollout
    :param ro: input rollout
    """
    if not isinstance(policy, LinearPolicy):
        print_cbt(
            "Plotting of the feature values is only supports linear policies!",
            "r")
        return

    if hasattr(ro, "observations"):
        # Use recorded time stamps if possible
        t = getattr(ro, "time", np.arange(0, ro.length + 1))[:-1]

        # Recover the features from the observations
        feat_vals = policy.eval_feats(to.from_numpy(ro.observations))
        dim_feat = range(feat_vals.shape[1])
        if len(dim_feat) <= 6:
            divisor = 2
        elif len(dim_feat) <= 12:
            divisor = 4
        else:
            divisor = 8
        num_cols = int(np.ceil(len(dim_feat) / divisor))
        num_rows = int(np.ceil(len(dim_feat) / num_cols))

        fig, axs = plt.subplots(num_rows,
                                num_cols,
                                figsize=(num_cols * 5, num_rows * 3),
                                tight_layout=True)
        axs = np.atleast_2d(axs)
        axs = correct_atleast_2d(axs)
        fig.canvas.manager.set_window_title("Feature Values over Time")
        plt.subplots_adjust(hspace=0.5)
        colors = plt.get_cmap("tab20")(np.linspace(0, 1, len(dim_feat)))

        if len(dim_feat) == 1:
            axs[0, 0].plot(t,
                           feat_vals[:-1, dim_feat[0]],
                           label=_get_obs_label(ro, dim_feat[0]))
            axs[0, 0].legend()
        else:
            for i in range(num_rows):
                for j in range(num_cols):
                    if j + i * num_cols < len(dim_feat):
                        # Omit the last observation for simplicity
                        axs[i, j].plot(t,
                                       feat_vals[:-1, j + i * num_cols],
                                       c=colors[j + i * num_cols])
                        axs[i, j].set_ylabel(rf"$\phi_{{{j + i*num_cols}}}$")
                    else:
                        # We might create more subplots than there are observations
                        axs[i, j].remove()
예제 #4
0
def plot_mean_std_across_rollouts(
    rollouts: Sequence[StepSequence],
    idcs_obs: Optional[Sequence[int]] = None,
    idcs_act: Optional[Sequence[int]] = None,
    show_applied_actions: bool = True,
):
    """
    Plot the mean and standard deviation across a selection of rollouts.

    :param rollouts: list of rollouts, they can be of unequal length but are assumed to be from the same type of env
    :param idcs_obs: indices of the observations to process and plot, pass `None` to select all
    :param idcs_act: indices of the actions to process and plot, pass `None` to select all
    :param show_applied_actions: if `True` show the actions applied to the environment insead of the commanded ones
    """
    act_key = "actions_applied" if show_applied_actions else "actions"

    dim_obs = rollouts[0].observations.shape[
        1]  # assuming same for all rollouts
    dim_act = rollouts[0].actions.shape[1]  # assuming same for all rollouts
    if idcs_obs is None:
        idcs_obs = np.arange(dim_obs)
    if idcs_act is None:
        idcs_act = np.arange(dim_act)

    max_len = 0
    time = None
    data_obs = pd.DataFrame()
    data_act = pd.DataFrame()
    for ro in rollouts:
        ro.numpy()
        if len(ro) > max_len:
            # Extract time
            max_len = len(ro)
            time = getattr(ro, "time", None)

        # Extract observations
        df = pd.DataFrame(ro.observations[:, idcs_obs],
                          columns=[_get_obs_label(ro, i) for i in idcs_obs])
        data_obs = pd.concat([data_obs, df], axis=1)

        # Extract actions
        df = pd.DataFrame(ro.get_data_values(act_key)[:, idcs_act],
                          columns=[_get_act_label(ro, i) for i in idcs_act])
        data_act = pd.concat([data_act, df], axis=1)

    # Compute statistics
    means_obs = data_obs.groupby(by=data_obs.columns, axis=1).mean()
    stds_obs = data_obs.groupby(by=data_obs.columns, axis=1).std()
    means_act = data_act.groupby(by=data_act.columns, axis=1).mean()
    stds_act = data_act.groupby(by=data_act.columns, axis=1).std()

    # Create figure
    num_rows, num_cols = num_rows_cols_from_length(len(idcs_obs),
                                                   transposed=True)
    fig_obs, axs_obs = plt.subplots(num_rows,
                                    num_cols,
                                    figsize=(18, 9),
                                    tight_layout=True)
    axs_obs = np.atleast_2d(axs_obs)
    axs_obs = correct_atleast_2d(axs_obs)
    fig_obs.canvas.set_window_title(
        "Mean And 2 Standard Deviations of the Observations over Time")
    colors = plt.get_cmap("tab20")(np.linspace(0, 1, len(idcs_obs)))

    # Plot observations
    for idx_o, c in enumerate(data_obs.columns.unique()):
        ax = axs_obs[idx_o // num_cols, idx_o %
                     num_cols] if isinstance(axs_obs, np.ndarray) else axs_obs

        # Plot means and stds
        draw_curve(
            "mean_std",
            axs_obs[idx_o // num_cols, idx_o %
                    num_cols] if isinstance(axs_obs, np.ndarray) else axs_obs,
            pd.DataFrame(dict(mean=means_obs[c], std=stds_obs[c])),
            x_grid=time if time is not None else np.arange(len(data_obs)),
            show_legend=False,
            x_label="time [s]" if time is not None else "steps [-]",
            y_label=str(c),
            plot_kwargs=dict(color=colors[idx_o]),
        )

        # Plot individual rollouts
        ax.plot(time if time is not None else np.arange(len(data_obs)),
                data_obs[c],
                c="gray",
                ls="--")

    # Plot actions
    num_rows, num_cols = num_rows_cols_from_length(dim_act, transposed=True)
    fig_act, axs_act = plt.subplots(num_rows,
                                    num_cols,
                                    figsize=(18, 9),
                                    tight_layout=True)
    axs_act = np.atleast_2d(axs_act)
    axs_act = correct_atleast_2d(axs_act)
    fig_act.canvas.set_window_title(
        "Mean And 2 Standard Deviations of the Actions over Time")
    colors = plt.get_cmap("tab20")(np.linspace(0, 1, dim_act))

    for idx_a, c in enumerate(data_act.columns.unique()):
        ax = axs_act[idx_a // num_cols, idx_a %
                     num_cols] if isinstance(axs_act, np.ndarray) else axs_act

        draw_curve(
            "mean_std",
            ax,
            pd.DataFrame(dict(mean=means_act[c], std=stds_act[c])),
            x_grid=time[:-1] if time is not None else np.arange(len(data_act)),
            show_legend=False,
            x_label="time [s]" if time is not None else "steps [-]",
            y_label=str(c),
            plot_kwargs=dict(color=colors[idx_a]),
        )

        # Plot individual rollouts
        ax.plot(time[:-1] if time is not None else np.arange(len(data_act)),
                data_act[c],
                c="gray",
                ls="--")
예제 #5
0
def plot_actions(ro: StepSequence, env: Env):
    """
    Plot all action trajectories of the given rollout.

    :param ro: input rollout
    :param env: environment (used for getting the clipped action values)
    """
    if hasattr(ro, "actions"):
        if not isinstance(ro.actions, np.ndarray):
            raise pyrado.TypeErr(given=ro.actions, expected_type=np.ndarray)

        dim_act = ro.actions.shape[1]
        # Use recorded time stamps if possible
        t = getattr(ro, "time", np.arange(0, ro.length + 1))[:-1]

        num_rows, num_cols = num_rows_cols_from_length(dim_act,
                                                       transposed=True)
        fig, axs = plt.subplots(num_rows,
                                num_cols,
                                figsize=(10, 8),
                                tight_layout=True)
        fig.canvas.manager.set_window_title("Actions over Time")
        axs = np.atleast_2d(axs)
        axs = correct_atleast_2d(axs)
        colors = plt.get_cmap("tab20")(np.linspace(0, 1, dim_act))

        act_norm_wrapper = typed_env(env, ActNormWrapper)
        if act_norm_wrapper is not None:
            lb, ub = inner_env(env).act_space.bounds
            act_denorm = lb + (ro.actions + 1.0) * (ub - lb) / 2
            act_clipped = np.array(
                [inner_env(env).limit_act(a) for a in act_denorm])
        else:
            act_denorm = ro.actions
            act_clipped = np.array([env.limit_act(a) for a in ro.actions])

        if dim_act == 1:
            axs[0, 0].plot(t, act_denorm, label="to env")
            axs[0, 0].plot(t, act_clipped, label="clipped", c="k", ls="--")
            axs[0, 0].legend(ncol=2)
            axs[0, 0].set_ylabel(_get_act_label(ro, 0))
        else:
            for idx_a in range(dim_act):
                axs[idx_a // num_cols,
                    idx_a % num_cols].plot(t,
                                           act_denorm[:, idx_a],
                                           label="to env",
                                           c=colors[idx_a])
                axs[idx_a // num_cols,
                    idx_a % num_cols].plot(t,
                                           act_clipped[:, idx_a],
                                           label="clipped",
                                           c="k",
                                           ls="--")
                axs[idx_a // num_cols, idx_a % num_cols].legend(ncol=2)
                axs[idx_a // num_cols,
                    idx_a % num_cols].set_ylabel(_get_act_label(ro, idx_a))

        # Put legends to the right of the plot
        if dim_act < 8:  # otherwise it gets too cluttered
            for a in fig.get_axes():
                a.legend(ncol=2)

        plt.subplots_adjust(hspace=0.2)
예제 #6
0
    def __init__(
        self,
        spec: EnvSpec,
        dt: float,
        t_end: float,
        cond_lvl: str,
        cond_final: Optional[Union[to.Tensor, List[float],
                                   List[List[float]]]] = None,
        cond_init: Optional[Union[to.Tensor, List[float],
                                  List[List[float]]]] = None,
        t_init: float = 0.0,
        overtime_behavior: str = "hold",
        init_param_kwargs: Optional[dict] = None,
        use_cuda: bool = False,
    ):
        """
        Constructor

        :param spec: environment specification
        :param dt: time step [s]
        :param t_end: final time [s], relative to `t_init`
        :param cond_lvl: highest level of the condition, so far, only velocity 'vel' and acceleration 'acc' level
                         conditions on the polynomial are supported. These need to be consistent with the actions.
        :param cond_final: final condition for the least squares proble,, needs to be of shape [X, dim_act] where X is
                           2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'`
        :param cond_init: initial condition for the least squares proble,, needs to be of shape [X, dim_act] where X is
                           2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'`
        :param t_init: initial time [s], also used on calling `reset()`, relative to `t_end`
        :param overtime_behavior: determines how the policy acts when `t > t_end`, e.g. 'hold' to keep the last action
        :param init_param_kwargs: additional keyword arguments for the policy parameter initialization
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if t_end <= t_init:
            raise pyrado.ValueErr(given=t_end, g_constraint=t_init)
        if not overtime_behavior.lower() in ["hold", "zero"]:
            raise pyrado.ValueErr(given=overtime_behavior,
                                  eq_constraint=("hold", "zero"))

        # Call Policy's constructor
        super().__init__(spec, use_cuda)

        self._dt = float(dt)
        self._t_end = float(t_end)
        self._t_init = float(t_init)
        self._t_curr = float(t_init)
        self._overtime_behavior = overtime_behavior.lower()

        # Determine the initial and final conditions used to compute the coefficients of the polynomials
        if cond_lvl.lower() == "vel":
            self._order = 3
        elif cond_lvl.lower() == "acc":
            self._order = 5
        else:
            raise pyrado.ValueErr(given=cond_lvl,
                                  eq_constraint="'vel' or 'acc'")
        num_cond = (self._order + 1) // 2

        if cond_final is not None:
            # Given initialization
            rand_init = False
            cond_final = to.as_tensor(cond_final, dtype=to.get_default_dtype())
            cond_final = correct_atleast_2d(to.atleast_2d(cond_final))
            if cond_final.shape != (num_cond, spec.act_space.flat_dim):
                raise pyrado.ShapeErr(given=cond_final,
                                      expected_match=(num_cond,
                                                      spec.act_space.flat_dim))
        else:
            # Empty initialization
            rand_init = True
            cond_final = to.empty(num_cond, spec.act_space.flat_dim)

        if cond_init is not None:
            # Given initialization
            cond_init = to.as_tensor(cond_init, dtype=to.get_default_dtype())
            cond_init = correct_atleast_2d(to.atleast_2d(cond_init))
            if cond_init.shape != (num_cond, spec.act_space.flat_dim):
                raise pyrado.ShapeErr(given=cond_init,
                                      expected_match=(num_cond,
                                                      spec.act_space.flat_dim))
        else:
            # Zero initialization
            cond_init = to.zeros(num_cond, spec.act_space.flat_dim)

        conds = to.cat([cond_init, cond_final], dim=0)
        assert conds.shape[0] in [4, 6]

        # Define the policy parameters
        self.conds = nn.Parameter(conds, requires_grad=False)

        # Store the polynomial coefficients for each output dimension in a matrix
        self.coeffs = to.empty(self._order + 1,
                               spec.act_space.flat_dim,
                               device=self.device)

        if rand_init:
            # Call custom initialization function after PyTorch network parameter initialization
            init_param_kwargs = init_param_kwargs if init_param_kwargs is not None else dict(
            )
            self.init_param(None, **init_param_kwargs)
        else:
            # Compute the coefficients to match the given (initial and) final conditions
            self._compute_coefficients()

        self.to(self.device)
예제 #7
0
    def __init__(
        self,
        spec: EnvSpec,
        dt: float,
        t_end: float,
        cond_lvl: str,
        cond_final: Union[to.Tensor, List[float], List[List[float]]],
        cond_init: Union[to.Tensor, List[float], List[List[float]]],
        t_init: float = 0.0,
        overtime_behavior: str = "hold",
    ):
        """
        In contrast to PolySplineTimePolicy, this constructor needs to be called with learned / working values for
        `cond_final` and `cond_init`.

        :param spec: environment specification
        :param dt: time step [s]
        :param t_end: final time [s], relative to `t_init`
        :param cond_lvl: highest level of the condition, so far, only velocity 'vel' and acceleration 'acc' level
                         conditions on the polynomial are supported. These need to be consistent with the actions.
        :param cond_final: final condition for the least squares proble,, needs to be of shape [X, dim_act] where X is
                           2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'`
        :param cond_init: initial condition for the least squares proble,, needs to be of shape [X, dim_act] where X is
                           2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'`
        :param t_init: initial time [s], also used on calling `reset()`, relative to `t_end`
        :param overtime_behavior: determines how the policy acts when `t > t_end`, e.g. 'hold' to keep the last action
        """
        super().__init__()

        # Setup attributes
        self.input_size = spec.obs_space.flat_dim
        self.output_size = spec.act_space.flat_dim
        self.dt = float(dt)
        self.t_end = float(t_end)
        self.t_init = float(t_init)
        self.t_curr = float(t_init)
        self.overtime_behavior = overtime_behavior.lower()

        # Could not be converted
        self.act_space_shape = spec.act_space.shape
        self.act_space_flat_dim = spec.act_space.flat_dim

        # Determine the initial and final conditions used to compute the coefficients of the polynomials
        if cond_lvl.lower() == "vel":
            self.order = 3
        elif cond_lvl.lower() == "acc":
            self.order = 5
        else:
            raise pyrado.ValueErr(given=cond_lvl,
                                  eq_constraint="'vel' or 'acc'")
        num_cond = (self.order + 1) // 2

        cond_final = to.as_tensor(cond_final, dtype=to.get_default_dtype())
        cond_final = correct_atleast_2d(to.atleast_2d(cond_final))
        if cond_final.shape != (num_cond, spec.act_space.flat_dim):
            raise pyrado.ShapeErr(given=cond_final,
                                  expected_match=(num_cond,
                                                  spec.act_space.flat_dim))

        cond_init = to.as_tensor(cond_init, dtype=to.get_default_dtype())
        cond_init = correct_atleast_2d(to.atleast_2d(cond_init))
        if cond_init.shape != (num_cond, spec.act_space.flat_dim):
            raise pyrado.ShapeErr(given=cond_init,
                                  expected_match=(num_cond,
                                                  spec.act_space.flat_dim))

        self.conds = to.cat([cond_init, cond_final], dim=0)
        assert self.conds.shape[0] in [4, 6]

        # Store the polynomial coefficients for each output dimension in a matrix
        self.coeffs = to.empty(self.order + 1, spec.act_space.flat_dim)

        self.compute_coefficients()
예제 #8
0
def test_correct_atleast_2d(x):
    x_corrected = correct_atleast_2d(x)
    assert x_corrected.shape[0] == len(x)