Exemple #1
0
def return_weighted_average(action_trajectories: jnp.ndarray,
                            cum_reward: jnp.ndarray,
                            kappa: float) -> jnp.ndarray:
    r"""Calculates return-weighted average over all trajectories.

  This will calculate the return-weighted average over a set of trajectories as
  defined on l.17 of Alg. 2 in the MBOP paper:
  [https://arxiv.org/abs/2008.05556].

  Note: Clipping will be performed for `cum_reward` values > 80 to avoid NaNs.

  Args:
    action_trajectories: (n_trajectories, horizon, action_dim) tensor of action
      trajectories, corresponds to `A` in Alg. 2.
    cum_reward: (n_trajectories) vector of corresponding cumulative rewards
      (returns) for each trajectory. Corresponds to `\mathcal{R}` in Alg. 2.
    kappa: `\kappa` constant, changes the 'peakiness' of the exponential
      averaging.

  Returns:
    Single action trajectory corresponding to the return-weighted average of the
      trajectories.
  """
    # Substract maximum reward to avoid NaNs:
    cum_reward = cum_reward - cum_reward.max()
    # Remove the batch dimension of cum_reward allows for an implicit broadcast in
    # jnp.average:
    exp_cum_reward = jnp.exp(kappa * jnp.squeeze(cum_reward))
    return jnp.average(action_trajectories, weights=exp_cum_reward, axis=0)
Exemple #2
0
def _save_results(
    x: jnp.ndarray,
    prior_samples: Dict[str, jnp.ndarray],
    posterior_samples: Dict[str, jnp.ndarray],
    posterior_predictive: Dict[str, jnp.ndarray],
    num_train: int,
) -> None:

    root = pathlib.Path("./data/seasonal")
    root.mkdir(exist_ok=True)

    jnp.savez(root / "piror_samples.npz", **prior_samples)
    jnp.savez(root / "posterior_samples.npz", **posterior_samples)
    jnp.savez(root / "posterior_predictive.npz", **posterior_predictive)

    x_pred = posterior_predictive["x"]

    x_pred_trn = x_pred[:, :num_train]
    x_hpdi_trn = diagnostics.hpdi(x_pred_trn)
    t_train = np.arange(num_train)

    x_pred_tst = x_pred[:, num_train:]
    x_hpdi_tst = diagnostics.hpdi(x_pred_tst)
    num_test = x_pred_tst.shape[1]
    t_test = np.arange(num_train, num_train + num_test)

    prop_cycle = plt.rcParams["axes.prop_cycle"]
    colors = prop_cycle.by_key()["color"]

    plt.figure(figsize=(12, 6))
    plt.plot(x.ravel(), label="ground truth", color=colors[0])

    plt.plot(t_train,
             x_pred_trn.mean(0)[:, 0],
             label="prediction",
             color=colors[1])
    plt.fill_between(t_train,
                     x_hpdi_trn[0, :, 0, 0],
                     x_hpdi_trn[1, :, 0, 0],
                     alpha=0.3,
                     color=colors[1])

    plt.plot(t_test,
             x_pred_tst.mean(0)[:, 0],
             label="forecast",
             color=colors[2])
    plt.fill_between(t_test,
                     x_hpdi_tst[0, :, 0, 0],
                     x_hpdi_tst[1, :, 0, 0],
                     alpha=0.3,
                     color=colors[2])

    plt.ylim(x.min() - 0.5, x.max() + 0.5)
    plt.legend()
    plt.tight_layout()
    plt.savefig(root / "prediction.png")
    plt.close()
Exemple #3
0
def int_quantize_jit(x: jnp.ndarray, max_int: int, to_type: str):
    min = x.min(axis=1, keepdims=True)
    max = x.max(axis=1, keepdims=True)

    offset = min
    scale = max - min

    normalized = (x - min) / scale
    return offset, scale, (normalized * max_int + 0.5).astype(to_type)  # round to nearest instead of round to zero
Exemple #4
0
def is_symmetric(
    row: jnp.ndarray,
    col: jnp.ndarray,
    data: Optional[jnp.ndarray] = None,
    shape: Optional[Tuple[int, int]] = None,
):
    if shape is None:
        nrows = row.max() + 1
        ncols = col.max() + 1
    else:
        nrows, ncols = shape
    conds = [nrows == ncols]
    i0 = indices_1d(row, col, ncols)
    i1 = indices_1d(col, row, nrows)
    if data is None:
        conds.append(jnp.all(i0 == jnp.sort(i1)))
    else:
        perm = jnp.argsort(i1)
        conds.append(jnp.all(i0 == i1.take(perm)))
        conds.append(jnp.all(data.take(perm) == data))
    return jnp.all(jnp.stack(conds))
Exemple #5
0
def split_by_class(
    rng: PRNGKey,
    labels: jnp.ndarray,
    examples_per_class: tp.Sequence[int],
    num_classes: tp.Optional[int] = None,
) -> tp.Sequence[jnp.ndarray]:
    splits = jnp.cumsum(jnp.asarray(examples_per_class))
    del examples_per_class
    if num_classes is None:
        num_classes = labels.max() + 1
    masks = jax.nn.one_hot(labels, num_classes, dtype=bool)
    id_lists = [[] for _ in range(len(splits) + 1)]
    for i, class_rng in enumerate(jax.random.split(rng, num_classes)):
        (indices, ) = jnp.where(masks[:, i])
        indices = jax.random.permutation(class_rng, indices)
        indices = jnp.split(indices, jnp.minimum(splits, indices.size))
        for id_list, ids in zip(id_lists, indices):
            id_list.append(ids)
    return tuple(jnp.sort(jnp.concatenate(ids)) for ids in id_lists)
Exemple #6
0
def indices_1d(row: jnp.ndarray,
               col: jnp.ndarray,
               ncols: Optional[int] = None):
    if ncols is None:
        ncols = col.max() + 1
    return row * ncols + col