def return_weighted_average(action_trajectories: jnp.ndarray, cum_reward: jnp.ndarray, kappa: float) -> jnp.ndarray: r"""Calculates return-weighted average over all trajectories. This will calculate the return-weighted average over a set of trajectories as defined on l.17 of Alg. 2 in the MBOP paper: [https://arxiv.org/abs/2008.05556]. Note: Clipping will be performed for `cum_reward` values > 80 to avoid NaNs. Args: action_trajectories: (n_trajectories, horizon, action_dim) tensor of action trajectories, corresponds to `A` in Alg. 2. cum_reward: (n_trajectories) vector of corresponding cumulative rewards (returns) for each trajectory. Corresponds to `\mathcal{R}` in Alg. 2. kappa: `\kappa` constant, changes the 'peakiness' of the exponential averaging. Returns: Single action trajectory corresponding to the return-weighted average of the trajectories. """ # Substract maximum reward to avoid NaNs: cum_reward = cum_reward - cum_reward.max() # Remove the batch dimension of cum_reward allows for an implicit broadcast in # jnp.average: exp_cum_reward = jnp.exp(kappa * jnp.squeeze(cum_reward)) return jnp.average(action_trajectories, weights=exp_cum_reward, axis=0)
def _save_results( x: jnp.ndarray, prior_samples: Dict[str, jnp.ndarray], posterior_samples: Dict[str, jnp.ndarray], posterior_predictive: Dict[str, jnp.ndarray], num_train: int, ) -> None: root = pathlib.Path("./data/seasonal") root.mkdir(exist_ok=True) jnp.savez(root / "piror_samples.npz", **prior_samples) jnp.savez(root / "posterior_samples.npz", **posterior_samples) jnp.savez(root / "posterior_predictive.npz", **posterior_predictive) x_pred = posterior_predictive["x"] x_pred_trn = x_pred[:, :num_train] x_hpdi_trn = diagnostics.hpdi(x_pred_trn) t_train = np.arange(num_train) x_pred_tst = x_pred[:, num_train:] x_hpdi_tst = diagnostics.hpdi(x_pred_tst) num_test = x_pred_tst.shape[1] t_test = np.arange(num_train, num_train + num_test) prop_cycle = plt.rcParams["axes.prop_cycle"] colors = prop_cycle.by_key()["color"] plt.figure(figsize=(12, 6)) plt.plot(x.ravel(), label="ground truth", color=colors[0]) plt.plot(t_train, x_pred_trn.mean(0)[:, 0], label="prediction", color=colors[1]) plt.fill_between(t_train, x_hpdi_trn[0, :, 0, 0], x_hpdi_trn[1, :, 0, 0], alpha=0.3, color=colors[1]) plt.plot(t_test, x_pred_tst.mean(0)[:, 0], label="forecast", color=colors[2]) plt.fill_between(t_test, x_hpdi_tst[0, :, 0, 0], x_hpdi_tst[1, :, 0, 0], alpha=0.3, color=colors[2]) plt.ylim(x.min() - 0.5, x.max() + 0.5) plt.legend() plt.tight_layout() plt.savefig(root / "prediction.png") plt.close()
def int_quantize_jit(x: jnp.ndarray, max_int: int, to_type: str): min = x.min(axis=1, keepdims=True) max = x.max(axis=1, keepdims=True) offset = min scale = max - min normalized = (x - min) / scale return offset, scale, (normalized * max_int + 0.5).astype(to_type) # round to nearest instead of round to zero
def is_symmetric( row: jnp.ndarray, col: jnp.ndarray, data: Optional[jnp.ndarray] = None, shape: Optional[Tuple[int, int]] = None, ): if shape is None: nrows = row.max() + 1 ncols = col.max() + 1 else: nrows, ncols = shape conds = [nrows == ncols] i0 = indices_1d(row, col, ncols) i1 = indices_1d(col, row, nrows) if data is None: conds.append(jnp.all(i0 == jnp.sort(i1))) else: perm = jnp.argsort(i1) conds.append(jnp.all(i0 == i1.take(perm))) conds.append(jnp.all(data.take(perm) == data)) return jnp.all(jnp.stack(conds))
def split_by_class( rng: PRNGKey, labels: jnp.ndarray, examples_per_class: tp.Sequence[int], num_classes: tp.Optional[int] = None, ) -> tp.Sequence[jnp.ndarray]: splits = jnp.cumsum(jnp.asarray(examples_per_class)) del examples_per_class if num_classes is None: num_classes = labels.max() + 1 masks = jax.nn.one_hot(labels, num_classes, dtype=bool) id_lists = [[] for _ in range(len(splits) + 1)] for i, class_rng in enumerate(jax.random.split(rng, num_classes)): (indices, ) = jnp.where(masks[:, i]) indices = jax.random.permutation(class_rng, indices) indices = jnp.split(indices, jnp.minimum(splits, indices.size)) for id_list, ids in zip(id_lists, indices): id_list.append(ids) return tuple(jnp.sort(jnp.concatenate(ids)) for ids in id_lists)
def indices_1d(row: jnp.ndarray, col: jnp.ndarray, ncols: Optional[int] = None): if ncols is None: ncols = col.max() + 1 return row * ncols + col