def local_linear_dynamics(self, latents, actions): output = self.locally_linear_f2(F.relu(self.locally_linear_f1(latents))) dynamics = output.view(latents.shape[0], self.representation_size, self.representation_size + self.action_dim) z_prime = ptu.zeros_like(latents) action_obs_pair = torch.cat([latents, actions], dim=1) for i in range(latents.shape[0]): z_prime[i] = torch.matmul(dynamics[i], action_obs_pair[i]) return z_prime
def get_tau(self, actions, fp=None): if self.tau_type == 'fix': presum_tau = ptu.zeros(len(actions), self.num_quantiles) + 1. / self.num_quantiles elif self.tau_type == 'iqn': # add 0.1 to prevent tau getting too close presum_tau = ptu.rand(len(actions), self.num_quantiles) + 0.1 presum_tau /= presum_tau.sum(dim=-1, keepdims=True) tau = torch.cumsum(presum_tau, dim=1) # (N, T), note that they are tau1...tauN in the paper with torch.no_grad(): tau_hat = ptu.zeros_like(tau) tau_hat[:, 0:1] = tau[:, 0:1] / 2. tau_hat[:, 1:] = (tau[:, 1:] + tau[:, :-1]) / 2. return tau, tau_hat, presum_tau