예제 #1
0
    def local_linear_dynamics(self, latents, actions):
        output = self.locally_linear_f2(F.relu(self.locally_linear_f1(latents)))
        dynamics = output.view(latents.shape[0], self.representation_size, self.representation_size + self.action_dim)

        z_prime = ptu.zeros_like(latents)
        action_obs_pair = torch.cat([latents, actions], dim=1)
        for i in range(latents.shape[0]):
            z_prime[i] = torch.matmul(dynamics[i], action_obs_pair[i])
        return z_prime
예제 #2
0
 def get_tau(self, actions, fp=None):
     if self.tau_type == 'fix':
         presum_tau = ptu.zeros(len(actions), self.num_quantiles) + 1. / self.num_quantiles
     elif self.tau_type == 'iqn':  # add 0.1 to prevent tau getting too close
         presum_tau = ptu.rand(len(actions), self.num_quantiles) + 0.1
         presum_tau /= presum_tau.sum(dim=-1, keepdims=True)
     tau = torch.cumsum(presum_tau, dim=1)  # (N, T), note that they are tau1...tauN in the paper
     with torch.no_grad():
         tau_hat = ptu.zeros_like(tau)
         tau_hat[:, 0:1] = tau[:, 0:1] / 2.
         tau_hat[:, 1:] = (tau[:, 1:] + tau[:, :-1]) / 2.
     return tau, tau_hat, presum_tau