def test_exponential_shape_tensor_param(self):
     expon = Exponential(torch.Tensor([1, 1]))
     self.assertEqual(expon._batch_shape, torch.Size((2,)))
     self.assertEqual(expon._event_shape, torch.Size(()))
     self.assertEqual(expon.sample().size(), torch.Size((2,)))
     self.assertEqual(expon.sample((3, 2)).size(), torch.Size((3, 2, 2)))
     self.assertEqual(expon.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, expon.log_prob, self.tensor_sample_2)
 def test_exponential_shape_scalar_param(self):
     expon = Exponential(1.)
     self.assertEqual(expon._batch_shape, torch.Size())
     self.assertEqual(expon._event_shape, torch.Size())
     self.assertEqual(expon.sample().size(), torch.Size((1,)))
     self.assertEqual(expon.sample((3, 2)).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, expon.log_prob, self.scalar_sample)
     self.assertEqual(expon.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertEqual(expon.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
Example #3
0
    def _make_ma_mdp(self):
        joint_action_shape = self.joint_action_shape
        n_states = self.n_states
        n_agents = len(joint_action_shape)
        rand = self.rand

        # Reward perturbation
        perturbation = mu.unsqueeze(
            self.reward_perturbation, -1,
            n_states + 3 - self.reward_perturbation.dim())
        # Generate transition probability tensor
        trans_prob = th.rand(n_states,
                             *joint_action_shape,
                             n_states,
                             generator=rand)
        # Acyclic (episodic) MDP
        if self.acyclic:
            states_idx, next_states_idx = th.tril_indices(n_states)
            trans_prob[states_idx, ..., next_states_idx] = 0
        # Normalize transition probability matrix
        trans_prob /= trans_prob.sum(dim=-1, keepdim=True)
        trans_prob[th.isnan(trans_prob)] = 0

        # Generate random reward (following method ensures enough variance in rewards)
        # 1) Generate rewards "core" for state, joint actions and agents
        rewards = th.randn(n_states,
                           *joint_action_shape,
                           1,
                           n_agents,
                           generator=rand)
        # 2) Multiply "core" by scales to generate different rewards for next state
        scales_dist = Exponential(th.tensor(1.))
        with mu.use_rand(rand):
            rewards *= scales_dist.sample(
                (n_states, *joint_action_shape, n_states, n_agents))
        # 3) Correlate rewards
        rewards = rewards @ self.reward_correlation

        ## Transition probability
        self._trans_prob = trans_prob
        ## Rewards for state-joint actions
        self._rewards = rewards
 def forward(self, theta, n_samp=1):
     n_exp = theta.shape[0]
     n_samp = torch.Size([n_samp, 1])
     unit = torch.ones(n_exp).to(self.device)
     with torch.autograd.no_grad():
         d0 = Normal(theta[:, 0], unit)
         z0 = d0.sample(n_samp).permute(2, 0, 1)
         d1 = Normal(3 * unit, torch.exp(theta[:, 1] / 3))
         z1 = d1.sample(n_samp).permute(2, 0, 1)
         d2_1 = Normal(-2 * unit, unit)
         d2_2 = Normal(2 * unit, .5 * unit)
         d2_b = Bernoulli(.5 * unit)
         z2_b = d2_b.sample(n_samp).float()
         # Gaussian Mixture
         z2 = ((z2_b * d2_1.sample(n_samp) +
                (1 - z2_b) * d2_2.sample(n_samp)).permute(2, 0, 1))
         d3 = Uniform(-5 * unit, theta[:, 2])
         z3 = d3.sample(n_samp).permute(2, 0, 1)
         d4 = Exponential(.5 * unit)
         z4 = d4.sample(n_samp).permute(2, 0, 1)
         z = torch.cat((z0, z1, z2, z3, z4), 2)
         X = torch.matmul(self.R,
                          z.view(-1, 5).unsqueeze(2)).view(n_exp, -1, 5)
     return X
Example #5
0
 def sample(self, size):
     m = Exponential(torch.tensor([1.0]))
     return m.sample(size)