Beispiel #1
0
    def act_vectorized(self,
                       obs,
                       goal,
                       horizon=None,
                       greedy=False,
                       noise=0,
                       marginal_policy=None):
        obs = torch.tensor(obs, dtype=torch.float32)
        goal = torch.tensor(goal, dtype=torch.float32)

        if horizon is not None:
            horizon = torch.tensor(horizon, dtype=torch.float32)

        logits = self.forward(obs, goal, horizon=horizon)
        logits = logits.view(-1, self.n_dims, self.granularity)
        noisy_logits = logits * (1 - noise)
        probs = torch.softmax(noisy_logits, 2)

        if greedy:
            samples = torch.argmax(probs, dim=-1)
        else:
            samples = torch.distributions.categorical.Categorical(
                probs=probs).sample()
        samples = self.flattened(samples)
        if greedy:
            samples = ptu.to_numpy(samples)
            random_samples = np.random.choice(self.action_space.n,
                                              size=len(samples))
            return np.where(
                np.random.rand(len(samples)) < noise,
                random_samples,
                samples,
            )
        return ptu.to_numpy(samples)
Beispiel #2
0
    def act_vectorized(self,
                       obs,
                       goal,
                       horizon=None,
                       greedy=False,
                       noise=0,
                       marginal_policy=None):
        obs = torch.tensor(obs, dtype=torch.float32)
        goal = torch.tensor(goal, dtype=torch.float32)

        if horizon is not None:
            horizon = torch.tensor(horizon, dtype=torch.float32)

        logits = self.forward(obs, goal, horizon=horizon)
        if marginal_policy is not None:
            dummy_goal = torch.zeros_like(goal)
            marginal_logits = marginal_policy.forward(obs, dummy_goal, horizon)
            logits -= marginal_logits
        noisy_logits = logits * (1 - noise)
        probs = torch.softmax(noisy_logits, 1)
        if greedy:
            samples = torch.argmax(probs, dim=-1)
        else:
            samples = torch.distributions.categorical.Categorical(
                probs=probs).sample()
        return ptu.to_numpy(samples)
Beispiel #3
0
 def entropy(self, obs, goal, horizon=None):
     logits = self.forward(obs, goal, horizon=horizon)
     logits = logits.view(-1, self.n_dims, self.granularity)
     probs = torch.softmax(noisy_logits, 2)
     Z = torch.logsumexp(logits, dim=2)
     return (Z - torch.sum(probs * logits, 2)).sum(1)
Beispiel #4
0
 def entropy(self, obs, goal, horizon=None):
     logits = self.forward(obs, goal, horizon=horizon)
     probs = torch.softmax(logits, 1)
     Z = torch.logsumexp(logits, dim=1)
     return Z - torch.sum(probs * logits, 1)
Beispiel #5
0
 def probabilities(self, obs, goal, horizon=None):
     logits = self.forward(obs, goal, horizon=horizon)
     probs = torch.softmax(logits, 1)
     return probs