Beispiel #1
0
    def test_discrete_distribution(self):
        d = Categorical(torch.tensor([0.1, 0.2, 0.3, 0.4]))

        def _function(a):
            return 2 * a

        torch.testing.assert_allclose(integrate(_function, d), 4.0)
Beispiel #2
0
    def test_delta(self):
        d = Delta(v=torch.tensor([0.2]))

        def _function(a):
            return 2 * a

        torch.testing.assert_allclose(integrate(_function, d, num_samples=10),
                                      torch.tensor([0.4]))
Beispiel #3
0
    def test_multivariate_normal(self):
        d = MultivariateNormal(torch.tensor([0.2]),
                               scale_tril=1e-6 * torch.eye(1))

        def _function(a):
            return 2 * a

        torch.testing.assert_allclose(integrate(_function, d, num_samples=100),
                                      0.4,
                                      rtol=1e-3,
                                      atol=1e-3)
    def forward(self, state):
        """Get value of the value-function at a given state."""
        pi = tensor_to_distribution(self.policy(state),
                                    **self.policy.dist_params)
        if isinstance(self.q_function, NNEnsembleQFunction):
            out_dim = self.q_function.num_heads
        else:
            out_dim = None

        final_v = integrate(
            lambda a: self.q_function(state, a),
            pi,
            out_dim=out_dim,
            num_samples=self.num_samples,
        )
        return final_v
Beispiel #5
0
    def actor_loss(self, observation):
        """Get Actor loss."""
        state, action, *_ = observation

        pi = tensor_to_distribution(self.policy(state),
                                    **self.policy.dist_params)
        entropy, _ = get_entropy_and_log_p(pi, action,
                                           self.policy.action_scale)

        policy_loss = integrate(
            lambda a: -pi.log_prob(a) *
            (self.critic(state, self.policy.action_scale * a) - self.
             value_target(state)).detach(),
            pi,
            num_samples=self.num_samples,
        ).sum()

        return Loss(policy_loss=policy_loss).reduce(self.criterion.reduction)