Esempio n. 1
0
    def forward(self, inputs, logprob=False, deterministic=False):
        action_size = inputs.shape[1] // 2
        mu, log_sigma = inputs[:, :action_size], inputs[:, action_size:]
        log_sigma = torch.clamp(log_sigma, LOG_SIG_MIN, LOG_SIG_MAX)
        sigma = torch.exp(log_sigma)
        z = mu if deterministic else normal_sample(mu, sigma)
        log_pi = normal_logprob(mu, sigma, z)
        action, log_pi = self.squashing_layer.forward(z, log_pi)

        if logprob:
            return action, log_pi
        return action
Esempio n. 2
0
    def forward(self, inputs, logprob=False, deterministic=False):
        state_embedding = inputs
        mu = torch.zeros((state_embedding.shape[0],
                          self.action_size)).to(state_embedding.device)
        sigma = torch.ones_like(mu).to(mu.device)
        z = mu if deterministic else normal_sample(mu, sigma)
        log_pi = normal_logprob(mu, sigma, z)
        z, log_pi = self.coupling1.forward(z, state_embedding, log_pi)
        z, log_pi = self.coupling2.forward(z, state_embedding, log_pi)
        action, log_pi = self.squashing_layer.forward(z, log_pi)

        if logprob:
            return action, log_pi
        return action
    def forward(self, logits, logprob=None, deterministic=False):
        action_size = logits.shape[1] // 2
        loc, log_scale = logits[:, :action_size], logits[:, action_size:]
        log_scale = torch.clamp(log_scale, LOG_SIG_MIN, LOG_SIG_MAX)
        scale = torch.exp(log_scale)
        action = loc if deterministic else normal_sample(loc, scale)

        bool_logprob = isinstance(logprob, bool) and logprob
        value_logprob = isinstance(logprob, torch.Tensor)
        assert not value_logprob, "Not implemented behaviour"

        action_logprob = normal_logprob(loc, scale, action)
        action, action_logprob = \
            self.squashing_layer.forward(action, action_logprob)

        if bool_logprob:
            return action, action_logprob
        else:
            return action
    def forward(self, logits, logprob=None, deterministic=False):
        state_embedding = logits
        loc = torch.zeros((state_embedding.shape[0],
                           self.action_size)).to(state_embedding.device)
        scale = torch.ones_like(loc).to(loc.device)
        action = loc if deterministic else normal_sample(loc, scale)

        bool_logprob = isinstance(logprob, bool) and logprob
        value_logprob = isinstance(logprob, torch.Tensor)
        assert not value_logprob, "Not implemented behaviour"

        action_logprob = normal_logprob(loc, scale, action)
        action, action_logprob = \
            self.coupling1.forward(action, state_embedding, action_logprob)
        action, action_logprob = \
            self.coupling2.forward(action, state_embedding, action_logprob)
        action, action_logprob = \
            self.squashing_layer.forward(action, action_logprob)

        if bool_logprob:
            return action, action_logprob
        else:
            return action