def forward(self, observation):
     if self.decoder is not None:
         state = self.decoder(observation)
     else:
         state = observation
     feature = self.extractor(state)
     log_lambda = self.lambda_layer(feature.view(-1, self.feature_size))
     return clip_but_pass_gradient(log_lambda, lower=-10., upper=6.)
 def forward(self, observation, action):
     if self.decoder is not None:
         state = self.decoder(observation)
     else:
         state = observation
     # Compute Q--value.
     feature = self.extractor(torch.cat((state, action), dim=-1))
     pre_q = self.value_layer(feature.view(-1, self.feature_size))
     return clip_but_pass_gradient(pre_q, 0., 1.)
    def log_prob(self, observation, action):
        if self.decoder is not None:
            state = self.decoder(observation)
        else:
            state = observation
        feature = self.extractor(state).view(-1, self.feature_size)
        mean = self.mean_layer(feature)
        logstd = clip_but_pass_gradient(self.logstd_layer(feature), -6., 2.)

        pre_action = atanh(action)
        log_prob = normal_likelihood(
            pre_action, mean, logstd) - torch.log(-action**2 + 1. + EPS).sum(
                1, keepdim=True)

        return log_prob
    def sample(self, observation, deterministic=False):
        if self.decoder is not None:
            state = self.decoder(observation)
        else:
            state = observation
        feature = self.extractor(state).view(-1, self.feature_size)
        mean = self.mean_layer(feature)
        logstd = clip_but_pass_gradient(self.logstd_layer(feature), -6., 2.)
        std = torch.exp(logstd)

        if deterministic:
            return torch.tanh(mean)
        else:
            pre_sample = mean + std * torch.randn(
                mean.size(), dtype=mean.dtype, device=mean.device)
            return torch.tanh(pre_sample)
    def forward(self, observation):
        if self.decoder is not None:
            state = self.decoder(observation)
        else:
            state = observation
        feature = self.extractor(state).view(-1, self.feature_size)
        mean = self.mean_layer(feature)
        logstd = clip_but_pass_gradient(self.logstd_layer(feature), -6., 2.)
        std = torch.exp(logstd)

        # Reparameterization trick
        pre_sample = mean + std * torch.randn(
            mean.size(), dtype=mean.dtype, device=mean.device)
        sample = torch.tanh(pre_sample)
        log_prob = normal_likelihood(
            pre_sample, mean, logstd) - torch.log(-sample**2 + 1. + EPS).sum(
                1, keepdim=True)

        return sample, torch.tanh(mean), log_prob