def _unsquash(self, values): normed_values = (values - self.low) / (self.high - self.low) * 2.0 - \ 1.0 # Stabilize input to atanh. save_normed_values = torch.clamp(normed_values, -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER) unsquashed = atanh(save_normed_values) return unsquashed
def bc_log(model, obs, actions): z = atanh(actions) logits = model.get_policy_output(obs) mean, log_std = torch.chunk(logits, 2, dim=-1) # Mean Clamping for Stability mean = torch.clamp(mean, MEAN_MIN, MEAN_MAX) log_std = torch.clamp(log_std, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT) std = torch.exp(log_std) normal_dist = torch.distributions.Normal(mean, std) return torch.sum(normal_dist.log_prob(z) - torch.log(1 - actions * actions + SMALL_NUMBER), dim=-1)
def bc_log(model, obs, actions): # Stabilize input to atanh. normed_actions = \ (actions - action_dist_t.low) / \ (action_dist_t.high - action_dist_t.low) * 2.0 - 1.0 save_normed_actions = torch.clamp( normed_actions, -1.0 + SMALL_NUMBER, 1.0 - SMALL_NUMBER) z = atanh(save_normed_actions) logits = model.get_policy_output(obs) mean, log_std = torch.chunk(logits, 2, dim=-1) # Mean Clamping for Stability mean = torch.clamp(mean, MEAN_MIN, MEAN_MAX) log_std = torch.clamp(log_std, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT) std = torch.exp(log_std) normal_dist = torch.distributions.Normal(mean, std) return torch.sum( normal_dist.log_prob(z) - torch.log(1 - actions * actions + SMALL_NUMBER), dim=-1)
def _unsquash(self, values): return atanh((values - self.low) / (self.high - self.low) * 2.0 - 1.0)