Ejemplo n.º 1
0
  def forward(self, belief, state, deterministic=False, with_logprob=False,):
    raw_init_std = np.log(np.exp(self.init_std) - 1)
    hidden = self.act_fn(self.fc1(torch.cat([belief, state], dim=-1)))
    hidden = self.act_fn(self.fc2(hidden))
    hidden = self.act_fn(self.fc3(hidden))
    hidden = self.act_fn(self.fc4(hidden))
    hidden = self.fc5(hidden)
    mean, std = torch.chunk(hidden, 2, dim=-1)

    # # ---------
    # mean = self.mean_scale * torch.tanh(mean / self.mean_scale)  # bound the action to [-5, 5] --> to avoid numerical instabilities.  For computing log-probabilities, we need to invert the tanh and this becomes difficult in highly saturated regions.
    # speed = torch.full(mean.shape, 0.3).to("cuda")
    # mean = torch.cat((mean, speed), -1)
    #
    # std = F.softplus(std + raw_init_std) + self.min_std
    #
    # speed = torch.full(std.shape, 0.0).to("cuda")
    # std = torch.cat((std, speed), -1)
    #
    # dist = torch.distributions.Normal(mean, std)
    # transform = [torch.distributions.transforms.TanhTransform()]
    # dist = torch.distributions.TransformedDistribution(dist, transform)
    # dist = torch.distributions.independent.Independent(dist, 1)  # Introduces dependence between actions dimension
    # dist = SampleDist(dist)  # because after transform a distribution, some methods may become invalid, such as entropy, mean and mode, we need SmapleDist to approximate it.
    # return dist  # dist ~ tanh(Normal(mean, std)); remember when sampling, using rsample() to adopt the reparameterization trick


    mean = self.mean_scale * torch.tanh(mean / self.mean_scale)  # bound the action to [-5, 5] --> to avoid numerical instabilities.  For computing log-probabilities, we need to invert the tanh and this becomes difficult in highly saturated regions.
    std = F.softplus(std + raw_init_std) + self.min_std

    dist = torch.distributions.Normal(mean, std)
    # TanhTransform = ComposeTransform([AffineTransform(0., 2.), SigmoidTransform(), AffineTransform(-1., 2.)])
    if self.fix_speed:
      transform = [AffineTransform(0., 2.), SigmoidTransform(), AffineTransform(-1., 2.)]

    else:
      transform = [AffineTransform(0., 2.), SigmoidTransform(), AffineTransform(-1., 2.),  # TanhTransform
                   AffineTransform(loc=torch.tensor([0.0, self.throtlle_base]).to("cuda"),
                                  scale=torch.tensor([1.0, 0.2]).to("cuda"))]  # TODO: this is limited at donkeycar env

    dist = TransformedDistribution(dist, transform)
    # dist = torch.distributions.independent.Independent(dist, 1)  # Introduces dependence between actions dimension
    dist = SampleDist(dist)  # because after transform a distribution, some methods may become invalid, such as entropy, mean and mode, we need SmapleDist to approximate it.

    if deterministic:
      action = dist.mean
    else:
      action = dist.rsample()

    # not use logprob now
    if with_logprob:
      logp_pi = dist.log_prob(action).sum(dim=1)
    else:
      logp_pi = None
    # action dim: [batch, act_dim], log_pi dim:[batch]
    return action if not self.fix_speed else torch.cat((action, self.throtlle_base*torch.ones_like(action, requires_grad=False)), dim=-1), logp_pi  # dist ~ tanh(Normal(mean, std)); remember when sampling, using rsample() to adopt the reparameterization trick
Ejemplo n.º 2
0
    def forward(self, mean, log_std, deterministic=False):
        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
        std = torch.exp(log_std)

        action_distribution = TransformedDistribution(
            Normal(mean, std), TanhTransform(cache_size=1))

        if deterministic:
            action_sample = torch.tanh(mean)
        else:
            action_sample = action_distribution.rsample()

        log_prob = torch.sum(action_distribution.log_prob(action_sample),
                             dim=1)

        return action_sample, log_prob