Exemple #1
0
    def forward(self, obs, deterministic=False, with_logprob=True):
        net_out = self.net(obs)
        mu = self.mu_layer(net_out)
        log_std = self.log_std_layer(net_out)
        log_std = torch.clamp(log_std, LOG_STD_MIN, LOG_STD_MAX)
        std = torch.exp(log_std)

        # Pre-squash distribution and sample
        pi_distribution = Normal(mu, std)
        if deterministic:
            # Only used for evaluating policy at test time.
            pi_action = mu
        else:
            pi_action = pi_distribution.rsample()

        if with_logprob:
            # Compute logprob from Gaussian, and then apply correction for Tanh squashing.
            # NOTE: The correction formula is a little bit magic. To get an understanding
            # of where it comes from, check out the original SAC paper (arXiv 1801.01290)
            # and look in appendix C. This is a more numerically-stable equivalent to Eq 21.
            # Try deriving it yourself as a (very difficult) exercise. :)
            logp_pi = pi_distribution.log_prob(pi_action).sum(axis=-1)
            logp_pi -= (
                2 * (np.log(2) - pi_action - F.softplus(-2 * pi_action))).sum(
                    axis=1)
            real_logp_pi = pi_distribution.log_prob(pi_action)
        else:
            logp_pi = None
            real_logp_pi = None

        pi_action = torch.tanh(pi_action)
        pi_action = self.act_limit * pi_action

        return pi_action, logp_pi, real_logp_pi
Exemple #2
0
 def forward(self, x):
     """ x (torch.Tensor): Input images [batch size, 3, dim, dim] """
     # Forward propagation
     recon, stats = self.vae(x)
     if self.pixel_bound:
         recon = torch.sigmoid(recon)
     # Reconstruction loss
     p_xr = Normal(recon, self.pixel_std)
     err = -p_xr.log_prob(x).sum(dim=(1, 2, 3))
     # KL divergence loss
     p_z = Normal(0, 1)
     # TODO(martin): the parsing below is not very intuitive
     # -- No flow
     if 'z' in stats:
         q_z = Normal(stats.mu, stats.sigma)
         kl = q_z.log_prob(stats.z) - p_z.log_prob(stats.z)
         kl = kl.sum(dim=1)
     # -- Using normalising flow
     else:
         q_z_0 = Normal(stats.mu_0, stats.sigma_0)
         kl = q_z_0.log_prob(stats.z_0) - p_z.log_prob(stats.z_k)
         kl = kl.sum(dim=1) - stats.ldj
     # Tracking
     losses = AttrDict(err=err, kl_l=kl)
     return recon, losses, stats, None, None
Exemple #3
0
def compute_stochastic_elbo(a, b, nu, omega, x, y, a_0, b_0, mu_0):
    """
    Return a monte-carlo estimate of the ELBO, using a single sample from Q(sigma^-2, beta)
    
    a, b are the Gamma 'shape' and 'rate' parameters for the variational posterior over *precision*: q(tau) = q(sigma^-2)
    nu_k, omega_k are Normal 'mean' and 'precision' parameters for the variational posterior over weights: q(beta_k)
    x is an n by k matrix, where each row contains the regression inputs [1, x, x^2, x^3]
    y is an n by 1 values
    a_0, b_0 the parameters for the Gamma prior over precision P(tau) = P(sigma^-2)
    mu_0 is the mean of the Gamma prior on weights beta
    """
    
    # Define mean field variational distribution over (beta, tau).
    Q_beta = Normal(nu, omega**-0.5)
    Q_tau = Gamma(a, b) 
    
    # Sample from variational distribution: (tau, beta) ~ Q
    # Use rsample to make sure that the result is differentiable.
    tau = Q_tau.rsample()
    sigma = tau**-0.5
    beta = Q_beta.rsample()
    
    # Create a single sample monte-carlo estimate of ELBO.
    P_tau = Gamma(a_0, b_0) 
    P_beta = Normal(mu_0, sigma) 
    P_y = Normal((beta[None, :]*x).sum(dim=1, keepdim=True), sigma) 
    
    kl_tau = Q_tau.log_prob(tau) - P_tau.log_prob(tau)
    kl_beta = Q_beta.log_prob(beta).sum() - P_beta.log_prob(beta).sum()
    log_likelihood = P_y.log_prob(y).sum()

    elbo = log_likelihood - kl_tau - kl_beta
    return elbo
Exemple #4
0
    def log_likelihood(self, x_norm, y_norm):
        mean, var, shape, rate, mixture_var = self(x_norm)
        norm_dist = Normal(mean, torch.sqrt(var))
        gamma_dist = Gamma(shape, rate)
        y = y_norm * self.y_std + self.y_mean + 10**(-4)

        only_normal_bool = (torch.abs(1 - mixture_var) < 10**(-4)).type(
            torch.float)
        only_gamma_bool = (mixture_var < 10**(-4)).type(torch.float)

        normal_component = norm_dist.log_prob(y_norm) + torch.log(mixture_var)
        gamma_component = gamma_dist.log_prob(y) + torch.log(1 - mixture_var)

        logging.debug('shape,rate: {:.3f}, {:.3f}'.format(
            float(shape.mean()), float(rate.mean())))

        combined_tensor = torch.stack((normal_component, gamma_component),
                                      dim=0)
        old_output = torch.logsumexp(combined_tensor, dim=0).mean()

        output = (torch.log((1 - only_gamma_bool) * mixture_var *
                            torch.exp(norm_dist.log_prob(y_norm)) +
                            ((1 - only_normal_bool) * (1 - mixture_var) *
                             torch.exp(gamma_dist.log_prob(y))))).mean()
        logging.debug('Mixture var: {}'.format(float(mixture_var.mean())))
        logging.debug('NLLs: {:.3f}, {:.3f}'.format(
            -float(norm_dist.log_prob(y_norm).mean()),
            -float(gamma_dist.log_prob(y).mean())))
        logging.debug('Combined NLL: {:.3f} or {:.3f}'.format(
            -float(output), -float(old_output)))

        return output
Exemple #5
0
    def test_gmm_loss(self):
        # seq_len x batch_size x gaussian_size x feature_size
        # 1 x 1 x 2 x 2
        mus = torch.Tensor([[[[0.0, 0.0], [6.0, 6.0]]]])
        sigmas = torch.Tensor([[[[2.0, 2.0], [2.0, 2.0]]]])
        # seq_len x batch_size x gaussian_size
        pi = torch.Tensor([[[0.5, 0.5]]])
        logpi = torch.log(pi)

        # seq_len x batch_size x feature_size
        batch = torch.Tensor([[[3.0, 3.0]]])
        gl = gmm_loss(batch, mus, sigmas, logpi)

        # first component, first dimension
        n11 = Normal(mus[0, 0, 0, 0], sigmas[0, 0, 0, 0])
        # first component, second dimension
        n12 = Normal(mus[0, 0, 0, 1], sigmas[0, 0, 0, 1])
        p1 = (pi[0, 0, 0] * torch.exp(n11.log_prob(batch[0, 0, 0])) *
              torch.exp(n12.log_prob(batch[0, 0, 1])))
        # second component, first dimension
        n21 = Normal(mus[0, 0, 1, 0], sigmas[0, 0, 1, 0])
        # second component, second dimension
        n22 = Normal(mus[0, 0, 1, 1], sigmas[0, 0, 1, 1])
        p2 = (pi[0, 0, 1] * torch.exp(n21.log_prob(batch[0, 0, 0])) *
              torch.exp(n22.log_prob(batch[0, 0, 1])))

        logger.info(
            "gmm loss={}, p1={}, p2={}, p1+p2={}, -log(p1+p2)={}".format(
                gl, p1, p2, p1 + p2, -torch.log(p1 + p2)))
        assert -torch.log(p1 + p2) == gl
class GaussianModel(nn.Module):
    r"""
    Model to learn a univariate Gaussian distribution.

    Arguments
    ----------
    mu: Mean of the Gaussian distribution
    sigma: Standard deviation of the Gaussian distribution
    device: The torch.device to use, typically cpu or gpu id
    """
    def __init__(self, mu, sigma, device=None):
        super(GaussianModel, self).__init__()
        if device is not None:
            self.device = device
            mu = mu.to(device)
            sigma = sigma.to(device)
        self.mu = mu
        self.sigma = sigma
        self.distr = Normal(self.mu, self.sigma)

    def to_device(self, device):
        """
        Moves members to a specified torch.device
        """
        self.device = device

    def forward(self, x):
        """
        Takes input x as new distribution parameters
        """
        # If mini-batching
        if len(x.shape) > 1:
            self.mu_batch = x[:, 0]
            self.sigma_batch = F.softplus(x[:, 1])

        # If not mini-batching
        else:
            self.mu = x[0]
            self.distr = Normal(self.mu, self.sigma)

        return self.distr

    def log_prob(self, x):
        x = x.view(x.shape.numel())
        if x.shape[0] == 1:
            return self.distr.log_prob(x[0]).view(1)

        log_like_arr = torch.ones_like(x)
        for i in range(len(x)):
            self.mu = self.mu_batch[i]
            self.distr = Normal(self.mu, self.sigma)
            lpxx = self.distr.log_prob(x[i]).view(1)
            log_like_arr[i] = lpxx

        lpx = log_like_arr
        return lpx

    def icdf(self, value):
        return self.distr.icdf(value)
Exemple #7
0
    def forward(self, x, a=None):
        mu = self.p_net(x)
        policy = Normal(mu, self.log_std.exp())

        pi = policy.sample()
        logp = policy.log_prob(a).sum(dim=1) if torch.is_tensor(a) else None 
        logp_pi = policy.log_prob(pi).sum(dim=1)
        return pi, logp, logp_pi
Exemple #8
0
 def forward(self, z_where_t, z_where_t_1=None, disp=None):
     S, B, D = z_where_t.shape
     if z_where_t_1 is None:
         p0 = Normal(self.prior_mu0, self.prior_Sigma0)
         return p0.log_prob(z_where_t).sum(-1)# S * B
     else:
         p0 = Normal(z_where_t_1, self.prior_Sigmat)
         return p0.log_prob(z_where_t).sum(-1) # S * B
Exemple #9
0
 def actor(self, obs, action=None, shared=None):
     if shared is None: shared = self.shared_body(toTensor(obs))
     action_mean = self.fc_action(self.actor_body(shared))
     action_dist = Normal(action_mean, F.softplus(self.std))
     if action is None:
         action = action_dist.sample()
         return action, action_dist.log_prob(action).sum(-1)
     else:
         return action_dist.log_prob(action).sum(-1)
Exemple #10
0
 def expected_log_pdf(i):
     if i < self.n_speakers:
         qd1 = torch.zeros_like(logspec0)
         for c in range(self.gmm['n_components']):
             pd_x = Normal(self.gmm['means'][c], self.gmm['stds'][c])
             qd1 += self.qz[i,c][:,None] * pd_x.log_prob(logspec0)
     else:
         pd_x = self.noise_model
         qd1 = pd_x.log_prob(logspec0)
     return qd1
Exemple #11
0
    def forward(self, x, a=None):
        policy = Normal(self.mu(x), self.log_std.exp())
        pi = policy.sample()
        logp_pi = policy.log_prob(pi).sum(dim=1)
        if a is not None:
            logp = policy.log_prob(a).sum(dim=1)
        else:
            logp = None

        return pi, logp, logp_pi
def pathological_mixture(x):
    #x = x * (1.0 + 0.0175 * torch.randn(1))
    mix1 = Normal(torch.zeros(1), torch.tensor([0.5]))
    mix2 = Normal(torch.tensor([1.0]), torch.tensor([0.15]))

    #logsumexp trick
    m1 = mix1.log_prob(x)
    m2 = mix2.log_prob(x).mul(200)

    out = LogSumExp(torch.cat((m1.view(-1, 1), m2.view(-1, 1)), dim=1), dim=1)
    return out
Exemple #13
0
 def forward(self, x: torch.Tensor, a: torch.Tensor) \
         -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     mu = self.mu(x, a)
     policy = Normal(mu, self.log_std.exp())
     pi = policy.sample()
     logp_pi = policy.log_prob(pi).sum(dim=1)
     if a is not None:
         logp = policy.log_prob(a).sum(dim=1)
     else:
         logp = None
     return pi, logp, logp_pi
Exemple #14
0
    def update_qZ(self, logspec0):
        '''Updates q(Z) approximate posterior.

        Args:
            logspec0 (torch.Tensor): Spectral features of shape (T,F)
        '''
        mu_vs, logvar_vs, mu_us, logvar_us = self.qz
        param_to_optimize = (*self.qz, )
        optim_z = optim.SGD(param_to_optimize, lr=self.qz_learn_rate)

        n_t = logspec0.shape[0]

        for i_iter in range(self.qz_n_updates):
            optim_z.zero_grad()

            kls, explogliks = [], []

            for _ in range(self.n_z_samples):
                # sample q(Z1), q(Z2)
                u_samp = sample_normal(mu_us, logvar_us)
                v_samp = sample_normal(mu_vs, logvar_vs)

                # forward Z1, Z2 through VAE decoder
                mu_x, logvar_x = self.vae.emission(v_samp, u_samp,
                                                   [n_t] * self.n_speakers)
                mu_x, logvar_x = mu_x[:, :n_t, :], logvar_x[:, :n_t, :]
                pd_x = Normal(mu_x, torch.exp(0.5 * logvar_x))

                # compute loss Eq.(21)
                kluv, _ = self.vae.kl_divergence_expected_speaker(
                    ((mu_vs, logvar_vs), (mu_us, logvar_us), u_samp, v_samp),
                    torch.tensor([n_t]))
                kls.append(kluv.sum() / self.n_z_samples)
                exploglik = self.qd[:self.n_speakers] * torch.clamp(
                    pd_x.log_prob(logspec0), -14, 100)
                exploglik += (1 - self.qd[:self.n_speakers]) * (
                    pd_x.cdf(logspec0) + 1e-6).log()
                explogliks.append(exploglik.sum() / self.n_z_samples)

            pd_x = self.noise_model
            exploglik = self.qd[-1] * torch.clamp(pd_x.log_prob(logspec0), -14,
                                                  100)
            exploglik += (1 - self.qd[-1]) * (pd_x.cdf(logspec0) + 1e-6).log()
            explogliks.append(exploglik.sum())

            objf = -(torch.sum(torch.stack(explogliks)) -
                     self.kl_weight * torch.sum(torch.stack(kls)))
            objf.backward()

            for p in param_to_optimize:
                nn.utils.clip_grad_value_(p, 5)
            optim_z.step()

        self.qz = (mu_vs, logvar_vs, mu_us, logvar_us)
Exemple #15
0
def select_action(state, env, teacher_mod, teacher_student):
    state = torch.from_numpy(state).float()
    mu1, s1, mu2, s2, val1, val2 = model(state)
    tmu1, ts1, tmu2, ts2, tval1, tval2 = teacher_mod(state)

    if env == 1:
        prob = Normal(tmu1, ts1.sqrt())
        entropy = 0.5*((ts1*2*pi).log()+1)
        action = prob.sample()
        log_prob_t = prob.log_prob(action)
        # model.entropies.append(entropy)
        # teacher_mod.saved_actions_env1.append(SavedAction(log_prob,
        #                                                   tval1))
        teacher_mod.saved_actions_env1.append((tmu1, ts1))

        prob = Normal(mu1, s1.sqrt())
        entropy = 0.5*((s1*2*pi).log()+1)
        action = prob.sample()
        log_prob_s = prob.log_prob(action)
        model.entropies.append(entropy)

        model.samples_student.append((mu1, s1))

        if teacher_student == 1:
            # Randomly save student
            model.saved_actions_student[env].append(SavedAction(log_prob_s,
                                                           val1))
        else:
            model.saved_actions_student[env].append(SavedAction(log_prob_t,
                                                           tval1))

    elif env == 2:
        prob = Normal(tmu2, ts2.sqrt())
        entropy = 0.5*((ts2*2*pi).log()+1)
        action = prob.sample()
        log_prob_t = prob.log_prob(action)
        # model.entropies.append(entropy)
        teacher_mod.saved_actions_env1.append(SavedAction(log_prob_t,
                                                          tval2))
        # model.samples_teacher[2].append((tmu2, ts2))
        prob = Normal(mu2, s2.sqrt())
        entropy = 0.5   *((s2*2*pi).log()+1)
        action = prob.sample()
        log_prob_s = prob.log_prob(action)
        model.entropies.append(entropy)
        model.samples_student.append((mu2, s2))

        if teacher_student == 1:
            # Randomly save student or teacher
            model.saved_actions_student[env].append(SavedAction(log_prob_s,
                                                              val2))
        else:
            model.saved_actions_student[env].append(SavedAction(log_prob_t, tval2))
    return action.item()
class ScaleMixturePrior():
    def __init__(self, pi, sigma1, sigma2):
        self.pi = pi
        self.sigma1 = sigma1
        self.sigma2 = sigma2
        self.normal1 = Normal(0, sigma1)
        self.normal2 = Normal(0, sigma2)

    def log_prob(self, x):
        p1 = torch.exp(self.normal1.log_prob(x))
        p2 = torch.exp(self.normal2.log_prob(x))
        return torch.sum(self.pi * p1 + (1 - self.pi) * p2)
Exemple #17
0
    def step(self, state):
        state = torch.flatten(torch.from_numpy(state).float())
        mean, sigma = self(state)

        dist = Normal(mean, sigma)
        action = dist.sample()
        action = action.view(self.action_shape)
        print("log dist", dist.log_prob(action))
        normal_dist = torch.normal(mean, sigma)
        prob = torch.normal(action, mean, sigma)
        print("log prob", torch.log(prob))
        return action.numpy(), dist.log_prob(action)
Exemple #18
0
 def forward(self, x, a=None):
     mu = self.mu(x)
     std = self.log_std.exp()
     policy = Normal(mu, std)
     pi = policy.sample()
     # gaussian likelihood
     logp_pi = policy.log_prob(pi).sum(dim=1)
     if a is not None:
         logp = policy.log_prob(a).sum(dim=1)
     else:
         logp = None
     return pi, logp, logp_pi, mu        # 순서 ActorCritic return 값이랑 맞춤.
Exemple #19
0
class ScaledGaussianMixture(Parameter):
    """Scaled Gaussian Mixture

    Scaled Mixture of Gaussians.
    Do not compute samples as this distribution is only used for the weight
    priors.

    Attributes:
        pi (float): interpolation factor between the two gaussians basis
        sigma1 (float): sigma for the first gaussian
        sigma2 (float): sigma for the second gaussian
        gaussian1 (Normal): normal distribution for the first gaussian
        gaussian2 (Normal): normal distribution for the second gaussian
    """

    def __init__(self, pi: float, sigma1: float, sigma2: float) -> None:
        """Initialize

        Arguments:
            pi (float): interpolation factor between the two gaussians basis
            sigma1 (float): sigma for the first gaussian
            sigma2 (float): sigma for the second gaussian
        """
        super(ScaledGaussianMixture, self).__init__()
        
        self.register_parameter("pi",     nn.Parameter(torch.tensor(pi).float(),     requires_grad = False))
        self.register_parameter("sigma1", nn.Parameter(torch.tensor(sigma1).float(), requires_grad = False))
        self.register_parameter("sigma2", nn.Parameter(torch.tensor(sigma2).float(), requires_grad = False))
        self.register_parameter("zero",   nn.Parameter(torch.tensor(0.).float(),     requires_grad = False))
        
        self.gaussian1 = Normal(self.zero, self.sigma1)
        self.gaussian2 = Normal(self.zero, self.sigma2)

    def sample(self) -> Tensor:
        """Sample

        Is not implemented for now for reasons stated above. Thus returns 0.0
        for the moment.
        """
        return 0.0

    def log_prob(self, input: Tensor) -> Tensor:
        """Scale Gaussian Mixture Log Probability

        Arguments:
            input (Tensor): sampled value of the gaussian weight

        Returns:
            Tensor: log probability
        """
        prob1 = torch.exp(self.gaussian1.log_prob(input))
        prob2 = torch.exp(self.gaussian2.log_prob(input))
        return torch.log(self.pi * prob1 + (1.0 - self.pi) * prob2).sum()
def plot_dist2(n_components, mixture_weights, true_mixture_weights, exp_dir, name=''):


    # mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)

    rows = 1
    cols = 1
    fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150)

    col =0
    row = 0
    ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1)


    # xs = np.linspace(-9,205, 300)
    xs = np.linspace(-10,n_components*10 +5, 300)
    sum_ = np.zeros(len(xs))

    # C = 20
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        ys = []
        for x in xs:
            component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().cpu().numpy()
            ys.append(component_i)
        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='', c='orange')
    ax.plot(xs, sum_, label='current', c='r')


    sum_ = np.zeros(len(xs))
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        ys = []
        for x in xs:
            component_i = (torch.exp(m.log_prob(x) )* true_mixture_weights[c]).detach().cpu().numpy()
            ys.append(component_i)
        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='', c='c')
    ax.plot(xs, sum_, label='true', c='b')

    ax.legend()

    ax.set_title(str(mixture_weights) +'\n'+str(true_mixture_weights), size=8, family='serif')


    # save_dir = home+'/Documents/Grad_Estimators/GMM/'
    plt_path = exp_dir+'gmm_plot_dist'+name+'.png'
    plt.savefig(plt_path)
    print ('saved training plot', plt_path)
    plt.close()
Exemple #21
0
    def forward(self, x, S):
        x = x.view(-1, self.x_dim)
        bsz = x.size(0)

        ### get w and \alpha and L(\theta)
        mu, logvar = self.encoder(x)
        q_phi = Normal(loc=mu, scale=torch.exp(0.5 * logvar))
        z_q = q_phi.rsample((S, ))
        recon_batch = self.decoder(z_q)
        x_dist = Bernoulli(logits=recon_batch)
        log_lik = x_dist.log_prob(x).sum(-1)
        log_prior = self.prior.log_prob(z_q).sum(-1)
        log_q = q_phi.log_prob(z_q).sum(-1)
        log_w = log_lik + log_prior - log_q
        tmp_alpha = torch.logsumexp(log_w, dim=0).unsqueeze(0)
        alpha = torch.exp(log_w - tmp_alpha).detach()
        if self.version == 'v1':
            p_loss = -alpha * (log_lik + log_prior)

        ### get moment-matched proposal
        mu_r = alpha.unsqueeze(2) * z_q
        mu_r = mu_r.sum(0).detach()
        z_minus_mu_r = z_q - mu_r.unsqueeze(0)
        reshaped_diff = z_minus_mu_r.view(S * bsz, -1, 1)
        reshaped_diff_t = reshaped_diff.permute(0, 2, 1)
        outer = torch.bmm(reshaped_diff, reshaped_diff_t)
        outer = outer.view(S, bsz, self.z_dim, self.z_dim)
        Sigma_r = outer.mean(0) * S / (S - 1)
        Sigma_r = Sigma_r + torch.eye(self.z_dim).to(device) * 1e-6  ## ridging

        ### get v, \beta, and L(\phi)
        L = torch.cholesky(Sigma_r)
        r_phi = MultivariateNormal(loc=mu_r, scale_tril=L)

        z = r_phi.rsample((S, ))
        z_r = z.detach()
        recon_batch_r = self.decoder(z_r)
        x_dist_r = Bernoulli(logits=recon_batch_r)
        log_lik_r = x_dist_r.log_prob(x).sum(-1)
        log_prior_r = self.prior.log_prob(z_r).sum(-1)
        log_r = r_phi.log_prob(z_r)
        log_v = log_lik_r + log_prior_r - log_r
        tmp_beta = torch.logsumexp(log_v, dim=0).unsqueeze(0)
        beta = torch.exp(log_v - tmp_beta).detach()
        log_q = q_phi.log_prob(z_r).sum(-1)
        q_loss = -beta * log_q

        if self.version == 'v2':
            p_loss = -beta * (log_lik_r + log_prior_r)

        rem_loss = torch.sum(q_loss + p_loss, 0).sum()
        return rem_loss
Exemple #22
0
    def loglik(self, y_pred, y_obs):

        if self.likelihood == "Gaussian":
            sigma = 1e-6 + softplus(self.noise_sd)
            p_data = Normal(loc=y_pred, scale=sigma)
            loglik = p_data.log_prob(y_obs).sum()
        elif self.likelihood == "Bernoulli":
            p_data = Bernoulli(logits=y_pred)
            loglik = p_data.log_prob(y_obs).sum()
        else:
            raise NotImplementedError("Other likelihoods not implemented")

        return loglik
Exemple #23
0
    def forward(self, x, a=None, batch = False):
        #pdb.set_trace()
        policy = Normal(self.mu(x), self.log_std.exp())
        if batch:
            pdb.set_trace()
        pi = policy.sample()
        logp_pi = policy.log_prob(pi).sum(dim=1)
        if a is not None:
            logp = policy.log_prob(a).sum(dim=1)
        else:
            logp = None

        return pi, logp, logp_pi
Exemple #24
0
    def forward(self, x, with_logprob=False):
        x = self.layers(x)
        mean = self.mean_layer(x)
        std = self.log_std_layer(x).clamp(-20, 2).exp()
        pi_distribution = Normal(mean, std)
        pi_action = pi_distribution.rsample()

        if with_logprob:
            # Compute logprob from Gaussian, and then apply correction for Tanh squashing.
            # NOTE: The correction formula is a little bit magic. To get an understanding
            # of where it comes from, check out the original SAC paper (arXiv 1801.01290)
            # and look in appendix C. This is a more numerically-stable equivalent to Eq 21.
            # Try deriving it yourself as a (very difficult) exercise. :)
            logp = pi_distribution.log_prob(pi_action).sum(axis=-1)
            logp -= (2 *
                     (np.log(2) - pi_action - F.softplus(-2 * pi_action))).sum(
                         axis=1)
        else:
            logp = None

        x = torch.tanh(pi_action)  # [N, action_dim]

        # scale (-1, 1) to [action.low, action_high]
        action = (x + 1) * (self.action_high -
                            self.action_low) / 2 + self.action_low

        if with_logprob:
            return (action, logp)
        else:
            return action
def logprob_undercomponent(x, component):
    B = x.shape[0]
    mean = (component.float()*10.).view(B,1)
    std = (torch.ones([B]) *5.).view(B,1)
    m = Normal(mean.cuda(), std.cuda())
    logpx_given_z = m.log_prob(x)
    return logpx_given_z
Exemple #26
0
    def choose_action(self, observation):
        state = T.tensor([observation], dtype=T.float).to(self.actor.device)
        value = self.critic(state)

        mu, sigma = self.actor(state)
        probabilities = Normal(mu, sigma)

        actions = probabilities.sample(
        )  # NOT have grad_fn, cannot do actions.backward()
        action = T.tanh(actions) * T.tensor(self.max_action).to(
            self.actor.device).float(
            )  # 1. scale action to fit the environment
        # 2. action casted to float so that can be used by T.cat, otherwise it is double type

        log_probs = probabilities.log_prob(
            actions)  # to calculate the loss function
        log_probs -= T.log(
            1 - action.pow(2) + self.reparam_noise
        )  # handle the scaling of action (as we use tanh to scale)
        log_probs = log_probs.sum(
            1, keepdim=True
        )  # 0-axis: batch, 1-axis: components of actions, summed over to get a scalar

        action = T.squeeze(
            action).detach().numpy()  # remove the dimension which equals 1
        probs = T.squeeze(log_probs).item()
        value = T.squeeze(value).item()

        return action, probs, value
    def log_forward(self, x):

        out = torch.Tensor(x).reshape(-1, self.in_dim)

        out = self.l1(out)
        out = self.leaky_relu(out)
        out = self.l2(out)
        out = self.leaky_relu(out)
        out = self.l3(out)
        out = self.leaky_relu(out)
        out = self.l4(out)
        #out = self.tanh(out)

        mu = self.mu_linear(out)
        log_std = self.log_linear(out)

        log_std = torch.clamp(log_std, -20, 2)
        std = torch.exp(log_std)
        distribution = Normal(mu, std)

        action = distribution.rsample()
        log_p = distribution.log_prob(action)
        log_p -= (2 * (np.log(2) - action - F.softplus(-2 * action)))

        action = torch.tanh(action)

        return action, log_p
def compute_sgd_approx_lr(lr=0.01):
    #initialize
    x = 3.0 * torch.randn(1, requires_grad=False)
    x.requires_grad = True

    ### run sgd
    optim = SGD([x], lr=lr)

    num_steps = 100
    all_x = torch.zeros(num_steps)
    for i in range(num_steps):
        all_x[i] = x.data
        #print(x)
        optim.zero_grad()
        loss = -pathological_mixture(x)
        loss.backward()
        optim.step()

    ## compute swa distribution
    swa_estimate = all_x[int(num_steps / 2):].mean()
    swa_std = all_x[int(num_steps / 2):].std() * 1 / math.sqrt(
        int(num_steps / 2))
    swa_dist = Normal(swa_estimate, swa_std)

    swa_nll = -swa_dist.log_prob(test_pts)

    return swa_nll, swa_estimate, swa_std
def logprob_undercomponent(x, component):
    B = x.shape[0]
    mean = (component.float() * 10.).view(B, 1)
    std = (torch.ones([B]) * 5.).view(B, 1)
    m = Normal(mean.cuda(), std.cuda())
    logpx_given_z = m.log_prob(x)
    return logpx_given_z
Exemple #30
0
    def learn(self, s, a, td):
        s = torch.from_numpy(s[np.newaxis, :]).float()
        td_no_grad = td.detach()
        mu, sigma = torch.squeeze(self.mu(self.l1(s))), torch.squeeze(
            self.sigma(self.l1(s)))
        normal_dist = Normal(mu * 2, sigma + 0.1)
        # action = torch.clamp(normal_dist.sample(1), self.action_bound[0], self.action_bound[1])
        log_prob = normal_dist.log_prob(torch.from_numpy(a))
        self.exp_v = log_prob * td_no_grad
        self.exp_v += 0.01 * normal_dist.entropy()
        self.exp_v = -self.exp_v
        optimizer = optim.Adam([{
            'params': self.l1.parameters()
        }, {
            'params': self.sigma.parameters()
        }, {
            'params': self.mu.parameters()
        }],
                               lr=self.lr)

        # optimize the model
        optimizer.zero_grad()
        self.exp_v.backward()
        optimizer.step()
        return -self.exp_v
def gmm_loss(batch, mus, sigmas, logpi, reduce=True):  # pylint: disable=too-many-arguments
    """ Computes the gmm loss.
    Compute minus the log probability of batch under the GMM model described
    by mus, sigmas, pi. Precisely, with bs1, bs2, ... the sizes of the batch
    dimensions (several batch dimension are useful when you have both a batch
    axis and a time step axis), gs the number of mixtures and fs the number of
    features.
    :args batch: (bs1, bs2, *, fs) torch tensor
    :args mus: (bs1, bs2, *, gs, fs) torch tensor
    :args sigmas: (bs1, bs2, *, gs, fs) torch tensor
    :args logpi: (bs1, bs2, *, gs) torch tensor
    :args reduce: if not reduce, the mean in the following formula is ommited
    :returns:
    loss(batch) = - mean_{i1=0..bs1, i2=0..bs2, ...} log(
        sum_{k=1..gs} pi[i1, i2, ..., k] * N(
            batch[i1, i2, ..., :] | mus[i1, i2, ..., k, :], sigmas[i1, i2, ..., k, :]))
    NOTE: The loss is not reduced along the feature dimension (i.e. it should scale ~linearily
    with fs).
    """
    batch = batch.unsqueeze(-2)
    normal_dist = Normal(mus, sigmas)
    g_log_probs = normal_dist.log_prob(batch)
    g_log_probs = logpi + torch.sum(g_log_probs, dim=-1)
    max_log_probs = torch.max(g_log_probs, dim=-1, keepdim=True)[0]
    g_log_probs = g_log_probs - max_log_probs

    g_probs = torch.exp(g_log_probs)
    probs = torch.sum(g_probs, dim=-1)

    log_prob = max_log_probs.squeeze() + torch.log(probs)
    if reduce:
        return -torch.mean(log_prob)
    return -log_prob
Exemple #32
0
 def forward(self, inputs, c=None):    
     inputs_permuted = inputs.transpose(0,1) # |D| * batch * ... 
     embeddings = [self.enc(x) for x in inputs_permuted]
     mean_embedding = sum(embeddings)/len(embeddings)
     mu_c = self.mu_c(mean_embedding)
     sigma_c = self.sigma_c(mean_embedding)
     dist = Normal(mu_c, sigma_c)
     if c is None: c = dist.rsample()
     return c, dist.log_prob(c).sum(dim=1) # Return value, score
Exemple #33
0
	def forward(self, inputs, c, z=None):
		inputs = inputs.view(-1, 1, 28, 28) #huh?
		mu = self.localization_mu(inputs)
		sigma = self.localization_sigma(inputs)
		dist = Normal(mu, sigma)
		if z is None: 
			z = dist.rsample()
		score = dist.log_prob(z).sum(dim=1).sum(dim=1).sum(dim=1)
		return z, score
Exemple #34
0
	def forward(self, inputs, c=None):
		# transform the input
		xs = [self.stn(inputs[:,i,:,:,:]) for i in range(inputs.size(1))]

		embs = [self.conv_post_stn(x) for x in xs]
		emb = sum(embs)/len(embs)
		mu = self.conv_mu(emb)
		sigma = self.conv_sigma(emb)
		dist = Normal(mu, sigma)
		if c is None: c = dist.rsample()
		return c, dist.log_prob(c).sum(dim=1).sum(dim=1).sum(dim=1)
def logprob_givenmixtureeweights(x, needsoftmax_mixtureweight):

    mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)
    probs_sum = 0# = []
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        # for x in xs:
        component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy()
        # probs.append(probs)
        probs_sum+=component_i
    logprob = torch.log(probs_sum)
    return logprob
Exemple #36
0
    def get_log_prob(self, state, squashed_action):
        """
        Action is expected to be squashed with tanh
        """
        with torch.no_grad():
            loc, scale_log = self._get_loc_and_scale_log(state)
            # This is not getting exported; we can use it
            n = Normal(loc, scale_log.exp())
            raw_action = self._atanh(squashed_action)
            log_prob = torch.sum(
                n.log_prob(raw_action) - self._squash_correction(squashed_action), dim=1
            ).reshape(-1, 1)

        return log_prob
def plot_dist(x=None):

    if x is None:
        x1 = sample_true(1).cuda() 
    else:
        x1 = x[0].cpu().numpy()#.view(1,1)
        # print (x)

    mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)

    rows = 1
    cols = 1
    fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150)

    col =0
    row = 0
    ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1)


    xs = np.linspace(-9,205, 300)
    sum_ = np.zeros(len(xs))

    C = 20
    for c in range(C):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        ys = []
        for x in xs:
            # component_i = (torch.exp(m.log_prob(x) )* ((c+5.) / 290.)).numpy()
            component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().cpu().numpy()


            ys.append(component_i)

        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='')

    ax.plot(xs, sum_, label='')

    # print (x)
    ax.plot([x1,x1+.001],[0.,.002])
    # fasda

    # save_dir = home+'/Documents/Grad_Estimators/GMM/'
    plt_path = exp_dir+'gmm_plot_dist.png'
    plt.savefig(plt_path)
    print ('saved training plot', plt_path)
    plt.close()
def true_posterior(x, needsoftmax_mixtureweight):

    mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)
    probs_ = []
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float().cuda())
        component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy()
        # print(component_i.shape)
        # fsdf
        probs_.append(component_i[0])
    probs_ = torch.stack(probs_)
    probs_ = probs_ / torch.sum(probs_)
    # print (probs_.shape)
    # fdssdfd
    # logprob = torch.log(probs_sum)
    return probs_
Exemple #39
0
def logprob_undercomponent(x, component, needsoftmax_mixtureweight, cuda=False):
    # c= component
    # C = c.
    B = x.shape[0]
    # print()
    # print (needsoftmax_mixtureweight.shape)
    mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)
    # print (mixture_weights.shape)
    # fdsfa
    # probs_sum = 0# = []
    # for c in range(n_components):
    # m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float() )#.cuda())
    mean = (component.float()*10.).view(B,1)
    std = (torch.ones([B]) *5.).view(B,1)
    # print (mean.shape) #[B]
    if not cuda:
        m = Normal(mean, std)#.cuda())
    else:
        m = Normal(mean.cuda(), std.cuda())
    # for x in xs:
    # component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy()
    # print (m.log_prob(x))
    # print (torch.log(mixture_weights[c]))
    # print(x.shape)
    logpx_given_z = m.log_prob(x)
    logpz = torch.log(mixture_weights[component]).view(B,1)
    # print (px_given_z.shape)
    # print (component)
    # print (mixture_weights)
    # print (mixture_weights[component])
    # print (torch.log(mixture_weights[component]).shape)
    # fdsasa
    # print (logpx_given_z.shape)
    # print (logpz.shape)
    # fsdfas
    logprob = logpx_given_z + logpz
    # print (logprob.shape)
    # fsfd
    # probs.append(probs)
    # probs_sum+=component_i
    # logprob = torch.log(component_i)
    return logprob
Exemple #40
0
def logprob_undercomponent(x, component, needsoftmax_mixtureweight, cuda=False):
    c= component
    # print (needsoftmax_mixtureweight.shape)
    mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)
    # probs_sum = 0# = []
    # for c in range(n_components):
    # m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float() )#.cuda())
    if not cuda:
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float() )#.cuda())
    else:
        m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float().cuda())
    # for x in xs:
    # component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy()
    # print (m.log_prob(x))
    # print (torch.log(mixture_weights[c]))

    logprob = m.log_prob(x) + torch.log(mixture_weights[c])
    # probs.append(probs)
    # probs_sum+=component_i
    # logprob = torch.log(component_i)
    return logprob
Exemple #41
0
def gmm_loss(batch, mus, sigmas, logpi, reduce=True): # pylint: disable=too-many-arguments
    """ Computes the gmm loss.

    Compute minus the log probability of batch under the GMM model described
    by mus, sigmas, pi. Precisely, with bs1, bs2, ... the sizes of the batch
    dimensions (several batch dimension are useful when you have both a batch
    axis and a time step axis), gs the number of mixtures and fs the number of
    features.

    :args batch: (bs1, bs2, *, fs) torch tensor
    :args mus: (bs1, bs2, *, gs, fs) torch tensor
    :args sigmas: (bs1, bs2, *, gs, fs) torch tensor
    :args logpi: (bs1, bs2, *, gs) torch tensor
    :args reduce: if not reduce, the mean in the following formula is ommited

    :returns:
    loss(batch) = - mean_{i1=0..bs1, i2=0..bs2, ...} log(
        sum_{k=1..gs} pi[i1, i2, ..., k] * N(
            batch[i1, i2, ..., :] | mus[i1, i2, ..., k, :], sigmas[i1, i2, ..., k, :]))

    NOTE: The loss is not reduced along the feature dimension (i.e. it should scale ~linearily
    with fs).
    """
    batch = batch.unsqueeze(-2)
    normal_dist = Normal(mus, sigmas)
    g_log_probs = normal_dist.log_prob(batch)
    g_log_probs = logpi + torch.sum(g_log_probs, dim=-1)
    max_log_probs = torch.max(g_log_probs, dim=-1, keepdim=True)[0]
    g_log_probs = g_log_probs - max_log_probs

    g_probs = torch.exp(g_log_probs)
    probs = torch.sum(g_probs, dim=-1)

    log_prob = max_log_probs.squeeze() + torch.log(probs)
    if reduce:
        return - torch.mean(log_prob)
    return - log_prob
def plot_both_dists():

    # needsoftmax_mixtureweight = needsoftmax_mixtureweight.cpu()

    #MAKE PLOT OF DISTRIBUTION
    rows = 1
    cols = 1
    fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150)

    col =0
    row = 0
    ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1)



    xs = np.linspace(-9,205, 300)
    sum_ = np.zeros(len(xs))
    # C = 20
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        # xs = torch.tensor(xs)
        # print (m.log_prob(lin))
        ys = []
        for x in xs:
            # print (m.log_prob(x))
            # component_i = (torch.exp(m.log_prob(x) )* ((c+5.) / denom)).numpy()
            component_i = (torch.exp(m.log_prob(x) )* true_mixture_weights[c]).numpy()
            ys.append(component_i)
        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='', c='c')
    ax.plot(xs, sum_, label='')



    # mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)
    # xs = np.linspace(-9,205, 300)
    # sum_ = np.zeros(len(xs))
    # C = 20
    # for c in range(C):
    #     m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
    #     # xs = torch.tensor(xs)
    #     # print (m.log_prob(lin))
    #     ys = []
    #     for x in xs:
    #         # print (m.log_prob(x))
    #         component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().numpy()
    #         ys.append(component_i)
    #     ys = np.reshape(np.array(ys), [-1])
    #     sum_ += ys
    #     ax.plot(xs, ys, label='', c='r')
    # ax.plot(xs, sum_, label='')


    # #HISTOGRAM
    # xs = []
    # for i in range(10000):
    #     x = sample_true().item()
    #     xs.append(x)
    # ax.hist(xs, bins=200, density=True)



    # # save_dir = home+'/Documents/Grad_Estimators/GMM/'
    # if simplax:
    #     plt_path = exp_dir+'gmm_pdf_plot_simplax.png'
    # elif reinforce:
    #     plt_path = exp_dir+'gmm_pdf_plot_reinforce.png'
    # elif marginal:
    #     plt_path = exp_dir+'gmm_pdf_plot_marginal.png'

    # plt.savefig(plt_path)
    # print ('saved training plot', plt_path)
    # plt.close()




    # save_dir = home+'/Documents/Grad_Estimators/GMM/'
    plt_path = exp_dir+'gmm_distplot.png'
    plt.savefig(plt_path)
    print ('saved training plot', plt_path)
    plt.close()
Exemple #43
0
 def forward(self, c, z, x=None):
     cz = torch.cat([c,z], dim=1)
     dist = Normal(self.mu(cz), self.sigma(cz))
     if x is None: x = dist.rsample()
     return x, dist.log_prob(x).sum(dim=1) # Return value, score
Exemple #44
0
 def forward(self, inputs, c, z=None):    
     mu_z = self.mu_z(inputs[:, 0])
     sigma_z = self.sigma_z(inputs[:, 0])
     dist = Normal(mu_z, sigma_z)
     if z is None: z = dist.rsample()
     return z, dist.log_prob(z).sum(dim=1) # Return value, score
    ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1)




    xs = np.linspace(-9,205, 300)
    sum_ = np.zeros(len(xs))
    C = 20
    for c in range(C):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        # xs = torch.tensor(xs)
        # print (m.log_prob(lin))
        ys = []
        for x in xs:
            # print (m.log_prob(x))
            component_i = (torch.exp(m.log_prob(x) )* ((c+5.) / 290.)).numpy()
            ys.append(component_i)
        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='', c='c')
    ax.plot(xs, sum_, label='')



    mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)
    xs = np.linspace(-9,205, 300)
    sum_ = np.zeros(len(xs))
    C = 20
    for c in range(C):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        # xs = torch.tensor(xs)