Ejemplo n.º 1
0
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Expected Improvement on the candidate set X.

        Args:
            X: A `b1 x ... bk x 1 x d`-dim batched tensor of `d`-dim design points.
                Expected Improvement is computed for each point individually,
                i.e., what is considered are the marginal posteriors, not the
                joint.

        Returns:
            A `b1 x ... bk`-dim tensor of Expected Improvement values at the
            given design points `X`.
        """
        self.best_f = self.best_f.to(X)
        posterior = self.model.posterior(X)
        self._validate_single_output_posterior(posterior)
        mean = posterior.mean
        # deal with batch evaluation and broadcasting
        view_shape = mean.shape[:-2] if mean.dim() >= X.dim() else X.shape[:-2]
        mean = mean.view(view_shape)
        sigma = posterior.variance.clamp_min(1e-9).sqrt().view(view_shape)
        u = (mean - self.best_f.expand_as(mean)) / sigma
        if not self.maximize:
            u = -u
        normal = Normal(torch.zeros_like(u), torch.ones_like(u))
        ucdf = normal.cdf(u)
        updf = torch.exp(normal.log_prob(u))
        ei = sigma * (updf + u * ucdf)
        return ei
    def sample_conditional_a(self, resid_image, var_so_far, pixel_1d):

        is_on = (pixel_1d < (self.n_discrete_latent - 1)).float()

        # pass through galaxy encoder
        pixel_2d = self.one_galaxy_vae.pixel_1d_to_2d(pixel_1d)
        z_mean, z_var = self.one_galaxy_vae.enc(resid_image, pixel_2d)

        # sample z
        q_z = Normal(z_mean, z_var.sqrt())
        z_sample = q_z.rsample()

        # kl term for continuous latent vars
        log_q_z = q_z.log_prob(z_sample).sum(1)
        p_z = Normal(torch.zeros_like(z_sample), torch.ones_like(z_sample))
        log_p_z = p_z.log_prob(z_sample).sum(1)
        kl_z = is_on * (log_q_z - log_p_z)

        # run through decoder
        recon_mean, recon_var = self.one_galaxy_vae.dec(is_on, pixel_2d, z_sample)

        # NOTE: we will have to the recon means once we do more detections
        # recon_means = recon_mean + image_so_far
        # recon_vars = recon_var + var_so_far

        return recon_mean, recon_var, is_on, kl_z
Ejemplo n.º 3
0
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Constrained Expected Improvement on the candidate set X.

        Args:
            X: A `(b) x 1 x d`-dim Tensor of `(b)` t-batches of `d`-dim design
                points each.

        Returns:
            A `(b)`-dim Tensor of Expected Improvement values at the given
            design points `X`.
        """
        posterior = self.model.posterior(X)
        means = posterior.mean.squeeze(dim=-2)  # (b) x t
        sigmas = posterior.variance.squeeze(dim=-2).sqrt().clamp_min(1e-9)  # (b) x t

        # (b) x 1
        mean_obj = means[..., [self.objective_index]]
        sigma_obj = sigmas[..., [self.objective_index]]
        u = (mean_obj - self.best_f.expand_as(mean_obj)) / sigma_obj
        if not self.maximize:
            u = -u
        normal = Normal(
            torch.zeros(1, device=u.device, dtype=u.dtype),
            torch.ones(1, device=u.device, dtype=u.dtype),
        )
        ei_pdf = torch.exp(normal.log_prob(u))  # (b) x 1
        ei_cdf = normal.cdf(u)
        ei = sigma_obj * (ei_pdf + u * ei_cdf)
        prob_feas = self._compute_prob_feas(X=X, means=means, sigmas=sigmas)
        ei = ei.mul(prob_feas)
        return ei.squeeze(dim=-1)
Ejemplo n.º 4
0
 def forward(self, x_src):
     # Example variational parameters lambda
     mu, logvar = self.encoder(x_src)
     q_normal = Normal(loc=mu, scale=logvar.mul(0.5).exp())
     # Reparameterized sample.
     z_sample = q_normal.rsample()
     # z_sample = mu (no sampling)
     return self.decoder(z_sample), q_normal
Ejemplo n.º 5
0
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate the Probability of Improvement on the candidate set X.

        Args:
            X: A `(b) x 1 x d`-dim Tensor of `(b)` t-batches of `d`-dim design
                points each.

        Returns:
            A `(b)`-dim tensor of Probability of Improvement values at the given
            design points `X`.
        """
        self.best_f = self.best_f.to(X)
        batch_shape = X.shape[:-2]
        posterior = self.model.posterior(X)
        self._validate_single_output_posterior(posterior)
        mean, sigma = posterior.mean, posterior.variance.sqrt()
        mean = posterior.mean.view(batch_shape)
        sigma = posterior.variance.sqrt().clamp_min(1e-9).view(batch_shape)
        u = (mean - self.best_f.expand_as(mean)) / sigma
        if not self.maximize:
            u = -u
        normal = Normal(torch.zeros_like(u), torch.ones_like(u))
        return normal.cdf(u)
Ejemplo n.º 6
0
 def dist(*logits):
     return Independent(Normal(*logits), 1)
Ejemplo n.º 7
0
# %% Import packages

import numpy as np
import torch

from torch.distributions import Normal

from eeyore.constants import loss_functions
from eeyore.models import mlp

from bnn_mcmc_examples.examples.mlp.penguins.constants import dtype, mlp_dims, mlp_bias, mlp_activations

# %% Setup MLP model

hparams = mlp.Hyperparameters(dims=mlp_dims,
                              bias=mlp_bias,
                              activations=mlp_activations)

model = mlp.MLP(loss=loss_functions['multiclass_classification'],
                hparams=hparams,
                dtype=dtype)

prior_scale = np.sqrt(10.)

model.prior = Normal(
    torch.zeros(model.num_params(), dtype=model.dtype),
    torch.full([model.num_params()], prior_scale, dtype=model.dtype))
Ejemplo n.º 8
0
 def sample_prior(self, n_imgs, **kwargs):
     z = self.pz.sample([n_imgs, self.latent,self.image_out_size,self.image_out_size]).to(device)
     mean = self.decode(z)
     pxz = Normal(mean, 1)
     return mean  # pxz.sample()
Ejemplo n.º 9
0
 def reparameterize_transformation(self, mu, var):
     untran_z = Normal(mu, var.sqrt()).rsample()
     z = self.z_transformation(untran_z)
     return z, untran_z
Ejemplo n.º 10
0
def std_normal(shape):
    N = Normal(torch.zeros(shape), torch.ones(shape))
    if torch.cuda.is_available():
        N.loc = N.loc.cuda()
        N.scale = N.scale.cuda()
    return N
Ejemplo n.º 11
0
 def log_init_prior(self, model_params, z):
     '''evaluate log pdf of z0 under the init prior
     '''
     prior = Normal(model_params['init_latent_loc'],
                    torch.exp(model_params['init_latent_log_scale']))
     return torch.sum(prior.log_prob(z))
Ejemplo n.º 12
0
def reparameterize_gaussian(mu, var):
    return Normal(mu, var.sqrt()).rsample()
Ejemplo n.º 13
0
 def rsample(self, sample_shape=torch.Size([])):
     local_shrinkage = HalfCauchy(1).rsample(self.scale.shape)
     param_sample = Normal(0, local_shrinkage *
                           self.scale).rsample(sample_shape)
     return param_sample
Ejemplo n.º 14
0
    print("viewpoints Batch Tensor: ")
    print(viewpoints.shape)

    #def step(batch):
    model.train()

    x, v = batch
    x, v = x.to(device), v.to(device)
    x, v, x_q, v_q = partition(x, v)

    # Reconstruction, representation and divergence
    x_mu, _, kl = model(x, v, x_q, v_q)

    # Log likelihood
    sigma = next(sigma_scheme)
    ll = Normal(x_mu, sigma).log_prob(x_q)

    likelihood = torch.mean(torch.sum(ll, dim=[1, 2, 3]))
    kl_divergence = torch.mean(torch.sum(kl, dim=[1, 2, 3]))

    # Evidence lower bound
    elbo = likelihood - kl_divergence
    loss = -elbo
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    def save_images(engine):
        print("Epoch Completed save_images")
        with torch.no_grad():
 def forward(self, state):
     mu = self.actor(state)
     std = self.log_std.exp().expand_as(mu)
     dist = Normal(mu, std)
     return dist
 def __init__(self, loc, scale):
     super(TanhNormal, self).__init__()
     self.normal = Normal(loc, scale)
 def forward(self, state):
     policy = Normal(self.actor(state), self.actor.policy_log_std.exp())
     value = self.critic(state)
     return policy, value
Ejemplo n.º 18
0
def expected_improvement_search(features, genotype):
    """ implementation of arch2vec-DNGO on DARTS Search Space """
    CURR_BEST_VALID = 0.
    CURR_BEST_TEST = 0.
    CURR_BEST_GENOTYPE = None
    MAX_BUDGET = args.max_budgets
    window_size = 200
    counter = 0
    visited = {}
    best_trace = defaultdict(list)

    features, genotype = features.cpu().detach(), genotype
    feat_samples, geno_samples, valid_label_samples, test_label_samples, visited = get_init_samples(
        features, genotype, visited)

    for feat, geno, acc_valid, acc_test in zip(feat_samples, geno_samples,
                                               valid_label_samples,
                                               test_label_samples):
        counter += 1
        if acc_valid > CURR_BEST_VALID:
            CURR_BEST_VALID = acc_valid
            CURR_BEST_TEST = acc_test
            CURR_BEST_GENOTYPE = geno
        best_trace['validation_acc'].append(float(CURR_BEST_VALID))
        best_trace['test_acc'].append(float(CURR_BEST_TEST))
        best_trace['genotype'].append(CURR_BEST_GENOTYPE)
        best_trace['counter'].append(counter)

    while counter < MAX_BUDGET:
        print("feat_samples:", feat_samples.shape)
        print("length of genotypes:", len(geno_samples))
        print("valid label_samples:", valid_label_samples.shape)
        print("test label samples:", test_label_samples.shape)
        print("current best validation: {}".format(CURR_BEST_VALID))
        print("current best test: {}".format(CURR_BEST_TEST))
        print("counter: {}".format(counter))
        print(feat_samples.shape)
        print(valid_label_samples.shape)
        model = DNGO(num_epochs=100,
                     n_units=128,
                     do_mcmc=False,
                     normalize_output=False)
        model.train(X=feat_samples.numpy(),
                    y=valid_label_samples.view(-1).numpy(),
                    do_optimize=True)
        print(model.network)
        m = []
        v = []
        chunks = int(features.shape[0] / window_size)
        if features.shape[0] % window_size > 0:
            chunks += 1
        features_split = torch.split(features, window_size, dim=0)
        for i in range(chunks):
            m_split, v_split = model.predict(features_split[i].numpy())
            m.extend(list(m_split))
            v.extend(list(v_split))
        mean = torch.Tensor(m)
        sigma = torch.Tensor(v)
        u = (mean - torch.Tensor([args.objective]).expand_as(mean)) / sigma
        normal = Normal(torch.zeros_like(u), torch.ones_like(u))
        ucdf = normal.cdf(u)
        updf = torch.exp(normal.log_prob(u))
        ei = sigma * (updf + u * ucdf)
        feat_next, geno_next, label_next_valid, label_next_test, visited = propose_location(
            ei, features, genotype, visited, counter)

        # add proposed networks to the pool
        for feat, geno, acc_valid, acc_test in zip(feat_next, geno_next,
                                                   label_next_valid,
                                                   label_next_test):
            feat_samples = torch.cat((feat_samples, feat.view(1, -1)), dim=0)
            geno_samples.append(geno)
            valid_label_samples = torch.cat(
                (valid_label_samples.view(-1, 1), acc_valid.view(1, 1)), dim=0)
            test_label_samples = torch.cat(
                (test_label_samples.view(-1, 1), acc_test.view(1, 1)), dim=0)
            counter += 1
            if acc_valid.item() > CURR_BEST_VALID:
                CURR_BEST_VALID = acc_valid.item()
                CURR_BEST_TEST = acc_test.item()
                CURR_BEST_GENOTYPE = geno

            best_trace['validation_acc'].append(float(CURR_BEST_VALID))
            best_trace['test_acc'].append(float(CURR_BEST_TEST))
            best_trace['genotype'].append(CURR_BEST_GENOTYPE)
            best_trace['counter'].append(counter)

            if counter >= MAX_BUDGET:
                break

    res = dict()
    res['validation_acc'] = best_trace['validation_acc']
    res['test_acc'] = best_trace['test_acc']
    res['genotype'] = best_trace['genotype']
    res['counter'] = best_trace['counter']
    save_path = os.path.join(args.output_path, 'dim{}'.format(args.dim))
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    print('save to {}'.format(save_path))
    fh = open(
        os.path.join(save_path,
                     'run_{}_arch2vec_model_darts.json'.format(args.seed)),
        'w')
    json.dump(res, fh)
    fh.close()
Ejemplo n.º 19
0
    def train(self):

        if len(self.memory) < self.BATCH_SIZE:
            return

        if self.update < self.UPDATE_INTERVAL:
            self.update += 1
            return

        self.update = 0

        samples = random.sample(self.memory, self.BATCH_SIZE)
        batch = self.experience(*zip(*samples))

        states = torch.as_tensor(np.float32(batch.s), device=device)
        next_states = torch.as_tensor(np.float32(batch.s2), device=device)
        rewards = torch.as_tensor(batch.r, device=device)
        done = torch.as_tensor(batch.done, device=device)
        y = torch.zeros([self.BATCH_SIZE, 1], device=device)
        actions = torch.zeros([self.outputs, self.BATCH_SIZE, 1], device=device)

        for i in range(self.BATCH_SIZE):
            for j in range(self.outputs):
                actions[j][i] = batch.a[i][j]

        q1 = self.q1(states, actions)
        q2 = self.q2(states, actions)

        with torch.no_grad():

            next_pi = self.pi(next_states)
            next_pi_dist = Normal(next_pi, 1e-8)
            next_pi_logprob = next_pi_dist.log_prob(next_pi).sum(-1, keepdim=True)
            next_pi_actions = torch.zeros([self.outputs, self.BATCH_SIZE, 1], device=device)

            for i in range(self.BATCH_SIZE):
                for j in range(self.outputs):
                    next_pi_actions[j][i] = next_pi[i][j].clamp(-1.0, 1.0)

            next_q1 = self.q1_target(next_states, next_pi_actions)
            next_q2 = self.q2_target(next_states, next_pi_actions)

            next_q = torch.min(next_q1, next_q2)

            for i in range(self.BATCH_SIZE):

                if done[i]:
                    y[i] = rewards[i]
                else:
                    y[i] = rewards[i] + self.GAMMA * (next_q[i] - self.ALPHA * next_pi_logprob[i])

        q1_loss = torch.nn.MSELoss()(q1, y)
        q2_loss = torch.nn.MSELoss()(q2, y)

        self.optimizer_q1.zero_grad()
        q1_loss.backward()
        self.optimizer_q1.step()

        self.optimizer_q2.zero_grad()
        q2_loss.backward()
        self.optimizer_q2.step()

        pi = self.pi(states)
        pi_dist = Normal(pi, 1e-8)
        pi_actions = torch.zeros([self.outputs, self.BATCH_SIZE, 1], device=device)

        for i in range(self.BATCH_SIZE):
            for j in range(self.outputs):
                pi_actions[j][i] = pi[i][j]

        q1 = self.q1(states, pi_actions)
        q2 = self.q2(states, pi_actions)
        q = torch.min(q1, q2)

        pi_loss = - (q - self.ALPHA * pi_dist.log_prob(pi).sum(axis=1)).mean()

        self.optimizer_pi.zero_grad()
        pi_loss.backward()
        self.optimizer_pi.step()

        # Update target network
        for target_param, source_param in zip(self.q1_target.parameters(), self.q1.parameters()):
            target_param.data.copy_(
                target_param.data * (1.0 - self.POLYAK) + \
                source_param.data * self.POLYAK)

        for target_param, source_param in zip(self.q2_target.parameters(), self.q2.parameters()):
            target_param.data.copy_(
                target_param.data * (1.0 - self.POLYAK) + \
                source_param.data * self.POLYAK)
Ejemplo n.º 20
0
    def loss(
        self,
        tensors,
        inference_outputs,
        generative_ouputs,
        feed_labels=False,
        kl_weight=1,
        labelled_tensors=None,
        classification_ratio=None,
    ):
        px_r = generative_ouputs["px_r"]
        px_rate = generative_ouputs["px_rate"]
        px_dropout = generative_ouputs["px_dropout"]
        qz1_m = inference_outputs["qz_m"]
        qz1_v = inference_outputs["qz_v"]
        z1 = inference_outputs["z"]
        x = tensors[_CONSTANTS.X_KEY]
        batch_index = tensors[_CONSTANTS.BATCH_KEY]

        if feed_labels:
            y = tensors[_CONSTANTS.LABELS_KEY]
        else:
            y = None
        is_labelled = False if y is None else True

        # Enumerate choices of label
        ys, z1s = broadcast_labels(y, z1, n_broadcast=self.n_labels)
        qz2_m, qz2_v, z2 = self.encoder_z2_z1(z1s, ys)
        pz1_m, pz1_v = self.decoder_z1_z2(z2, ys)

        reconst_loss = self.get_reconstruction_loss(x, px_rate, px_r,
                                                    px_dropout)

        # KL Divergence
        mean = torch.zeros_like(qz2_m)
        scale = torch.ones_like(qz2_v)

        kl_divergence_z2 = kl(Normal(qz2_m, torch.sqrt(qz2_v)),
                              Normal(mean, scale)).sum(dim=1)
        loss_z1_unweight = -Normal(pz1_m,
                                   torch.sqrt(pz1_v)).log_prob(z1s).sum(dim=-1)
        loss_z1_weight = Normal(qz1_m,
                                torch.sqrt(qz1_v)).log_prob(z1).sum(dim=-1)
        if not self.use_observed_lib_size:
            ql_m = inference_outputs["ql_m"]
            ql_v = inference_outputs["ql_v"]
            (
                local_library_log_means,
                local_library_log_vars,
            ) = self._compute_local_library_params(batch_index)

            kl_divergence_l = kl(
                Normal(ql_m, torch.sqrt(ql_v)),
                Normal(local_library_log_means,
                       torch.sqrt(local_library_log_vars)),
            ).sum(dim=1)
        else:
            kl_divergence_l = 0.0

        if is_labelled:
            loss = reconst_loss + loss_z1_weight + loss_z1_unweight
            kl_locals = {
                "kl_divergence_z2": kl_divergence_z2,
                "kl_divergence_l": kl_divergence_l,
            }
            if labelled_tensors is not None:
                classifier_loss = self.classification_loss(labelled_tensors)
                loss += classifier_loss * classification_ratio
                return LossRecorder(
                    loss,
                    reconst_loss,
                    kl_locals,
                    kl_global=torch.tensor(0.0),
                    classification_loss=classifier_loss,
                    n_labelled_tensors=labelled_tensors[
                        _CONSTANTS.X_KEY].shape[0],
                )
            return LossRecorder(
                loss,
                reconst_loss,
                kl_locals,
                kl_global=torch.tensor(0.0),
            )

        probs = self.classifier(z1)
        reconst_loss += loss_z1_weight + (
            (loss_z1_unweight).view(self.n_labels, -1).t() * probs).sum(dim=1)

        kl_divergence = (kl_divergence_z2.view(self.n_labels, -1).t() *
                         probs).sum(dim=1)
        kl_divergence += kl(
            Categorical(probs=probs),
            Categorical(probs=self.y_prior.repeat(probs.size(0), 1)),
        )
        kl_divergence += kl_divergence_l

        loss = torch.mean(reconst_loss + kl_divergence * kl_weight)

        if labelled_tensors is not None:
            classifier_loss = self.classification_loss(labelled_tensors)
            loss += classifier_loss * classification_ratio
            return LossRecorder(
                loss,
                reconst_loss,
                kl_divergence,
                kl_global=torch.tensor(0.0),
                classification_loss=classifier_loss,
                n_labelled_tensors=labelled_tensors[_CONSTANTS.X_KEY].shape[0],
            )
        return LossRecorder(loss,
                            reconst_loss,
                            kl_divergence,
                            kl_global=torch.tensor(0.0))
Ejemplo n.º 21
0
 def sample(self, mean, var):
     return Normal(mean, torch.sqrt(var)).sample()
Ejemplo n.º 22
0
 def distribution(self, logits: Tensor) -> Distribution:
     assert logits.size(1) % 2 == 0
     mid = logits.size(1) // 2
     loc = logits[:, :mid]
     scale = logits[:, mid:]
     return Normal(loc, scale)
Ejemplo n.º 23
0
 def forward(self, repeat):
     epsilon = Normal(0, 1).sample([repeat, self.p])
     return (self.logstd.exp() * epsilon +
             self.mu).exp(), epsilon, self.mu, self.logstd
Ejemplo n.º 24
0
 def sample_init_prior(self, model_params):
     prior = Normal(model_params['init_latent_loc'],
                    torch.exp(model_params['init_latent_log_scale']))
     return prior.sample()[None]
Ejemplo n.º 25
0
                                               start_step=5000,
                                               end_step=10000,
                                               start_value=0.1,
                                               end_value=5)

    else:
        lambd_AE, lambd_fit_error = 1.0, 1.0
        lambd_hrank = 1.0
        lambd_AE = 1
        lambd_fit_error = 1
        lambd_pred = 1

    logprob_rec = 0.0
    if output["rec"] is not None:
        rec = output["rec"]
        rec_distr = Normal(rec, std_rec)
        logprob_rec = rec_distr.log_prob(target[:, -rec.shape[1]:]) \
            .flatten(start_dim=1).sum(1)

    logprob_pred = 0.0
    # n_preds = 5
    if output["pred"] is not None:
        pred = output["pred"]
        n_preds = pred.shape[1]
        pred_distr = Normal(pred[:, -n_preds:], std_rec)
        logprob_pred = pred_distr.log_prob(target[:, -n_preds:]) \
            .flatten(start_dim=1).sum(1)

    logprob_rec_ori = 0.0
    if output["rec_ori"] is not None:
        rec_ori = output["rec_ori"]
Ejemplo n.º 26
0
def compute_marginal_log_likelihood_autozi(autozivae,
                                           posterior,
                                           n_samples_mc=100):
    """ Computes a biased estimator for log p(x), which is the marginal log likelihood.

    Despite its bias, the estimator still converges to the real value
    of log p(x) when n_samples_mc (for Monte Carlo) goes to infinity
    (a fairly high value like 100 should be enough)
    Due to the Monte Carlo sampling, this method is not as computationally efficient
    as computing only the reconstruction loss
    """
    # Uses MC sampling to compute a tighter lower bound on log p(x)
    log_lkl = 0
    to_sum = torch.zeros((n_samples_mc, ))
    alphas_betas = autozivae.get_alphas_betas(as_numpy=False)
    alpha_prior = alphas_betas["alpha_prior"]
    alpha_posterior = alphas_betas["alpha_posterior"]
    beta_prior = alphas_betas["beta_prior"]
    beta_posterior = alphas_betas["beta_posterior"]

    for i in range(n_samples_mc):

        bernoulli_params = autozivae.sample_from_beta_distribution(
            alpha_posterior, beta_posterior)

        for i_batch, tensors in enumerate(posterior):
            sample_batch, local_l_mean, local_l_var, batch_index, labels = tensors

            # Distribution parameters and sampled variables
            outputs = autozivae.inference(sample_batch, batch_index, labels)
            px_r = outputs["px_r"]
            px_rate = outputs["px_rate"]
            px_dropout = outputs["px_dropout"]
            qz_m = outputs["qz_m"]
            qz_v = outputs["qz_v"]
            z = outputs["z"]
            ql_m = outputs["ql_m"]
            ql_v = outputs["ql_v"]
            library = outputs["library"]

            # Reconstruction Loss
            bernoulli_params_batch = autozivae.reshape_bernoulli(
                bernoulli_params, batch_index, labels)
            reconst_loss = autozivae.get_reconstruction_loss(
                sample_batch, px_rate, px_r, px_dropout,
                bernoulli_params_batch)

            # Log-probabilities
            p_l = Normal(local_l_mean,
                         local_l_var.sqrt()).log_prob(library).sum(dim=-1)
            p_z = (Normal(torch.zeros_like(qz_m),
                          torch.ones_like(qz_v)).log_prob(z).sum(dim=-1))
            p_x_zld = -reconst_loss
            q_z_x = Normal(qz_m, qz_v.sqrt()).log_prob(z).sum(dim=-1)
            q_l_x = Normal(ql_m, ql_v.sqrt()).log_prob(library).sum(dim=-1)

            batch_log_lkl = torch.sum(p_x_zld + p_l + p_z - q_z_x - q_l_x,
                                      dim=0)
            to_sum[i] += batch_log_lkl

        p_d = Beta(alpha_prior, beta_prior).log_prob(bernoulli_params).sum()
        q_d = Beta(alpha_posterior,
                   beta_posterior).log_prob(bernoulli_params).sum()

        to_sum[i] += p_d - q_d

    log_lkl = logsumexp(to_sum, dim=-1).item() - np.log(n_samples_mc)
    n_samples = len(posterior.indices)
    # The minus sign is there because we actually look at the negative log likelihood
    return -log_lkl / n_samples
Ejemplo n.º 27
0
def get_entropy(mu, std):
    dist = Normal(mu, std)
    entropy = dist.entropy().mean()
    return entropy
Ejemplo n.º 28
0
    def forward(self, z: torch.Tensor, library_gene: torch.Tensor, *cat_list: int):
        """
        The forward computation for a single sample.

         #. Decodes the data from the latent space using the decoder network
         #. Returns local parameters for the ZINB distribution for genes
         #. Returns local parameters for the Mixture NB distribution for proteins

         We use the dictionary `px_` to contain the parameters of the ZINB/NB for genes.
         The rate refers to the mean of the NB, dropout refers to Bernoulli mixing parameters.
         `scale` refers to the quanity upon which differential expression is performed. For genes,
         this can be viewed as the mean of the underlying gamma distribution.

         We use the dictionary `py_` to contain the parameters of the Mixture NB distribution for proteins.
         `rate_fore` refers to foreground mean, while `rate_back` refers to background mean. `scale` refers to
         foreground mean adjusted for background probability and scaled to reside in simplex.
         `back_alpha` and `back_beta` are the posterior parameters for `rate_back`.  `fore_scale` is the scaling
         factor that enforces `rate_fore` > `rate_back`.

        Parameters
        ----------
        z
            tensor with shape ``(n_input,)``
        library_gene
            library size
        cat_list
            list of category membership(s) for this sample

        Returns
        -------
        3-tuple (first 2-tuple :py:class:`dict`, last :py:class:`torch.Tensor`)
            parameters for the ZINB distribution of expression

        """
        px_ = {}
        py_ = {}

        px = self.px_decoder(z, *cat_list)
        px_cat_z = torch.cat([px, z], dim=-1)
        unnorm_px_scale = self.px_scale_decoder(px_cat_z, *cat_list)
        px_["scale"] = self.px_scale_activation(unnorm_px_scale)
        px_["rate"] = library_gene * px_["scale"]

        py_back = self.py_back_decoder(z, *cat_list)
        py_back_cat_z = torch.cat([py_back, z], dim=-1)

        py_["back_alpha"] = self.py_back_mean_log_alpha(py_back_cat_z, *cat_list)
        py_["back_beta"] = torch.exp(
            self.py_back_mean_log_beta(py_back_cat_z, *cat_list)
        )
        log_pro_back_mean = Normal(py_["back_alpha"], py_["back_beta"]).rsample()
        py_["rate_back"] = torch.exp(log_pro_back_mean)

        py_fore = self.py_fore_decoder(z, *cat_list)
        py_fore_cat_z = torch.cat([py_fore, z], dim=-1)
        py_["fore_scale"] = (
            self.py_fore_scale_decoder(py_fore_cat_z, *cat_list) + 1 + 1e-8
        )
        py_["rate_fore"] = py_["rate_back"] * py_["fore_scale"]

        p_mixing = self.sigmoid_decoder(z, *cat_list)
        p_mixing_cat_z = torch.cat([p_mixing, z], dim=-1)
        px_["dropout"] = self.px_dropout_decoder_gene(p_mixing_cat_z, *cat_list)
        py_["mixing"] = self.py_background_decoder(p_mixing_cat_z, *cat_list)

        protein_mixing = 1 / (1 + torch.exp(-py_["mixing"]))
        py_["scale"] = torch.nn.functional.normalize(
            (1 - protein_mixing) * py_["rate_fore"], p=1, dim=-1
        )

        return (px_, py_, log_pro_back_mean)
Ejemplo n.º 29
0
class ActorNetwork(nn.Module):
    def __init__(self,
                 acts_dim=10,
                 num_filters=64,
                 use_bn=False,
                 pov_scaling=255,
                 compass_scaling=180,
                 lin_1_dim=128,
                 lin_2_dim=64):

        super(ActorNetwork, self).__init__()

        # Convolutional Block
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=num_filters,
                               padding=0,
                               kernel_size=9,
                               stride=1,
                               bias=not use_bn)  # output dim: 56
        self.max_pool1 = nn.MaxPool2d(kernel_size=2,
                                      stride=2)  # output dim: 28
        if use_bn: self.bn1 = nn.BatchNorm2d(num_features=num_filters)

        self.conv2 = nn.Conv2d(in_channels=num_filters,
                               out_channels=num_filters,
                               padding=1,
                               stride=2,
                               kernel_size=4,
                               bias=not use_bn)  # output dim: 14
        self.max_pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # output dim: 7
        if use_bn: self.bn2 = nn.BatchNorm2d(num_features=num_filters)

        # Fully connected layer
        self.linear1 = nn.Linear(
            num_filters * 7 * 7,
            lin_1_dim)  #todo: automatically calculate this
        self.linear2 = nn.Linear(lin_1_dim, lin_2_dim)
        self.mean_linear = nn.Linear(lin_2_dim, acts_dim)
        self.log_std_linear = nn.Linear(lin_2_dim, acts_dim)

        self.normal = Normal(0, 1)

        self.use_bn = use_bn
        self.pov_scaling = pov_scaling

    def forward(self, obs):

        x = self.conv1(obs.permute(0, 3, 1, 2) / self.pov_scaling)
        x = self.max_pool1(x)
        if self.use_bn: x = self.bn1(x)
        self.non_lin_1 = F.relu(x)

        x = self.conv2(self.non_lin_1)
        x = self.max_pool2(x)
        if self.use_bn: x = self.bn2(x)
        self.non_lin_2 = F.relu(x)

        x = x.view(self.non_lin_2.size(0), -1)
        x = self.linear1(x)
        self.non_lin_3 = F.relu(x)
        x = self.linear2(self.non_lin_3)
        self.non_lin_4 = F.relu(x)

        self.mean = self.mean_linear(self.non_lin_4)
        log_std = self.log_std_linear(self.non_lin_4)
        self.log_std = torch.clamp(log_std, -20, 2)
        return self.mean, self.log_std

    def get_log_probs(self, obs, epsilon=1e-6):
        mean, log_std = self.forward(obs)
        std = log_std.exp(
        )  # no clip in evaluation, clip affects gradients flow
        action_logit = Normal(mean, std).sample()
        action = torch.tanh(action_logit)
        log_prob = Normal(
            mean, std).log_prob(action_logit) - torch.log(1. - action.pow(2) +
                                                          epsilon)
        #assert float(log_prob.mean())==float(log_prob.mean()), "Log_prob is nan"
        return log_prob.sum(dim=1, keepdim=True), action

    def get_action(self, obs, deterministic=False):
        mean, log_std = self.forward(obs)
        if deterministic:
            action = torch.tanh(mean)
        else:
            std = log_std.exp(
            )  # no clip in evaluation, clip affects gradients flow
            action_logit = mean + std * self.normal.sample()
            action = torch.tanh(
                action_logit
            )  # TanhNormal distribution as actions; reparameterization trick
        return action
Ejemplo n.º 30
0
logvar = Variable(torch.randn(args.batch_size,args.latent_dim).cuda(),requires_grad=True)
optim2 = torch.optim.SGD([mu,logvar], lr = args.learning_rate2)

# TODO: to make this stochastic, shuffle and make smaller batches.
start = time.time()
theta.train()
for epoch in range(args.num_epochs*2):
    # Keep track of reconstruction loss and total kl
    total_recon_loss = 0
    total_kl = 0
    total = 0
    for img, _ in loader:
        # no need to Variable(img).cuda()
        optim1.zero_grad()
        optim2.zero_grad()
        q = Normal(loc=mu, scale=logvar.mul(0.5).exp())
        # Reparameterized sample.
        qsamp = q.rsample()
        kl = kl_divergence(q, p).sum() # KL term
        out = theta(qsamp)
        recon_loss = criterion(out, img) # reconstruction term
        loss = (recon_loss + args.alpha * kl) / args.batch_size
        total_recon_loss += recon_loss.item() / args.batch_size
        total_kl += kl.item() / args.batch_size
        total += 1
        loss.backward()
        if args.clip:
            torch.nn.utils.clip_grad_norm(theta.parameters(), args.clip)
            torch.nn.utils.clip_grad_norm(mu, args.clip)
            torch.nn.utils.clip_grad_norm(theta.parameters(), args.clip)
        if epoch % 2:
Ejemplo n.º 31
0
class Tests(unittest.TestCase):
    # ===== Simple 1D model ===== #
    norm = DistributionWrapper(Normal, loc=0.0, scale=1.0)
    linear = AffineProcess((f, g), (1.0, 1.0), norm, norm)
    model = LinearGaussianObservations(linear, 1.0, 1.0)

    # ===== Simple 2D model ===== #
    mvn = DistributionWrapper(lambda **u: Independent(Normal(**u), 1), loc=torch.zeros(2), scale=torch.ones(2))
    mvn = AffineProcess((fmvn, gmvn), (0.5, 1.0), mvn, mvn)
    a = torch.tensor([1.0, 2.0])

    mvnmodel = LinearGaussianObservations(mvn, a, 1.0)

    def test_InitializeFilter(self):
        state = SISR(self.model, 1000).initialize()

        assert state.x.shape == torch.Size([1000])

    def test_Filters(self):
        for model in [self.model, self.mvnmodel]:
            x, y = model.sample_path(500)

            for filter_type, props in [
                (SISR, {"particles": 500}),
                (APF, {"particles": 500}),
                (UKF, {}),
                (SISR, {"particles": 50, "proposal": prop.Unscented()}),
            ]:
                filt = filter_type(model, **props)
                result = filt.longfilter(y, record_states=True)

                filtmeans = result.filter_means.numpy()

                # ===== Run Kalman ===== #
                if model is self.model:
                    kf = pykalman.KalmanFilter(transition_matrices=1.0, observation_matrices=1.0)
                else:
                    kf = pykalman.KalmanFilter(
                        transition_matrices=[[0.5, 1 / 3], [0, 1.0]], observation_matrices=[1, 2]
                    )

                f_mean, _ = kf.filter(y.numpy())

                if model.hidden_ndim < 1 and not isinstance(filt, UKF):
                    f_mean = f_mean[:, 0]

                rel_error = np.median(np.abs((filtmeans - f_mean) / f_mean))

                ll = kf.loglikelihood(y.numpy())
                rel_ll_error = np.abs((ll - result.loglikelihood.numpy()) / ll)

                assert rel_error < 0.05 and rel_ll_error < 0.05

    def test_ParallellFiltersAndStability(self):
        x, y = self.model.sample_path(50)

        shape = 3000

        linear = AffineProcess((f, g), (1.0, 1.0), self.norm, self.norm)
        self.model.hidden = linear

        filt = SISR(self.model, 1000).set_nparallel(shape)
        result = filt.longfilter(y)

        filtermeans = result.filter_means

        x = filtermeans[:, :1]
        mape = ((x - filtermeans[:, 1:]) / x).abs()

        assert mape.median(0)[0].max() < 0.05

    def test_ParallelUnscented(self):
        x, y = self.model.sample_path(50)

        shape = 30

        linear = AffineProcess((f, g), (1.0, 1.0), self.norm, self.norm)
        self.model.hidden = linear

        filt = SISR(self.model, 1000, proposal=prop.Unscented()).set_nparallel(shape)
        result = filt.longfilter(y)

        filtermeans = result.filter_means

        x = filtermeans[:, :1]
        mape = ((x - filtermeans[:, 1:]) / x).abs()

        assert mape.median(0)[0].max() < 0.05

    def test_SDE(self):
        def f(x, a, s):
            return -a * x

        def g(x, a, s):
            return s

        dt = 1e-2
        norm = DistributionWrapper(Normal, loc=0.0, scale=sqrt(dt))

        em = AffineEulerMaruyama((f, g), (0.02, 0.15), norm, norm, dt=1e-2, num_steps=10)
        model = LinearGaussianObservations(em, scale=1e-3)

        x, y = model.sample_path(500)

        for filt in [SISR(model, 500, proposal=prop.Bootstrap()), UKF(model)]:
            result = filt.longfilter(y)

            means = result.filter_means
            if isinstance(filt, UKF):
                means = means[:, 0]

            self.assertLess(torch.std(x - means), 5e-2)
Ejemplo n.º 32
0
 def test_normal_sample(self):
     self._set_rng_seed()
     for mean, std in product([-1.0, 0.0, 1.0], [0.1, 1.0, 10.0]):
         self._check_sampler_sampler(Normal(mean, std),
                                     scipy.stats.norm(loc=mean, scale=std),
                                     'Normal(mean={}, std={})'.format(mean, std))
 def forward(self, state):
   state = tensor(state)
   actor_mean = self.actor_mean_fc(state)
   value = self.critic_fc(state)
   return Normal(actor_mean, self.actor_logstd.exp()), value
Ejemplo n.º 34
0
 def forward(self, x):
     value = self.critic(x)
     mu    = self.actor(x)
     std   = self.log_std.exp().expand_as(mu)
     dist  = Normal(mu, std)
     return dist, value
Ejemplo n.º 35
0
    def __init__(self, mu1, mu2, sigma1, sigma2, pi):
        self.N1 = Normal(mu1, sigma1)
        self.N2 = Normal(mu2, sigma2)

        self.pi1 = pi
        self.pi2 = (1. - pi)
Ejemplo n.º 36
0
 def set_prior_params(self, mu, logstd1, logstd2, pi):
     sigma1, sigma2 = math.exp(logstd1), math.exp(logstd2)
     #self._prior = GMM(mu,mu,sigma1,sigma2,pi)
     self._prior = Normal(mu, sigma1)
Ejemplo n.º 37
0
elif args.model_type == 3:
    G = Generator3(latent_dim = LATENT_DIM)
    D = Discriminator3()
elif args.model_type == 4:
    G = Generator4(latent_dim = LATENT_DIM)
    D = Discriminator4()
elif args.model_type == 5:
    print('did you make sure Latent dim is 100? else errors are coming!')
    G = Generator5()
    D = Discriminator5()

G.cuda()
D.cuda()
optim_gen = torch.optim.Adam(G.parameters(), lr=learning_rate)
optim_disc = torch.optim.SGD(D.parameters(), lr=learning_rate)
seed_distribution = Normal(V(torch.zeros(BATCH_SIZE, LATENT_DIM).cuda()), 
                           V(torch.ones(BATCH_SIZE, LATENT_DIM)).cuda())

start = time.time()
G.train() # TODO: switch between train and eval for appropriate parts
D.train()
for epoch in range(NUM_EPOCHS):
    total_gen_loss = 0
    total_disc_loss = 0
    total = 0
    for img, label in train_loader:
        if img.size(0) < BATCH_SIZE: continue
        img = V(img).cuda()
        # Grad discriminator real: -E[log(D(x))]
        optim_disc.zero_grad()
        optim_gen.zero_grad()
        d = D(img)