def _(samples, estimator, repetitions=1000, probs=0.68): estimand = torch.zeros(repetitions) for i in range(repetitions): bootstrap_values = resample(samples, num_samples=len(samples), replacement=True) estimand[i] = estimator(bootstrap_values) return pi(estimand, probs)
def test_resample(replacement): x = torch.empty(10000, 2) x[:, 0].normal_(3, 4) x[:, 1].normal_(5, 6) num_samples = 5000 y = resample(x, num_samples=num_samples, replacement=replacement) z = resample(x.t(), num_samples=num_samples, dim=1, replacement=replacement) if not replacement: assert_equal(torch.unique(y.reshape(-1)).numel(), y.numel()) assert_equal(torch.unique(z.reshape(-1)).numel(), z.numel()) assert_equal(y.shape, torch.Size([num_samples, 2])) assert_equal(z.shape, torch.Size([2, num_samples])) assert_equal(y.mean(dim=0), torch.tensor([3.0, 5.0]), prec=0.2) assert_equal(z.mean(dim=1), torch.tensor([3.0, 5.0]), prec=0.2) assert_equal(y.std(dim=0), torch.tensor([4.0, 6.0]), prec=0.2) assert_equal(z.std(dim=1), torch.tensor([4.0, 6.0]), prec=0.2)
# %% # Put the embeddings into a DataFrame #y = pd.DataFrame(msg_embeddings) y = torch.tensor(msg_embeddings, dtype=torch.get_default_dtype()) # Prior over X k = 2 # Number of dimensions for latent space X_prior_mean = torch.zeros(y.size(1), k) # Kernel definition kernel = gp.kernels.RBF(input_dim=k, lengthscale=torch.ones(2)) # Clone the prior mean so it doesn't change during training X = Parameter(X_prior_mean.clone()) Xu = stats.resample(X_prior_mean.clone(), 32) gplvm = gp.models.SparseGPRegression(X, y, kernel, Xu, noise=torch.tensor(0.01), jitter=1e-5) gplvm.X = pyro.nn.PyroSample(dist.Normal(X_prior_mean, 0.1).to_event()) gplvm.autoguide("X", dist.Normal) # %% losses = gp.util.train(gplvm, num_steps=4000) plt.plot(losses) plt.show()
def run_gplvm(y, informative_prior=True): pyro.set_rng_seed(1) # the latent variables are X (in the tut, X is called Latent Space) # dim(X) = 2 to describe 2 aspects: # + capture-time (1,2,4,8,32,64) (6 stages) # + cell-branching types (TE, ICM, PE, EPI) # Stick the capture-time feature to x-axis # note that, we are using the supervised information capture_time = y.new_tensor([ int(cell_name.split(" ")[0]) for cell_name in df.index.values ]) # in [1, 2, 4, 8, 32, 64] capture_time_normalized = capture_time.log2() / 6 # in range [0, 1] # try to corrupt this supervised info, e.g., let keep 10% of them # print(capture_time_normalized.shape) # mask = torch.randint( # low=0, # high=capture_time_normalized.size(0), # size=(int(0.9 * capture_time_normalized.size(0)),), # ) # capture_time_normalized[mask] = -0.1 # setup the mean of the prior over X X_prior_mean = torch.zeros(y.size(1), 2) # n_observations x x_sim if informative_prior: X_prior_mean[:, 0] = capture_time_normalized # note that X has 2 features # the first feature we set the prior to capture_time_normalized (this is just the prior) # this will be changed in the posterior # and the second features has zero mean, it will be inferred "from scratch" # construction of a Sparse Gaussian Process # RBF kernel kernel = gp.kernels.RBF(input_dim=2, lengthscale=torch.ones(2)) # define X as Parameter so its "param" can be learned / we can set a prior and guide X = Parameter(X_prior_mean.clone()) # build a SparesGP with num_inducing=32 Xu = stats.resample(X_prior_mean.clone(), 32) gplvm = gp.models.SparseGPRegression(X, y, kernel, Xu=Xu, noise=torch.tensor(0.01), jitter=1e-5) # set prior and guide for the GP-LVM gplvm.set_prior("X", dist.Normal(X_prior_mean, 0.1).to_event()) gplvm.autoguide("X", dist.Normal) # Inference: train GP by gp.util.train <- which use VI with Adam lr=0.01 t = time.time() print("Start training") losses = gp.util.train(gplvm, num_steps=4000) print(f"Training GP-LVM in {time.time() - t} seconds") plt.plot(losses) plt.savefig("./plots/gplvm_losses.png") # now the mean and std of X (in q(X) ~ p(X|y)) will be store in X_loc and X_scale # important: to get sample from q(X), set `mode` of `gplvm` to `guide` gplvm.mode = "guide" # default: "model" X = gplvm.X_loc.detach().numpy() viz(X, name="gplvm") # viz_bokeh(X, name=("gplvm_with_prior" if informative_prior # else "gplvm_non_informative_prior")) return X