def construct_posterior_model(self, joint_model): test_sample = joint_model._get_sample(1, observed=False) posterior_model = ProbabilisticModel([ DeterministicVariable(value[0, 0, :], variable.name, learnable=True) for variable, value in test_sample.items() if (not variable.is_observed) and not isinstance(variable, (DeterministicVariable, RootVariable)) ]) return posterior_model
def set_posterior_model(self, process, parameters=[]): assert isinstance( process, (ProbabilisticModel, StochasticProcess) ), "The posterior model of as tochastic process should be either a StochasticProcess or a ProbabilisticModel" self.posterior_process = process if isinstance(parameters, list) and all( [isinstance(param, Variable) for param in parameters]): self.posterior_parameters = ProbabilisticModel(parameters) elif isinstance(parameters, ProbabilisticModel): self.posterior_parameters = parameters else: raise ValueError( "The posterior parameters should be either a list of variables or a probabilistic model" ) if self.has_observed_points: self.update_posterior_submodel()
# Architecture parameters weights1 = NormalVariable( np.zeros((number_hidden_nodes, number_regressors)), 10 * np.ones( (number_hidden_nodes, number_regressors)), "weights1") weights2 = NormalVariable( np.zeros((number_output_classes, number_hidden_nodes)), 10 * np.ones( (number_output_classes, number_hidden_nodes)), "weights2") # Forward pass final_activations = BF.matmul(weights2, BF.tanh(BF.matmul(weights1, x))) k = CategoricalVariable(softmax_p=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational model num_particles = N initial_locations1 = [ np.random.normal(0., 1., (number_hidden_nodes, number_regressors)) for _ in range(num_particles) ] initial_locations2 = [ np.random.normal(0., 1., (number_output_classes, number_hidden_nodes)) for _ in range(num_particles) ]
new_h = h[t - 1] + dt * (x[t - 1] * (r - z[t - 1]) - h[t - 1]) new_z = z[t - 1] + dt * (x[t - 1] * h[t - 1] - b * z[t - 1]) x.append( NormalVariable(new_x, np.sqrt(dt) * driving_noise, x_names[t])) h.append( NormalVariable(new_h, np.sqrt(dt) * driving_noise, h_names[t])) z.append( NormalVariable(new_z, np.sqrt(dt) * driving_noise, z_names[t])) if t in y_range: y_name = "y{}".format(t) y_names.append(y_name) y.append(NormalVariable(x[t], measure_noise, y_name)) AR_model = ProbabilisticModel(x + y + z + h) # Generate data # data = AR_model._get_sample(number_samples=1) time_series = [float(data[yt].data) for yt in y] ground_truth = [float(data[xt].data) for xt in x] # Observe data # [yt.observe(data[yt][:, 0, :]) for yt in y] # Structured variational distribution # mx0 = DeterministicVariable(value=0., name="mx0", learnable=True) Qx = [NormalVariable(mx0, 5 * driving_noise, 'x0', learnable=True)] Qx_mean = [RootVariable(0., 'x0_mean', learnable=True)] Qxlambda = [RootVariable(-1., 'x0_lambda', learnable=True)]
#query_points = range(num_timepoints) query_points = list(range(0, 10)) + list(range(30, 40)) X.observe(data, query_points) ## Variational model ## # Variational parameters # # Variational process # #Qx0 = Normal(0, 1, "x_0", learnable=True) #QX = MarkovProcess(Qx0, lambda t, x: Normal(0., 0.5, name="x_{}".format(t), has_bias=False, learnable=True)) Qx10 = Normal(float(temporal_sample[9:10].values), 0.25, "x_10") QX = [Qx10] for idx in range(11, 30): QX.append(Normal(QX[idx-11], 0.25, "x_{}".format(idx), has_bias=True, learnable=True)) QX = ProbabilisticModel(QX) #X.set_posterior_model(process=QX) ## Perform ML inference ## perform_inference(X, posterior_model=QX, inference_method=ReverseKL(), number_iterations=3000, number_samples=50, optimizer="SGD", lr=0.005) loss_list = X.active_submodel.diagnostics["loss curve"] plt.plot(loss_list) plt.show()
import matplotlib.pyplot as plt from brancher.variables import ProbabilisticModel from brancher.standard_variables import BernulliVariable, NormalVariable import brancher.functions as BF from brancher import inference from brancher.inference import ReverseKL from brancher.gradient_estimators import BlackBoxEstimator, Taylor1Estimator #Model z1 = BernulliVariable(logits=0., name="z1") z2 = BernulliVariable(logits=0., name="z2") y = NormalVariable(2 * z1 + z2, 1., name="y") model = ProbabilisticModel([y]) #Generate data data = y.get_sample(20, input_values={z1: 1, z2: 0}) data.hist(bins=20) plt.show() #Observe data y.observe(data) #Variational Model Qz1 = BernulliVariable(logits=0., name="z1", learnable=True) Qz2 = BernulliVariable(logits=0., name="z2", learnable=True) variational_model = ProbabilisticModel([Qz1, Qz2]) model.set_posterior_model(variational_model) # Joint-contrastive inference inference.perform_inference(
NormalVariable(F(z[-1], W1, W2), driving_noise * np.ones((h_size, 1)), "z{}".format(t), learnable=False)) img.append( DeterministicVariable(decoder(BF.reshape(z[-1], (h_size, 1, 1))), "img{}".format(t), learnable=False)) if t_cond(t): x.append( NormalVariable(img[-1], measurement_noise * np.ones( (3, image_size, image_size)), "x{}".format(t), learnable=False)) model = ProbabilisticModel(x + z + img) samples = model._get_sample(1) imagesGT.append([ np.reshape(samples[img[t]].detach().numpy(), (3, image_size, image_size)) for t in range(T) ]) imagesNoise.append([ np.reshape(samples[x[t]].detach().numpy(), (3, image_size, image_size)) for t in range(T) ]) # Observe model [xt.observe(samples[xt].detach().numpy()[0, :, :, :, :]) for xt in x]
def _construct_observed_model(self, observation_variables, instance): return ProbabilisticModel(observation_variables)
import torch from brancher.variables import ProbabilisticModel from brancher.standard_variables import NormalVariable, DeterministicVariable, LogNormalVariable import brancher.functions as BF from brancher.visualizations import plot_density from brancher.transformations import PlanarFlow from brancher import inference from brancher.visualizations import plot_posterior # Model M = 8 y = NormalVariable(torch.zeros((M, )), 1. * torch.ones((M, )), "y") y0 = DeterministicVariable(y[1], "y0") d = NormalVariable(y, torch.ones((M, )), "d") model = ProbabilisticModel([d, y, y0]) # get samples d.observe(d.get_sample(55, input_values={y: 1. * torch.ones((M, ))})) # Variational distribution u1 = DeterministicVariable(torch.normal(0., 1., (M, 1)), "u1", learnable=True) w1 = DeterministicVariable(torch.normal(0., 1., (M, 1)), "w1", learnable=True) b1 = DeterministicVariable(torch.normal(0., 1., (1, 1)), "b1", learnable=True) u2 = DeterministicVariable(torch.normal(0., 1., (M, 1)), "u2", learnable=True) w2 = DeterministicVariable(torch.normal(0., 1., (M, 1)), "w2", learnable=True) b2 = DeterministicVariable(torch.normal(0., 1., (1, 1)), "b2", learnable=True) z = NormalVariable(torch.zeros((M, 1)), torch.ones((M, 1)), "z", learnable=True)
batch_size=minibatch_size, name="indices", is_observed=True) x = EmpiricalVariable(input_variable, indices=minibatch_indices, name="x", is_observed=True) labels = EmpiricalVariable(output_labels, indices=minibatch_indices, name="labels", is_observed=True) weights = NormalVariable(np.zeros((1, number_regressors)), 0.5 * np.ones( (1, number_regressors)), "weights") logit_p = BF.matmul(weights, x) k = BinomialVariable(1, logit_p=logit_p, name="k") model = ProbabilisticModel([k]) #samples = model._get_sample(300) #model.calculate_log_probability(samples) # Observations k.observe(labels) #observed_model = inference.get_observed_model(model) #observed_samples = observed_model._get_sample(number_samples=1, observed=True) # Variational Model Qweights = NormalVariable(np.zeros((1, number_regressors)), np.ones((1, number_regressors)), "weights", learnable=True)
import matplotlib.pyplot as plt from brancher.variables import ProbabilisticModel from brancher.standard_variables import NormalVariable, LogNormalVariable from brancher.transformations import truncate_model from brancher.visualizations import plot_density # Normal model mu = NormalVariable(0., 1., "mu") x = NormalVariable(mu, 0.1, "x") model = ProbabilisticModel([x]) # decision rule model_statistics = lambda dic: dic[x].data truncation_rule = lambda a: ((a > 0.5) & (a < 0.6)) | ((a > -0.6) & (a < -0.5)) # Truncated model truncated_model = truncate_model(model, truncation_rule, model_statistics) plot_density(truncated_model, variables=["mu", "x"], number_samples=10000) plt.show()
NormalVariable(new_mx, new_sx, "mux_{}".format(t + 1), is_observed=True)) Qmuy.append( NormalVariable(new_my, new_sy, "muy_{}".format(t + 1), is_observed=True)) mx.append(new_mx) my.append(new_my) sx.append(new_sx) sy.append(new_sy) model = ProbabilisticModel(w + r + mux + muy) variational_filter = ProbabilisticModel(Qmux + Qmuy) # Variational model print(model.get_average_reward(10)) # Train control num_itr = 3000 inference.perform_inference(model, posterior_model=variational_filter, number_iterations=num_itr, number_samples=9, optimizer="Adam", lr=0.01) reward_list = model.diagnostics[
import matplotlib.pyplot as plt import numpy as np from brancher.variables import DeterministicVariable, ProbabilisticModel from brancher.standard_variables import NormalVariable, EmpiricalVariable from brancher import inference import brancher.functions as BF # Data # Neural architectures #Encoder #Decoder # Generative model latent_size = (10, ) z = NormalVariable(np.zeros(latent_size), np.ones(latent_size)) decoder_output = decoder(z) x = NormalVariable(decoder_output["mean"], BF.exp(decoder_output["log_var"]), name="x") model = ProbabilisticModel([x, z]) # Amortized variational distribution Qx = EmpiricalVariable(dataset, name="x") encoder_output = encoder(Qx) Qz = NormalVariable(decoder_output["mean"], BF.exp(decoder_output["log_var"]), name="z") variational_model = ProbabilisticModel([Qx, Qz])
# Data sampling model minibatch_size = dataset_size minibatch_indices = RandomIndices(dataset_size=dataset_size, batch_size=minibatch_size, name="indices", is_observed=True) x = EmpiricalVariable(input_variable, indices=minibatch_indices, name="x", is_observed=True) labels = EmpiricalVariable(output_labels, indices=minibatch_indices, name="labels", is_observed=True) # Architecture parameters weights = NormalVariable(np.zeros((number_output_classes, number_regressors)), 10 * np.ones((number_output_classes, number_regressors)), "weights") # Forward pass final_activations = BF.matmul(weights, x) k = CategoricalVariable(softmax_p=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational model num_particles = 1 #10 initial_locations = [np.random.normal(0., 1., (number_output_classes, number_regressors)) for _ in range(num_particles)] particles = [ProbabilisticModel([DeterministicVariable(location, name="weights", learnable=True)]) for location in initial_locations] # Importance sampling distributions variational_samplers = [ProbabilisticModel([NormalVariable(loc=location, scale=0.1, name="weights", learnable=True)]) for location in initial_locations]
# Data sampling model minibatch_size = 50 minibatch_indices = RandomIndices(dataset_size=dataset_size, batch_size=minibatch_size, name="indices", is_observed=True) x = EmpiricalVariable(input_variable, indices=minibatch_indices, name="x", is_observed=True) labels = EmpiricalVariable(output_labels, indices=minibatch_indices, name="labels", is_observed=True) # Architecture parameters weights = NormalVariable(np.zeros((number_output_classes, number_pixels)), 10*np.ones((number_output_classes, number_pixels)), "weights") # Forward pass final_activations = BF.matmul(weights, x) k = CategoricalVariable(softmax_p=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational model number_particles = 2 initial_location_1 = np.random.normal(0., 1., (number_output_classes, number_pixels)) initial_location_2 = np.random.normal(0., 1., (number_output_classes, number_pixels)) particle_1 = DeterministicVariable(initial_location_1, name="weights", learnable=True) particle_2 = DeterministicVariable(initial_location_2, name="weights", learnable=True) particle_locations = [particle_1, particle_2] particles = [ProbabilisticModel([l]) for l in particle_locations] # Importance sampling distributions voranoi_set = VoronoiSet(particle_locations) #TODO: Bug if you use variables instead of probabilistic models
omega = 2*np.pi*8 x = [x0, x1] y = [y0, y1] x_names = ["x0", "x1"] y_names = ["y0", "y1"] y_range = [t for t in range(T) if cond(t)] for t in range(2, T): x_names.append("x{}".format(t)) new_mu = (-1 - omega**2*dt**2 + b*dt)*x[t - 2] + (2 - b*dt)*x[t - 1] x.append(NormalVariable(new_mu, np.sqrt(dt)*driving_noise, x_names[t])) if t in y_range: y_name = "y{}".format(t) y_names.append(y_name) y.append(NormalVariable(x[t], measure_noise, y_name)) AR_model = ProbabilisticModel(x + y) # Generate data # data = AR_model._get_sample(number_samples=1) time_series = [float(data[yt].data) for yt in y] ground_truth = [float(data[xt].data) for xt in x] #true_b = data[omega].data #print("The true coefficient is: {}".format(float(true_b))) # Observe data # [yt.observe(data[yt][:, 0, :]) for yt in y] # Structured variational distribution # Qx = [NormalVariable(0., 1., 'x0', learnable=True), NormalVariable(0., 1., 'x1', learnable=True)]
from brancher.distributions import NormalDistribution, LogNormalDistribution from brancher.variables import DeterministicVariable, RandomVariable, ProbabilisticModel from brancher.standard_variables import NormalVariable, LogNormalVariable from brancher import inference import brancher.functions as BF # Real model nu_real = 1. mu_real = -2. x_real = NormalVariable(mu_real, nu_real, "x_real") # Normal model nu = LogNormalVariable(0., 1., "nu") mu = NormalVariable(0., 10., "mu") x = NormalVariable(mu, nu, "x") model = ProbabilisticModel([x]) print(model) # Print samples sample = model.get_sample(10) print(sample) # Print samples from single variable x_sample = x.get_sample(10) print(x_sample) # Print samples conditional on an input in_sample = model.get_sample(10, input_values={mu: 100.}) print(in_sample)
import matplotlib.pyplot as plt import chainer from brancher.variables import RootVariable, ProbabilisticModel from brancher.particle_inference_tools import VoronoiSet from brancher.standard_variables import EmpiricalVariable, NormalVariable, LogNormalVariable from brancher import inference from brancher.inference import WassersteinVariationalGradientDescent as WVGD from brancher.visualizations import ensemble_histogram from brancher.pandas_interface import reformat_sample_to_pandas # Model dimensionality = 1 theta = NormalVariable(loc=0., scale=2., name="theta") x = NormalVariable(theta**2, scale=0.2, name="x") model = ProbabilisticModel([x, theta]) # Generate data N = 3 theta_real = 0.1 x_real = NormalVariable(theta_real**2, 0.2, "x") data = x_real._get_sample(number_samples=N) # Observe data x.observe(data[x_real][:, 0, :]) # Variational model num_particles = 2 initial_locations = [-2, 2] #initial_locations = [0, 0.1]
n = 100 x_range = np.linspace(-x_max, x_max, n) x1 = DeterministicVariable(np.sin(2 * np.pi * 2 * x_range), name="x1", is_observed=True) x2 = DeterministicVariable(x_range, name="x2", is_observed=True) # Multivariate Regression b = Norm(0., 1., name="b") w1 = Norm(0., 1., name="w1") w2 = Norm(0., 1., name="w2") w12 = Norm(0., 1., name="w12") nu = LogNorm(0.2, 0.5, name="nu") mean = b + w1 * x1 + w2 * x2 + w12 * x1 * x2 y = Norm(mean, nu, name="y") model = ProbabilisticModel([y]) # Variational distributions Qb = Norm(0., 1., name="b", learnable=True) Qw1 = Norm(0., 1., name="w1", learnable=True) Qw2 = Norm(0., 1., name="w2", learnable=True) Qw12 = Norm(0., 1., name="w12", learnable=True) Qnu = LogNorm(0.2, 0.5, name="nu", learnable=True) variational_model = ProbabilisticModel([Qb, Qw1, Qw2, Qw12, Qnu]) model.set_posterior_model(variational_model) # Generate data ground_samples = model._get_sample(1) # Observe data data = np.reshape(ground_samples[y].cpu().detach().numpy(), newshape=(n, 1, 1))
z2 = NormalVariable(BF.relu(decoder_output1["mean"]), z2sd * np.ones((latent_size2, )), name="z2") label_logits = DeterministicVariable(decoderLabel(z2), "label_logits") labels = CategoricalVariable(logits=label_logits, name="labels") decoder_output2 = DeterministicVariable(decoder2(z2), name="decoder_output2") z3 = NormalVariable(BF.relu(decoder_output2["mean"]), z3sd * np.ones((latent_size3, )), name="z3") decoder_output3 = DeterministicVariable(decoder3(z3), name="decoder_output3") x = BinomialVariable(total_count=1, logits=decoder_output3["mean"], name="x") model = ProbabilisticModel([x, z1, z2, z3, labels]) # Amortized variational distribution minibatch_indices = RandomIndices(dataset_size=dataset_size, batch_size=b_size, name="indices", is_observed=True) Qx = EmpiricalVariable(dataset, indices=minibatch_indices, name="x", is_observed=True) Qlabels = EmpiricalVariable(output_labels, indices=minibatch_indices,
def __call__(self, x): h = self.relu(self.l1(x)) output_mean = self.l2(h) output_log_sd = self.l3(h) return {"mean": output_mean, "sd": self.softplus(output_log_sd) + 0.01} # Initialize encoder and decoders encoder = BF.BrancherFunction(EncoderArchitecture(image_size=image_size, latent_size=latent_size)) decoder = BF.BrancherFunction(DecoderArchitecture(latent_size=latent_size, image_size=image_size)) # Generative model z = NormalVariable(np.zeros((latent_size,)), np.ones((latent_size,)), name="z") decoder_output = decoder(z) x = NormalVariable(decoder_output["mean"], decoder_output["sd"], name="x") model = ProbabilisticModel([x, z]) # Amortized variational distribution Qx = EmpiricalVariable(dataset, batch_size=50, name="x", is_observed=True) encoder_output = encoder(Qx) Qz = NormalVariable(encoder_output["mean"], encoder_output["sd"], name="z") model.set_posterior_model(ProbabilisticModel([Qx, Qz])) # Joint-contrastive inference inference.perform_inference(model, number_iterations=5000, number_samples=1, optimizer="Adam", lr=0.005) loss_list = model.diagnostics["loss curve"]
import matplotlib.pyplot as plt import numpy as np from brancher.variables import ProbabilisticModel from brancher.standard_variables import BetaVariable, BinomialVariable from brancher import inference from brancher.visualizations import plot_posterior # betaNormal/Binomial model number_tosses = 1 p = BetaVariable(1., 1., "p") k = BinomialVariable(number_tosses, probs=p, name="k") model = ProbabilisticModel([k, p]) # Generate data p_real = 0.8 data = model.get_sample(number_samples=30, input_values={p: p_real}) # Observe data k.observe(data) # Inference inference.perform_inference(model, number_iterations=1000, number_samples=500, lr=0.1, optimizer='SGD') loss_list = model.diagnostics["loss curve"] #Plot loss plt.plot(loss_list)
driving_noise = 1. measure_noise = 0.5 x0 = NormalVariable(0., driving_noise, 'x0') y0 = NormalVariable(x0, measure_noise, 'x0') b = LogitNormalVariable(0.5, 1., 'b') x = [x0] y = [y0] x_names = ["x0"] y_names = ["y0"] for t in range(1, T): x_names.append("x{}".format(t)) y_names.append("y{}".format(t)) x.append(NormalVariable(b * x[t - 1], driving_noise, x_names[t])) y.append(NormalVariable(x[t], measure_noise, y_names[t])) AR_model = ProbabilisticModel(x + y) # Generate data # data = AR_model._get_sample(number_samples=1) time_series = [float(data[yt].data) for yt in y] ground_truth = [float(data[xt].data) for xt in x] true_b = data[b].data print("The true coefficient is: {}".format(float(true_b))) # Observe data # [yt.observe(data[yt][:, 0, :]) for yt in y] # Autoregressive variational distribution # Qb = LogitNormalVariable(0.5, 0.5, "b", learnable=True) logit_b_post = DeterministicVariable(0., 'logit_b_post', learnable=True) Qx = [NormalVariable(0., 1., 'x0', learnable=True)]
new_h = h[t - 1] + dt * (x[t - 1] * (r - z[t - 1]) - h[t - 1]) new_z = z[t - 1] + dt * (x[t - 1] * h[t - 1] - b * z[t - 1]) x.append( NormalVariable(new_x, np.sqrt(dt) * driving_noise, x_names[t])) h.append( NormalVariable(new_h, np.sqrt(dt) * driving_noise, h_names[t])) z.append( NormalVariable(new_z, np.sqrt(dt) * driving_noise, z_names[t])) if t in y_range: y_name = "y{}".format(t) y_names.append(y_name) y.append(NormalVariable(x[t], measure_noise, y_name)) AR_model = ProbabilisticModel(x + y) # Generate data # data = AR_model._get_sample(number_samples=1) time_series = [float(data[yt].data) for yt in y] ground_truth = [float(data[xt].data) for xt in x] # Observe data # [yt.observe(data[yt][:, 0, :]) for yt in y] # Structured variational distribution # Qx = [NormalVariable(0., 1., 'x0', learnable=True)] Qx_mean = [RootVariable(0., 'x0_mean', learnable=True)] Qxlambda = [RootVariable(0.5, 'x0_lambda', learnable=True)] Qh = [NormalVariable(0., 1., 'h0', learnable=True)]
labels = EmpiricalVariable(output_labels, indices=minibatch_indices, name="labels", is_observed=True) # Architecture parameters weights = NormalVariable(np.zeros( (number_output_classes, number_pixels)), 10 * np.ones( (number_output_classes, number_pixels)), "weights") # Forward pass final_activations = BF.matmul(weights, x) k = CategoricalVariable(softmax_p=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational Model Qweights = NormalVariable(np.zeros((number_output_classes, number_pixels)), 0.1 * np.ones( (number_output_classes, number_pixels)), "weights", learnable=True) variational_model = ProbabilisticModel([Qweights]) model.set_posterior_model(variational_model) # Inference inference.perform_inference(model,
N_people = 5 N_scores = 10 # 5 group_means = [ Normal(0., 4., "group_mean_{}".format(n)) for n in range(N_groups) ] assignment_matrix = [[1], [1], [1], [1], [1]] people_means = [ Normal(sum([l * m for l, m in zip(assignment_list, group_means)]), 0.1, "person_{}".format(m)) for m, assignment_list in enumerate(assignment_matrix) ] scores = [ Normal(people_means[m], 0.1, "score_{}_{}".format(m, z)) for m in range(N_people) for z in range(N_scores) ] model = ProbabilisticModel(scores) # Observations sample = model.get_sample(1) data = sample.filter(regex="^score").filter(regex="^((?!scale).)*$") model.observe(data) # Variational model Qgroup_means = [ Normal(0., 4., "group_mean_{}".format(n), learnable=True) for n in range(N_groups) ] Qpeople_means = [ Normal(0., 0.1, "person_{}".format(m), learnable=True) for m, assignment_list in enumerate(assignment_matrix) ]
labels = EmpiricalVariable(output_labels, indices=minibatch_indices, name="labels", is_observed=True) # Architecture parameters weights = NormalVariable( np.zeros((number_output_classes, number_regressors)), 10 * np.ones( (number_output_classes, number_regressors)), "weights") # Forward pass final_activations = BF.matmul(weights, x) k = CategoricalVariable(logits=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational model num_particles = 2 #10 initial_locations = [ np.random.normal(0., 1., (number_output_classes, number_regressors)) for _ in range(num_particles) ] particles = [ ProbabilisticModel( [RootVariable(location, name="weights", learnable=True)]) for location in initial_locations ]
return {"mean": output_mean} # Initialize encoder and decoders encoder = BF.BrancherFunction( EncoderArchitecture(image_size=image_size, latent_size=latent_size)) decoder = BF.BrancherFunction( DecoderArchitecture(latent_size=latent_size, image_size=image_size)) # Generative model z = NormalVariable(np.zeros((latent_size, )), np.ones((latent_size, )), name="z") decoder_output = DeterministicVariable(decoder(z), name="decoder_output") x = BinomialVariable(total_count=1, logits=decoder_output["mean"], name="x") model = ProbabilisticModel([x, z]) # Amortized variational distribution Qx = EmpiricalVariable(dataset, batch_size=100, name="x", is_observed=True) encoder_output = DeterministicVariable(encoder(Qx), name="encoder_output") Qz = NormalVariable(encoder_output["mean"], encoder_output["sd"], name="z") model.set_posterior_model(ProbabilisticModel([Qx, Qz])) # Joint-contrastive inference inference.perform_inference( model, inference_method=ReverseKL(gradient_estimator=PathwiseDerivativeEstimator), number_iterations=1000, number_samples=1, optimizer="Adam", lr=0.001)
b2 = NormalVariable(np.zeros((number_output_classes, 1)), 10 * np.ones( (number_output_classes, 1)), "b2") weights1 = NormalVariable(np.zeros( (number_hidden_units, number_pixels)), 10 * np.ones( (number_hidden_units, number_pixels)), "weights1") weights2 = NormalVariable( np.zeros((number_output_classes, number_hidden_units)), 10 * np.ones( (number_output_classes, number_hidden_units)), "weights2") # Forward pass hidden_units = BF.tanh(BF.matmul(weights1, x) + b1) final_activations = BF.matmul(weights2, hidden_units) + b2 k = CategoricalVariable(softmax_p=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational Model Qb1 = NormalVariable(np.zeros((number_hidden_units, 1)), 0.2 * np.ones((number_hidden_units, 1)), "b1", learnable=True) Qb2 = NormalVariable(np.zeros((number_output_classes, 1)), 0.2 * np.ones((number_output_classes, 1)), "b2", learnable=True) Qweights1 = NormalVariable(np.zeros((number_hidden_units, number_pixels)), 0.2 * np.ones((number_hidden_units, number_pixels)),
def perform_inference(joint_model, number_iterations, number_samples=1, optimizer='Adam', input_values={}, inference_method=None, posterior_model=None, sampler_model=None, pretraining_iterations=0, **opt_params): #TODO: input values """ Summary Parameters --------- """ if isinstance(joint_model, StochasticProcess): joint_model = joint_model.active_submodel if isinstance(joint_model, Variable): joint_model = ProbabilisticModel([joint_model]) if not inference_method: warnings.warn( "The inference method was not specified, using the default reverse KL variational inference" ) inference_method = ReverseKL() if not posterior_model and joint_model.posterior_model is not None: posterior_model = joint_model.posterior_model else: posterior_model = inference_method.construct_posterior_model( joint_model) if not sampler_model: #TODO: clean up if not sampler_model: try: sampler_model = inference_method.sampler_model except AttributeError: try: sampler_model = joint_model.posterior_sampler except AttributeError: sampler_model = None joint_model.update_observed_submodel() def append_prob_optimizer(model, optimizer, **opt_params): prob_opt = ProbabilisticOptimizer( model, optimizer, **opt_params ) # TODO: this should be better! handling models with no params if prob_opt.optimizer: optimizers_list.append(prob_opt) optimizers_list = [] if inference_method.learnable_posterior: append_prob_optimizer(posterior_model, optimizer, **opt_params) if inference_method.learnable_model: append_prob_optimizer(joint_model, optimizer, **opt_params) if inference_method.learnable_sampler: append_prob_optimizer(sampler_model, optimizer, **opt_params) loss_list = [] inference_method.check_model_compatibility(joint_model, posterior_model, sampler_model) for iteration in tqdm(range(number_iterations)): loss = inference_method.compute_loss(joint_model, posterior_model, sampler_model, number_samples) if torch.isfinite(loss.detach()).all().item(): [opt.zero_grad() for opt in optimizers_list] loss.backward() inference_method.correct_gradient(joint_model, posterior_model, sampler_model, number_samples) optimizers_list[0].update() if iteration > pretraining_iterations: [opt.update() for opt in optimizers_list[1:]] loss_list.append(loss.cpu().detach().numpy().flatten()) else: warnings.warn("Numerical error, skipping sample") loss_list.append(loss.cpu().detach().numpy()) joint_model.diagnostics.update({"loss curve": np.array(loss_list)}) inference_method.post_process( joint_model) #TODO: this could be implemented with a with block if joint_model.posterior_model is None and inference_method.learnable_posterior: joint_model.set_posterior_model(posterior_model)