number_hidden_units = 20 b1 = NormalVariable(np.zeros((number_hidden_units, 1)), 10 * np.ones( (number_hidden_units, 1)), "b1") b2 = NormalVariable(np.zeros((number_output_classes, 1)), 10 * np.ones( (number_output_classes, 1)), "b2") weights1 = NormalVariable(np.zeros( (number_hidden_units, number_pixels)), 10 * np.ones( (number_hidden_units, number_pixels)), "weights1") weights2 = NormalVariable( np.zeros((number_output_classes, number_hidden_units)), 10 * np.ones( (number_output_classes, number_hidden_units)), "weights2") # Forward pass hidden_units = BF.tanh(BF.matmul(weights1, x) + b1) final_activations = BF.matmul(weights2, hidden_units) + b2 k = CategoricalVariable(softmax_p=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational Model Qb1 = NormalVariable(np.zeros((number_hidden_units, 1)), 0.2 * np.ones((number_hidden_units, 1)), "b1", learnable=True) Qb2 = NormalVariable(np.zeros((number_output_classes, 1)), 0.2 * np.ones((number_output_classes, 1)), "b2",
indices=minibatch_indices, name="x", is_observed=True) labels = EmpiricalVariable(output_labels, indices=minibatch_indices, name="labels", is_observed=True) # Architecture parameters weights = NormalVariable( np.zeros((number_output_classes, number_regressors)), 10 * np.ones( (number_output_classes, number_regressors)), "weights") # Forward pass final_activations = BF.matmul(weights, x) k = CategoricalVariable(logits=final_activations, name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational model num_particles = 2 #10 initial_locations = [ np.random.normal(0., 1., (number_output_classes, number_regressors)) for _ in range(num_particles) ] particles = [ ProbabilisticModel(
num_classes=num_classes)) # # Generative model z1sd = 1.5 # 1 z2sd = 0.25 # 0.25 z3sd = 0.15 z1 = NormalVariable(np.zeros((latent_size1, )), z1sd * np.ones((latent_size1, )), name="z1") decoder_output1 = DeterministicVariable(decoder1(z1), name="decoder_output1") z2 = NormalVariable(BF.relu(decoder_output1["mean"]), z2sd * np.ones((latent_size2, )), name="z2") label_logits = DeterministicVariable(decoderLabel(z2), "label_logits") labels = CategoricalVariable(logits=label_logits, name="labels") decoder_output2 = DeterministicVariable(decoder2(z2), name="decoder_output2") z3 = NormalVariable(BF.relu(decoder_output2["mean"]), z3sd * np.ones((latent_size3, )), name="z3") decoder_output3 = DeterministicVariable(decoder3(z3), name="decoder_output3") x = BinomialVariable(total_count=1, logits=decoder_output3["mean"], name="x") model = ProbabilisticModel([x, z1, z2, z3, labels]) # Amortized variational distribution minibatch_indices = RandomIndices(dataset_size=dataset_size,
#hidden_size2 = 10 out_size = 10 # Weights #W1 = Deterministic(np.random.normal(0., 0.1, (hidden_size1, input_size)), "W1", learnable=True) #W2 = Deterministic(np.random.normal(0., 0.1, (hidden_size2, hidden_size1)), "W2", learnable=True) #W3 = Deterministic(np.random.normal(0., 0.1, (out_size, hidden_size2)), "W3", learnable=True) V = Deterministic(np.random.normal(0., 0.1, (out_size, input_size)), "W3", learnable=True) #z1 = Deterministic(BF.relu(BF.matmul(W1, BF.reshape(x, shape=(input_size, 1)))), "z1") #z2 = Deterministic(BF.relu(BF.matmul(W2, z1)), "z2") #rho = Deterministic(0.1*BF.matmul(W3, z2), "rho") rho = Deterministic(BF.matmul(V, x / 255), "rho") k = Categorical(logits=rho, name="k") # Observe k.observe(labels) model = ProbabilisticModel([k]) # Train from brancher.inference import MaximumLikelihood from brancher.inference import perform_inference perform_inference(model, inference_method=MaximumLikelihood(), number_iterations=150, optimizer="Adam", lr=0.01) loss_list = model.diagnostics["loss curve"]
image_size = 28 Wk = NormalVariable(loc=np.zeros((out_channels, in_channels, 2, 2)), scale=10 * np.ones( (out_channels, in_channels, 2, 2)), name="Wk") z = DeterministicVariable(BF.mean(BF.relu(BF.conv2d(x, Wk, stride=1)), (2, 3)), name="z") Wl = NormalVariable(loc=np.zeros((num_classes, out_channels)), scale=10 * np.ones((num_classes, out_channels)), name="Wl") b = NormalVariable(loc=np.zeros((num_classes, 1)), scale=10 * np.ones((num_classes, 1)), name="b") reshaped_z = BF.reshape(z, shape=(out_channels, 1)) k = CategoricalVariable(logits=BF.linear(reshaped_z, Wl, b), name="k") # Probabilistic model model = ProbabilisticModel([k]) # Observations k.observe(labels) # Variational model #num_particles = 2 #10 wk_locations = [ np.random.normal(0., 0.1, (out_channels, in_channels, 2, 2)) for _ in range(num_particles) ] wl_locations = [ np.random.normal(0., 0.1, (num_classes, out_channels))
z = Normal(BF.conv2d(x, Wk, padding=1), 1., name="z") num_samples = 6 z.get_sample(num_samples)["z"] num_classes = 10 Wl = Normal(loc=np.zeros( (num_classes, image_size * image_size * out_channels)), scale=1. * np.ones( (num_classes, image_size * image_size * out_channels)), name="Wl") b = Normal(loc=np.zeros((num_classes, 1)), scale=1. * np.ones((num_classes, 1)), name="b") reshaped_z = BF.reshape(z, shape=(image_size * image_size * out_channels, 1)) k = Categorical(logits=BF.linear(reshaped_z, Wl, b), name="k") k.observe(labels) from brancher.inference import MAP from brancher.inference import perform_inference from brancher.variables import ProbabilisticModel convolutional_model = ProbabilisticModel([k]) perform_inference(convolutional_model, inference_method=MAP(), number_iterations=1, optimizer="Adam", lr=0.0025) loss_list = convolutional_model.diagnostics["loss curve"]