def main(): # parse command line arguments parser = argparse.ArgumentParser(description="parse args") parser.add_argument('-n', '--num-epochs', default=101, type=int, help='number of training epochs') parser.add_argument('-tf', '--test-frequency', default=5, type=int, help='how often we evaluate the test set') parser.add_argument('-lr', '--learning-rate', default=1.0e-3, type=float, help='learning rate') parser.add_argument('-b1', '--beta1', default=0.95, type=float, help='beta1 adam hyperparameter') parser.add_argument('--cuda', action='store_true', default=False, help='whether to use cuda') parser.add_argument('-visdom', '--visdom_flag', default=False, help='Whether plotting in visdom is desired') parser.add_argument('-i-tsne', '--tsne_iter', default=100, type=int, help='epoch when tsne visualization runs') args = parser.parse_args() # setup MNIST data loaders # train_loader, test_loader train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256) # setup the VAE vae = VAE(use_cuda=args.cuda) # setup the optimizer adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) # setup the inference algorithm svi = SVI(vae.model, vae.guide, optimizer, loss="ELBO") # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for _, (x, _) in enumerate(train_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # wrap the mini-batch in a PyTorch Variable x = Variable(x) # do ELBO gradient and accumulate loss epoch_loss += svi.step(x) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % args.test_frequency == 0: # initialize loss accumulator test_loss = 0. # compute the loss over the entire test set for i, (x, _) in enumerate(test_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # wrap the mini-batch in a PyTorch Variable x = Variable(x) # compute ELBO estimate and accumulate loss test_loss += svi.evaluate_loss(x) # pick three random test images from the first mini-batch and # visualize how well we're reconstructing them if i == 0: if args.visdom_flag: plot_vae_samples(vae, vis) reco_indices = np.random.randint(0, x.size(0), 3) for index in reco_indices: test_img = x[index, :] reco_img = vae.reconstruct_img(test_img) vis.image(test_img.contiguous().view( 28, 28).data.cpu().numpy(), opts={'caption': 'test image'}) vis.image(reco_img.contiguous().view( 28, 28).data.cpu().numpy(), opts={'caption': 'reconstructed image'}) # report test diagnostics normalizer_test = len(test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo.append(total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) if epoch == args.tsne_iter: mnist_test_tsne(vae=vae, test_loader=test_loader) plot_llk(np.array(train_elbo), np.array(test_elbo)) return vae
def main(args): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') logging.info('Loading data') data = poly.load_data(poly.JSB_CHORALES) logging.info('-' * 40) model = models[args.model] logging.info('Training {} on {} sequences'.format( model.__name__, len(data['train']['sequences']))) sequences = data['train']['sequences'] lengths = data['train']['sequence_lengths'] # find all the notes that are present at least once in the training set present_notes = ((sequences == 1).sum(0).sum(0) > 0) # remove notes that are never played (we remove 37/88 notes) sequences = sequences[..., present_notes] if args.truncate: lengths.clamp_(max=args.truncate) sequences = sequences[:, :args.truncate] num_observations = float(lengths.sum()) pyro.set_rng_seed(0) pyro.clear_param_store() pyro.enable_validation(True) # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing # out the hidden state x. This is accomplished via an automatic guide that # learns point estimates of all of our conditional probability tables, # named probs_*. guide = AutoDelta( poutine.block(model, expose_fn=lambda msg: msg["name"].startswith("probs_"))) # To help debug our tensor shapes, let's print the shape of each site's # distribution, value, and log_prob tensor. Note this information is # automatically printed on most errors inside SVI. if args.print_shapes: first_available_dim = -2 if model is model_0 else -3 guide_trace = poutine.trace(guide).get_trace( sequences, lengths, args=args, batch_size=args.batch_size) model_trace = poutine.trace( poutine.replay(poutine.enum(model, first_available_dim), guide_trace)).get_trace(sequences, lengths, args=args, batch_size=args.batch_size) logging.info(model_trace.format_shapes()) # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting. # All of our models have two plates: "data" and "tones". Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo(max_plate_nesting=1 if model is model_0 else 2) optim = Adam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) # We'll train on small minibatches. logging.info('Step\tLoss') for step in range(args.num_steps): loss = svi.step(sequences, lengths, args=args, batch_size=args.batch_size) logging.info('{: >5d}\t{}'.format(step, loss / num_observations)) # We evaluate on the entire training dataset, # excluding the prior term so our results are comparable across models. train_loss = elbo.loss(model, guide, sequences, lengths, args, include_prior=False) logging.info('training loss = {}'.format(train_loss / num_observations)) # Finally we evaluate on the test dataset. logging.info('-' * 40) logging.info('Evaluating on {} test sequences'.format( len(data['test']['sequences']))) sequences = data['test']['sequences'][..., present_notes] lengths = data['test']['sequence_lengths'] if args.truncate: lengths.clamp_(max=args.truncate) num_observations = float(lengths.sum()) # note that since we removed unseen notes above (to make the problem a bit easier and for # numerical stability) this test loss may not be directly comparable to numbers # reported on this dataset elsewhere. test_loss = elbo.loss(model, guide, sequences, lengths, args=args, include_prior=False) logging.info('test loss = {}'.format(test_loss / num_observations)) # We expect models with higher capacity to perform better, # but eventually overfit to the training set. capacity = sum( value.reshape(-1).size(0) for value in pyro.get_param_store().values()) logging.info('{} capacity = {} parameters'.format(model.__name__, capacity))
def main(args): """ Train GAE """ print("Using {} dataset".format(args.dataset_str)) # Load data np.random.seed(1) adj, features = load_data(args.dataset_str) N, D = features.shape # Store original adjacency matrix (without diagonal entries) adj_orig = adj adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) # Some preprocessing adj_train_norm = preprocess_graph(adj_train) adj_train_norm = Variable(make_sparse(adj_train_norm)) adj_train_labels = Variable( torch.FloatTensor(adj_train + sp.eye(adj_train.shape[0]).todense())) features = Variable(make_sparse(features)) n_edges = adj_train_labels.sum() data = { 'adj_norm': adj_train_norm, 'adj_labels': adj_train_labels, 'features': features, } gae = GAE(data, n_hidden=32, n_latent=16, dropout=args.dropout, subsampling=args.subsampling) optimizer = Adam({"lr": args.lr, "betas": (0.95, 0.999)}) svi = SVI(gae.model, gae.guide, optimizer, loss="ELBO") # Results results = defaultdict(list) # Full batch training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do ELBO gradient and accumulate loss epoch_loss += svi.step() # report training diagnostics if args.subsampling: normalized_loss = epoch_loss / float(2 * n_edges) else: normalized_loss = epoch_loss / (2 * N * N) results['train_elbo'].append(normalized_loss) # Training loss emb = gae.get_embeddings() accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false, emb, adj_orig) results['accuracy_train'].append(accuracy) results['roc_train'].append(roc_curr) results['ap_train'].append(ap_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(normalized_loss), "train_acc=", "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr), "val_ap=", "{:.5f}".format(ap_curr)) # Test loss if epoch % args.test_freq == 0: emb = gae.get_embeddings() accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) results['accuracy_test'].append(accuracy) results['roc_test'].append(roc_curr) results['ap_test'].append(ap_curr) print("Optimization Finished!") # Test loss emb = gae.get_embeddings() accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) print('Test Accuracy: ' + str(accuracy)) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) # Plot plot_results(results, args.test_freq, path=args.dataset_str + "_results.png")
""" TEST_STRINGS = [ "+1 604 250 1363", "+1 604 922 5941", "+1 604 337 1000", "+1 604 250 9999", "+1 604 922 1414", "+1 604 337 2654", "+1 604 250 9573", "+1 604 922 2543", "+1 604 337 5068" ] svae = PhoneVAE(batch_size=1) optimizer = Adam(ADAM_CONFIG) svi = SVI(svae.model, svae.guide, optimizer, loss=Trace_ELBO()) """ Train the model """ train_elbo = [] for e in range(NUM_EPOCHS): epoch_loss = 0. for string in TEST_STRINGS: # Pad input string differently than observed string so program doesn't get rewarded by making string short one_hot_string = strings_to_tensor([string], MAX_STRING_LEN) if CUDA: one_hot_string.cuda() svi.step(one_hot_string) epoch_loss += svi.step(one_hot_string) if e % RECORD_EVERY == 0:
describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats # Prepare training data df = rugged_data[["cont_africa", "rugged", "rgdppc_2000"]] df = df[np.isfinite(df.rgdppc_2000)] df["rgdppc_2000"] = np.log(df["rgdppc_2000"]) train = torch.tensor(df.values, dtype=torch.float) svi = SVI(model, guide, optim.Adam({"lr": .005}), loss=Trace_ELBO(), num_samples=1000) is_cont_africa, ruggedness, log_gdp = train[:, 0], train[:, 1], train[:, 2] pyro.clear_param_store() num_iters = 8000 if not smoke_test else 2 for i in range(num_iters): elbo = svi.step(is_cont_africa, ruggedness, log_gdp) if i % 500 == 0: logging.info("Elbo loss: {}".format(elbo)) posterior = svi.run(log_gdp, is_cont_africa, ruggedness) sites = ["a", "bA", "bR", "bAR", "sigma"] for site, values in summary(posterior, sites).items():
def main(): with WUST_TRAIN_PATH.open('rb') as file: wust_train = pickle.load(file) with WUST_TEST_PATH.open('rb') as file: wust_test = pickle.load(file) with WEAK_SMALL_ENCODED_PATH.open('rb') as file: weak_small_x = pickle.load(file) weak_small_x = torch.FloatTensor(weak_small_x) wust_train_x = torch.FloatTensor(wust_train[1]) wust_test_x = torch.FloatTensor(wust_test[1]) wust_train_labels = wust_train[2] wust_test_labels = wust_test[2] label_encoder = LabelEncoder() label_encoder.fit(wust_train_labels) wust_train_y = label_encoder.transform(wust_train_labels) wust_test_y = label_encoder.transform(wust_test_labels) wust_train_y = torch.LongTensor(wust_train_y.reshape(-1, 1)) wust_test_y = torch.LongTensor(wust_test_y.reshape(-1, 1)) wust_train_ds = TensorDataset(wust_train_x, wust_train_y) wust_train_dl = DataLoader(wust_train_ds, batch_size=BATCH_SIZE, shuffle=True) wust_test_ds = TensorDataset(wust_test_x, wust_test_y) wust_test_dl = DataLoader(wust_test_ds, batch_size=BATCH_SIZE, shuffle=True) weak_train_ds = TensorDataset(weak_small_x) weak_train_dl = DataLoader(weak_train_ds, batch_size=BATCH_SIZE, shuffle=True) pyro.set_rng_seed(SEED) # batch_size: number of images (and labels) to be considered in a batch ss_vae = SSVAE(input_size=wust_train_x.shape[1], output_size=len(label_encoder.classes_), z_dim=Z_DIM, hidden_layers=HIDDEN_LAYERS, use_cuda=CUDA, config_enum=ENUM_DISCRETE, aux_loss_multiplier=AUX_LOSS_MULTIPLIER) # setup the optimizer optimizer = Adam({"lr": LEARNING_RATE, "betas": (BETA_1, 0.999)}) # set up the loss(es) for inference. wrapping the guide in config_enumerate builds the loss as a sum # by enumerating each class label for the sampled discrete categorical distribution in the model guide = config_enumerate(ss_vae.guide, ENUM_DISCRETE, expand=True) Elbo = JitTraceEnum_ELBO if USE_JIT else TraceEnum_ELBO elbo = Elbo(max_plate_nesting=1, strict_enumeration_warning=False) loss_basic = SVI(ss_vae.model, guide, optimizer, loss=elbo) # build a list of all losses considered losses = [loss_basic] # aux_loss: whether to use the auxiliary loss from NIPS 14 paper (Kingma et al) if AUX_LOSS: elbo = JitTrace_ELBO() if USE_JIT else Trace_ELBO() loss_aux = SVI(ss_vae.model_classify, ss_vae.guide_classify, optimizer, loss=elbo) losses.append(loss_aux) try: # setup the logger if a filename is provided logger = open(LOGFILE, "w") if LOGFILE else None # data_loaders = setup_data_loaders(MNISTCached, CUDA, BATCH_SIZE, sup_num=SUP_NUM) # how often would a supervised batch be encountered during inference # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised # until we have traversed through the all supervised batches # number of unsupervised examples sup_num = len(wust_train_ds) unsup_num = len(weak_train_ds) periodic_interval_batches = int(unsup_num / sup_num) # initializing local variables to maintain the best validation accuracy # seen across epochs over the supervised training set # and the corresponding testing set and the state of the networks best_valid_acc, corresponding_test_acc = 0.0, 0.0 # run inference for a certain number of epochs for i in range(0, NUM_EPOCHS): # get the losses for an epoch epoch_losses_sup, epoch_losses_unsup = run_inference_for_epoch( wust_train_dl, weak_train_dl, losses, periodic_interval_batches) # compute average epoch losses i.e. losses per example avg_epoch_losses_sup = map(lambda v: v / sup_num, epoch_losses_sup) avg_epoch_losses_unsup = map(lambda v: v / unsup_num, epoch_losses_unsup) # store the loss and validation/testing accuracies in the logfile str_loss_sup = " ".join(map(str, avg_epoch_losses_sup)) str_loss_unsup = " ".join(map(str, avg_epoch_losses_unsup)) str_print = f"{i} epoch: avg losses {str_loss_sup} {str_loss_unsup}" # validation_accuracy = get_accuracy(data_loaders["valid"], ss_vae.classifier, BATCH_SIZE) # str_print += " validation accuracy {}".format(validation_accuracy) # this test accuracy is only for logging, this is not used # to make any decisions during training test_accuracy = get_accuracy(wust_test_dl, ss_vae.classifier, BATCH_SIZE) str_print += " test accuracy {}".format(test_accuracy) # update the best validation accuracy and the corresponding # testing accuracy and the state of the parent module (including the networks) if best_valid_acc < test_accuracy: best_valid_acc = test_accuracy corresponding_test_acc = test_accuracy print_and_log(logger, str_print) final_test_accuracy = get_accuracy(wust_test_dl, ss_vae.classifier, BATCH_SIZE) print_and_log( logger, f"best validation accuracy {best_valid_acc} corresponding testing accuracy {corresponding_test_acc} " f"last testing accuracy {final_test_accuracy}") finally: if LOGFILE: logger.close()
str(myID) + " " + str(counter)) logitCorr = batchOrdered[0][-1]["relevant_logprob_sum"] pyro.sample("result_Correct_{}".format(q), Bernoulli(logits=logitCorr), obs=Variable(torch.FloatTensor([1.0]))) # pyro.observe("result_Correct_{}".format(q), Bernoulli(logits=logitCorr), Variable(torch.FloatTensor([1.0]))) adam_params = {"lr": 0.001, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) # setup the inference algorithm from pyro.infer import Trace_ELBO svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) #, num_particles=7) n_steps = 10 * 400000 # do gradient steps for step in range(1, n_steps): if step % 100 == 1: print("DOING A STEP") print(".......") print(step) # quit() # for name in pyro.get_param_store().get_all_param_names(): # print [name, pyro.param(name).data.numpy()] svi.step(corpus)
def main(args): # clear param store pyro.clear_param_store() ### SETUP train_loader, test_loader = get_data() # setup the VAE vae = VAE(use_cuda=args.cuda) # setup the optimizer adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) # setup the inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(vae.model, vae.guide, optimizer, loss=elbo) inputSize = 0 # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for step, batch in enumerate(train_loader): x, adj = 0, 0 # if on GPU put mini-batch into CUDA memory if args.cuda: x = batch['x'].cuda() adj = batch['edge_index'].cuda() else: x = batch['x'] adj = batch['edge_index'] print("x_shape", x.shape) print("adj_shape", adj.shape) inputSize = x.shape[0] * x.shape[1] epoch_loss += svi.step(x, adj) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if True: # if epoch % args.test_frequency == 0: # initialize loss accumulator test_loss = 0. # compute the loss over the entire test set for step, batch in enumerate(test_loader): x, adj = 0, 0 # if on GPU put mini-batch into CUDA memory if args.cuda: x = batch['x'].cuda() adj = batch['edge_index'].cuda() else: x = batch['x'] adj = batch['edge_index'] # compute ELBO estimate and accumulate loss # print('before evaluating test loss') test_loss += svi.evaluate_loss(x, adj) # print('after evaluating test loss') # pick three random test images from the first mini-batch and # visualize how well we're reconstructing them # if i == 0: # if args.visdom_flag: # plot_vae_samples(vae, vis) # reco_indices = np.random.randint(0, x.shape[0], 3) # for index in reco_indices: # test_img = x[index, :] # reco_img = vae.reconstruct_img(test_img) # vis.image(test_img.reshape(28, 28).detach().cpu().numpy(), # opts={'caption': 'test image'}) # vis.image(reco_img.reshape(28, 28).detach().cpu().numpy(), # opts={'caption': 'reconstructed image'}) if args.visdom_flag: plot_vae_samples(vae, vis) reco_indices = np.random.randint(0, x.shape[0], 3) for index in reco_indices: test_img = x[index, :] reco_img = vae.reconstruct_graph(test_img) vis.image(test_img.reshape(28, 28).detach().cpu().numpy(), opts={'caption': 'test image'}) vis.image(reco_img.reshape(28, 28).detach().cpu().numpy(), opts={'caption': 'reconstructed image'}) # report test diagnostics normalizer_test = len(test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo.append(total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) # if epoch == args.tsne_iter: # mnist_test_tsne(vae=vae, test_loader=test_loader) # plot_llk(np.array(train_elbo), np.array(test_elbo)) if args.save: torch.save( { 'epoch': epoch, 'model_state_dict': vae.state_dict(), 'optimzier_state_dict': optimizer.get_state(), 'train_loss': total_epoch_loss_train, 'test_loss': total_epoch_loss_test }, 'vae_' + args.name + str(args.time) + '.pt') return vae
def main(args): # Fix random number seed pyro.util.set_rng_seed(args.seed) # Enable optional validation warnings # Load and pre-process data dataloader, num_genes, l_mean, l_scale, anndata = get_data( dataset=args.dataset, batch_size=args.batch_size, cuda=args.cuda) # Instantiate instance of model/guide and various neural networks scanvi = SCANVI( num_genes=num_genes, num_labels=4, l_loc=l_mean, l_scale=l_scale, scale_factor=1.0 / (args.batch_size * num_genes), ) if args.cuda: scanvi.cuda() # Setup an optimizer (Adam) and learning rate scheduler. # By default we start with a moderately high learning rate (0.005) # and reduce by a factor of 5 after 20 epochs. scheduler = MultiStepLR({ "optimizer": Adam, "optim_args": { "lr": args.learning_rate }, "milestones": [20], "gamma": 0.2, }) # Tell Pyro to enumerate out y when y is unobserved guide = config_enumerate(scanvi.guide, "parallel", expand=True) # Setup a variational objective for gradient-based learning. # Note we use TraceEnum_ELBO in order to leverage Pyro's machinery # for automatic enumeration of the discrete latent variable y. elbo = TraceEnum_ELBO(strict_enumeration_warning=False) svi = SVI(scanvi.model, guide, scheduler, elbo) # Training loop for epoch in range(args.num_epochs): losses = [] for x, y in dataloader: if y is not None: y = y.type_as(x) loss = svi.step(x, y) losses.append(loss) # Tell the scheduler we've done one epoch. scheduler.step() print("[Epoch %04d] Loss: %.5f" % (epoch, np.mean(losses))) # Put neural networks in eval mode (needed for batchnorm) scanvi.eval() # Now that we're done training we'll inspect the latent representations we've learned if args.plot and args.dataset == "pbmc": import scanpy as sc # Compute latent representation (z2_loc) for each cell in the dataset latent_rep = scanvi.z2l_encoder(dataloader.data_x)[0] # Compute inferred cell type probabilities for each cell y_logits = scanvi.classifier(latent_rep) y_probs = softmax(y_logits, dim=-1).data.cpu().numpy() # Use scanpy to compute 2-dimensional UMAP coordinates using our # learned 10-dimensional latent representation z2 anndata.obsm["X_scANVI"] = latent_rep.data.cpu().numpy() sc.pp.neighbors(anndata, use_rep="X_scANVI") sc.tl.umap(anndata) umap1, umap2 = anndata.obsm["X_umap"][:, 0], anndata.obsm["X_umap"][:, 1] # Construct plots; all plots are scatterplots depicting the two-dimensional UMAP embedding # and only differ in how points are colored # The topmost plot depicts the 200 hand-curated seed labels in our dataset fig, axes = plt.subplots(3, 2) seed_marker_sizes = anndata.obs["seed_marker_sizes"] axes[0, 0].scatter( umap1, umap2, s=seed_marker_sizes, c=anndata.obs["seed_colors"], marker=".", alpha=0.7, ) axes[0, 0].set_title("Hand-Curated Seed Labels") patch1 = Patch(color="lightcoral", label="CD8-Naive") patch2 = Patch(color="limegreen", label="CD4-Naive") patch3 = Patch(color="deepskyblue", label="CD4-Memory") patch4 = Patch(color="mediumorchid", label="CD4-Regulatory") axes[0, 1].legend(loc="center left", handles=[patch1, patch2, patch3, patch4]) axes[0, 1].get_xaxis().set_visible(False) axes[0, 1].get_yaxis().set_visible(False) axes[0, 1].set_frame_on(False) # The remaining plots depict the inferred cell type probability for each of the four cell types s10 = axes[1, 0].scatter(umap1, umap2, s=1, c=y_probs[:, 0], marker=".", alpha=0.7) axes[1, 0].set_title("Inferred CD8-Naive probability") fig.colorbar(s10, ax=axes[1, 0]) s11 = axes[1, 1].scatter(umap1, umap2, s=1, c=y_probs[:, 1], marker=".", alpha=0.7) axes[1, 1].set_title("Inferred CD4-Naive probability") fig.colorbar(s11, ax=axes[1, 1]) s20 = axes[2, 0].scatter(umap1, umap2, s=1, c=y_probs[:, 2], marker=".", alpha=0.7) axes[2, 0].set_title("Inferred CD4-Memory probability") fig.colorbar(s20, ax=axes[2, 0]) s21 = axes[2, 1].scatter(umap1, umap2, s=1, c=y_probs[:, 3], marker=".", alpha=0.7) axes[2, 1].set_title("Inferred CD4-Regulatory probability") fig.colorbar(s21, ax=axes[2, 1]) fig.tight_layout() plt.savefig("scanvi.pdf")
rewards.append(reward) next_states.append(next_state) actions.append(action) if done: print("exit at", t) break global episode episode += 1 rewards = generate_rewards(raw_rewards=rewards, gamma=0.98) return [states, actions, rewards, next_states] learning_rate = 1e-5 #1e-5 optimizer = optim.Adam({"lr": learning_rate}) svi = SVI(policy, guide, optimizer, loss=Trace_ELBO()) def optimize(memory): num_steps = 1000 for experience in memory: states = experience[0] actions = experience[1] rewards = experience[2] for t in range(num_steps): loss = 0 for idx, state in enumerate(states): state = torch.from_numpy(state).float() action = torch.tensor(actions[idx]) reward = torch.tensor(rewards[idx]) loss += svi.step(state, action, reward)
def main(args): # clear param store pyro.clear_param_store() # setup MNIST data loaders # train_loader, test_loader train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256) # setup the VAE vae = VAE(use_cuda=args.cuda) # setup the optimizer adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) # setup the inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(vae.model, vae.guide, optimizer, loss=elbo) # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for x, _ in train_loader: # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # do ELBO gradient and accumulate loss epoch_loss += svi.step(x) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % args.test_frequency == 0: # initialize loss accumulator test_loss = 0. # compute the loss over the entire test set for i, (x, _) in enumerate(test_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # compute ELBO estimate and accumulate loss test_loss += svi.evaluate_loss(x) # pick three random test images from the first mini-batch and # visualize how well we're reconstructing them if i == 0: if args.visdom_flag: plot_vae_samples(vae, vis) reco_indices = np.random.randint(0, x.shape[0], 3) for index in reco_indices: test_img = x[index, :] reco_img = vae.reconstruct_img(test_img) vis.image(test_img.reshape( 28, 28).detach().cpu().numpy(), opts={'caption': 'test image'}) vis.image(reco_img.reshape( 28, 28).detach().cpu().numpy(), opts={'caption': 'reconstructed image'}) # report test diagnostics normalizer_test = len(test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo.append(total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) if epoch == args.tsne_iter: mnist_test_tsne(vae=vae, test_loader=test_loader) plot_llk(np.array(train_elbo), np.array(test_elbo)) return vae
def main(args): """ run inference for CVAE :param args: arguments for CVAE :return: None """ if args.seed is not None: set_seed(args.seed, args.cuda) if os.path.exists('cvae.model.pt'): print('Loading model %s' % 'cvae.model.pt') cvae = torch.load('cvae.model.pt') else: cvae = CVAE(z_dim=args.z_dim, y_dim=8, x_dim=32612, hidden_dim=args.hidden_dimension, use_cuda=args.cuda) print(cvae) # setup the optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta_1, 0.999), "clip_norm": 0.5 } optimizer = ClippedAdam(adam_params) guide = config_enumerate(cvae.guide, args.enum_discrete) # set up the loss for inference. loss = SVI(cvae.model, guide, optimizer, loss=TraceEnum_ELBO(max_iarange_nesting=1)) try: # setup the logger if a filename is provided logger = open(args.logfile, "w") if args.logfile else None data_loaders = setup_data_loaders(NHANES, args.cuda, args.batch_size) print(len(data_loaders['train'])) print(len(data_loaders['test'])) print(len(data_loaders['valid'])) # initializing local variables to maintain the best validation acc # seen across epochs over the supervised training set # and the corresponding testing set and the state of the networks best_valid_err, best_test_err = float('inf'), float('inf') # run inference for a certain number of epochs for i in range(0, args.num_epochs): # get the losses for an epoch epoch_losses = \ run_inference_for_epoch(args.batch_size, data_loaders, loss, args.cuda) # compute average epoch losses i.e. losses per example avg_epoch_losses = epoch_losses / NHANES.train_size # store the losses in the logfile str_loss = str(avg_epoch_losses) str_print = "{} epoch: avg loss {}".format(i, "{}".format(str_loss)) validation_err = get_accuracy(data_loaders["valid"], cvae.sim_measurements) str_print += " validation error {}".format(validation_err) # this test accuracy is only for logging, this is not used # to make any decisions during training test_еrr = get_accuracy(data_loaders["test"], cvae.sim_measurements) str_print += " test error {}".format(test_еrr) # update the best validation accuracy and the corresponding # testing accuracy and the state of the parent module (including the networks) if best_valid_err > validation_err: best_valid_err = validation_err if best_test_err > test_еrr: best_test_err = test_еrr print_and_log(logger, str_print) final_test_accuracy = get_accuracy(data_loaders["test"], cvae.sim_measurements) print_and_log( logger, "best validation error {} corresponding testing error {} " "last testing error {}".format(best_valid_err, best_test_err, final_test_accuracy)) torch.save(cvae, 'cvae.model.pt') #mu, sigma, actuals, lods, masks = get_predictions(data_loaders["prediction"], cvae.sim_measurements) #torch.save((mu, sigma, actuals, lods, masks), 'cvae.predictions.pt') finally: # close the logger file object if we opened it earlier if args.logfile: logger.close()
def __init__(self, model, data, covariates, *, guide=None, init_loc_fn=init_to_sample, init_scale=0.1, create_plates=None, optim=None, learning_rate=0.01, betas=(0.9, 0.99), learning_rate_decay=0.1, clip_norm=10.0, dct_gradients=False, subsample_aware=False, num_steps=1001, num_particles=1, vectorize_particles=True, warm_start=False, log_every=100): assert data.size(-2) == covariates.size(-2) super().__init__() self.model = model if guide is None: guide = AutoNormal(self.model, init_loc_fn=init_loc_fn, init_scale=init_scale, create_plates=create_plates) self.guide = guide # Initialize. if warm_start: model = PrefixWarmStartMessenger()(model) guide = PrefixWarmStartMessenger()(guide) if dct_gradients: model = MarkDCTParamMessenger("time")(model) guide = MarkDCTParamMessenger("time")(guide) elbo = TraceEnum_ELBO(num_particles=num_particles, vectorize_particles=vectorize_particles) elbo._guess_max_plate_nesting(model, guide, (data, covariates), {}) elbo.max_plate_nesting = max(elbo.max_plate_nesting, 1) # force a time plate losses = [] if num_steps: if optim is None: optim = DCTAdam({ "lr": learning_rate, "betas": betas, "lrd": learning_rate_decay**(1 / num_steps), "clip_norm": clip_norm, "subsample_aware": subsample_aware }) svi = SVI(self.model, self.guide, optim, elbo) for step in range(num_steps): loss = svi.step(data, covariates) / data.numel() if log_every and step % log_every == 0: logger.info("step {: >4d} loss = {:0.6g}".format( step, loss)) losses.append(loss) self.guide.create_plates = None # Disable subsampling after training. self.max_plate_nesting = elbo.max_plate_nesting self.losses = losses
def main(args): # setup logging log = get_logger(args.log) log(args) data = poly.load_data(poly.JSB_CHORALES) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:] repeat_dims = [1] * len(x.size()) repeat_dims[0] = n_eval_samples return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(1, 0).reshape(rep_shape) # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = {"lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay} adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): log("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), args.save_model) log("saving optimizer states to %s..." % args.save_opt) adam.save(args.save_opt) log("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) log("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) log("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) dmm.rnn.eval() # compute the validation and test loss n_samples many times val_nll = svi.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / torch.sum(val_seq_lengths) test_nll = svi.evaluate_loss(test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / torch.sum(test_seq_lengths) # put the RNN back into training mode (i.e. turn on drop-out if applicable) dmm.rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll))
def main(args): pyro.set_rng_seed(0) pyro.clear_param_store() pyro.enable_validation(__debug__) # load data if args.dataset == "dipper": capture_history_file = os.path.dirname( os.path.abspath(__file__)) + '/dipper_capture_history.csv' elif args.dataset == "vole": capture_history_file = os.path.dirname( os.path.abspath(__file__)) + '/meadow_voles_capture_history.csv' else: raise ValueError("Available datasets are \'dipper\' and \'vole\'.") capture_history = torch.tensor( np.genfromtxt(capture_history_file, delimiter=',')).float()[:, 1:] N, T = capture_history.shape print( "Loaded {} capture history for {} individuals collected over {} time periods." .format(args.dataset, N, T)) if args.dataset == "dipper" and args.model in ["4", "5"]: sex_file = os.path.dirname( os.path.abspath(__file__)) + '/dipper_sex.csv' sex = torch.tensor(np.genfromtxt(sex_file, delimiter=',')).float()[:, 1] print("Loaded dipper sex data.") elif args.dataset == "vole" and args.model in ["4", "5"]: raise ValueError( "Cannot run model_{} on meadow voles data, since we lack sex " + "information for these animals.".format(args.model)) else: sex = None model = models[args.model] # we use poutine.block to only expose the continuous latent variables # in the models to AutoDiagonalNormal (all of which begin with 'phi' # or 'rho') def expose_fn(msg): return msg["name"][0:3] in ['phi', 'rho'] # we use a mean field diagonal normal variational distributions (i.e. guide) # for the continuous latent variables. guide = AutoDiagonalNormal(poutine.block(model, expose_fn=expose_fn)) # since we enumerate the discrete random variables, # we need to use TraceEnum_ELBO or TraceTMC_ELBO. optim = Adam({'lr': args.learning_rate}) if args.tmc: elbo = TraceTMC_ELBO(max_plate_nesting=1) tmc_model = poutine.infer_config(model, lambda msg: { "num_samples": args.tmc_num_samples, "expand": False } if msg["infer"].get("enumerate", None) == "parallel" else {} ) # noqa: E501 svi = SVI(tmc_model, guide, optim, elbo) else: elbo = TraceEnum_ELBO(max_plate_nesting=1, num_particles=20, vectorize_particles=True) svi = SVI(model, guide, optim, elbo) losses = [] print( "Beginning training of model_{} with Stochastic Variational Inference." .format(args.model)) for step in range(args.num_steps): loss = svi.step(capture_history, sex) losses.append(loss) if step % 20 == 0 and step > 0 or step == args.num_steps - 1: print("[iteration %03d] loss: %.3f" % (step, np.mean(losses[-20:]))) # evaluate final trained model elbo_eval = TraceEnum_ELBO(max_plate_nesting=1, num_particles=2000, vectorize_particles=True) svi_eval = SVI(model, guide, optim, elbo_eval) print("Final loss: %.4f" % svi_eval.evaluate_loss(capture_history, sex))
def run_inference(dataset_obj: SingleCellRNACountsDataset, args) -> RemoveBackgroundPyroModel: """Run a full inference procedure, training a latent variable model. Args: dataset_obj: Input data in the form of a SingleCellRNACountsDataset object. args: Input command line parsed arguments. Returns: model: cellbender.model.RemoveBackgroundPyroModel that has had inference run. """ # Get the trimmed count matrix (transformed if called for). count_matrix = dataset_obj.get_count_matrix() # Configure pyro options (skip validations to improve speed). pyro.enable_validation(False) pyro.distributions.enable_validation(False) pyro.set_rng_seed(0) pyro.clear_param_store() # Set up the variational autoencoder: # Encoder. encoder_z = EncodeZ(input_dim=count_matrix.shape[1], hidden_dims=args.z_hidden_dims, output_dim=args.z_dim, input_transform='normalize') encoder_other = EncodeNonZLatents( n_genes=count_matrix.shape[1], z_dim=args.z_dim, hidden_dims=consts.ENC_HIDDEN_DIMS, log_count_crossover=dataset_obj.priors['log_counts_crossover'], prior_log_cell_counts=np.log1p(dataset_obj.priors['cell_counts']), input_transform='normalize') encoder = CompositeEncoder({'z': encoder_z, 'other': encoder_other}) # Decoder. decoder = Decoder(input_dim=args.z_dim, hidden_dims=args.z_hidden_dims[::-1], output_dim=count_matrix.shape[1]) # Set up the pyro model for variational inference. model = RemoveBackgroundPyroModel(model_type=args.model, encoder=encoder, decoder=decoder, dataset_obj=dataset_obj, use_cuda=args.use_cuda) # Load the dataset into DataLoaders. frac = args.training_fraction # Fraction of barcodes to use for training batch_size = int( min(300, frac * dataset_obj.analyzed_barcode_inds.size / 2)) train_loader, test_loader = \ prep_data_for_training(dataset=count_matrix, empty_drop_dataset= dataset_obj.get_count_matrix_empties(), random_state=dataset_obj.random, batch_size=batch_size, training_fraction=frac, fraction_empties=args.fraction_empties, shuffle=True, use_cuda=args.use_cuda) # Set up the optimizer. optimizer = pyro.optim.clipped_adam.ClippedAdam optimizer_args = {'lr': args.learning_rate, 'clip_norm': 10.} # Set up a learning rate scheduler. minibatches_per_epoch = int( np.ceil(len(train_loader) / train_loader.batch_size).item()) scheduler_args = { 'optimizer': optimizer, 'max_lr': args.learning_rate * 10, 'steps_per_epoch': minibatches_per_epoch, 'epochs': args.epochs, 'optim_args': optimizer_args } scheduler = pyro.optim.OneCycleLR(scheduler_args) # Determine the loss function. if args.use_jit: # Call guide() once as a warm-up. model.guide( torch.zeros([10, dataset_obj.analyzed_gene_inds.size ]).to(model.device)) if args.model == "simple": loss_function = JitTrace_ELBO() else: loss_function = JitTraceEnum_ELBO(max_plate_nesting=1, strict_enumeration_warning=False) else: if args.model == "simple": loss_function = Trace_ELBO() else: loss_function = TraceEnum_ELBO(max_plate_nesting=1) # Set up the inference process. svi = SVI(model.model, model.guide, scheduler, loss=loss_function) # Run training. run_training(model, svi, train_loader, test_loader, epochs=args.epochs, test_freq=5) return model
b = pyro.sample('b'.format(''), dist.Gamma(Variable((5.63887222899)*torch.ones([amb(N)])),Variable((40.1978121928)*torch.ones([amb(N)])))) with pyro.iarange('p_range_'.format(''), N): p = pyro.sample('p'.format(''), dist.Beta(Variable((52.1419233118)*torch.ones([amb(N)])),Variable((83.6618285099)*torch.ones([amb(N)])))) pyro.sample('obs__100'.format(), dist.Beta(w*x+b,p), obs=y) def guide(y,x,N): arg_1 = torch.nn.Softplus()(pyro.param('arg_1', Variable(torch.ones((amb(1))), requires_grad=True))) arg_2 = torch.nn.Softplus()(pyro.param('arg_2', Variable(torch.ones((amb(1))), requires_grad=True))) w = pyro.sample('w'.format(''), dist.Beta(arg_1,arg_2)) arg_3 = torch.nn.Softplus()(pyro.param('arg_3', Variable(torch.ones((amb(N))), requires_grad=True))) arg_4 = torch.nn.Softplus()(pyro.param('arg_4', Variable(torch.ones((amb(N))), requires_grad=True))) with pyro.iarange('b_prange'): b = pyro.sample('b'.format(''), dist.Gamma(arg_3,arg_4)) arg_5 = torch.nn.Softplus()(pyro.param('arg_5', Variable(torch.ones((amb(N))), requires_grad=True))) arg_6 = torch.nn.Softplus()(pyro.param('arg_6', Variable(torch.ones((amb(N))), requires_grad=True))) with pyro.iarange('p_prange'): p = pyro.sample('p'.format(''), dist.Beta(arg_5,arg_6)) pass optim = Adam({'lr': 0.05}) svi = SVI(model, guide, optim, loss=Trace_ELBO() if pyro.__version__ > '0.1.2' else 'ELBO') for i in range(4000): loss = svi.step(y,x,N) if ((i % 1000) == 0): print(loss) for name in pyro.get_param_store().get_all_param_names(): print(('{0} : {1}'.format(name, pyro.param(name).data.numpy()))) print('w_mean', np.array2string(dist.Beta(pyro.param('arg_1'), pyro.param('arg_2')).mean.detach().numpy(), separator=',')) print('b_mean', np.array2string(dist.Gamma(pyro.param('arg_3'), pyro.param('arg_4')).mean.detach().numpy(), separator=',')) print('p_mean', np.array2string(dist.Beta(pyro.param('arg_5'), pyro.param('arg_6')).mean.detach().numpy(), separator=','))
sw_param = softplus(pyro.param("guide_log_sigma_weight", w_log_sig)) mb_param = pyro.param("guide_mean_bias", b_mu) sb_param = softplus(pyro.param("guide_log_sigma_bias", b_log_sig)) # gaussian priors for w and b w_prior = Normal(mw_param, sw_param) b_prior = Normal(mb_param, sb_param) priors = {'linear.weight': w_prior, 'linear.bias': b_prior} # overloading the parameters in the module with random samples from the prior lifted_module = pyro.random_module("module", regression_model, priors) # sample a nn lifted_module() # instantiate optim and inference objects optim = Adam({"lr": 0.01}) svi = SVI(model, guide, optim, loss="ELBO") # get array of batch indices def get_batch_indices(N, batch_size): all_batches = np.arange(0, N, batch_size) if all_batches[-1] != N: all_batches = list(all_batches) + [N] return all_batches def main(): parser = argparse.ArgumentParser(description="parse args") parser.add_argument('-n', '--num-epochs', default=1000, type=int) parser.add_argument('-b', '--batch-size', default=N, type=int) parser.add_argument('--cuda', action='store_true')
def main(args): # load data print("loading training data...") dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, "faces_training.csv") if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download( "https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv", dataset_path, ) data = torch.tensor(np.loadtxt(dataset_path, delimiter=",")).float() sparse_gamma_def = SparseGammaDEF() # Due to the special logic in the custom guide (e.g. parameter clipping), the custom guide # seems to be more amenable to higher learning rates. # Nevertheless, the easy guide performs the best (presumably because of numerical instabilities # related to the gamma distribution in the custom guide). learning_rate = 0.2 if args.guide in ["auto", "easy"] else 4.5 momentum = 0.05 if args.guide in ["auto", "easy"] else 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # use one of our three different guide types if args.guide == "auto": guide = AutoDiagonalNormal(sparse_gamma_def.model, init_loc_fn=init_to_feasible) elif args.guide == "easy": guide = MyEasyGuide(sparse_gamma_def.model) else: guide = sparse_gamma_def.guide # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI( sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True), ) print("\nbeginning training with %s guide..." % args.guide) # the training loop for k in range(args.num_epochs): loss = svi.step(data) # for the custom guide we clip parameters after each gradient step if args.guide == "custom": clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): """ run inference for SS-VAE :param args: arguments for SS-VAE :return: None """ if args.seed is not None: pyro.set_rng_seed(args.seed) viz = None if args.visualize: viz = Visdom() mkdir_p("./vae_results") # batch_size: number of images (and labels) to be considered in a batch ss_vae = SSVAE(z_dim=args.z_dim, hidden_layers=args.hidden_layers, use_cuda=args.cuda, config_enum=args.enum_discrete, aux_loss_multiplier=args.aux_loss_multiplier) # setup the optimizer adam_params = {"lr": args.learning_rate, "betas": (args.beta_1, 0.999)} optimizer = Adam(adam_params) # set up the loss(es) for inference. wrapping the guide in config_enumerate builds the loss as a sum # by enumerating each class label for the sampled discrete categorical distribution in the model guide = config_enumerate(ss_vae.guide, args.enum_discrete, expand=True) elbo = (JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO)( max_plate_nesting=1) loss_basic = SVI(ss_vae.model, guide, optimizer, loss=elbo) # build a list of all losses considered losses = [loss_basic] # aux_loss: whether to use the auxiliary loss from NIPS 14 paper (Kingma et al) if args.aux_loss: elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() loss_aux = SVI(ss_vae.model_classify, ss_vae.guide_classify, optimizer, loss=elbo) losses.append(loss_aux) try: # setup the logger if a filename is provided logger = open(args.logfile, "w") if args.logfile else None data_loaders = setup_data_loaders(MNISTCached, args.cuda, args.batch_size, sup_num=args.sup_num) # how often would a supervised batch be encountered during inference # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised # until we have traversed through the all supervised batches periodic_interval_batches = int(MNISTCached.train_data_size / (1.0 * args.sup_num)) # number of unsupervised examples unsup_num = MNISTCached.train_data_size - args.sup_num # initializing local variables to maintain the best validation accuracy # seen across epochs over the supervised training set # and the corresponding testing set and the state of the networks best_valid_acc, corresponding_test_acc = 0.0, 0.0 # run inference for a certain number of epochs for i in range(0, args.num_epochs): # get the losses for an epoch epoch_losses_sup, epoch_losses_unsup = \ run_inference_for_epoch(data_loaders, losses, periodic_interval_batches) # compute average epoch losses i.e. losses per example avg_epoch_losses_sup = map(lambda v: v / args.sup_num, epoch_losses_sup) avg_epoch_losses_unsup = map(lambda v: v / unsup_num, epoch_losses_unsup) # store the loss and validation/testing accuracies in the logfile str_loss_sup = " ".join(map(str, avg_epoch_losses_sup)) str_loss_unsup = " ".join(map(str, avg_epoch_losses_unsup)) str_print = "{} epoch: avg losses {}".format( i, "{} {}".format(str_loss_sup, str_loss_unsup)) validation_accuracy = get_accuracy(data_loaders["valid"], ss_vae.classifier, args.batch_size) str_print += " validation accuracy {}".format(validation_accuracy) # this test accuracy is only for logging, this is not used # to make any decisions during training test_accuracy = get_accuracy(data_loaders["test"], ss_vae.classifier, args.batch_size) str_print += " test accuracy {}".format(test_accuracy) # update the best validation accuracy and the corresponding # testing accuracy and the state of the parent module (including the networks) if best_valid_acc < validation_accuracy: best_valid_acc = validation_accuracy corresponding_test_acc = test_accuracy print_and_log(logger, str_print) final_test_accuracy = get_accuracy(data_loaders["test"], ss_vae.classifier, args.batch_size) print_and_log( logger, "best validation accuracy {} corresponding testing accuracy {} " "last testing accuracy {}".format(best_valid_acc, corresponding_test_acc, final_test_accuracy)) # visualize the conditional samples visualize(ss_vae, viz, data_loaders["test"]) finally: # close the logger file object if we opened it earlier if args.logfile: logger.close()
# Training loop num_epochs = 100 test_frequency = 1 vrae = VRAE(dataset, VOCAB_SIZE, ENCODER_HIDDEN_SIZE, Z_DIMENSION, DECODER_HIDDEN_SIZE, MAX_LENGTH, NUM_LAYERS_FOR_RNNS, USE_CUDA) optimizer = optim.Adam({"lr": LEARNING_RATE}) svi = SVI(vrae.model, vrae.guide, optimizer, loss="ELBO") for epoch in range(30): print("Start epoch!") # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x # returned by the data loader for convo_i in range(dataset.size()): x, y = dataset.next_batch() #HACK for overfitting # y = [100, 30, 11, 1, 0, 24, 8, 4, 17, 11, 1, 6, 0, 9, 4, 8, 6, 24, 9, 1, 101] x = dataset.to_onehot(x, long_type=False)
Delta(_corr_chol[t, ...]).to_event(1).to_event(1)) with pyro.plate("mu_plate", T): _q_std = torch.sqrt(1. / q_prec.view(-1, D)) q_sigma_chol = torch.bmm(torch.diag_embed(_q_std), q_corr_chol) q_mu = pyro.sample("mu", MultivariateNormal(tau, scale_tril=q_sigma_chol)) with pyro.plate("data", N): z = pyro.sample("z", Categorical(pi)) T = 5 optim = Adam({"lr": 0.01}) svi = SVI(model, guide, optim, loss=Trace_ELBO(num_particles=15)) def train(num_iterations): losses = [] pyro.clear_param_store() fig = plt.figure(figsize=(5, 5)) for j in tqdm(range(num_iterations)): loss = svi.step(data) losses.append(loss) if (j % 100) == 0: centers, covars = marginal(guide, num_samples=250) animate(fig.gca(), centers, covars)
def main(args): # Init tensorboard writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber)) model_name = 'VanillaDMM' # Set evaluation log file evaluation_logpath = './logs/{}/evaluation_result.log'.format( model_name.lower()) log_evaluation(evaluation_logpath, 'Evaluation Trial - {}\n'.format(args.trialnumber)) # Constants time_length = 30 input_length_for_pred = 20 pred_length = time_length - input_length_for_pred train_batch_size = 16 valid_batch_size = 1 # For model input_channels = 1 z_channels = 50 emission_channels = [64, 32] transition_channels = 64 encoder_channels = [32, 64] rnn_input_dim = 256 rnn_channels = 128 kernel_size = 3 pred_length = 0 # Device checking use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Make dataset logging.info("Generate data") train_datapath = args.datapath / 'train' valid_datapath = args.datapath / 'valid' train_dataset = DiffusionDataset(train_datapath) valid_dataset = DiffusionDataset(valid_datapath) # Create data loaders from pickle data logging.info("Generate data loaders") train_dataloader = DataLoader( train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8) valid_dataloader = DataLoader( valid_dataset, batch_size=valid_batch_size, num_workers=4) # Training parameters width = 100 height = 100 input_dim = width * height # Create model logging.warning("Generate model") logging.warning(input_dim) pred_input_dim = 10 dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels, transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=use_cuda) # Initialize model logging.info("Initialize model") epochs = args.endepoch learning_rate = 0.0001 beta1 = 0.9 beta2 = 0.999 clip_norm = 10.0 lr_decay = 1.0 weight_decay = 0 adam_params = {"lr": learning_rate, "betas": (beta1, beta2), "clip_norm": clip_norm, "lrd": lr_decay, "weight_decay": weight_decay} adam = ClippedAdam(adam_params) elbo = Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # saves the model and optimizer states to disk save_model = Path('./checkpoints/' + model_name) def save_checkpoint(epoch): save_dir = save_model / '{}.model'.format(epoch) save_opt_dir = save_model / '{}.opt'.format(epoch) logging.info("saving model to %s..." % save_dir) torch.save(dmm.state_dict(), save_dir) logging.info("saving optimizer states to %s..." % save_opt_dir) adam.save(save_opt_dir) logging.info("done saving model and optimizer checkpoints to disk.") # Starting epoch start_epoch = args.startepoch # loads the model and optimizer states from disk if start_epoch != 0: load_opt = './checkpoints/' + model_name + \ '/e{}-i188-opt-tn{}.opt'.format(start_epoch - 1, args.trialnumber) load_model = './checkpoints/' + model_name + \ '/e{}-i188-tn{}.pt'.format(start_epoch - 1, args.trialnumber) def load_checkpoint(): # assert exists(load_opt) and exists(load_model), \ # "--load-model and/or --load-opt misspecified" logging.info("loading model from %s..." % load_model) dmm.load_state_dict(torch.load(load_model, map_location=device)) # logging.info("loading optimizer states from %s..." % load_opt) # adam.load(load_opt) # logging.info("done loading model and optimizer states.") if load_model != '': logging.info('Load checkpoint') load_checkpoint() # Validation only? validation_only = args.validonly # Train the model if not validation_only: logging.info("Training model") annealing_epochs = 1000 minimum_annealing_factor = 0.2 N_train_size = 3000 N_mini_batches = int(N_train_size / train_batch_size + int(N_train_size % train_batch_size > 0)) for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True): r_loss_train = 0 dmm.train(True) idx = 0 mov_avg_loss = 0 mov_data_len = 0 for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)): if annealing_epochs > 0 and epoch < annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 data['observation'] = normalize( data['observation'].unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() loss = svi.step(data['observation'], data_reversed, data_mask, annealing_factor) # Running losses mov_avg_loss += loss mov_data_len += batch_size r_loss_train += loss idx += 1 # Average losses train_loss_avg = r_loss_train / (len(train_dataset) * time_length) writer.add_scalar('Loss/train', train_loss_avg, epoch) logging.info("Epoch: %d, Training loss: %1.5f", epoch, train_loss_avg) # # Time to time evaluation if epoch == epochs - 1: for temp_pred_length in [20]: r_loss_valid = 0 r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = temp_pred_length val_pred_input_length = 10 with torch.no_grad(): for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)): data['observation'] = normalize( data['observation'].unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences( data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() pred_tensor = data['observation'][:, :input_length_for_pred, :, :, :] pred_tensor_reversed = reverse_sequences( pred_tensor) pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).cuda() ground_truth = data['observation'][:, input_length_for_pred:, :, :, :] val_nll = svi.evaluate_loss( data['observation'], data_reversed, data_mask) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach() ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach() ) # Running losses r_loss_valid += val_nll r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale # Average losses valid_loss_avg = r_loss_valid / \ (len(valid_dataset) * time_length) valid_loss_loc_avg = r_loss_loc_valid / \ (len(valid_dataset) * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (len(valid_dataset) * val_pred_length * width * height) writer.add_scalar('Loss/test', valid_loss_avg, epoch) writer.add_scalar( 'Loss/test_obs', valid_loss_loc_avg, epoch) writer.add_scalar('Loss/test_scale', valid_loss_scale_avg, epoch) logging.info("Validation loss: %1.5f", valid_loss_avg) logging.info("Validation obs loss: %1.5f", valid_loss_loc_avg) logging.info("Validation scale loss: %1.5f", valid_loss_scale_avg) log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_loc_avg)) log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_scale_avg)) # Save model if epoch % 50 == 0 or epoch == epochs - 1: torch.save(dmm.state_dict(), args.modelsavepath / model_name / 'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber)) adam.save(args.modelsavepath / model_name / 'e{}-i{}-opt-tn{}.opt'.format(epoch, idx, args.trialnumber)) # Last validation after training test_samples_indices = range(100) total_n = 0 if validation_only: r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = args.validpredlength val_pred_input_length = 10 with torch.no_grad(): for i in tqdm(test_samples_indices, desc='Valid', leave=True): # Data processing data = valid_dataset[i] if torch.isnan(torch.sum(data['observation'])): print("Skip {}".format(i)) continue else: total_n += 1 data['observation'] = normalize( data['observation'].unsqueeze(0).unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).to(device) # Prediction pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).to(device) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach() ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach() ) # Save samples if i < 5: save_dir_samples = Path('./samples/more_variance_long') with open(save_dir_samples / '{}-gt-test.pkl'.format(i), 'wb') as fout: pickle.dump(ground_truth, fout) with open(save_dir_samples / '{}-vanilladmm-pred-test.pkl'.format(i), 'wb') as fout: pickle.dump(pred_with_input, fout) # Running losses r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy( ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2) # Average losses test_samples_indices = range(total_n) print(total_n) valid_loss_loc_avg = r_loss_loc_valid / \ (total_n * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (total_n * val_pred_length * width * height) valid_loss_latent_avg = r_loss_latent_valid / \ (total_n * val_pred_length * width * height) logging.info("Validation obs loss for %ds pred VanillaDMM: %f", val_pred_length, valid_loss_loc_avg) logging.info("Validation latent loss: %f", valid_loss_latent_avg) with open('VanillaDMMResult.log', 'a+') as fout: validation_log = 'Pred {}s VanillaDMM: {}\n'.format( val_pred_length, valid_loss_loc_avg) fout.write(validation_log)
engines = [] for engine_id in engines_eval: engines.append([]) train_one_eng = train_df[train_df.id == engine_id] for i in range(train_one_eng.shape[0]): engines[-1].append(train_one_eng[sequence_cols].values[:i]) sensor_cols = ['s' + str(i) for i in range(1, 22)] sequence_cols = ['cycle', 'setting1', 'setting2', 'setting3', 'cycle_norm'] sequence_cols.extend(sensor_cols) trainX = np.vstack([trainX, valX]) trainY = np.vstack([trainY, valY]) optim = Adam({'lr': 0.005}) svi = SVI(model.model, model.guide, optim, loss='ELBO', num_particles=1) y_data = trainY.squeeze(-1) x_data, y_data = torch.tensor(trainX).type(ftype), torch.tensor(y_data).type( ftype) y_test = testY.squeeze(-1) x_test, y_test = torch.tensor(testX).type(ftype), torch.tensor(y_test).type( ftype) def get_batch_indices(N, batch_size): all_batches = np.arange(0, N, batch_size) if all_batches[-1] != N: all_batches = list(all_batches) + [N] return all_batches
def main(args): if args.cuda: torch.set_default_tensor_type("torch.cuda.FloatTensor") logging.info("Loading data") data = poly.load_data(poly.JSB_CHORALES) logging.info("-" * 40) model = models[args.model] logging.info("Training {} on {} sequences".format( model.__name__, len(data["train"]["sequences"]))) sequences = data["train"]["sequences"] lengths = data["train"]["sequence_lengths"] # find all the notes that are present at least once in the training set present_notes = (sequences == 1).sum(0).sum(0) > 0 # remove notes that are never played (we remove 37/88 notes) sequences = sequences[..., present_notes] if args.truncate: lengths = lengths.clamp(max=args.truncate) sequences = sequences[:, :args.truncate] num_observations = float(lengths.sum()) pyro.set_rng_seed(args.seed) pyro.clear_param_store() # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing # out the hidden state x. This is accomplished via an automatic guide that # learns point estimates of all of our conditional probability tables, # named probs_*. guide = AutoDelta( poutine.block(model, expose_fn=lambda msg: msg["name"].startswith("probs_"))) # To help debug our tensor shapes, let's print the shape of each site's # distribution, value, and log_prob tensor. Note this information is # automatically printed on most errors inside SVI. if args.print_shapes: first_available_dim = -2 if model is model_0 else -3 guide_trace = poutine.trace(guide).get_trace( sequences, lengths, args=args, batch_size=args.batch_size) model_trace = poutine.trace( poutine.replay(poutine.enum(model, first_available_dim), guide_trace)).get_trace(sequences, lengths, args=args, batch_size=args.batch_size) logging.info(model_trace.format_shapes()) # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting. # All of our models have two plates: "data" and "tones". optim = Adam({"lr": args.learning_rate}) if args.tmc: if args.jit: raise NotImplementedError( "jit support not yet added for TraceTMC_ELBO") elbo = TraceTMC_ELBO(max_plate_nesting=1 if model is model_0 else 2) tmc_model = poutine.infer_config( model, lambda msg: { "num_samples": args.tmc_num_samples, "expand": False } if msg["infer"].get("enumerate", None) == "parallel" else {}, ) # noqa: E501 svi = SVI(tmc_model, guide, optim, elbo) else: Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo( max_plate_nesting=1 if model is model_0 else 2, strict_enumeration_warning=(model is not model_7), jit_options={"time_compilation": args.time_compilation}, ) svi = SVI(model, guide, optim, elbo) # We'll train on small minibatches. logging.info("Step\tLoss") for step in range(args.num_steps): loss = svi.step(sequences, lengths, args=args, batch_size=args.batch_size) logging.info("{: >5d}\t{}".format(step, loss / num_observations)) if args.jit and args.time_compilation: logging.debug("time to compile: {} s.".format( elbo._differentiable_loss.compile_time)) # We evaluate on the entire training dataset, # excluding the prior term so our results are comparable across models. train_loss = elbo.loss(model, guide, sequences, lengths, args, include_prior=False) logging.info("training loss = {}".format(train_loss / num_observations)) # Finally we evaluate on the test dataset. logging.info("-" * 40) logging.info("Evaluating on {} test sequences".format( len(data["test"]["sequences"]))) sequences = data["test"]["sequences"][..., present_notes] lengths = data["test"]["sequence_lengths"] if args.truncate: lengths = lengths.clamp(max=args.truncate) num_observations = float(lengths.sum()) # note that since we removed unseen notes above (to make the problem a bit easier and for # numerical stability) this test loss may not be directly comparable to numbers # reported on this dataset elsewhere. test_loss = elbo.loss(model, guide, sequences, lengths, args=args, include_prior=False) logging.info("test loss = {}".format(test_loss / num_observations)) # We expect models with higher capacity to perform better, # but eventually overfit to the training set. capacity = sum( value.reshape(-1).size(0) for value in pyro.get_param_store().values()) logging.info("{} capacity = {} parameters".format(model.__name__, capacity))
def __init__(self, model, guide, pred_fn=None, lr=0.05): self.model = model self.guide = guide self.pred_fn = pred_fn self.svi = SVI(model, guide, optim.Adam({"lr": lr}), loss=Trace_ELBO())
mw_param = pyro.param("guide_mean_weight", w_loc) sw_param = softplus(pyro.param("guide_log_scale_weight", w_log_sig)) mb_param = pyro.param("guide_mean_bias", b_loc) sb_param = softplus(pyro.param("guide_log_scale_bias", b_log_sig)) # guide distributions for w and b w_dist = Normal(mw_param, sw_param).independent(1) b_dist = Normal(mb_param, sb_param).independent(1) dists = {'linear.weight': w_dist, 'linear.bias': b_dist} # overload the parameters in the module with random samples # from the guide distributions lifted_module = pyro.random_module("module", regression_model, dists) # sample a regressor (which also samples w and b) return lifted_module() optim = Adam({"lr": 0.05}) svi = SVI(model, guide, optim, loss=Trace_ELBO()) def linear_bayes(): pyro.clear_param_store() data = build_linear_dataset(N) for j in range(num_iterations): # calculate the loss and take a gradient step loss = svi.step(data) if j % 100 == 0: print("[iteration %04d] loss: %.4f" % (j + 1, loss / float(N))) def validation(): for name in pyro.get_param_store().get_all_param_names(): print("[%s]: %.3f" % (name, pyro.param(name).data.numpy())) def point_evaluation():
} lifted_module = pyro.random_module("module", net, priors) return lifted_module() # Reducing Learning Rate. ReduceOnPlateau is not supported. #This code works but loss doesn't get lower than constant LR. Perhaps gamma should be closer to 1.0? AdamArgs = {'lr': 1e-2} optimizer = torch.optim.Adam scheduler = pyro.optim.ExponentialLR({ 'optimizer': optimizer, 'optim_args': AdamArgs, 'gamma': 0.99995 }) svi = SVI(model, guide, scheduler, loss=Trace_ELBO()) """ optimizer = Adam({"lr": 0.01}) svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) """ """ num_iterations = 1 for j in range(num_iterations): print("Epoch ", j) for batch_id, data in enumerate(training_generator): print("batch_id", batch_id, data[1][:,-1]) """ experiment_id = datetime.now().isoformat() print('Logging experiment as: ', experiment_id)
# Convert the data into tensors X_train_torch = torch.tensor(X_train_scaled) y_train_torch = torch.tensor(y_train_scaled) pyro.clear_param_store() # Provide a guide which fits a pre-defined distribution over each # hidden parameter. The AutoDiagonalNormal guide fits a normal # distribution over each coefficient and our rate parameter my_guide = AutoDiagonalNormal(model_gamma) # Initialize the SVI optimzation class my_svi = SVI(model=model_gamma, guide= my_guide, optim=ClippedAdam({"lr": 0.01, 'clip_norm': 1.0}), loss=Trace_ELBO()) losses = [] start_time = time.time() # Perform optimization for i in range(5000): loss = my_svi.step(X_train_torch, y_train_torch, california.feature_names) normalized_loss = loss/X_train_torch.shape[0]
def __init__(self, *args, step_args=None, **kwargs): self.svi = SVI(*args, **kwargs) self._step_args = step_args or {} super(SVIEngine, self).__init__(self._update)