def main(): # Load MNIST image dataset mnist_train_data = datasets.MNIST( '/home/ajays/Downloads/',download=True,transform=transforms.ToTensor() ) mnist_test_data = datasets.MNIST('/home/ajays/Downloads/',train=False,download=True) train_loader = torch.utils.data.DataLoader( mnist_train_data, batch_size = batch_size, shuffle=True ) # Instantiation vae = VAE(n_inputs=32) # ********************* # IMAGE VAE TRAINING # ********************* # plot before training # o_before, mu, logvar = vae(mnist_train_data[0][0].reshape((1,1,28,28))) # plt.imshow(o_before.detach().numpy().reshape((28,28))) # plt.show() # train vae.load_state_dict(torch.load(LOAD_PATH)) #vae = train_image_vae(vae, train_loader) # After training # o_after, mu, logvar = vae(example[0].reshape((1,1,28,28))) o_after = vae.decode(torch.randn((128))) plt.imshow(o_after.detach().numpy().reshape((28,28))) plt.show()
def main(args): if args.save_path == '': make_savepath(args) seed(args) if args.cuda: print('using cuda') print(args) device = torch.device("cuda" if args.cuda else "cpu") args.device = device opt_dict = {"not_improved": 0, "lr": 1., "best_loss": 1e4} all_data = torch.load(args.data_file) x_train, x_val, x_test = all_data x_train = x_train.to(device) x_val = x_val.to(device) x_test = x_test.to(device) y_size = 1 y_train = x_train.new_zeros(x_train.size(0), y_size) y_val = x_train.new_zeros(x_val.size(0), y_size) y_test = x_train.new_zeros(x_test.size(0), y_size) print(torch.__version__) train_data = torch.utils.data.TensorDataset(x_train, y_train) val_data = torch.utils.data.TensorDataset(x_val, y_val) test_data = torch.utils.data.TensorDataset(x_test, y_test) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True) print('Train data: %d batches' % len(train_loader)) print('Val data: %d batches' % len(val_loader)) print('Test data: %d batches' % len(test_loader)) sys.stdout.flush() log_niter = len(train_loader) // 5 encoder = ResNetEncoderV2(args) decoder = PixelCNNDecoderV2(args) vae = VAE(encoder, decoder, args).to(device) if args.sample_from != '': save_dir = "samples/%s" % args.dataset if not os.path.exists(save_dir): os.makedirs(save_dir) vae.load_state_dict(torch.load(args.sample_from)) vae.eval() with torch.no_grad(): sample_z = vae.sample_from_prior(400).to(device) sample_x, sample_probs = vae.decode(sample_z, False) image_file = 'sample_binary_from_%s.png' % ( args.sample_from.split('/')[-1][:-3]) save_image(sample_x.data.cpu(), os.path.join(save_dir, image_file), nrow=20) image_file = 'sample_cont_from_%s.png' % ( args.sample_from.split('/')[-1][:-3]) save_image(sample_probs.data.cpu(), os.path.join(save_dir, image_file), nrow=20) return if args.eval: print('begin evaluation') test_loader = torch.utils.data.DataLoader(test_data, batch_size=50, shuffle=True) vae.load_state_dict(torch.load(args.load_path)) vae.eval() with torch.no_grad(): test(vae, test_loader, "TEST", args) au, au_var = calc_au(vae, test_loader) print("%d active units" % au) # print(au_var) calc_iwnll(vae, test_loader, args) return enc_optimizer = optim.Adam(vae.encoder.parameters(), lr=0.001) dec_optimizer = optim.Adam(vae.decoder.parameters(), lr=0.001) opt_dict['lr'] = 0.001 iter_ = 0 best_loss = 1e4 best_kl = best_nll = best_ppl = 0 decay_cnt = pre_mi = best_mi = mi_not_improved = 0 aggressive_flag = True if args.aggressive else False vae.train() start = time.time() kl_weight = args.kl_start anneal_rate = (1.0 - args.kl_start) / (args.warm_up * len(train_loader)) for epoch in range(args.epochs): report_kl_loss = report_rec_loss = 0 report_num_examples = 0 for datum in train_loader: batch_data, _ = datum batch_data = torch.bernoulli(batch_data) batch_size = batch_data.size(0) report_num_examples += batch_size # kl_weight = 1.0 kl_weight = min(1.0, kl_weight + anneal_rate) sub_iter = 1 batch_data_enc = batch_data burn_num_examples = 0 burn_pre_loss = 1e4 burn_cur_loss = 0 while aggressive_flag and sub_iter < 100: enc_optimizer.zero_grad() dec_optimizer.zero_grad() burn_num_examples += batch_data_enc.size(0) loss, loss_rc, loss_kl = vae.loss(batch_data_enc, kl_weight, nsamples=args.nsamples) burn_cur_loss += loss.sum().item() loss = loss.mean(dim=-1) loss.backward() torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad) enc_optimizer.step() id_ = np.random.choice(x_train.size(0), args.batch_size, replace=False) batch_data_enc = torch.bernoulli(x_train[id_]) if sub_iter % 10 == 0: burn_cur_loss = burn_cur_loss / burn_num_examples if burn_pre_loss - burn_cur_loss < 0: break burn_pre_loss = burn_cur_loss burn_cur_loss = burn_num_examples = 0 sub_iter += 1 # print(sub_iter) enc_optimizer.zero_grad() dec_optimizer.zero_grad() loss, loss_rc, loss_kl = vae.loss(batch_data, kl_weight, nsamples=args.nsamples) loss = loss.mean(dim=-1) loss.backward() torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad) loss_rc = loss_rc.sum() loss_kl = loss_kl.sum() if not aggressive_flag: enc_optimizer.step() dec_optimizer.step() report_rec_loss += loss_rc.item() report_kl_loss += loss_kl.item() if iter_ % log_niter == 0: train_loss = (report_rec_loss + report_kl_loss) / report_num_examples if aggressive_flag or epoch == 0: vae.eval() with torch.no_grad(): mi = calc_mi(vae, val_loader) au, _ = calc_au(vae, val_loader) vae.train() print('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, mi: %.4f, recon: %.4f,' \ 'au %d, time elapsed %.2fs' % (epoch, iter_, train_loss, report_kl_loss / report_num_examples, mi, report_rec_loss / report_num_examples, au, time.time() - start)) else: print('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, recon: %.4f,' \ 'time elapsed %.2fs' % (epoch, iter_, train_loss, report_kl_loss / report_num_examples, report_rec_loss / report_num_examples, time.time() - start)) sys.stdout.flush() report_rec_loss = report_kl_loss = 0 report_num_examples = 0 iter_ += 1 if aggressive_flag and (iter_ % len(train_loader)) == 0: vae.eval() cur_mi = calc_mi(vae, val_loader) vae.train() if cur_mi - best_mi < 0: mi_not_improved += 1 if mi_not_improved == 5: aggressive_flag = False print("STOP BURNING") else: best_mi = cur_mi pre_mi = cur_mi print('kl weight %.4f' % kl_weight) print('epoch: %d, VAL' % epoch) vae.eval() with torch.no_grad(): loss, nll, kl = test(vae, val_loader, "VAL", args) au, au_var = calc_au(vae, val_loader) print("%d active units" % au) # print(au_var) if loss < best_loss: print('update best loss') best_loss = loss best_nll = nll best_kl = kl torch.save(vae.state_dict(), args.save_path) if loss > best_loss: opt_dict["not_improved"] += 1 if opt_dict["not_improved"] >= decay_epoch: opt_dict["best_loss"] = loss opt_dict["not_improved"] = 0 opt_dict["lr"] = opt_dict["lr"] * lr_decay vae.load_state_dict(torch.load(args.save_path)) decay_cnt += 1 print('new lr: %f' % opt_dict["lr"]) enc_optimizer = optim.Adam(vae.encoder.parameters(), lr=opt_dict["lr"]) dec_optimizer = optim.Adam(vae.decoder.parameters(), lr=opt_dict["lr"]) else: opt_dict["not_improved"] = 0 opt_dict["best_loss"] = loss if decay_cnt == max_decay: break if epoch % args.test_nepoch == 0: with torch.no_grad(): loss, nll, kl = test(vae, test_loader, "TEST", args) vae.train() # compute importance weighted estimate of log p(x) vae.load_state_dict(torch.load(args.save_path)) vae.eval() with torch.no_grad(): loss, nll, kl = test(vae, test_loader, "TEST", args) au, au_var = calc_au(vae, test_loader) print("%d active units" % au) # print(au_var) test_loader = torch.utils.data.DataLoader(test_data, batch_size=50, shuffle=True) with torch.no_grad(): calc_iwnll(vae, test_loader, args)
num_workers=1, pin_memory=True) print('train_loader', len(train_loader)) print('test_loader', len(test_loader)) #%% model = VAE().to(device) # optimizer = optim.Adam(model.parameters(), lr=1e-3) optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), weight_decay=0.0005) #%% epochs = 100 viz = Visdom() global plotter, recon plotter = utils.VisdomLinePlotter(env_name='main') sample_image = utils.VisdomImage(env_name='main') recon = utils.VisdomImage(env_name='main') for epoch in range(1, epochs + 1): with torch.no_grad(): sample = torch.randn(32, 32).to(device) sample = model.decode(sample).cpu() print("save image: " + 'results/sample_' + str(epoch) + '.png') save_image(sample, 'results/sample_' + str(epoch) + '.png') sample_image.display_image(sample, 0, 'SAMPLE RECON') train(batch_size, epoch, model, train_loader, device, optimizer, plotter) test(batch_size, epoch, model, test_loader, device, optimizer, plotter, recon)