def vae_generation(): target_shape = (64, 64) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model_path = "useful_models/vae_5_layers_mnist_max_lr_0.01_24122020_115212_050.tar" # model_path = "useful_models/vae_5_layers_mnist_max_lr_0.01_24122020_141519_050.tar" model_path = "useful_models/squeeze_vae_mountain_car_v0_max_lr_0.005_29122020_143138_050.tar" input_shape = (1, ) + target_shape # model = AutoEncoder.get_basic_ae(input_shape=input_shape).to(device) # model = VariationalAutoEncoder.get_basic_vae(input_shape=input_shape).to(device) model = VariationalAutoEncoder.get_squeeze_vae( input_shape=input_shape).to(device) load_checkpoint(model_path, model) mnist_preprocessor = CNNPreProcessor(bgr_mean=0.1307, bgr_std=0.3081, target_shape=target_shape) mountain_car_64g_preprocessor = CNNPreProcessor(bgr_mean=0.9857, bgr_std=0.1056) preprocessor = mountain_car_64g_preprocessor for _ in range(100): generated = model.generate(device) im_generated = preprocessor.reverse_preprocess(generated) opencv_show(im_generated) print("")
def create_model(sess, vocab, forward_only=False, reuse=False): model = VariationalAutoEncoder(FLAGS.learning_rate, FLAGS.batch_size, FLAGS.num_units, FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.reg_scale, FLAGS.keep_prob, FLAGS.latent_dim, FLAGS.annealing_pivot, _buckets, vocab, forward_only) if reuse: return model ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print('Reading model parameters from {}'.format( ckpt.model_checkpoint_path)) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") sess.run(tf.initialize_all_variables()) return model
def train(): # ------------------------------ setting ------------------------------ # if 'X_SGE_CUDA_DEVICE' in os.environ: print('running on the stack...') cuda_device = os.environ['X_SGE_CUDA_DEVICE'] print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device)) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device else: # development only e.g. air202 print('running locally...') os.environ[ 'CUDA_VISIBLE_DEVICES'] = '1' # choose the device (GPU) here sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True # Whether the GPU memory usage can grow dynamically. sess_config.gpu_options.per_process_gpu_memory_fraction = 0.95 # The fraction of GPU memory that the process can use. # --------------------------------------------------------------------- # # MNIST # data_path = '/home/alta/BLTSpeaking/ged-pm574/my-projects/mnist/data/train.csv' # x_train = load_data(data_path) # DOGS data_train = 'data/dogs/train/' data_test = 'data/dogs/test/' x_train = load_dog_images(data_train) print('load train done...') # x2 = load_dog_images(data_test) # print('load test done...') # x_train = np.concatenate((x_train,x2), axis=0) save_path = 'save/dogs2/vae-v1' if not os.path.exists(save_path): os.makedirs(save_path) vae = VariationalAutoEncoder() vae.build_network() vae.build_loss_function() vae.build_optimiser() vae.build_generator() saver = tf.train.Saver(max_to_keep=1) batch_size = 512 num_epochs = 2000 batches = get_batches(x_train, batch_size) my_z_gen = np.random.normal(0.0, 1.0, size=(64, vae.z_size)) with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(num_epochs): random.shuffle(batches) for i, batch in enumerate(batches): feed_dict = {vae.inputs: batch} _, loss = sess.run([vae.train_op, vae.loss], feed_dict=feed_dict) if i == 0: print("epoch: {} --- loss: {:.5f}".format(epoch, loss)) # print("################## EPOCH {} done ##################".format(epoch)) if epoch % 2 == 0: saver.save(sess, save_path + '/model', global_step=epoch) # generate image feed_dict = {vae.z_gen: my_z_gen} [output_gen] = sess.run([vae.output_gen], feed_dict=feed_dict) output_gen = np.multiply(255, output_gen) output_gen = np.array(output_gen, dtype=float) result_name = 'results/dogs2/gen-' + str(epoch) + '.jpg' new_img = merge(output_gen, dimension=200) new_img.save(result_name)
def evaluation(): # cart_pole_v0_bgr_mean = (0.9890, 0.9898, 0.9908) # cart_pole_v0_bgr_std = (0.0977, 0.0936, 0.0906) # target_shape = (64, 64) cart_pole_v0_bgr_mean = (0.9922, 0.9931, 0.9940) cart_pole_v0_bgr_std = (0.0791, 0.0741, 0.0703) target_shape = (64, 64) latent_dim = 512 preprocessor = CNNPreProcessor(bgr_mean=cart_pole_v0_bgr_mean, bgr_std=cart_pole_v0_bgr_std, target_shape=target_shape) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model_path = "model_checkpoints/cartpoleV0_autoencoder_2_200.tar" # model_path = "model_checkpoints/cartpoleV0_autoencoder_3_032.tar" # model_path = "useful_models/cartpoleV0_basic_autoencoder_mnist_14122020_026.tar" # model_path = "useful_models/no_out_activation_autoencoder_5_layers_mnist_max_lr_0.01_23122020_160428_049.tar" # model_path = "useful_models/squeeze_autoencoder_mnist_max_lr_0.001_26122020_122631_050.tar" model_path = "useful_models/squeeze_vae_mnist_max_lr_0.001_26122020_142330_050.tar" # model_path = "useful_models/vae_5_layers_mnist_max_lr_0.01_24122020_115212_050.tar" input_shape = (1, ) + target_shape # model = AutoEncoder.get_basic_ae(input_shape=input_shape).to(device) # model = AutoEncoder.get_squeeze_ae(input_shape=input_shape).to(device) # model = VariationalAutoEncoder.get_basic_vae(input_shape=input_shape).to(device) model = VariationalAutoEncoder.get_squeeze_vae( input_shape=input_shape).to(device) load_checkpoint(model_path, model) ims_path = "agent_frames/cartpoleV0" f_names = [ join(ims_path, e) for e in listdir(ims_path) if e.endswith(".jpg") ] mnist_test_ims = [el for el in mnist.test_images()] mnist_preprocessor = CNNPreProcessor(bgr_mean=0.1307, bgr_std=0.3081, target_shape=target_shape) preprocessor = mnist_preprocessor for _ in range(100): # random_im_path = np.random.choice(f_names) # # im = cv.imread(random_im_path) im = mnist_test_ims[np.random.randint(len(mnist_test_ims))] im_target = cv.resize(im, target_shape) orig_shape = im.shape[:2][::-1] in_t = preprocessor.preprocess(im).to(device) # out_t, embedding = model(in_t) out_t, mu, log_var = model(in_t) loss = F.mse_loss(in_t, out_t) out_im_target = preprocessor.reverse_preprocess(out_t) out_im = cv.resize(out_im_target, orig_shape) sbs = put_side_by_side([im_target, out_im_target]) print(f"Loss: {loss.item()}") opencv_show(sbs) print("") pass
parser.add_argument('--epochs', type=int, default=20, help='batch size for test') parser.add_argument('--start', type=int, default=0, help='batch size for test') parser.add_argument('--verbose', type=bool, default=True, help='print batch loss info') parser.add_argument('--report-interval', type=int, default=100, help='report interview in verbose mode') # model parameters parser.add_argument('--latent-n', type=int, default=20, help='latent size of the encoder') parser.add_argument('--learning-rate', type=float, default=1e-3, help='learning rate during training') args = parser.parse_args() print("latent_n is: {}".format(args.latent_n)) train_loader = torch.utils.data.DataLoader( getattr(datasets, args.dataset)('../data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **{}) test_loader = torch.utils.data.DataLoader( getattr(datasets, args.dataset)('../data', train=False, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **{}) vae = VariationalAutoEncoder(latent_n=args.latent_n) with Session(vae, prefix=args.prefix, lr=args.learning_rate, dashboard_server=args.dashboard_server, checkpoint_path=args.checkpoint_path, load=args.load, save=args.save) as sess: for epoch in range(args.epochs): sess.train(train_loader, epoch, verbose=args.verbose, report_interval=args.report_interval) print('epoch {} complete'.format(epoch)) sess.test(test_loader)
if self.checkpoint_path and self.save: print('saving state_dict to {}'.format(self.checkpoint_path)) torch.save( dict(state_dict=state_dict, epoch_number=self.epoch_number), self.checkpoint_path) if __name__ == "__main__": EPOCHS = 20 BATCH_SIZE = 128 train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.ToTensor()), batch_size=BATCH_SIZE, shuffle=True, **{}) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.ToTensor()), batch_size=BATCH_SIZE, shuffle=True, **{}) losses = [] vae = VariationalAutoEncoder() sess = Session(vae, lr=1e-3) for epoch in range(1, EPOCHS + 1): losses += sess.train(train_loader, epoch) print('epoch {} complete'.format(epoch)) sess.test(test_loader)
mpl.use('Agg') import matplotlib.pyplot as plt from vae_mnist import Session parser = argparse.ArgumentParser(description='variational autoencoder generate examples') parser.add_argument('--prefix', type=str, default='VAE', help='the prefix of this session') parser.add_argument('--checkpoint-path', type=str, default='./checkpoints/{prefix}-{date}-{time}.pkl', help='path for the checkpoint file') parser.add_argument('--row', type=int, default=10, help='columns in the output') parser.add_argument('--col', type=int, default=10, help='rows in the output') parser.add_argument('--output', type=str, default="./figures/{prefix}-{date}-{time}.png") # model parameters parser.add_argument('--latent-n', type=int, default=20, help='latent size of the encoder') args = parser.parse_args() vae = VariationalAutoEncoder(latent_n=args.latent_n) with Session(vae, prefix=args.prefix, checkpoint_path=args.checkpoint_path, load=True) as sess: row, col = args.row, args.col z = Variable(torch.randn(row * col, args.latent_n)) x = vae.decoder(z) fig = plt.figure(figsize=(15, 15)) for n in range(row * col): plt.subplot(row, col, n + 1) plt.imshow(x[n].view(28, 28).data.numpy(), cmap='gray', aspect='auto') plt.savefig(args.output.format(**sess.format_dict), dpi=300, bbox_inches="tight")