def run_training(corruption_chance, perplexity, batch_size): global train_data, test_data corrupt = lambda x: 0 if np.random.uniform() <= corruption_chance else x train_data = np.vectorize(corrupt)(train_data) test_data = np.vectorize(corrupt)(test_data) def hook(args): print(args) if np.isnan(args[2]): raise Exception if isinstance(args[0], PTSNE) and args[2] <= 0.0: raise Exception vae = VAE( [n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions), bernoulli_supplier) ptsne = PTSNE( [n_input_dimensions], get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) vptsne = VPTSNE( vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) ptsne.fit(train_data, n_iters=1500, batch_size=batch_size, hook_fn=hook) vptsne.fit(train_data, n_iters=1500, n_vae_iters=10000, batch_size=batch_size, vae_batch_size=1000, hook_fn=hook) knn_score = KNC(n_neighbors=1).fit( ptsne.transform(train_data), train_labels).score( ptsne.transform(test_data), test_labels) knn_score_vptsne = KNC(n_neighbors=1).fit( vptsne.transform(train_data), train_labels).score( vptsne.transform(test_data), test_labels) tw = trustworthiness( test_data, ptsne.transform(test_data), n_neighbors=12) tw_vptsne = trustworthiness( test_data, vptsne.transform(test_data), n_neighbors=12) train_data = np.copy(non_corrupted_train_data) test_data = np.copy(non_corrupted_test_data) return knn_score, tw, knn_score_vptsne, tw_vptsne
def tt(): vptsne_layers = LayerDefinition.from_array([(200, tf.nn.relu), (200, tf.nn.relu), (2000, tf.nn.relu), (2, None)]) from vptsne import PTSNE vptsne = VPTSNE( # [n_input_dimensions], vae, get_feed_forward_network_builder(vptsne_layers), perplexity=30) fit_params = { "hook_fn": print, "n_iters": 2000, "batch_size": 200, "deterministic": False, "fit_vae": False, "n_vae_iters": 14000, "vae_batch_size": 1000 } #vptsne.load_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") vptsne.fit(mnist.train._images, **fit_params) #vptsne.save_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") #display_reconstructions(10) #from sklearn.decomposition import PCA #p = PCA(n_components=3).fit(mnist.train._images) #train = p.transform(mnist.train._images) #vptsne.fit(train, **fit_params) #transformed = vptsne.transform(train) transformed = vptsne.transform(mnist.train._images, reconstruct=True) #transformed = vae.transform(mnist.train._images) transformed_test = vptsne.transform(mnist.test._images, reconstruct=True) print( "Trustworthiness, test set", trustworthiness(mnist.test._images, transformed_test, n_neighbors=12)) #print( # "Trustworthiness, first 10k", # trustworthiness( # mnist.train._images[:10000], # vptsne.transform(mnist.train._images[:10000]), # n_neighbors=12)) from sklearn.neighbors import KNeighborsClassifier as KNC print( "1-NN, test set", KNC(n_neighbors=1).fit(transformed, mnist.train._labels).score( transformed_test, mnist.test._labels)) plt.clf() color_palette = np.random.rand(100, 3) for label in np.unique(mnist.train._labels): tmp = transformed[mnist.train._labels == label] plt.scatter(tmp[:, 0], tmp[:, 1], s=0.2, c=color_palette[label]) plt.show()
perplexity=30.) ptsne = PTSNE([n_input_dimensions], get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=30.) fit_params = { # "hook_fn": print, "n_iters": 1500, "batch_size": 300, "fit_vae": True, "n_vae_epochs": 200, "vae_batch_size": 1000 } vptsne.fit(mnist.train._images, **fit_params) ptsne.fit(mnist.train._images, **fit_params) vptsne.save_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") ptsne.save_weights("models/mnist_ptsne.ckpt") #vptsne.load_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") #ptsne.load_weights("models/mnist_ptsne.ckpt") pca = PCA(n_components=2).fit(mnist.train._images) estimators = [vptsne, ptsne, vae, pca] transformed_train = [ estimator.transform(mnist.train._images) for estimator in estimators ] transformed_test = [ estimator.transform(mnist.test._images) for estimator in estimators
def run_training(vae, n_latent_dimensions, perplexity, batch_size, run_id): info = "%d_%d_%d_%d" % (n_latent_dimensions, perplexity, batch_size, run_id) vptsne = VPTSNE(vae, get_feed_forward_network_builder( vptsne_layers, batch_normalization=False), perplexity=perplexity, learning_rate=0.001) ptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity, learning_rate=0.001) def get_logger(loss_file, trustworthiness_file, knn_file): def log_fn(args): print(args) if isinstance(args[0], VAE): return loss_file.write(str(args[2]) + "\n") if args[1] % 400 == 0: transformed_train = args[0].transform(mnist_train_images) transformed_test = args[0].transform(mnist_test_images) trustworthiness_file.write( str( trustworthiness(mnist_test_images, transformed_test, n_neighbors=12)) + "\n") knn_file.write( str( KNC(n_neighbors=1).fit( transformed_train, mnist_train_labels).score( transformed_test, mnist_test_labels)) + "\n") return log_fn fit_params = { "n_iters": 2001, "batch_size": batch_size, "deterministic": True, "fit_vae": False } fit_params_nondeterministic = { "n_iters": 2001, "batch_size": batch_size, "deterministic": False, "fit_vae": False } vptsne_log_files = [ open("deterministic_output/%s_vptsne_%s.log" % (to_log, info), "w") for to_log in ["loss", "trustworthiness", "knn"] ] ptsne_log_files = [ open("deterministic_output/%s_ptsne_%s.log" % (to_log, info), "w") for to_log in ["loss", "trustworthiness", "knn"] ] vptsne.fit(mnist_train_images, hook_fn=get_logger(*vptsne_log_files), **fit_params) ptsne.fit(mnist_train_images, hook_fn=get_logger(*ptsne_log_files), **fit_params_nondeterministic) vptsne.save_weights("models/mnist_vptsne_deterministic_%s.ckpt" % info, "models/mnist_vae_deterministic_%s.ckpt" % info) ptsne.save_weights("models/mnist_vptsne_nondeterministic_%s.ckpt" % info) for f in vptsne_log_files: f.close() for f in ptsne_log_files: f.close()
def run_training(n_latent_dimensions, perplexity, batch_size, percent_missing): data_points = mnist_train_images.shape[0] indices = np.random.choice(data_points, int(data_points * (1 - percent_missing)), replace=False) train_data = mnist_train_images[indices] train_labels = mnist_train_labels[indices] test_data = mnist_test_images test_labels = mnist_test_labels vae = VAE( [n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions, output_hidden=False), bernoulli_supplier) vptsne = VPTSNE( vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) vptsne2 = VPTSNE( vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) ptsne = PTSNE( [n_input_dimensions], get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) def hook(args): print(args) if np.isnan(args[2]): raise Exception if isinstance(args[0], PTSNE) and args[2] <= 0.0: raise Exception fit_params = { "hook_fn": hook, "n_iters": 1500, "batch_size": batch_size, "deterministic": True, "fit_vae": True, "n_vae_iters": 8000, "vae_batch_size": np.min([indices.shape[0], 1000])} fit_params_2 = { "hook_fn": hook, "n_iters": 1500, "batch_size": batch_size, "deterministic": False, "fit_vae": False} vptsne.fit(train_data, **fit_params) vptsne_knn_score = KNC(n_neighbors=1).fit(vptsne.transform(train_data), train_labels).score(vptsne.transform(test_data), test_labels) vptsne_trustworthiness = trustworthiness(test_data, vptsne.transform(test_data), n_neighbors=12) print(vptsne_knn_score, vptsne_trustworthiness) vptsne2.fit(train_data, **fit_params_2) vptsne2_knn_score = KNC(n_neighbors=1).fit(vptsne2.transform(train_data), train_labels).score(vptsne2.transform(test_data), test_labels) vptsne2_trustworthiness = trustworthiness(test_data, vptsne2.transform(test_data), n_neighbors=12) print(vptsne2_knn_score, vptsne2_trustworthiness) ptsne.fit(train_data, **fit_params) ptsne_knn_score = KNC(n_neighbors=1).fit(ptsne.transform(train_data), train_labels).score(ptsne.transform(test_data), test_labels) ptsne_trustworthiness = trustworthiness(test_data, ptsne.transform(test_data), n_neighbors=12) print(ptsne_knn_score, ptsne_trustworthiness) return vptsne_knn_score, ptsne_knn_score, vptsne_trustworthiness, ptsne_trustworthiness, vptsne2_knn_score, vptsne2_trustworthiness
vptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers), perplexity=7., learning_rate=0.01) fit_params = { "n_iters": 1000, "batch_size": 300, "deterministic": False, "fit_vae": True, "n_vae_epochs": 300, "vae_batch_size": 10000, "hook_fn": print } #vptsne.load_weights("models/levine_vptsne.ckpt", "models/levine_vae.ckpt") vptsne.fit(levine_data, **fit_params) vptsne.save_weights("models/levine_vptsne.ckpt", "models/levine_vae.ckpt") transformed = vptsne.transform(levine_data) print( "Trustworthiness, first 10k subset", trustworthiness(levine_data[:10000], transformed[:10000], n_neighbors=12)) color_palette = np.random.rand(100, 3) for label in np.unique(levine_labels): tmp = transformed[levine_labels == label] plt.scatter(tmp[:, 0], tmp[:, 1], s=0.2, c=color_palette[label]) plt.show()
learning_rate=0.00001) #vae.load_weights("models/svhn_vae.ckpt") vae.fit(svhn_train, n_epochs=200, batch_size=1000, hook_fn=print) vae.save_weights("models/svhn_vae.ckpt") #display_reconstructions(10) vptsne_layers = LayerDefinition.from_array([(250, tf.nn.relu), (2500, tf.nn.relu), (2, None)]) vptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers), learning_rate=0.00001, perplexity=10.) fit_params = { "n_iters": 1500, "batch_size": 500, "fit_vae": False, "hook_fn": print } #vptsne.load_weights("models/svhn_vptsne.ckpt") vptsne.fit(svhn_train, **fit_params) vptsne.save_weights("models/svhn_vptsne.ckpt") transformed = vptsne.transform(svhn_test) plt.scatter(transformed[:, 0], transformed[:, 1], s=0.1) plt.show()