def run_training(corruption_chance, perplexity, batch_size): global train_data, test_data corrupt = lambda x: 0 if np.random.uniform() <= corruption_chance else x train_data = np.vectorize(corrupt)(train_data) test_data = np.vectorize(corrupt)(test_data) def hook(args): print(args) if np.isnan(args[2]): raise Exception if isinstance(args[0], PTSNE) and args[2] <= 0.0: raise Exception vae = VAE( [n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions), bernoulli_supplier) ptsne = PTSNE( [n_input_dimensions], get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) vptsne = VPTSNE( vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) ptsne.fit(train_data, n_iters=1500, batch_size=batch_size, hook_fn=hook) vptsne.fit(train_data, n_iters=1500, n_vae_iters=10000, batch_size=batch_size, vae_batch_size=1000, hook_fn=hook) knn_score = KNC(n_neighbors=1).fit( ptsne.transform(train_data), train_labels).score( ptsne.transform(test_data), test_labels) knn_score_vptsne = KNC(n_neighbors=1).fit( vptsne.transform(train_data), train_labels).score( vptsne.transform(test_data), test_labels) tw = trustworthiness( test_data, ptsne.transform(test_data), n_neighbors=12) tw_vptsne = trustworthiness( test_data, vptsne.transform(test_data), n_neighbors=12) train_data = np.copy(non_corrupted_train_data) test_data = np.copy(non_corrupted_test_data) return knn_score, tw, knn_score_vptsne, tw_vptsne
def tt(): vptsne_layers = LayerDefinition.from_array([(200, tf.nn.relu), (200, tf.nn.relu), (2000, tf.nn.relu), (2, None)]) from vptsne import PTSNE vptsne = VPTSNE( # [n_input_dimensions], vae, get_feed_forward_network_builder(vptsne_layers), perplexity=30) fit_params = { "hook_fn": print, "n_iters": 2000, "batch_size": 200, "deterministic": False, "fit_vae": False, "n_vae_iters": 14000, "vae_batch_size": 1000 } #vptsne.load_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") vptsne.fit(mnist.train._images, **fit_params) #vptsne.save_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") #display_reconstructions(10) #from sklearn.decomposition import PCA #p = PCA(n_components=3).fit(mnist.train._images) #train = p.transform(mnist.train._images) #vptsne.fit(train, **fit_params) #transformed = vptsne.transform(train) transformed = vptsne.transform(mnist.train._images, reconstruct=True) #transformed = vae.transform(mnist.train._images) transformed_test = vptsne.transform(mnist.test._images, reconstruct=True) print( "Trustworthiness, test set", trustworthiness(mnist.test._images, transformed_test, n_neighbors=12)) #print( # "Trustworthiness, first 10k", # trustworthiness( # mnist.train._images[:10000], # vptsne.transform(mnist.train._images[:10000]), # n_neighbors=12)) from sklearn.neighbors import KNeighborsClassifier as KNC print( "1-NN, test set", KNC(n_neighbors=1).fit(transformed, mnist.train._labels).score( transformed_test, mnist.test._labels)) plt.clf() color_palette = np.random.rand(100, 3) for label in np.unique(mnist.train._labels): tmp = transformed[mnist.train._labels == label] plt.scatter(tmp[:, 0], tmp[:, 1], s=0.2, c=color_palette[label]) plt.show()
get_gaussian_network_builder(vae_decoder_layers, n_input_dimensions, constant_sigma=0.1), gaussian_supplier) fit_params = { "hook_fn": print, "n_iters": 1500, "batch_size": 5000, "deterministic": True, "fit_vae": True, "n_vae_iters": 10000, "vae_batch_size": 1000 } vptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=True), perplexity=10) pca = PCA(n_components=2) umap = UMAP(n_components=2) tsne = TSNE(n_components=2, perplexity=10) estimators = [vptsne, umap] def fit_fn(estimator): print("Running fit with estimator", estimator.__class__.__name__) start = curr_millis() if isinstance(estimator, PTSNE): transformed = estimator.fit(data, **fit_params) else:
n_input_dimensions = mnist.train._images.shape[1] n_latent_dimensions = 2 vae = VAE( [n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, # get_gaussian_network_builder(vae_decoder_layers, n_input_dimensions, constant_sigma=0.1), # gaussian_supplier, get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions), bernoulli_supplier, beta=1.0) vptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=30.) ptsne = PTSNE([n_input_dimensions], get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=30.) fit_params = { # "hook_fn": print, "n_iters": 1500, "batch_size": 300, "fit_vae": True, "n_vae_epochs": 200, "vae_batch_size": 1000 }
def run_training(vae, n_latent_dimensions, perplexity, batch_size, run_id): info = "%d_%d_%d_%d" % (n_latent_dimensions, perplexity, batch_size, run_id) vptsne = VPTSNE(vae, get_feed_forward_network_builder( vptsne_layers, batch_normalization=False), perplexity=perplexity, learning_rate=0.001) ptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity, learning_rate=0.001) def get_logger(loss_file, trustworthiness_file, knn_file): def log_fn(args): print(args) if isinstance(args[0], VAE): return loss_file.write(str(args[2]) + "\n") if args[1] % 400 == 0: transformed_train = args[0].transform(mnist_train_images) transformed_test = args[0].transform(mnist_test_images) trustworthiness_file.write( str( trustworthiness(mnist_test_images, transformed_test, n_neighbors=12)) + "\n") knn_file.write( str( KNC(n_neighbors=1).fit( transformed_train, mnist_train_labels).score( transformed_test, mnist_test_labels)) + "\n") return log_fn fit_params = { "n_iters": 2001, "batch_size": batch_size, "deterministic": True, "fit_vae": False } fit_params_nondeterministic = { "n_iters": 2001, "batch_size": batch_size, "deterministic": False, "fit_vae": False } vptsne_log_files = [ open("deterministic_output/%s_vptsne_%s.log" % (to_log, info), "w") for to_log in ["loss", "trustworthiness", "knn"] ] ptsne_log_files = [ open("deterministic_output/%s_ptsne_%s.log" % (to_log, info), "w") for to_log in ["loss", "trustworthiness", "knn"] ] vptsne.fit(mnist_train_images, hook_fn=get_logger(*vptsne_log_files), **fit_params) ptsne.fit(mnist_train_images, hook_fn=get_logger(*ptsne_log_files), **fit_params_nondeterministic) vptsne.save_weights("models/mnist_vptsne_deterministic_%s.ckpt" % info, "models/mnist_vae_deterministic_%s.ckpt" % info) ptsne.save_weights("models/mnist_vptsne_nondeterministic_%s.ckpt" % info) for f in vptsne_log_files: f.close() for f in ptsne_log_files: f.close()
def run_training(n_latent_dimensions, perplexity, batch_size, percent_missing): data_points = mnist_train_images.shape[0] indices = np.random.choice(data_points, int(data_points * (1 - percent_missing)), replace=False) train_data = mnist_train_images[indices] train_labels = mnist_train_labels[indices] test_data = mnist_test_images test_labels = mnist_test_labels vae = VAE( [n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions, output_hidden=False), bernoulli_supplier) vptsne = VPTSNE( vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) vptsne2 = VPTSNE( vae, get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) ptsne = PTSNE( [n_input_dimensions], get_feed_forward_network_builder(vptsne_layers, batch_normalization=False), perplexity=perplexity) def hook(args): print(args) if np.isnan(args[2]): raise Exception if isinstance(args[0], PTSNE) and args[2] <= 0.0: raise Exception fit_params = { "hook_fn": hook, "n_iters": 1500, "batch_size": batch_size, "deterministic": True, "fit_vae": True, "n_vae_iters": 8000, "vae_batch_size": np.min([indices.shape[0], 1000])} fit_params_2 = { "hook_fn": hook, "n_iters": 1500, "batch_size": batch_size, "deterministic": False, "fit_vae": False} vptsne.fit(train_data, **fit_params) vptsne_knn_score = KNC(n_neighbors=1).fit(vptsne.transform(train_data), train_labels).score(vptsne.transform(test_data), test_labels) vptsne_trustworthiness = trustworthiness(test_data, vptsne.transform(test_data), n_neighbors=12) print(vptsne_knn_score, vptsne_trustworthiness) vptsne2.fit(train_data, **fit_params_2) vptsne2_knn_score = KNC(n_neighbors=1).fit(vptsne2.transform(train_data), train_labels).score(vptsne2.transform(test_data), test_labels) vptsne2_trustworthiness = trustworthiness(test_data, vptsne2.transform(test_data), n_neighbors=12) print(vptsne2_knn_score, vptsne2_trustworthiness) ptsne.fit(train_data, **fit_params) ptsne_knn_score = KNC(n_neighbors=1).fit(ptsne.transform(train_data), train_labels).score(ptsne.transform(test_data), test_labels) ptsne_trustworthiness = trustworthiness(test_data, ptsne.transform(test_data), n_neighbors=12) print(ptsne_knn_score, ptsne_trustworthiness) return vptsne_knn_score, ptsne_knn_score, vptsne_trustworthiness, ptsne_trustworthiness, vptsne2_knn_score, vptsne2_trustworthiness
reversed(vae_layer_definitions)) vae = VAE([n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, get_gaussian_network_builder(vae_decoder_layers, n_input_dimensions, constant_sigma=0.025), gaussian_supplier) vptsne_layers = LayerDefinition.from_array([(250, tf.nn.relu), (2500, tf.nn.relu), (2, None)]) vptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers), perplexity=7., learning_rate=0.01) fit_params = { "n_iters": 1000, "batch_size": 300, "deterministic": False, "fit_vae": True, "n_vae_epochs": 300, "vae_batch_size": 10000, "hook_fn": print } #vptsne.load_weights("models/levine_vptsne.ckpt", "models/levine_vae.ckpt") vptsne.fit(levine_data, **fit_params) vptsne.save_weights("models/levine_vptsne.ckpt", "models/levine_vae.ckpt")
# data = scaler.transform(data) plot_data.append(data) n_input_dimensions = train_data.shape[1] n_latent_dimensions = 3 vae = VAE([n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier, gaussian_supplier, get_gaussian_network_builder(vae_decoder_layers, n_input_dimensions, constant_sigma=0.1), gaussian_supplier) vptsne = VPTSNE( # [train_data.shape[1]], vae, get_feed_forward_network_builder(vptsne_layers)) fit_params = { "hook_fn": print, "n_iters": 1500, "batch_size": 3000, "fit_vae": True, "n_vae_iters": 10000, "vae_batch_size": 1000 } vptsne.load_weights("models/vptsne_hdata_no_cd44_h100_116.ckpt", "models/vae_hdata_no_cd44_h100_116.ckpt") #vptsne.fit(train_data, **fit_params) #vptsne.save_weights("models/vptsne_hdata_no_cd44_h100_116.ckpt", "models/vae_hdata_no_cd44_h100_116.ckpt")
gaussian_prior_supplier, gaussian_supplier, decoder_network_builder, bernoulli_supplier, learning_rate=0.00001) #vae.load_weights("models/svhn_vae.ckpt") vae.fit(svhn_train, n_epochs=200, batch_size=1000, hook_fn=print) vae.save_weights("models/svhn_vae.ckpt") #display_reconstructions(10) vptsne_layers = LayerDefinition.from_array([(250, tf.nn.relu), (2500, tf.nn.relu), (2, None)]) vptsne = VPTSNE(vae, get_feed_forward_network_builder(vptsne_layers), learning_rate=0.00001, perplexity=10.) fit_params = { "n_iters": 1500, "batch_size": 500, "fit_vae": False, "hook_fn": print } #vptsne.load_weights("models/svhn_vptsne.ckpt") vptsne.fit(svhn_train, **fit_params) vptsne.save_weights("models/svhn_vptsne.ckpt") transformed = vptsne.transform(svhn_test)