def vae_benchmark(name, x, clf): averages = [] latent_dims = range(2, 10) for latent_dim in latent_dims: encoder, vae = utils.create_vae(np.shape(x)[1], latent_dim=latent_dim) vae.fit(x, batch_size=50, epochs=5) transformed = encoder.predict(x) predicted_labels = clf.fit_predict(transformed) silhouette_avg = silhouette_score(transformed, predicted_labels) averages.append(silhouette_avg) lb = np.min(averages) ub = np.max(averages) amplitude = ub - lb lb -= 0.2 * amplitude ub += 0.2 * amplitude plot.style.use('seaborn-darkgrid') plot.title( f'Silhouette averages on the {name} dataset using {repr(clf).split("(")[0]} and VAE' ) plot.bar(latent_dims, averages) plot.xticks(latent_dims) plot.xlabel('Number of components') plot.ylabel('Silhouette averages') plot.ylim([lb, ub]) plot.show()
def vae(name, x, y): encoder, vae = utils.create_vae(np.shape(x)[1]) vae.fit(x, batch_size=50, epochs=10) transformed = encoder.predict(x) plot.style.use('seaborn-darkgrid') plot.title(f'VAE on {name}') plot.xlabel('First dimension') plot.ylabel('Second dimension') plot.scatter(transformed[:, 0], transformed[:, 1], c=y, cmap='viridis') plot.show()
# For this task, you will need to use some database of face images. There are multiple datasets available on the web that you can use: for example, <a href="http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html">CelebA</a> or <a href="http://vis-www.cs.umass.edu/lfw/">Labeled Faces in the Wild</a>. We used Aligned & Cropped version of CelebA that you can find <a href="https://www.dropbox.com/sh/8oqt9vytwxb3s4r/AADSNUu0bseoCKuxuI5ZeTl1a/Img?dl=0&preview=img_align_celeba.zip">here</a> to pretrain VAE model for you. See optional part of the final project if you wish to train VAE on your own. # <b>Task 1:</b> Train VAE on faces dataset and draw some samples from it. (You can use code from previous assignments. You may also want to use convolutional encoders and decoders as well as tuning hyperparameters) # In[ ]: sess = tf.InteractiveSession() K.set_session(sess) # In[ ]: latent_size = 8 # In[ ]: vae, encoder, decoder = utils.create_vae(batch_size=128, latent=latent_size) sess.run(tf.global_variables_initializer()) vae.load_weights('CelebA_VAE_small_8.h5') # In[ ]: K.set_learning_phase(False) # In[ ]: latent_placeholder = tf.placeholder(tf.float32, (1, latent_size)) decode = decoder(latent_placeholder) # #### GRADED 1 (3 points): Draw 25 samples from trained VAE model # As the first part of the assignment, you need to become familiar with the trained model. For all tasks, you will only need a decoder to reconstruct samples from a latent space. #
def nn2(xs, ys, xs_test, ys_test, n_components, clf_constructor): ks = [0 for _ in range(10)] cataccs = [0 for _ in range(10)] ys = [to_categorical(ys[0]), to_categorical(ys[1])] ys_test = [to_categorical(ys_test[0]), to_categorical(ys_test[1])] for i in range(2): shape = np.shape(xs[i])[1] n_components[i] = shape model = utils.create_adult_model( shape, 2) if i == 0 else utils.create_wine_model(shape, 5) model.fit(xs[i][:10000], ys[i][:10000], batch_size=50, epochs=10, verbose=False) cataccs[i] = model.evaluate(xs_test[i], ys_test[i], verbose=False)[1] * 100 for k in range(2, 11): try: clf = clf_constructor(n_clusters=k) except: clf = clf_constructor(n_components=k) for i in range(2): pca = PCA(n_components=n_components[2 + i]) transformed = pca.fit_transform(xs[i]) transformed_test = pca.transform(xs_test[i]) predict = to_categorical(clf.fit_predict(transformed[:10000])) predict_test = to_categorical(clf.predict( transformed_test[:10000])) input_dims = [n_components[2 + i], k] model = utils.create_mi_adult_model( input_dims, 2) if i == 0 else utils.create_mi_wine_model( input_dims, 5) model.fit([transformed[:10000], predict], ys[i][:10000], batch_size=50, epochs=10, verbose=False) catacc = model.evaluate([transformed_test, predict_test], ys_test[i], verbose=False)[1] * 100 if catacc > cataccs[2 + i]: ks[2 + i] = k cataccs[2 + i] = catacc ica = FastICA(n_components=n_components[4 + i]) transformed = ica.fit_transform(xs[i]) transformed_test = ica.transform(xs_test[i]) predict = to_categorical(clf.fit_predict(transformed[:10000])) predict_test = to_categorical(clf.predict( transformed_test[:10000])) input_dims = [n_components[4 + i], k] model = utils.create_mi_adult_model( input_dims, 2) if i == 0 else utils.create_mi_wine_model( input_dims, 5) model.fit([transformed[:10000], predict], ys[i][:10000], batch_size=50, epochs=10, verbose=False) catacc = model.evaluate([transformed_test, predict_test], ys_test[i], verbose=False)[1] * 100 if catacc > cataccs[4 + i]: ks[4 + i] = k cataccs[4 + i] = catacc if i == 1: rp = GaussianRandomProjection(eps=0.95) transformed = rp.fit_transform(xs[i]) transformed_test = rp.transform(xs_test[i]) predict = to_categorical(clf.fit_predict(transformed[:10000])) predict_test = to_categorical( clf.predict(transformed_test[:10000])) input_dims = [np.shape(transformed)[1], k] model = utils.create_mi_wine_model(input_dims, 5) model.fit([transformed[:10000], predict], ys[i][:10000], batch_size=50, epochs=10, verbose=False) catacc = model.evaluate([transformed_test, predict_test], ys_test[i], verbose=False)[1] * 100 if catacc > cataccs[6 + i]: ks[6 + i] = k cataccs[6 + i] = catacc encoder, vae = utils.create_vae( np.shape(xs[i])[1], n_components[8 + i]) vae.fit(xs[i], batch_size=50, epochs=10, verbose=False) transformed = encoder.predict(xs[i], verbose=False) transformed_test = encoder.predict(xs_test[i], verbose=False) predict = to_categorical(clf.fit_predict(transformed[:10000])) predict_test = to_categorical(clf.predict( transformed_test[:10000])) input_dims = [n_components[8 + i], k] model = utils.create_mi_adult_model( input_dims, 2) if i == 0 else utils.create_mi_wine_model( input_dims, 5) model.fit([transformed[:10000], predict], ys[i][:10000], batch_size=50, epochs=10, verbose=False) catacc = model.evaluate([transformed_test, predict_test], ys_test[i], verbose=False)[1] * 100 if catacc > cataccs[8 + i]: ks[8 + i] = k cataccs[8 + i] = catacc plot.style.use('seaborn-darkgrid') plot.title(f'Influence of feature transformation on the NN accuracy') color = [] for _ in range(5): color.append('tab:blue') color.append('tab:orange') x = [] count = 1 for _ in range(5): x.append(count) count += 0.5 x.append(count) count += 1 plot.bar(x, cataccs, color=color, width=0.75) x = [] count = 1.25 for _ in range(5): x.append(count) count += 1.5 plot.xticks(x, ['None', 'PCA', 'ICA', 'RP', 'VAE']) plot.xlabel('Feature transformation method') plot.ylabel('Categorical accuracy (%)') plot.show()
def nn_benchmark(xs, ys, n_components): ys = [to_categorical(ys[0]), to_categorical(ys[1])] none_samples = [[], []] pca_samples = [[], []] ica_samples = [[], []] rp_samples = [[], []] vae_samples = [[], []] trials = 7 for _ in range(trials): for i in range(2): shape = np.shape(xs[i])[1] n_components[i] = shape model = utils.create_adult_model( shape, 2) if i == 0 else utils.create_wine_model(shape, 5) start = time.time() model.fit(xs[i][:10000], ys[i][:10000], batch_size=50, epochs=10, verbose=False) none_samples[i].append(time.time() - start) for i in range(2): dim = n_components[2 + i] pca = PCA(n_components=dim) transformed = pca.fit_transform(xs[i]) model = utils.create_adult_model( dim, 2) if i == 0 else utils.create_wine_model(dim, 5) start = time.time() model.fit(transformed[:10000], ys[i][:10000], batch_size=50, epochs=10, verbose=False) pca_samples[i].append(time.time() - start) dim = n_components[4 + i] ica = FastICA(n_components=dim) transformed = ica.fit_transform(xs[i]) model = utils.create_adult_model( dim, 2) if i == 0 else utils.create_wine_model(dim, 5) start = time.time() model.fit(transformed[:10000], ys[i][:10000], batch_size=50, epochs=10, verbose=False) ica_samples[i].append(time.time() - start) if i == 1: rp = GaussianRandomProjection(eps=0.95) transformed = rp.fit_transform(xs[i]) dim = np.shape(transformed)[1] model = utils.create_wine_model(dim, 5) start = time.time() model.fit(transformed[:10000], ys[i][:10000], batch_size=50, epochs=10, verbose=False) rp_samples[i].append(time.time() - start) dim = n_components[8 + i] encoder, vae = utils.create_vae(np.shape(xs[i])[1], dim) vae.fit(xs[i], batch_size=50, epochs=10, verbose=False) transformed = encoder.predict(xs[i], verbose=False) model = utils.create_adult_model( dim, 2) if i == 0 else utils.create_wine_model(dim, 5) start = time.time() model.fit(transformed[:10000], ys[i][:10000], batch_size=50, epochs=10, verbose=False) vae_samples[i].append(time.time() - start) times = [ np.mean(none_samples[0]), np.mean(none_samples[1]), np.mean(pca_samples[0]), np.mean(pca_samples[1]), np.mean(ica_samples[0]), np.mean(ica_samples[1]), 0, np.mean(rp_samples[1]), np.mean(vae_samples[0]), np.mean(vae_samples[1]) ] times_err = [ np.std(none_samples[0]) / 2, np.std(none_samples[1]) / 2, np.std(pca_samples[0]) / 2, np.std(pca_samples[1]) / 2, np.std(ica_samples[0]) / 2, np.std(ica_samples[1]) / 2, 0, np.std(rp_samples[1]) / 2, np.std(vae_samples[0]) / 2, np.std(vae_samples[1]) / 2 ] plot.style.use('seaborn-darkgrid') plot.title(f'Influence of feature transformation on the NN training time') color = [] for _ in range(5): color.append('tab:blue') color.append('tab:orange') x = [] count = 1 for _ in range(5): x.append(count) count += 0.5 x.append(count) count += 1 plot.bar(x, times, color=color, width=0.75, yerr=times_err) x = [] count = 1.25 for _ in range(5): x.append(count) count += 1.5 plot.xticks(x, ['None', 'PCA', 'ICA', 'RP', 'VAE']) plot.xlabel('Feature transformation method') plot.ylabel('Average training time (s)') plot.show()
vae_benchmark('Adult', x_adult, KMeans(n_clusters=5)) # 951s vae_benchmark('Adult', x_adult, GaussianMixture(n_components=6)) # 458s vae('Wine reviews', x_wine, y_wine) # 158s vae_benchmark('Wine reviews', x_wine, KMeans(n_clusters=3)) # 554s vae_benchmark('Wine reviews', x_wine, GaussianMixture(n_components=3)) # 1604s # DIMENSIONALITY REDUCTION + CLUSTERING reduction_clustering( [x_adult, x_wine], [y_adult, y_wine], [2, 5], [KMeans(n_clusters=5), KMeans(n_clusters=3)], [PCA(n_components=2), PCA(n_components=2)], [FastICA(n_components=2), FastICA(n_components=2)], [None, GaussianRandomProjection(eps=0.95)], [ utils.create_vae(np.shape(x_adult)[1]), utils.create_vae(np.shape(x_wine)[1]) ]) # 202s reduction_clustering( [x_adult, x_wine], [y_adult, y_wine], [2, 5], [GaussianMixture(n_components=6), GaussianMixture(n_components=3)], [PCA(n_components=3), PCA(n_components=2)], [FastICA(n_components=3), FastICA(n_components=3)], [None, GaussianRandomProjection(eps=0.95)], [ utils.create_vae(np.shape(x_adult)[1]), utils.create_vae(np.shape(x_wine)[1]) ]) # 291s # DIMENSIONALITY REDUCTION + NN