def vae_benchmark(name, x, clf):
    averages = []
    latent_dims = range(2, 10)

    for latent_dim in latent_dims:
        encoder, vae = utils.create_vae(np.shape(x)[1], latent_dim=latent_dim)
        vae.fit(x, batch_size=50, epochs=5)
        transformed = encoder.predict(x)
        predicted_labels = clf.fit_predict(transformed)
        silhouette_avg = silhouette_score(transformed, predicted_labels)
        averages.append(silhouette_avg)

    lb = np.min(averages)
    ub = np.max(averages)
    amplitude = ub - lb
    lb -= 0.2 * amplitude
    ub += 0.2 * amplitude

    plot.style.use('seaborn-darkgrid')
    plot.title(
        f'Silhouette averages on the {name} dataset using {repr(clf).split("(")[0]} and VAE'
    )
    plot.bar(latent_dims, averages)
    plot.xticks(latent_dims)
    plot.xlabel('Number of components')
    plot.ylabel('Silhouette averages')
    plot.ylim([lb, ub])
    plot.show()
def vae(name, x, y):
    encoder, vae = utils.create_vae(np.shape(x)[1])
    vae.fit(x, batch_size=50, epochs=10)
    transformed = encoder.predict(x)

    plot.style.use('seaborn-darkgrid')
    plot.title(f'VAE on {name}')
    plot.xlabel('First dimension')
    plot.ylabel('Second dimension')
    plot.scatter(transformed[:, 0], transformed[:, 1], c=y, cmap='viridis')
    plot.show()
# For this task, you will need to use some database of face images. There are multiple datasets available on the web that you can use: for example, <a href="http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html">CelebA</a> or <a href="http://vis-www.cs.umass.edu/lfw/">Labeled Faces in the Wild</a>. We used Aligned & Cropped version of CelebA that you can find <a href="https://www.dropbox.com/sh/8oqt9vytwxb3s4r/AADSNUu0bseoCKuxuI5ZeTl1a/Img?dl=0&preview=img_align_celeba.zip">here</a> to pretrain VAE model for you. See optional part of the final project if you wish to train VAE on your own.

# <b>Task 1:</b> Train VAE on faces dataset and draw some samples from it. (You can use code from previous assignments. You may also want to use convolutional encoders and decoders as well as tuning hyperparameters)

# In[ ]:

sess = tf.InteractiveSession()
K.set_session(sess)

# In[ ]:

latent_size = 8

# In[ ]:

vae, encoder, decoder = utils.create_vae(batch_size=128, latent=latent_size)
sess.run(tf.global_variables_initializer())
vae.load_weights('CelebA_VAE_small_8.h5')

# In[ ]:

K.set_learning_phase(False)

# In[ ]:

latent_placeholder = tf.placeholder(tf.float32, (1, latent_size))
decode = decoder(latent_placeholder)

# #### GRADED 1 (3 points): Draw 25 samples from trained VAE model
# As the first part of the assignment, you need to become familiar with the trained model. For all tasks, you will only need a decoder to reconstruct samples from a latent space.
#
def nn2(xs, ys, xs_test, ys_test, n_components, clf_constructor):
    ks = [0 for _ in range(10)]
    cataccs = [0 for _ in range(10)]

    ys = [to_categorical(ys[0]), to_categorical(ys[1])]
    ys_test = [to_categorical(ys_test[0]), to_categorical(ys_test[1])]

    for i in range(2):
        shape = np.shape(xs[i])[1]
        n_components[i] = shape
        model = utils.create_adult_model(
            shape, 2) if i == 0 else utils.create_wine_model(shape, 5)
        model.fit(xs[i][:10000],
                  ys[i][:10000],
                  batch_size=50,
                  epochs=10,
                  verbose=False)
        cataccs[i] = model.evaluate(xs_test[i], ys_test[i],
                                    verbose=False)[1] * 100

    for k in range(2, 11):
        try:
            clf = clf_constructor(n_clusters=k)
        except:
            clf = clf_constructor(n_components=k)
        for i in range(2):
            pca = PCA(n_components=n_components[2 + i])
            transformed = pca.fit_transform(xs[i])
            transformed_test = pca.transform(xs_test[i])
            predict = to_categorical(clf.fit_predict(transformed[:10000]))
            predict_test = to_categorical(clf.predict(
                transformed_test[:10000]))
            input_dims = [n_components[2 + i], k]
            model = utils.create_mi_adult_model(
                input_dims, 2) if i == 0 else utils.create_mi_wine_model(
                    input_dims, 5)
            model.fit([transformed[:10000], predict],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            catacc = model.evaluate([transformed_test, predict_test],
                                    ys_test[i],
                                    verbose=False)[1] * 100
            if catacc > cataccs[2 + i]:
                ks[2 + i] = k
                cataccs[2 + i] = catacc

            ica = FastICA(n_components=n_components[4 + i])
            transformed = ica.fit_transform(xs[i])
            transformed_test = ica.transform(xs_test[i])
            predict = to_categorical(clf.fit_predict(transformed[:10000]))
            predict_test = to_categorical(clf.predict(
                transformed_test[:10000]))
            input_dims = [n_components[4 + i], k]
            model = utils.create_mi_adult_model(
                input_dims, 2) if i == 0 else utils.create_mi_wine_model(
                    input_dims, 5)
            model.fit([transformed[:10000], predict],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            catacc = model.evaluate([transformed_test, predict_test],
                                    ys_test[i],
                                    verbose=False)[1] * 100
            if catacc > cataccs[4 + i]:
                ks[4 + i] = k
                cataccs[4 + i] = catacc

            if i == 1:
                rp = GaussianRandomProjection(eps=0.95)
                transformed = rp.fit_transform(xs[i])
                transformed_test = rp.transform(xs_test[i])
                predict = to_categorical(clf.fit_predict(transformed[:10000]))
                predict_test = to_categorical(
                    clf.predict(transformed_test[:10000]))
                input_dims = [np.shape(transformed)[1], k]
                model = utils.create_mi_wine_model(input_dims, 5)
                model.fit([transformed[:10000], predict],
                          ys[i][:10000],
                          batch_size=50,
                          epochs=10,
                          verbose=False)
                catacc = model.evaluate([transformed_test, predict_test],
                                        ys_test[i],
                                        verbose=False)[1] * 100
                if catacc > cataccs[6 + i]:
                    ks[6 + i] = k
                    cataccs[6 + i] = catacc

            encoder, vae = utils.create_vae(
                np.shape(xs[i])[1], n_components[8 + i])
            vae.fit(xs[i], batch_size=50, epochs=10, verbose=False)
            transformed = encoder.predict(xs[i], verbose=False)
            transformed_test = encoder.predict(xs_test[i], verbose=False)
            predict = to_categorical(clf.fit_predict(transformed[:10000]))
            predict_test = to_categorical(clf.predict(
                transformed_test[:10000]))
            input_dims = [n_components[8 + i], k]
            model = utils.create_mi_adult_model(
                input_dims, 2) if i == 0 else utils.create_mi_wine_model(
                    input_dims, 5)
            model.fit([transformed[:10000], predict],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            catacc = model.evaluate([transformed_test, predict_test],
                                    ys_test[i],
                                    verbose=False)[1] * 100
            if catacc > cataccs[8 + i]:
                ks[8 + i] = k
                cataccs[8 + i] = catacc

    plot.style.use('seaborn-darkgrid')
    plot.title(f'Influence of feature transformation on the NN accuracy')
    color = []
    for _ in range(5):
        color.append('tab:blue')
        color.append('tab:orange')
    x = []
    count = 1
    for _ in range(5):
        x.append(count)
        count += 0.5
        x.append(count)
        count += 1
    plot.bar(x, cataccs, color=color, width=0.75)
    x = []
    count = 1.25
    for _ in range(5):
        x.append(count)
        count += 1.5
    plot.xticks(x, ['None', 'PCA', 'ICA', 'RP', 'VAE'])
    plot.xlabel('Feature transformation method')
    plot.ylabel('Categorical accuracy (%)')
    plot.show()
def nn_benchmark(xs, ys, n_components):
    ys = [to_categorical(ys[0]), to_categorical(ys[1])]

    none_samples = [[], []]
    pca_samples = [[], []]
    ica_samples = [[], []]
    rp_samples = [[], []]
    vae_samples = [[], []]

    trials = 7
    for _ in range(trials):

        for i in range(2):
            shape = np.shape(xs[i])[1]
            n_components[i] = shape
            model = utils.create_adult_model(
                shape, 2) if i == 0 else utils.create_wine_model(shape, 5)
            start = time.time()
            model.fit(xs[i][:10000],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            none_samples[i].append(time.time() - start)

        for i in range(2):
            dim = n_components[2 + i]
            pca = PCA(n_components=dim)
            transformed = pca.fit_transform(xs[i])
            model = utils.create_adult_model(
                dim, 2) if i == 0 else utils.create_wine_model(dim, 5)
            start = time.time()
            model.fit(transformed[:10000],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            pca_samples[i].append(time.time() - start)

            dim = n_components[4 + i]
            ica = FastICA(n_components=dim)
            transformed = ica.fit_transform(xs[i])
            model = utils.create_adult_model(
                dim, 2) if i == 0 else utils.create_wine_model(dim, 5)
            start = time.time()
            model.fit(transformed[:10000],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            ica_samples[i].append(time.time() - start)

            if i == 1:
                rp = GaussianRandomProjection(eps=0.95)
                transformed = rp.fit_transform(xs[i])
                dim = np.shape(transformed)[1]
                model = utils.create_wine_model(dim, 5)
                start = time.time()
                model.fit(transformed[:10000],
                          ys[i][:10000],
                          batch_size=50,
                          epochs=10,
                          verbose=False)
                rp_samples[i].append(time.time() - start)

            dim = n_components[8 + i]
            encoder, vae = utils.create_vae(np.shape(xs[i])[1], dim)
            vae.fit(xs[i], batch_size=50, epochs=10, verbose=False)
            transformed = encoder.predict(xs[i], verbose=False)
            model = utils.create_adult_model(
                dim, 2) if i == 0 else utils.create_wine_model(dim, 5)
            start = time.time()
            model.fit(transformed[:10000],
                      ys[i][:10000],
                      batch_size=50,
                      epochs=10,
                      verbose=False)
            vae_samples[i].append(time.time() - start)

    times = [
        np.mean(none_samples[0]),
        np.mean(none_samples[1]),
        np.mean(pca_samples[0]),
        np.mean(pca_samples[1]),
        np.mean(ica_samples[0]),
        np.mean(ica_samples[1]), 0,
        np.mean(rp_samples[1]),
        np.mean(vae_samples[0]),
        np.mean(vae_samples[1])
    ]

    times_err = [
        np.std(none_samples[0]) / 2,
        np.std(none_samples[1]) / 2,
        np.std(pca_samples[0]) / 2,
        np.std(pca_samples[1]) / 2,
        np.std(ica_samples[0]) / 2,
        np.std(ica_samples[1]) / 2, 0,
        np.std(rp_samples[1]) / 2,
        np.std(vae_samples[0]) / 2,
        np.std(vae_samples[1]) / 2
    ]

    plot.style.use('seaborn-darkgrid')
    plot.title(f'Influence of feature transformation on the NN training time')
    color = []
    for _ in range(5):
        color.append('tab:blue')
        color.append('tab:orange')
    x = []
    count = 1
    for _ in range(5):
        x.append(count)
        count += 0.5
        x.append(count)
        count += 1
    plot.bar(x, times, color=color, width=0.75, yerr=times_err)
    x = []
    count = 1.25
    for _ in range(5):
        x.append(count)
        count += 1.5
    plot.xticks(x, ['None', 'PCA', 'ICA', 'RP', 'VAE'])
    plot.xlabel('Feature transformation method')
    plot.ylabel('Average training time (s)')
    plot.show()
vae_benchmark('Adult', x_adult, KMeans(n_clusters=5))  # 951s
vae_benchmark('Adult', x_adult, GaussianMixture(n_components=6))  # 458s

vae('Wine reviews', x_wine, y_wine)  # 158s
vae_benchmark('Wine reviews', x_wine, KMeans(n_clusters=3))  # 554s
vae_benchmark('Wine reviews', x_wine, GaussianMixture(n_components=3))  # 1604s

# DIMENSIONALITY REDUCTION + CLUSTERING

reduction_clustering(
    [x_adult, x_wine], [y_adult, y_wine], [2, 5],
    [KMeans(n_clusters=5), KMeans(n_clusters=3)],
    [PCA(n_components=2), PCA(n_components=2)],
    [FastICA(n_components=2), FastICA(n_components=2)],
    [None, GaussianRandomProjection(eps=0.95)], [
        utils.create_vae(np.shape(x_adult)[1]),
        utils.create_vae(np.shape(x_wine)[1])
    ])  # 202s
reduction_clustering(
    [x_adult, x_wine], [y_adult, y_wine], [2, 5],
    [GaussianMixture(n_components=6),
     GaussianMixture(n_components=3)],
    [PCA(n_components=3), PCA(n_components=2)],
    [FastICA(n_components=3), FastICA(n_components=3)],
    [None, GaussianRandomProjection(eps=0.95)], [
        utils.create_vae(np.shape(x_adult)[1]),
        utils.create_vae(np.shape(x_wine)[1])
    ])  # 291s

# DIMENSIONALITY REDUCTION + NN