Ejemplo n.º 1
0
def run_net(data, params):
    #
    # UNPACK DATA
    #

    x_train_unlabeled, y_train_unlabeled, x_val, y_val, x_test, y_test = data[
        'spectral']['train_and_test']

    print(params['input_shape'])
    inputs_vae = Input(shape=params['input_shape'], name='inputs_vae')
    ConvAE = Conv.ConvAE(inputs_vae, params)
    try:
        ConvAE.vae.load_weights('vae_mnist.h5')
    except OSError:
        print('No pretrained weights available...')

    lh = LearningHandler(lr=params['spec_lr'],
                         drop=params['spec_drop'],
                         lr_tensor=ConvAE.learning_rate,
                         patience=params['spec_patience'])

    lh.on_train_begin()

    n_epochs = 5000
    losses_vae = np.empty((n_epochs, ))
    homo_plot = np.empty((n_epochs, ))
    nmi_plot = np.empty((n_epochs, ))
    ari_plot = np.empty((n_epochs, ))

    y_val = np.squeeze(np.asarray(y_val).ravel())  # squeeze into 1D array

    start_time = time.time()
    for i in range(n_epochs):
        # if i==0:
        x_recon, _, x_val_y = ConvAE.vae.predict(x_val)
        losses_vae[i] = ConvAE.train_vae(x_val, x_val_y, params['batch_size'])
        #x_val_y = ConvAE.vae.predict(x_val)[2]
        #y_sp = x_val_y.argmax(axis=1)
        #print_accuracy(y_sp, y_val, params['n_clusters'])
        print("Epoch: {}, loss={:2f}".format(i, losses_vae[i]))

        os.makedirs('vae', exist_ok=True)
        os.makedirs('vae_umap', exist_ok=True)

        fig, axs = plt.subplots(3, 4, figsize=(25, 18))
        fig.subplots_adjust(wspace=0.25)

        embedding = ConvAE.encoder.predict(x_val)
        kmeans = KMeans(n_clusters=params['n_clusters'], n_init=30)
        predicted_labels = kmeans.fit_predict(
            embedding)  # cluster on current embeddings for metric eval
        _, confusion_matrix = get_y_preds(predicted_labels, y_val,
                                          params['n_clusters'])

        homo_plot[i] = metrics.acc(y_val, predicted_labels)
        nmi_plot[i] = metrics.nmi(y_val, predicted_labels)
        ari_plot[i] = metrics.ari(y_val, predicted_labels)

        tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
        Z_tsne = tsne.fit_transform(embedding)
        sc = axs[1][0].scatter(Z_tsne[:, 0],
                               Z_tsne[:, 1],
                               s=2,
                               c=y_train_unlabeled,
                               cmap=plt.cm.get_cmap("jet", 14))
        axs[1][0].set_title('t-SNE Embeddings')
        axs[1][0].set_xlabel('t-SNE 1')
        axs[1][0].set_ylabel('t-SNE 2')
        axs[1][0].set_xticks([])
        axs[1][0].set_yticks([])
        axs[1][0].spines['right'].set_visible(False)
        axs[1][0].spines['top'].set_visible(False)
        divider = make_axes_locatable(axs[1][0])
        cax = divider.append_axes('right', size='15%', pad=0.05)
        cbar = fig.colorbar(sc,
                            cax=cax,
                            orientation='vertical',
                            ticks=range(params['n_clusters']))
        cbar.ax.set_yticklabels(
            params['cluster_names'])  # vertically oriented colorbar
        # Create offset transform by 5 points in x direction
        dx = 0 / 72.
        dy = -5 / 72.
        offset = matplotlib.transforms.ScaledTranslation(
            dx, dy, fig.dpi_scale_trans)

        # apply offset transform to all cluster ticklabels.
        for label in cbar.ax.yaxis.get_majorticklabels():
            label.set_transform(label.get_transform() + offset)

        reducer = umap.UMAP(transform_seed=36, random_state=36)
        matrix_reduce = reducer.fit_transform(embedding)
        sc = axs[1][1].scatter(matrix_reduce[:, 0],
                               matrix_reduce[:, 1],
                               s=2,
                               c=y_train_unlabeled,
                               cmap=plt.cm.get_cmap("jet", 14))
        axs[1][1].set_title('UMAP Embeddings')
        axs[1][1].set_xlabel('UMAP 1')
        axs[1][1].set_ylabel('UMAP 2')
        axs[1][1].set_xticks([])
        axs[1][1].set_yticks([])
        # Hide the right and top spines
        axs[1][1].spines['right'].set_visible(False)
        axs[1][1].spines['top'].set_visible(False)

        im = axs[1][2].imshow(confusion_matrix, cmap='YlOrRd')
        axs[1][2].set_title('Confusion Matrix')
        axs[1][2].set_xticks(range(params['n_clusters']))
        axs[1][2].set_yticks(range(params['n_clusters']))
        axs[1][2].set_xticklabels(params['cluster_names'], fontsize=8)
        axs[1][2].set_yticklabels(params['cluster_names'], fontsize=8)
        divider = make_axes_locatable(axs[1][2])
        cax = divider.append_axes('right', size='10%', pad=0.05)
        cbar = fig.colorbar(im, cax=cax, orientation='vertical', ticks=[])

        axs[0][0].plot(losses_vae[:i + 1])
        axs[0][0].set_title('VAE Loss')
        axs[0][0].set_xlabel('epochs')

        axs[0][1].plot(homo_plot[:i + 1])
        axs[0][1].set_title('Homogeneity')
        axs[0][1].set_xlabel('epochs')
        axs[0][1].set_ylim(0, 1)

        axs[0][2].plot(ari_plot[:i + 1])
        axs[0][2].set_title('ARI')
        axs[0][2].set_xlabel('epochs')
        axs[0][2].set_ylim(0, 1)

        axs[0][3].plot(nmi_plot[:i + 1])
        axs[0][3].set_title('NMI')
        axs[0][3].set_xlabel('epochs')
        axs[0][3].set_ylim(0, 1)

        #reconstructed_cell = ConvAE.vae.predict(x_val[:1, ...])[0, ..., 0]
        cell_tile = x_val[0, ..., 0]
        cell_tile = cell_tile[:, :64]
        x_recon = x_recon[0, ..., 0]
        reconstructed_cell_tile = x_recon[:, :64]
        reconstructed_cell_tile = np.flipud(reconstructed_cell_tile)
        cell_heatmap = np.vstack((cell_tile, reconstructed_cell_tile))
        axs[1][3].imshow(cell_heatmap, cmap='Reds')
        axs[1][3].set_xticks([])
        axs[1][3].set_yticks([])
        axs[1][3].spines['right'].set_visible(False)
        axs[1][3].spines['top'].set_visible(False)
        axs[1][3].spines['left'].set_visible(False)
        axs[1][3].spines['bottom'].set_visible(False)

        # get eigenvalues and eigenvectors
        scale = get_scale(embedding, params['batch_size'], params['scale_nbr'])
        values, vectors = spectral_clustering(embedding, scale,
                                              params['n_nbrs'],
                                              params['affinity'])

        # sort, then store the top n_clusters=2
        values_idx = np.argsort(values)
        x_spectral_clustering = vectors[:, values_idx[:params['n_clusters']]]

        # do kmeans clustering in this subspace
        y_spectral_clustering = KMeans(
            n_clusters=params['n_clusters']).fit_predict(
                vectors[:, values_idx[:params['n_clusters']]])

        tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
        Z_tsne = tsne.fit_transform(x_spectral_clustering)
        sc = axs[2][0].scatter(Z_tsne[:, 0],
                               Z_tsne[:, 1],
                               s=2,
                               c=y_train_unlabeled,
                               cmap=plt.cm.get_cmap("jet", 14))
        axs[2][0].set_title('Spectral Clusters (t-SNE) True Labels')
        axs[2][0].set_xlabel('t-SNE 1')
        axs[2][0].set_ylabel('t-SNE 2')
        axs[2][0].set_xticks([])
        axs[2][0].set_yticks([])
        axs[2][0].spines['right'].set_visible(False)
        axs[2][0].spines['top'].set_visible(False)

        reducer = umap.UMAP(transform_seed=36, random_state=36)
        matrix_reduce = reducer.fit_transform(x_spectral_clustering)
        axs[2][1].scatter(matrix_reduce[:, 0],
                          matrix_reduce[:, 1],
                          s=2,
                          c=y_spectral_clustering,
                          cmap=plt.cm.get_cmap("jet", 14))
        axs[2][1].set_title('Spectral Clusters (UMAP)')
        axs[2][1].set_xlabel('UMAP 1')
        axs[2][1].set_ylabel('UMAP 2')
        axs[2][1].set_xticks([])
        axs[2][1].set_yticks([])
        # Hide the right and top spines
        axs[2][1].spines['right'].set_visible(False)
        axs[2][1].spines['top'].set_visible(False)

        axs[2][2].scatter(matrix_reduce[:, 0],
                          matrix_reduce[:, 1],
                          s=2,
                          c=y_train_unlabeled,
                          cmap=plt.cm.get_cmap("jet", 14))
        axs[2][2].set_title('True Labels (UMAP)')
        axs[2][2].set_xlabel('UMAP 1')
        axs[2][2].set_ylabel('UMAP 2')
        axs[2][2].set_xticks([])
        axs[2][2].set_yticks([])
        # Hide the right and top spines
        axs[2][2].spines['right'].set_visible(False)
        axs[2][2].spines['top'].set_visible(False)

        axs[2][3].hist(x_spectral_clustering)
        axs[2][3].set_title("histogram of true eigenvectors")

        train_time = str(
            datetime.timedelta(seconds=(int(time.time() - start_time))))
        n_matrices = (i + 1) * params['batch_size'] * 100
        fig.suptitle('Trained on ' + '{:,}'.format(n_matrices) + ' cells\n' +
                     train_time)

        plt.savefig('vae/%d.png' % i)
        plt.close()

        plt.close()

        if i > 1:
            if np.abs(losses_vae[i] - losses_vae[i - 1]) < 0.0001:
                print('STOPPING EARLY')
                break

    print("finished training")

    plt.plot(losses_vae)
    plt.title('VAE Loss')
    plt.show()

    x_val_y = ConvAE.vae.predict(x_val)[2]
    # x_val_y = ConvAE.classfier.predict(x_val_lp)
    y_sp = x_val_y.argmax(axis=1)
    print_accuracy(y_sp, y_val, params['n_clusters'])
    from sklearn.metrics import normalized_mutual_info_score as nmi
    y_val = np.squeeze(np.asarray(y_val).ravel())  # squeeze into 1D array
    print(y_sp.shape, y_val.shape)
    nmi_score1 = nmi(y_sp, y_val)
    print('NMI: ' + str(np.round(nmi_score1, 4)))

    embedding = ConvAE.encoder.predict(x_val)
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    Z_tsne = tsne.fit_transform(embedding)
    fig = plt.figure()
    plt.scatter(Z_tsne[:, 0],
                Z_tsne[:, 1],
                s=2,
                c=y_train_unlabeled,
                cmap=plt.cm.get_cmap("jet", 14))
    plt.colorbar(ticks=range(params['n_clusters']))
    plt.show()
Ejemplo n.º 2
0
def process(x_spectralnet, y_spectralnet, data, params):
    # UNPACK DATA
    x_train, y_train, x_val, y_val, x_test, y_test = data['spectral'][
        'train_and_test']

    # concatenate
    x = np.concatenate([x_train, x_val, x_test], axis=0)
    y = np.concatenate([y_train, y_val, y_test], axis=0)

    # PERFORM SPECTRAL CLUSTERING ON DATA

    # get eigenvalues and eigenvectors
    scale = get_scale(x, params['batch_size'], params['scale_nbr'])
    values, vectors = spectral_clustering(x, scale, params['n_nbrs'],
                                          params['affinity'])

    # sort, then store the top n_clusters=2
    values_idx = np.argsort(values)
    x_spectral_clustering = vectors[:, values_idx[:params['n_clusters']]]

    # do kmeans clustering in this subspace
    y_spectral_clustering = KMeans(
        n_clusters=params['n_clusters']).fit_predict(
            vectors[:, values_idx[:params['n_clusters']]])

    # PLOT RESULTS

    # plot spectral net clustering
    fig2 = plt.figure()
    if x.shape[1] == 2:
        ax1 = fig2.add_subplot(311)
        ax1.scatter(x[:, 0],
                    x[:, 1],
                    alpha=0.5,
                    s=20,
                    cmap='rainbow',
                    c=y_spectralnet,
                    lw=0)
    ax1.set_title("x colored by net prediction")

    # plot spectral clustering clusters
    if x.shape[1] == 2:
        ax2 = fig2.add_subplot(313)
        ax2.scatter(x[:, 0],
                    x[:, 1],
                    alpha=0.5,
                    s=20,
                    cmap='rainbow',
                    c=y_spectral_clustering,
                    lw=0)
    ax2.set_title("x colored by spectral clustering")

    # plot histogram of eigenvectors
    fig3 = plt.figure()
    ax1 = fig3.add_subplot(212)
    ax1.hist(x_spectral_clustering)
    ax1.set_title("histogram of true eigenvectors")
    ax2 = fig3.add_subplot(211)
    ax2.hist(x_spectralnet)
    ax2.set_title("histogram of net outputs")

    # plot eigenvectors
    y_idx = np.argsort(y)
    fig4 = plt.figure()
    ax1 = fig4.add_subplot(212)
    ax1.plot(x_spectral_clustering[y_idx])
    ax1.set_title("plot of true eigenvectors")
    ax2 = fig4.add_subplot(211)
    ax2.plot(x_spectralnet[y_idx])
    ax2.set_title("plot of net outputs")

    plt.draw()
    plt.show()