예제 #1
0
def calculate_cpca(dataFrame, illness):
    # Remove the illnesses from the data and background frames
    df = dataFrame.loc[:, dataFrame.columns.difference(["K760", "D50*"])]

    data = df[(dataFrame[illness] == 2) | (dataFrame[illness] == 1)]
    data = data.values
    background = df[dataFrame[illness] == 3]
    background = background.values

    labels = (
        len(dataFrame.loc[(dataFrame["sex_f31_0_0"] == 1) & (dataFrame[illness] == 1)])
        * [0]
        + len(
            dataFrame.loc[(dataFrame["sex_f31_0_0"] == 2) & (dataFrame[illness] == 1)]
        )
        * [1]
        + len(
            dataFrame.loc[(dataFrame["sex_f31_0_0"] == 1) & (dataFrame[illness] == 2)]
        )
        * [2]
        + len(
            dataFrame.loc[(dataFrame["sex_f31_0_0"] == 2) & (dataFrame[illness] == 2)]
        )
        * [3]
    )

    # mdl = CPCA(n_components=4)
    mdl = CPCA()
    projected_data = mdl.fit_transform(
        data, background, plot=True, active_labels=labels
    )

    return projected_data
예제 #2
0
def cpca(ill, control, dataframe, background):
    mdl = CPCA(n_components=len(cd.values.features))
    data_cpca = mdl.fit_transform(dataFrame,
                                  background,
                                  alpha_selection="manual",
                                  alpha_value=1.06)

    mean = data_cpca.mean(axis=0)

    ill_data = mdl.fit_transform(ill,
                                 background,
                                 alpha_selection="manual",
                                 alpha_value=1.06)
    control_data = mdl.fit_transform(control,
                                     background,
                                     alpha_selection="manual",
                                     alpha_value=1.06)

    mean = mean.reshape(1, -1)

    ill_diff = sp.spatial.distance.cdist(mean, ill_data)
    control_diff = sp.spatial.distance.cdist(mean, control_data)

    ill_diff = ill_diff.reshape(1, -1)
    control_diff = control_diff.reshape(1, -1)

    # Remove distances that are large
    ill_diff = ill_diff[0]

    control_diff = control_diff[0]
    control_diff = numpy.delete(control_diff, control_diff.argmax())

    ks_test2 = st.ks_2samp(control_diff, ill_diff)
    print(ks_test2)

    seaborn.distplot(
        control_diff,
        label="control",
        hist_kws={"cumulative": True},
        kde_kws={"cumulative": True},
    )

    seaborn.distplot(
        ill_diff,
        label="ill",
        hist_kws={"cumulative": True},
        kde_kws={"cumulative": True},
    )

    plt.legend()
    plt.show()

    print("Diff ill:", numpy.sort(ill_diff))
    print("Diff Control:", numpy.sort(control_diff))

    numpy.savetxt("1-cpca.csv", ill_diff, delimiter=",")
    numpy.savetxt("2-cpca.csv", control_diff, delimiter=",")
예제 #3
0
def preform_cpca(X_train, X_test, background, alpha=1.06):
    """ Returns the Train and Test data after CPCA calculations. """
    mdl = CPCA(n_components=len(values.features))
    X_train = mdl.fit_transform(
        X_train, background, alpha_selection="manual", alpha_value=alpha
    )

    # Convert to NumPy array so CPCA calculation will work
    test = X_test.to_numpy()
    X_test = mdl.transform(test, alpha_selection="manual", alpha_value=alpha)

    return X_train, X_test
def contrastive_pca(background, foreground, alpha=np.log10(0.5), n=50):
    """Perform a contrastive PCA to maximize variance in foreground data and minimize variance of
   background data, for a given tradeoff parameter alpha, and return best n axes
   """
    background_data = np.array(background)
    foreground_data = np.array(foreground)
    assert foreground_data.shape[1] == background_data.shape[1]
    mdl = CPCA(n_components=n)
    projected_data = mdl.fit_transform(foreground_data,
                                       background_data,
                                       alpha_selection='manual',
                                       alpha_value=alpha)
    return (projected_data)
def cpca_plot(dsver, dsname):
    ydata, Xdata = load_data('./data/processed/ds{0:04d}-{1}-train.csv'.format(
        dsver, dsname))
    ylabels = LevelMulti(targetmin=0.2, targetmax=0.8).transform(ydata.copy())

    _, Xback = load_data(
        './data/processed/ds{0:04d}-{1}-background-signal.csv'.format(
            dsver, dsname))
    CPCA().fit_transform(Xdata, Xback, plot=True, active_labels=ylabels)
    #CPCA().fit_transform(Xdata, Xback, plot=True, active_labels=ylabels, n_alphas=10, max_log_alpha=2, n_alphas_to_return=4)

    _, Xback = load_data(
        './data/processed/ds{0:04d}-{1}-background-nosignal.csv'.format(
            dsver, dsname))
    CPCA().fit_transform(Xdata, Xback, plot=True, active_labels=ylabels)
예제 #6
0
def logistic_regression_cpca(data):
    dataFrame = data[data["K760"] != 3]

    x = dataFrame.drop(columns=["K760", "D50*"])
    y1 = dataFrame["K760"]
    y2 = dataFrame["D50*"]

    df = data.loc[:, data.columns.difference(["K760", "D50*"])]

    background = df[(data["K760"] == 3) | (data["D50*"] == 3)]
    background = background.values

    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y2,
                                                        test_size=0.3,
                                                        random_state=13)

    mdl = CPCA(n_components=len(cd.features))
    projected_data = mdl.fit_transform(X_train,
                                       background,
                                       alpha_selection="manual",
                                       alpha_value=1.06)

    # Convert to NumPy array so CPCA calculation will work
    test = X_test.to_numpy()
    test_data = mdl.transform(test, alpha_selection="manual", alpha_value=1.06)

    lg = LogisticRegression(random_state=13,
                            class_weight={
                                1: 1,
                                2: 1
                            },
                            max_iter=5000)

    lg.fit(projected_data, y_train)

    y_pred = lg.predict(test_data)

    # performance
    con_matrix = confusion_matrix(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    y_pred_proba = lg.predict_proba(X_test)[:, 1]

    print("##### CPCA #####")
    print(f"Accuracy Score: {accuracy_score(y_test,y_pred)}")
    print(f"Confusion Matrix: \n{con_matrix}")
    print(f"Area Under Curve: {auc}")
    print(f"Recall score: {recall_score(y_test,y_pred)}")
def cpca_data(dsver, dsname, alpha, dstype='train', bgname='nosignal'):
    _, Xback = load_data('./data/processed/ds{0:04d}-{1}-background-{2}.csv'.format(dsver, dsname, bgname))    
    ydata, Xdata = load_data('./data/processed/ds{0:04d}-{1}-{2}.csv'.format(dsver, dsname, dstype))
    ylabels = LevelMulti(targetmin=0.2, targetmax=0.8).transform(ydata.copy())   

    Xpca = CPCA(n_components=2).fit_transform(Xdata, Xback, alpha_selection='manual', alpha_value=alpha) 
    return ylabels, Xpca
예제 #8
0
def calculate_cpca_alpha(dataFrame, alpha, illness):
    # Remove the illnesses from the data and background frames
    df = dataFrame.loc[:, dataFrame.columns.difference(["K760", "D50*"])]

    data = df[(dataFrame[illness] == 2) | (dataFrame[illness] == 1)]
    data = data.values
    background = df[dataFrame[illness] == 3]
    background = background.values

    print("Num of features:", len(features))

    mdl = CPCA(n_components=len(features))
    projected_data = mdl.fit_transform(
        data, background, alpha_selection="manual", alpha_value=alpha
    )

    return projected_data
def cpca_score(dsver, dsname, bgname, alpha):
    _, Xback = load_data(
        './data/processed/ds{0:04d}-{1}-background-{2}.csv'.format(
            dsver, dsname, bgname))
    ydata, Xdata = load_data('./data/processed/ds{0:04d}-{1}-train.csv'.format(
        dsver, dsname))
    ylabels = LevelMulti(targetmin=0.2, targetmax=0.8).transform(ydata.copy())

    Xpca = CPCA().fit_transform(Xdata,
                                Xback,
                                alpha_selection='manual',
                                alpha_value=alpha)
    sscore = metrics.silhouette_score(Xpca, ylabels)
    print('CPCA {0}-{1} Silhouette Score: {2:.4f} alpha={3:.2f}'.format(
        dsname.capitalize(), bgname.capitalize(), sscore, alpha))
        
p = stats.f_oneway(DMSOtest[0], DMSOtest[1], DMSOtest[2],DMSOtest[3], DMSOtest[4],\
                   DMSOtest[5] ,DMSOtest[6])

#there is a difference between the DMSO controls between the years
plt.figure()
sns.swarmplot(x='date', y='PC_2', data=PC_df[PC_df['drug']=='DMSO'], color = lut['DMSO'])
plt.text(1,0.3, '1way_anova, p=' + str(p[1]))
plt.savefig(os.path.join(savedir, 'PC2_1wayANOVA.png'))
plt.ylim([-0.5, 0.5])
plt.show()

#%% Implement contrastive PCA to  

from contrastive import CPCA
mdl = CPCA(n_components = 50)

#use No_Compound as background condition
foreground = np.array(featMatZ2[featMatZ2['drug']!='No_Compound'].select_dtypes(include='float').drop(columns = 'concentration'))
background = np.array(featMatZ2[featMatZ2['drug']=='No_Compound'].select_dtypes(include='float').drop(columns='concentration'))
Druglabels = featMatZ2[featMatZ2['drug']!='No_Compound']['drug'].to_frame().reset_index(drop=True)
Conclabels =featMatZ2[featMatZ2['drug']!='No_Compound']['concentration'].to_frame().reset_index(drop=True)
Datelabels = featMatZ2[featMatZ2['drug']!='No_Compound']['date'].to_frame().reset_index(drop=True)

#calculate CPCA with 50PCs
projected_data = mdl.fit_transform(foreground, background)

#and now plot to compare the alphas
cPC_df = {}
cPCmean={}
cPCsem = {}
예제 #11
0
values = dataset.values()

category = dataset.categoricalLabels[0]

print(values.shape)
print(representations.shape)

all_labels = np.array(
    [mts.categoricalFeatures[category] for mts in dataset.get_mtseries()])
labels = np.unique(all_labels)
print(labels)

genreA = labels[-1]
groupA = []
groupB = []

for i in range(len(all_labels)):
    if all_labels[i] == genreA:
        groupA += [representations[i]]
    else:
        groupB += [representations[i]]

groupA = np.array(groupA)
groupB = np.array(groupB)

print(groupA.shape)
print(groupB.shape)

mdl = CPCA()
projected_data = mdl.fit_transform(groupB, groupA, gui=True)
print(projected_data)
예제 #12
0
# To do:
# 1. Do contrastive PCA 
# 2. Label antipsychotics (typical, atypical, and test compounds) and pesticides
    # and look at the distribution of these compounds across multiple principal components
# 3. Is it possible to train a classifier to differentiate between antipsychotics and pesticides?
    
# 4. tSNE embedding
    
    

# =============================================================================

#%% cPCA - could use df.groupby function here
from contrastive import CPCA

mdl = CPCA(n_components = 2)
foreground = np.array(featMatZ2[featMatZ2['drug']!='No_Compound'].select_dtypes(include='float').drop(columns = 'concentration'))
background = np.array(featMatZ2[featMatZ2['drug']=='No_Compound'].select_dtypes(include='float').drop(columns='concentration'))
Druglabels = featMatZ2[featMatZ2['drug']!='No_Compound']['drug'].to_frame().reset_index(drop=True)
Conclabels =featMatZ2[featMatZ2['drug']!='No_Compound']['concentration'].to_frame().reset_index(drop=True)
Datelabels = featMatZ2[featMatZ2['drug']!='No_Compound']['date'].to_frame().reset_index(drop=True)
MoAlabels = featMatZ2[featMatZ2['drug']!='No_Compound']['MoAGeneral'].to_frame().reset_index(drop=True)

#test and see what alpha looks best
mdl.fit_transform(foreground, background, plot=True, active_labels=Druglabels)
alpha1 = 1.34

#calculate CPCA with 50PCs
mdl = CPCA(n_components = 50)
projected_data = mdl.fit_transform(foreground, background)
예제 #13
0
#%matplotlib inline
from sklearn.cluster import AffinityPropagation, KMeans, DBSCAN, SpectralClustering
from sklearn.manifold import MDS, TSNE, Isomap
from sklearn.metrics import silhouette_score

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.linalg import logm, expm
from contrastive import CPCA

sheat = pd.read_csv("TNBC10vNormal10_Counts_4.csv", sep=",",header=0, index_col=0)
sheat2 = sheat.T
print(sheat2)

sheat2 = pd.DataFrame(sheat2)

X,y = sheat2.iloc[:, :].values, np.array([1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0])

foreground_data = X[:,:]
background_data = X[10:20,:]
background_data
mdl = CPCA()
pre_cluster_lables = np.array([1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0])
                            
projected_data = mdl.fit_transform(foreground_data, background_data, plot=True,active_labels=pre_cluster_lables)


예제 #14
0
파일: toy.py 프로젝트: blengerich/drpca
def fit_all():
    if K == 3:
        n_components = 2
    else:
        n_components = 1
    pca = PCA(n_components=n_components)
    measure_silhouette = lambda reps: silhouette_score(
        reps, np.ravel(np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1))))))

    def get_differential(data, numComponents=None):
        """Principal Components Analysis

        From: http://stackoverflow.com/a/13224592/834250

        Parameters
        ----------
        data : `numpy.ndarray`
            numpy array of data to analyse
        numComponents : `int`
            number of principal components to use

        Returns
        -------
        comps : `numpy.ndarray`
            Principal components
        evals : `numpy.ndarray`
            Eigenvalues
        evecs : `numpy.ndarray`
            Eigenvectors
        """
        m, n = data.shape
        data -= data.mean(axis=0)
        R = np.cov(data, rowvar=False)
        # use 'eigh' rather than 'eig' since R is symmetric,
        # the performance gain is substantial
        evals, evecs = np.linalg.eigh(R)
        idx = np.argsort(evals)[::-1]
        evecs = evecs[:, idx]
        evals = evals[idx]
        if numComponents is not None:
            evecs = evecs[:, :numComponents]
        # carry out the transformation on the data using eigenvectors
        # and return the re-scaled data, eigenvalues, and eigenvectors
        return np.dot(evecs.T, data.T).T, evals, evecs

    names = []
    fig = plt.figure()
    if K == 3:
        from mpl_toolkits.mplot3d import Axes3D
        #ax = fig.add_subplot(2,4,1, projection='3d')
        ax = plt.gca()
        ax.scatter(foreground_data[:, 0],
                   foreground_data[:, 1],
                   foreground_data[:, 2],
                   marker='*',
                   alpha=0.5)
        ax.scatter(background_data[:, 0],
                   background_data[:, 1],
                   background_data[:, 2],
                   marker='+',
                   alpha=0.5)
        ax.set_zticks([])
    else:
        #ax = fig.add_subplot(2,4,1)
        ax = plt.gca()
        ax.scatter(foreground_data[:, 0],
                   foreground_data[:, 1],
                   marker='*',
                   alpha=0.5)
        ax.scatter(background_data[:, 0],
                   background_data[:, 1],
                   marker='+',
                   alpha=0.5)
    #ax = plt.gca()
    #names.append("Foreground Data")
    #names.append("Background Data")
    #ax.legend(names)

    def get_annotate_loc(ax, data):
        if data[np.argmin(data[:, 0]), 1] < data[np.argmax(data[:, 0]),
                                                 1]:  # angling up
            x_loc = (ax.get_xlim()[1] -
                     ax.get_xlim()[0]) * 0.7 + ax.get_xlim()[0]
            y_loc = (ax.get_ylim()[1] -
                     ax.get_ylim()[0]) * 0.1 + ax.get_ylim()[0]
        else:
            x_loc = (ax.get_xlim()[1] -
                     ax.get_xlim()[0]) * 0.2 + ax.get_xlim()[0]
            y_loc = (ax.get_ylim()[1] -
                     ax.get_ylim()[0]) * 0.1 + ax.get_ylim()[0]
        return [x_loc, y_loc]

    raw_silhouette = measure_silhouette(all_data)
    #x_loc = (ax.get_xlim()[1] - ax.get_xlim()[0])*0.7 + ax.get_xlim()[0]
    #y_loc = (ax.get_ylim()[1] - ax.get_ylim()[0])*0.1 + ax.get_ylim()[0]
    annotate_location = get_annotate_loc(ax, all_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(raw_silhouette), annotate_location)
    ax.set_xticks([])
    ax.set_yticks([])

    plt.tight_layout()
    plt.savefig("Raw Data")
    print("Raw Data Silhouette: {:.3f}".format(raw_silhouette))

    y_lims = ax.get_ylim()
    x_lims = ax.get_xlim()

    def set_ax_lims(ax):
        y_expand = (y_lims[1] - y_lims[0]) * 0.05
        x_expand = (x_lims[1] - x_lims[0]) * 0.05
        ax.set_ylim([y_lims[0] - y_expand, y_lims[1] + y_expand])
        ax.set_xlim([x_lims[0] - x_expand, x_lims[1] + x_expand])

    set_ax_lims(ax)

    # Normal PCA
    pca = PCA(n_components=n_components)
    reduced = pca.fit_transform(all_data)
    reduced = reduced.dot(pca.components_)
    normal_components = pca.components_
    #plt.subplot(2,4,2)
    fig = plt.figure()
    ax = plt.gca()
    names = []
    ax.scatter(reduced[:n_fg, 0], reduced[:n_fg, 1], marker='*')
    ax.scatter(reduced[n_fg:, 0], reduced[n_fg:, 1], marker='+')
    pca_silhouette = measure_silhouette(reduced)

    set_ax_lims(ax)
    annotate_location = get_annotate_loc(ax, reduced)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(pca_silhouette), annotate_location)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    plt.savefig("PCA")
    print("PCA Silhouette: {:.3f}".format(pca_silhouette))
    """
    names = ["PCA FG", "PCA BG"]
    names.append("PCA FG")
    names.append("PCA BG")
    plt.plot([0, pca.components_[0][0]], [0, pca.components_[0][1]], color='red')
    names.append("PCA Ax")
    ax.legend(names)
    """

    # Contrastive PCA
    # CPCA doesn't do dim reduction.
    mdl = CPCA(n_components=2)
    #print(foreground_data.shape)
    #print(background_data.shape)
    # For some reason, CPCA returns the data as the same size as the input data.
    alpha = 0
    mdl.fit(foreground_data, background_data)
    fg_cpca = mdl.transform(foreground_data)[0]
    #print(fg_cpca.shape)
    #bg_cpca = pca.fit_transform(background_data).dot(pca.components_)

    pca.fit(mdl.fg_cov - alpha * mdl.bg_cov)
    fg_cpca = np.expand_dims(fg_cpca[:, 0], 1).dot(pca.components_)
    bg_cpca = pca.transform(background_data).dot(pca.components_)

    #pca_directions = pca.components_
    #pca_directions = np.array([np.array([1.0]), np.array([1.0])])
    #print(projected_data)
    #print(dir(mdl))
    #print(mdl.pca_directions())
    #print(mdl.get_bg())
    #print(mdl.get_pca_directions())
    #print(mdl.pca_directions)
    #print(mdl.fg)

    #print(mdl.get_bg())
    #print(projected_data)
    #print(projected_data)
    #fg_cpca = (projected_data[2][:, :n_components].dot(pca_directions)).T
    #bg_cpca = (projected_data[3][:, :n_components].dot(pca_directions)).T
    #fg_cpca = mdl.get_fg()#[:, 0]projected_data[0]
    #bg_cpca = mdl.get_bg()#[:, 0], 1).dot(pca_directions)
    #fg_cpca = pca.fit_transform(fg_cpca)
    #bg_cpca = pca.transform(bg_cpca)
    #fg_cpca = fg_cpca.dot(pca.components_)
    #bg_cpca = bg_cpca.dot(pca.components_)
    #print(fg_cpca.shape)
    #print(bg_cpca.shape)
    #fig = plt.figure()
    #print(fg_proj.shape)
    #plt.subplot(2,4,3)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_cpca[:, 0], fg_cpca[:, 1], marker='*')
    ax.scatter(bg_cpca[:, 0], bg_cpca[:, 1], marker='+')
    set_ax_lims(ax)

    ax.set_xticks([])
    ax.set_yticks([])
    cpca_data = np.vstack((fg_cpca, bg_cpca))
    cpca_silhouette = measure_silhouette(cpca_data)
    annotate_location = get_annotate_loc(ax, cpca_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(cpca_silhouette), annotate_location)
    plt.tight_layout()
    plt.savefig("cPCA")
    print("cPCA Silhouette: {:.3f}".format(cpca_silhouette))
    #names.append("cPCA FG")
    #names.append("cPCA BG")
    #names = ["cPCA FG", "cPCA BG"]
    #ax.legend(names)

    # RPCA
    L, S = R_pca(all_data).fit(max_iter=10000, iter_print=1000)
    rpca_components, rpca_evals, rpca_evecs = get_differential(L, n_components)

    fg_rpca = foreground_data.dot(rpca_evecs)
    fg_rpca = np.array(
        [fg_rpca[i, 0] * rpca_evecs[:, 0] for i in range(len(fg_rpca))])
    bg_rpca = background_data.dot(rpca_evecs)
    bg_rpca = np.array(
        [bg_rpca[i, 0] * rpca_evecs[:, 0] for i in range(len(bg_rpca))])

    #plt.subplot(2,4,4)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_rpca[:, 0], fg_rpca[:, 1], marker='*')
    ax.scatter(bg_rpca[:, 0], bg_rpca[:, 1], marker='+')
    set_ax_lims(ax)

    ax.set_xticks([])
    ax.set_yticks([])
    rpca_data = np.vstack((fg_rpca, bg_rpca))
    rpca_silhouette = measure_silhouette(rpca_data)
    if annotate_sil:
        annotate_location = get_annotate_loc(ax, rpca_data)
        ax.annotate("S: {:.3f}".format(rpca_silhouette), annotate_location)

    plt.tight_layout()
    plt.savefig("rPCA")
    print("rPCA Silhouette: {:.3f}".format(rpca_silhouette))

    print("Fitting CCA...", end='')
    t = time.time()
    from sklearn.cross_decomposition import CCA
    cca = CCA(n_components=n_components, scale=True)
    cca.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1)))))
    cca_components = cca.x_weights_.T
    cca_all_data = cca.transform(all_data).dot(
        cca_components)  #cca.predict(train_data)#.dot(cca_components)
    fg_cca = cca_all_data[:n_fg]
    bg_cca = cca_all_data[n_fg:]
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_cca[:, 0], fg_cca[:, 1], marker='*')
    ax.scatter(bg_cca[:, 0], bg_cca[:, 1], marker='+')

    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    plt.savefig("CCA")

    # sPCA
    #plt.subplot(2, 4, 5)
    fig = plt.figure()
    ax = plt.gca()
    spca = SparsePCA(n_components=n_components,
                     max_iter=1000,
                     verbose=False,
                     alpha=10.,
                     ridge_alpha=0.0)
    spca.fit(all_data)
    spca_components = spca.components_
    spca_all_data = spca.fit_transform(all_data).dot(spca_components)
    fg_spca = spca_all_data[:n_fg]
    bg_spca = spca_all_data[n_fg:]
    ax.scatter(fg_spca[:, 0], fg_spca[:, 1], marker='*')
    ax.scatter(bg_spca[:, 0], bg_spca[:, 1], marker='+')

    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    spca_data = np.vstack((fg_spca, bg_spca))
    spca_silhouette = measure_silhouette(spca_data)
    if annotate_sil:
        annotate_location = get_annotate_loc(ax, spca_data)
        ax.annotate("S: {:.3f}".format(spca_silhouette), annotate_location)
    plt.tight_layout()
    plt.savefig("sPCA")
    print("sPCA Silhouette: {:.3f}".format(spca_silhouette))

    # LDA
    #plt.subplot(2, 4, 5)
    fig = plt.figure()
    ax = plt.gca()
    t = time.time()
    lda = LDA(n_components=n_components)
    lda.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1)))))
    lda_all_data = lda.transform(all_data).dot(lda.scalings_.T)
    print("LDA took {:.3f} seconds".format(time.time() - t))
    fg_lda = lda_all_data[:n_fg]
    bg_lda = lda_all_data[n_fg:]
    ax.scatter(fg_lda[:, 0], fg_lda[:, 1], marker='*')
    ax.scatter(bg_lda[:, 0], bg_lda[:, 1], marker='+')

    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    lda_data = np.vstack((fg_lda, bg_lda))
    lda_silhouette = measure_silhouette(lda_data)
    annotate_location = get_annotate_loc(ax, lda_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(lda_silhouette), annotate_location)
    print("LDA Silhouette: {:.3f}".format(lda_silhouette))
    plt.tight_layout()
    plt.savefig("LDA")
    #print(lda.scalings_)

    # Supervised PCA
    sup_pca = supervised_pca.SupervisedPCAClassifier(n_components=n_components)
    sup_pca.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1)))))
    fg_sup_pca = sup_pca.get_transformed_data(foreground_data).dot(
        sup_pca.get_components())
    bg_sup_pca = sup_pca.get_transformed_data(background_data).dot(
        sup_pca.get_components())
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_sup_pca[:, 0], fg_sup_pca[:, 1], marker='*')
    ax.scatter(bg_sup_pca[:, 0], bg_sup_pca[:, 1], marker='+')
    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    sup_pca_data = np.vstack((fg_sup_pca, bg_sup_pca))
    sup_silhouette = measure_silhouette(sup_pca_data)
    annotate_location = get_annotate_loc(ax, sup_pca_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(sup_silhouette), annotate_location)
    print("SupPCA Silhouette: {:.3f}".format(sup_silhouette))
    plt.tight_layout()
    plt.savefig("supPCA")

    # PLSRegression
    from sklearn.cross_decomposition import PLSRegression
    plsr = PLSRegression(n_components=n_components, scale=False)
    plsr.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1)))))
    fg_plsr = plsr.x_scores_[:n_fg].dot(plsr.x_weights_.T)
    bg_plsr = plsr.x_scores_[n_fg:].dot(plsr.x_weights_.T)
    #print(plsr.x_scores_.shape)
    #print(plsr.x_weights_.shape)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_plsr[:, 0], fg_plsr[:, 1], marker='*')
    ax.scatter(bg_plsr[:, 0], bg_plsr[:, 1], marker='+')
    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    plt.savefig("PLSR")
    #plsr_silhouette = measure_silhouette()

    # dPCA-Mean
    x = np.mean(foreground_data, axis=0) - np.mean(background_data, axis=0)
    pca = PCA(n_components=n_components)
    x = x.reshape((1, -1))
    pca.fit(np.vstack((x, np.zeros_like(x))))
    dpca_mean_components = pca.components_
    print(dpca_mean_components)
    dpca_mean_transformed = pca.transform(all_data).dot(dpca_mean_components)
    fg_dpca_mean = dpca_mean_transformed[:n_fg]
    bg_dpca_mean = dpca_mean_transformed[n_fg:]
    #plt.subplot(2, 4, 6)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_dpca_mean[:, 0], fg_dpca_mean[:, 1], marker='*')
    ax.scatter(bg_dpca_mean[:, 0], bg_dpca_mean[:, 1], marker='+')
    names = ["dPCA_mean FG", "dPCA_mean BG "]
    #ax.legend(names)

    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    dpca_mean_data = np.vstack((fg_dpca_mean, bg_dpca_mean))
    dpca_mean_silhouette = measure_silhouette(dpca_mean_data)
    annotate_location = get_annotate_loc(ax, dpca_mean_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dpca_mean_silhouette),
                    annotate_location)
    plt.tight_layout()
    plt.savefig("dPCA-Mean")
    print("dPCA-mean Silhouette: {:.3f}".format(dpca_mean_silhouette))

    # dPCA
    pca.fit(differential_matched)
    dpca_components = pca.components_
    print(dpca_components)
    dpca_transformed = pca.transform(all_data).dot(dpca_components)
    fg_dpca = dpca_transformed[:n_fg]
    bg_dpca = dpca_transformed[n_fg:]
    #plt.subplot(2,4,7)
    fig = plt.figure()
    set_ax_lims(ax)
    ax = plt.gca()
    #fg_mapped = fg_dpca*dpca_components
    #bg_mapped = bg_dpca*dpca_components
    ax.scatter(fg_dpca[:, 0], fg_dpca[:, 1], marker='*')
    #ax.scatter(fg_diff_transformed[:, 0], fg_diff_transformed[:, 1], marker='+')
    ax.scatter(bg_dpca[:, 0], bg_dpca[:, 1], marker='+')
    #ax.scatter(bg_diff_transformed[:, 0], bg_diff_transformed[:, 1], marker='*')
    #names.append("dPCA FG")
    #names.append("dPCA BG")
    #names = ["dPCA FG", "dPCA BG"]
    #ax.legend(names)

    ax.set_xticks([])
    ax.set_yticks([])
    dpca_data = np.vstack((fg_dpca, bg_dpca))
    dpca_silhouette = measure_silhouette(dpca_data)
    annotate_location = get_annotate_loc(ax, dpca_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dpca_silhouette), annotate_location)
    plt.tight_layout()
    plt.savefig("dPCA-Matched")
    print("dPCA-Matched Silhouette: {:.3f}".format(dpca_silhouette))

    # dPCA
    pca.fit(differential_unmatched)
    dpca_components = pca.components_
    print(dpca_components)
    dpca_transformed = pca.transform(all_data).dot(dpca_components)
    fg_dpca = dpca_transformed[:n_fg]
    bg_dpca = dpca_transformed[n_fg:]
    #plt.subplot(2,4,7)
    fig = plt.figure()
    set_ax_lims(ax)
    ax = plt.gca()
    #fg_mapped = fg_dpca*dpca_components
    #bg_mapped = bg_dpca*dpca_components
    ax.scatter(fg_dpca[:, 0], fg_dpca[:, 1], marker='*')
    #ax.scatter(fg_diff_transformed[:, 0], fg_diff_transformed[:, 1], marker='+')
    ax.scatter(bg_dpca[:, 0], bg_dpca[:, 1], marker='+')
    #ax.scatter(bg_diff_transformed[:, 0], bg_diff_transformed[:, 1], marker='*')
    #names.append("dPCA FG")
    #names.append("dPCA BG")
    #names = ["dPCA FG", "dPCA BG"]
    #ax.legend(names)

    ax.set_xticks([])
    ax.set_yticks([])
    dpca_data = np.vstack((fg_dpca, bg_dpca))
    dpca_silhouette = measure_silhouette(dpca_data)
    annotate_location = get_annotate_loc(ax, dpca_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dpca_silhouette), annotate_location)
    plt.tight_layout()
    plt.savefig("dPCA-Unmatched")
    print("dPCA-Unmatched Silhouette: {:.3f}".format(dpca_silhouette))

    # drPCA
    t = time.time()
    rpca = R_pca(differential_matched)
    L, S = rpca.fit(max_iter=10000, iter_print=1000)
    drpca_components, drpca_evals, drpca_evecs = get_differential(
        L, n_components)
    fg_drpca = foreground_data.dot(drpca_evecs)
    fg_drpca = np.array(
        [fg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(fg_drpca))])
    bg_drpca = background_data.dot(drpca_evecs)
    bg_drpca = np.array(
        [bg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(bg_drpca))])
    print("drPCA took {:.3f} seconds".format(time.time() - t))
    #plt.subplot(2,4,7)
    fig = plt.figure()
    ax = plt.gca()
    set_ax_lims(ax)
    ax.scatter(fg_drpca[:, 0], fg_drpca[:, 1], marker='*')
    ax.scatter(bg_drpca[:, 0], bg_drpca[:, 1], marker='+')
    names = ["drPCA FG", "drPCA BG"]
    drpca_data = np.vstack((fg_drpca, bg_drpca))
    drpca_silhouette = measure_silhouette(drpca_data)
    annotate_location = get_annotate_loc(ax, drpca_data)
    ax.set_xticks([])
    ax.set_yticks([])
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(drpca_silhouette), annotate_location)
    print("drPCA-Matched Silhouette: {:.3f}".format(drpca_silhouette))
    plt.tight_layout()
    plt.savefig("drPCA-Matched")

    # drPCA
    t = time.time()
    rpca = R_pca(differential_unmatched)
    L, S = rpca.fit(max_iter=10000, iter_print=1000)
    drpca_components, drpca_evals, drpca_evecs = get_differential(
        L, n_components)
    fg_drpca = foreground_data.dot(drpca_evecs)
    fg_drpca = np.array(
        [fg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(fg_drpca))])
    bg_drpca = background_data.dot(drpca_evecs)
    bg_drpca = np.array(
        [bg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(bg_drpca))])
    print("drPCA took {:.3f} seconds".format(time.time() - t))
    #plt.subplot(2,4,7)
    fig = plt.figure()
    ax = plt.gca()
    set_ax_lims(ax)
    ax.scatter(fg_drpca[:, 0], fg_drpca[:, 1], marker='*')
    ax.scatter(bg_drpca[:, 0], bg_drpca[:, 1], marker='+')
    names = ["drPCA FG", "drPCA BG"]
    drpca_data = np.vstack((fg_drpca, bg_drpca))
    drpca_silhouette = measure_silhouette(drpca_data)
    annotate_location = get_annotate_loc(ax, drpca_data)
    ax.set_xticks([])
    ax.set_yticks([])
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(drpca_silhouette), annotate_location)
    print("drPCA-Unmatched Silhouette: {:.3f}".format(drpca_silhouette))
    plt.tight_layout()
    plt.savefig("drPCA-Unmatched")

    # dsPCA
    #plt.subplot(2, 4, 8)
    fig = plt.figure()
    ax = plt.gca()
    spca = SparsePCA(n_components=n_components,
                     max_iter=1000,
                     verbose=False,
                     alpha=10.,
                     ridge_alpha=0.0)
    spca.fit(differential_matched)
    dspca_components = spca.components_
    dspca_all_data = spca.transform(all_data).dot(dspca_components)
    fg_dspca = dspca_all_data[:n_fg]
    bg_dspca = dspca_all_data[n_fg:]
    #fg_dspca = spca.transform(foreground_data, ridge_alpha=0.0).dot(dspca_components)
    #bg_dspca = spca.transform(background_data, ridge_alpha=0.0).dot(dspca_components)
    ax.scatter(fg_dspca[:, 0], fg_dspca[:, 1], marker='*')
    ax.scatter(bg_dspca[:, 0], bg_dspca[:, 1], marker='+')
    plt.tight_layout()
    plt.savefig("dsPCA-Matched")
    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    dspca_data = np.vstack((fg_dspca, bg_dspca))
    dspca_silhouette = measure_silhouette(dspca_data)
    annotate_location = get_annotate_loc(ax, dspca_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dspca_silhouette), annotate_location)
    print("dsPCA-Matched Silhouette: {:.3f}".format(dspca_silhouette))

    # dsPCA
    #plt.subplot(2, 4, 8)
    fig = plt.figure()
    ax = plt.gca()
    spca = SparsePCA(n_components=n_components,
                     max_iter=1000,
                     verbose=False,
                     alpha=10.,
                     ridge_alpha=0.0)
    spca.fit(differential_unmatched)
    dspca_components = spca.components_
    dspca_all_data = spca.transform(all_data).dot(dspca_components)
    fg_dspca = dspca_all_data[:n_fg]
    bg_dspca = dspca_all_data[n_fg:]
    #fg_dspca = spca.transform(foreground_data, ridge_alpha=0.0).dot(dspca_components)
    #bg_dspca = spca.transform(background_data, ridge_alpha=0.0).dot(dspca_components)
    ax.scatter(fg_dspca[:, 0], fg_dspca[:, 1], marker='*')
    ax.scatter(bg_dspca[:, 0], bg_dspca[:, 1], marker='+')
    plt.tight_layout()
    plt.savefig("dsPCA-Unmatched")
    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    dspca_data = np.vstack((fg_dspca, bg_dspca))
    dspca_silhouette = measure_silhouette(dspca_data)
    annotate_location = get_annotate_loc(ax, dspca_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dspca_silhouette), annotate_location)
    print("dsPCA-Unmatched Silhouette: {:.3f}".format(dspca_silhouette))

    # ICA
    print("Fitting ICA...", end='')
    t = time.time()
    ica = FastICA(n_components=n_components, max_iter=1000)
    ica.fit(all_data)
    #print(ica.mixing_)
    print("Took {:.3f} seconds.".format(time.time() - t))
    fg_ica = ica.transform(foreground_data).dot(ica.mixing_.T)
    bg_ica = ica.transform(background_data).dot(ica.mixing_.T)
    print(fg_ica)
    print(bg_ica)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_ica[:, 0], fg_ica[:, 1], marker='*')
    ax.scatter(bg_ica[:, 0], bg_ica[:, 1], marker='+')
    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    ica_data = np.vstack((fg_ica, bg_ica))
    ica_silhouette = measure_silhouette(ica_data)
    annotate_location = get_annotate_loc(ax, ica_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(ica_silhouette), annotate_location)
    plt.tight_layout()
    plt.savefig("ICA")
    print("ICA Silhouette: {:.3f}".format(ica_silhouette))

    # ICA
    print("Fitting dICA...", end='')
    t = time.time()
    dica = FastICA(n_components=n_components, max_iter=1000)
    dica.fit(differential_matched)
    print("Took {:.3f} seconds.".format(time.time() - t))
    fg_dica = dica.transform(foreground_data).dot(dica.mixing_.T)
    bg_dica = dica.transform(background_data).dot(dica.mixing_.T)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_dica[:, 0], fg_dica[:, 1], marker='*')
    ax.scatter(bg_dica[:, 0], bg_dica[:, 1], marker='+')

    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    dica_data = np.vstack((fg_dica, bg_dica))
    dica_silhouette = measure_silhouette(dica_data)
    annotate_location = get_annotate_loc(ax, dica_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dica_silhouette), annotate_location)
    print("dICA-Matched Silhouette: {:.3f}".format(dica_silhouette))
    plt.tight_layout()
    plt.savefig("dICA-Matched")

    # ICA
    print("Fitting dICA...", end='')
    t = time.time()
    dica = FastICA(n_components=n_components, max_iter=1000)
    dica.fit(differential_unmatched)
    print("Took {:.3f} seconds.".format(time.time() - t))
    print(dica.components_)
    fg_dica = dica.transform(foreground_data).dot(dica.mixing_.T)
    bg_dica = dica.transform(background_data).dot(dica.mixing_.T)
    fig = plt.figure()
    ax = plt.gca()
    ax.scatter(fg_dica[:, 0], fg_dica[:, 1], marker='*')
    ax.scatter(bg_dica[:, 0], bg_dica[:, 1], marker='+')

    set_ax_lims(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    dica_data = np.vstack((fg_dica, bg_dica))
    dica_silhouette = measure_silhouette(dica_data)
    annotate_location = get_annotate_loc(ax, dica_data)
    if annotate_sil:
        ax.annotate("S: {:.3f}".format(dica_silhouette), annotate_location)
    print("dICA-Unmatched Silhouette: {:.3f}".format(dica_silhouette))
    plt.tight_layout()
    plt.savefig("dICA-Unmatched")
    #plt.scatter(reduced[:n_fg, 0], reduced[:n_fg, 1])
    #plt.scatter(reduced[n_fg:, 0], reduced[n_fg:, 1])
    #plt.scatter(foreground_data[:, 0], foreground_data[:, 1])
    #plt.scatter(background_data[:, 0], background_data[:, 1])
    #plt.scatter(fg_diff_transformed[:, 0], fg_diff_transformed[:, 1])
    #plt.scatter(bg_diff_transformed[:, 0], bg_diff_transformed[:, 1])
    #plt.legend(names)
    #plt.title("Toy Example of Differential PCA")

    #plt.suptitle(title)
    """