def calculate_cpca(dataFrame, illness): # Remove the illnesses from the data and background frames df = dataFrame.loc[:, dataFrame.columns.difference(["K760", "D50*"])] data = df[(dataFrame[illness] == 2) | (dataFrame[illness] == 1)] data = data.values background = df[dataFrame[illness] == 3] background = background.values labels = ( len(dataFrame.loc[(dataFrame["sex_f31_0_0"] == 1) & (dataFrame[illness] == 1)]) * [0] + len( dataFrame.loc[(dataFrame["sex_f31_0_0"] == 2) & (dataFrame[illness] == 1)] ) * [1] + len( dataFrame.loc[(dataFrame["sex_f31_0_0"] == 1) & (dataFrame[illness] == 2)] ) * [2] + len( dataFrame.loc[(dataFrame["sex_f31_0_0"] == 2) & (dataFrame[illness] == 2)] ) * [3] ) # mdl = CPCA(n_components=4) mdl = CPCA() projected_data = mdl.fit_transform( data, background, plot=True, active_labels=labels ) return projected_data
def cpca(ill, control, dataframe, background): mdl = CPCA(n_components=len(cd.values.features)) data_cpca = mdl.fit_transform(dataFrame, background, alpha_selection="manual", alpha_value=1.06) mean = data_cpca.mean(axis=0) ill_data = mdl.fit_transform(ill, background, alpha_selection="manual", alpha_value=1.06) control_data = mdl.fit_transform(control, background, alpha_selection="manual", alpha_value=1.06) mean = mean.reshape(1, -1) ill_diff = sp.spatial.distance.cdist(mean, ill_data) control_diff = sp.spatial.distance.cdist(mean, control_data) ill_diff = ill_diff.reshape(1, -1) control_diff = control_diff.reshape(1, -1) # Remove distances that are large ill_diff = ill_diff[0] control_diff = control_diff[0] control_diff = numpy.delete(control_diff, control_diff.argmax()) ks_test2 = st.ks_2samp(control_diff, ill_diff) print(ks_test2) seaborn.distplot( control_diff, label="control", hist_kws={"cumulative": True}, kde_kws={"cumulative": True}, ) seaborn.distplot( ill_diff, label="ill", hist_kws={"cumulative": True}, kde_kws={"cumulative": True}, ) plt.legend() plt.show() print("Diff ill:", numpy.sort(ill_diff)) print("Diff Control:", numpy.sort(control_diff)) numpy.savetxt("1-cpca.csv", ill_diff, delimiter=",") numpy.savetxt("2-cpca.csv", control_diff, delimiter=",")
def preform_cpca(X_train, X_test, background, alpha=1.06): """ Returns the Train and Test data after CPCA calculations. """ mdl = CPCA(n_components=len(values.features)) X_train = mdl.fit_transform( X_train, background, alpha_selection="manual", alpha_value=alpha ) # Convert to NumPy array so CPCA calculation will work test = X_test.to_numpy() X_test = mdl.transform(test, alpha_selection="manual", alpha_value=alpha) return X_train, X_test
def contrastive_pca(background, foreground, alpha=np.log10(0.5), n=50): """Perform a contrastive PCA to maximize variance in foreground data and minimize variance of background data, for a given tradeoff parameter alpha, and return best n axes """ background_data = np.array(background) foreground_data = np.array(foreground) assert foreground_data.shape[1] == background_data.shape[1] mdl = CPCA(n_components=n) projected_data = mdl.fit_transform(foreground_data, background_data, alpha_selection='manual', alpha_value=alpha) return (projected_data)
def cpca_plot(dsver, dsname): ydata, Xdata = load_data('./data/processed/ds{0:04d}-{1}-train.csv'.format( dsver, dsname)) ylabels = LevelMulti(targetmin=0.2, targetmax=0.8).transform(ydata.copy()) _, Xback = load_data( './data/processed/ds{0:04d}-{1}-background-signal.csv'.format( dsver, dsname)) CPCA().fit_transform(Xdata, Xback, plot=True, active_labels=ylabels) #CPCA().fit_transform(Xdata, Xback, plot=True, active_labels=ylabels, n_alphas=10, max_log_alpha=2, n_alphas_to_return=4) _, Xback = load_data( './data/processed/ds{0:04d}-{1}-background-nosignal.csv'.format( dsver, dsname)) CPCA().fit_transform(Xdata, Xback, plot=True, active_labels=ylabels)
def logistic_regression_cpca(data): dataFrame = data[data["K760"] != 3] x = dataFrame.drop(columns=["K760", "D50*"]) y1 = dataFrame["K760"] y2 = dataFrame["D50*"] df = data.loc[:, data.columns.difference(["K760", "D50*"])] background = df[(data["K760"] == 3) | (data["D50*"] == 3)] background = background.values X_train, X_test, y_train, y_test = train_test_split(x, y2, test_size=0.3, random_state=13) mdl = CPCA(n_components=len(cd.features)) projected_data = mdl.fit_transform(X_train, background, alpha_selection="manual", alpha_value=1.06) # Convert to NumPy array so CPCA calculation will work test = X_test.to_numpy() test_data = mdl.transform(test, alpha_selection="manual", alpha_value=1.06) lg = LogisticRegression(random_state=13, class_weight={ 1: 1, 2: 1 }, max_iter=5000) lg.fit(projected_data, y_train) y_pred = lg.predict(test_data) # performance con_matrix = confusion_matrix(y_test, y_pred) auc = roc_auc_score(y_test, y_pred) y_pred_proba = lg.predict_proba(X_test)[:, 1] print("##### CPCA #####") print(f"Accuracy Score: {accuracy_score(y_test,y_pred)}") print(f"Confusion Matrix: \n{con_matrix}") print(f"Area Under Curve: {auc}") print(f"Recall score: {recall_score(y_test,y_pred)}")
def cpca_data(dsver, dsname, alpha, dstype='train', bgname='nosignal'): _, Xback = load_data('./data/processed/ds{0:04d}-{1}-background-{2}.csv'.format(dsver, dsname, bgname)) ydata, Xdata = load_data('./data/processed/ds{0:04d}-{1}-{2}.csv'.format(dsver, dsname, dstype)) ylabels = LevelMulti(targetmin=0.2, targetmax=0.8).transform(ydata.copy()) Xpca = CPCA(n_components=2).fit_transform(Xdata, Xback, alpha_selection='manual', alpha_value=alpha) return ylabels, Xpca
def calculate_cpca_alpha(dataFrame, alpha, illness): # Remove the illnesses from the data and background frames df = dataFrame.loc[:, dataFrame.columns.difference(["K760", "D50*"])] data = df[(dataFrame[illness] == 2) | (dataFrame[illness] == 1)] data = data.values background = df[dataFrame[illness] == 3] background = background.values print("Num of features:", len(features)) mdl = CPCA(n_components=len(features)) projected_data = mdl.fit_transform( data, background, alpha_selection="manual", alpha_value=alpha ) return projected_data
def cpca_score(dsver, dsname, bgname, alpha): _, Xback = load_data( './data/processed/ds{0:04d}-{1}-background-{2}.csv'.format( dsver, dsname, bgname)) ydata, Xdata = load_data('./data/processed/ds{0:04d}-{1}-train.csv'.format( dsver, dsname)) ylabels = LevelMulti(targetmin=0.2, targetmax=0.8).transform(ydata.copy()) Xpca = CPCA().fit_transform(Xdata, Xback, alpha_selection='manual', alpha_value=alpha) sscore = metrics.silhouette_score(Xpca, ylabels) print('CPCA {0}-{1} Silhouette Score: {2:.4f} alpha={3:.2f}'.format( dsname.capitalize(), bgname.capitalize(), sscore, alpha))
p = stats.f_oneway(DMSOtest[0], DMSOtest[1], DMSOtest[2],DMSOtest[3], DMSOtest[4],\ DMSOtest[5] ,DMSOtest[6]) #there is a difference between the DMSO controls between the years plt.figure() sns.swarmplot(x='date', y='PC_2', data=PC_df[PC_df['drug']=='DMSO'], color = lut['DMSO']) plt.text(1,0.3, '1way_anova, p=' + str(p[1])) plt.savefig(os.path.join(savedir, 'PC2_1wayANOVA.png')) plt.ylim([-0.5, 0.5]) plt.show() #%% Implement contrastive PCA to from contrastive import CPCA mdl = CPCA(n_components = 50) #use No_Compound as background condition foreground = np.array(featMatZ2[featMatZ2['drug']!='No_Compound'].select_dtypes(include='float').drop(columns = 'concentration')) background = np.array(featMatZ2[featMatZ2['drug']=='No_Compound'].select_dtypes(include='float').drop(columns='concentration')) Druglabels = featMatZ2[featMatZ2['drug']!='No_Compound']['drug'].to_frame().reset_index(drop=True) Conclabels =featMatZ2[featMatZ2['drug']!='No_Compound']['concentration'].to_frame().reset_index(drop=True) Datelabels = featMatZ2[featMatZ2['drug']!='No_Compound']['date'].to_frame().reset_index(drop=True) #calculate CPCA with 50PCs projected_data = mdl.fit_transform(foreground, background) #and now plot to compare the alphas cPC_df = {} cPCmean={} cPCsem = {}
values = dataset.values() category = dataset.categoricalLabels[0] print(values.shape) print(representations.shape) all_labels = np.array( [mts.categoricalFeatures[category] for mts in dataset.get_mtseries()]) labels = np.unique(all_labels) print(labels) genreA = labels[-1] groupA = [] groupB = [] for i in range(len(all_labels)): if all_labels[i] == genreA: groupA += [representations[i]] else: groupB += [representations[i]] groupA = np.array(groupA) groupB = np.array(groupB) print(groupA.shape) print(groupB.shape) mdl = CPCA() projected_data = mdl.fit_transform(groupB, groupA, gui=True) print(projected_data)
# To do: # 1. Do contrastive PCA # 2. Label antipsychotics (typical, atypical, and test compounds) and pesticides # and look at the distribution of these compounds across multiple principal components # 3. Is it possible to train a classifier to differentiate between antipsychotics and pesticides? # 4. tSNE embedding # ============================================================================= #%% cPCA - could use df.groupby function here from contrastive import CPCA mdl = CPCA(n_components = 2) foreground = np.array(featMatZ2[featMatZ2['drug']!='No_Compound'].select_dtypes(include='float').drop(columns = 'concentration')) background = np.array(featMatZ2[featMatZ2['drug']=='No_Compound'].select_dtypes(include='float').drop(columns='concentration')) Druglabels = featMatZ2[featMatZ2['drug']!='No_Compound']['drug'].to_frame().reset_index(drop=True) Conclabels =featMatZ2[featMatZ2['drug']!='No_Compound']['concentration'].to_frame().reset_index(drop=True) Datelabels = featMatZ2[featMatZ2['drug']!='No_Compound']['date'].to_frame().reset_index(drop=True) MoAlabels = featMatZ2[featMatZ2['drug']!='No_Compound']['MoAGeneral'].to_frame().reset_index(drop=True) #test and see what alpha looks best mdl.fit_transform(foreground, background, plot=True, active_labels=Druglabels) alpha1 = 1.34 #calculate CPCA with 50PCs mdl = CPCA(n_components = 50) projected_data = mdl.fit_transform(foreground, background)
#%matplotlib inline from sklearn.cluster import AffinityPropagation, KMeans, DBSCAN, SpectralClustering from sklearn.manifold import MDS, TSNE, Isomap from sklearn.metrics import silhouette_score import numpy as np import matplotlib.pyplot as plt import pandas as pd from scipy.linalg import logm, expm from contrastive import CPCA sheat = pd.read_csv("TNBC10vNormal10_Counts_4.csv", sep=",",header=0, index_col=0) sheat2 = sheat.T print(sheat2) sheat2 = pd.DataFrame(sheat2) X,y = sheat2.iloc[:, :].values, np.array([1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0]) foreground_data = X[:,:] background_data = X[10:20,:] background_data mdl = CPCA() pre_cluster_lables = np.array([1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0]) projected_data = mdl.fit_transform(foreground_data, background_data, plot=True,active_labels=pre_cluster_lables)
def fit_all(): if K == 3: n_components = 2 else: n_components = 1 pca = PCA(n_components=n_components) measure_silhouette = lambda reps: silhouette_score( reps, np.ravel(np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1)))))) def get_differential(data, numComponents=None): """Principal Components Analysis From: http://stackoverflow.com/a/13224592/834250 Parameters ---------- data : `numpy.ndarray` numpy array of data to analyse numComponents : `int` number of principal components to use Returns ------- comps : `numpy.ndarray` Principal components evals : `numpy.ndarray` Eigenvalues evecs : `numpy.ndarray` Eigenvectors """ m, n = data.shape data -= data.mean(axis=0) R = np.cov(data, rowvar=False) # use 'eigh' rather than 'eig' since R is symmetric, # the performance gain is substantial evals, evecs = np.linalg.eigh(R) idx = np.argsort(evals)[::-1] evecs = evecs[:, idx] evals = evals[idx] if numComponents is not None: evecs = evecs[:, :numComponents] # carry out the transformation on the data using eigenvectors # and return the re-scaled data, eigenvalues, and eigenvectors return np.dot(evecs.T, data.T).T, evals, evecs names = [] fig = plt.figure() if K == 3: from mpl_toolkits.mplot3d import Axes3D #ax = fig.add_subplot(2,4,1, projection='3d') ax = plt.gca() ax.scatter(foreground_data[:, 0], foreground_data[:, 1], foreground_data[:, 2], marker='*', alpha=0.5) ax.scatter(background_data[:, 0], background_data[:, 1], background_data[:, 2], marker='+', alpha=0.5) ax.set_zticks([]) else: #ax = fig.add_subplot(2,4,1) ax = plt.gca() ax.scatter(foreground_data[:, 0], foreground_data[:, 1], marker='*', alpha=0.5) ax.scatter(background_data[:, 0], background_data[:, 1], marker='+', alpha=0.5) #ax = plt.gca() #names.append("Foreground Data") #names.append("Background Data") #ax.legend(names) def get_annotate_loc(ax, data): if data[np.argmin(data[:, 0]), 1] < data[np.argmax(data[:, 0]), 1]: # angling up x_loc = (ax.get_xlim()[1] - ax.get_xlim()[0]) * 0.7 + ax.get_xlim()[0] y_loc = (ax.get_ylim()[1] - ax.get_ylim()[0]) * 0.1 + ax.get_ylim()[0] else: x_loc = (ax.get_xlim()[1] - ax.get_xlim()[0]) * 0.2 + ax.get_xlim()[0] y_loc = (ax.get_ylim()[1] - ax.get_ylim()[0]) * 0.1 + ax.get_ylim()[0] return [x_loc, y_loc] raw_silhouette = measure_silhouette(all_data) #x_loc = (ax.get_xlim()[1] - ax.get_xlim()[0])*0.7 + ax.get_xlim()[0] #y_loc = (ax.get_ylim()[1] - ax.get_ylim()[0])*0.1 + ax.get_ylim()[0] annotate_location = get_annotate_loc(ax, all_data) if annotate_sil: ax.annotate("S: {:.3f}".format(raw_silhouette), annotate_location) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() plt.savefig("Raw Data") print("Raw Data Silhouette: {:.3f}".format(raw_silhouette)) y_lims = ax.get_ylim() x_lims = ax.get_xlim() def set_ax_lims(ax): y_expand = (y_lims[1] - y_lims[0]) * 0.05 x_expand = (x_lims[1] - x_lims[0]) * 0.05 ax.set_ylim([y_lims[0] - y_expand, y_lims[1] + y_expand]) ax.set_xlim([x_lims[0] - x_expand, x_lims[1] + x_expand]) set_ax_lims(ax) # Normal PCA pca = PCA(n_components=n_components) reduced = pca.fit_transform(all_data) reduced = reduced.dot(pca.components_) normal_components = pca.components_ #plt.subplot(2,4,2) fig = plt.figure() ax = plt.gca() names = [] ax.scatter(reduced[:n_fg, 0], reduced[:n_fg, 1], marker='*') ax.scatter(reduced[n_fg:, 0], reduced[n_fg:, 1], marker='+') pca_silhouette = measure_silhouette(reduced) set_ax_lims(ax) annotate_location = get_annotate_loc(ax, reduced) if annotate_sil: ax.annotate("S: {:.3f}".format(pca_silhouette), annotate_location) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() plt.savefig("PCA") print("PCA Silhouette: {:.3f}".format(pca_silhouette)) """ names = ["PCA FG", "PCA BG"] names.append("PCA FG") names.append("PCA BG") plt.plot([0, pca.components_[0][0]], [0, pca.components_[0][1]], color='red') names.append("PCA Ax") ax.legend(names) """ # Contrastive PCA # CPCA doesn't do dim reduction. mdl = CPCA(n_components=2) #print(foreground_data.shape) #print(background_data.shape) # For some reason, CPCA returns the data as the same size as the input data. alpha = 0 mdl.fit(foreground_data, background_data) fg_cpca = mdl.transform(foreground_data)[0] #print(fg_cpca.shape) #bg_cpca = pca.fit_transform(background_data).dot(pca.components_) pca.fit(mdl.fg_cov - alpha * mdl.bg_cov) fg_cpca = np.expand_dims(fg_cpca[:, 0], 1).dot(pca.components_) bg_cpca = pca.transform(background_data).dot(pca.components_) #pca_directions = pca.components_ #pca_directions = np.array([np.array([1.0]), np.array([1.0])]) #print(projected_data) #print(dir(mdl)) #print(mdl.pca_directions()) #print(mdl.get_bg()) #print(mdl.get_pca_directions()) #print(mdl.pca_directions) #print(mdl.fg) #print(mdl.get_bg()) #print(projected_data) #print(projected_data) #fg_cpca = (projected_data[2][:, :n_components].dot(pca_directions)).T #bg_cpca = (projected_data[3][:, :n_components].dot(pca_directions)).T #fg_cpca = mdl.get_fg()#[:, 0]projected_data[0] #bg_cpca = mdl.get_bg()#[:, 0], 1).dot(pca_directions) #fg_cpca = pca.fit_transform(fg_cpca) #bg_cpca = pca.transform(bg_cpca) #fg_cpca = fg_cpca.dot(pca.components_) #bg_cpca = bg_cpca.dot(pca.components_) #print(fg_cpca.shape) #print(bg_cpca.shape) #fig = plt.figure() #print(fg_proj.shape) #plt.subplot(2,4,3) fig = plt.figure() ax = plt.gca() ax.scatter(fg_cpca[:, 0], fg_cpca[:, 1], marker='*') ax.scatter(bg_cpca[:, 0], bg_cpca[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) cpca_data = np.vstack((fg_cpca, bg_cpca)) cpca_silhouette = measure_silhouette(cpca_data) annotate_location = get_annotate_loc(ax, cpca_data) if annotate_sil: ax.annotate("S: {:.3f}".format(cpca_silhouette), annotate_location) plt.tight_layout() plt.savefig("cPCA") print("cPCA Silhouette: {:.3f}".format(cpca_silhouette)) #names.append("cPCA FG") #names.append("cPCA BG") #names = ["cPCA FG", "cPCA BG"] #ax.legend(names) # RPCA L, S = R_pca(all_data).fit(max_iter=10000, iter_print=1000) rpca_components, rpca_evals, rpca_evecs = get_differential(L, n_components) fg_rpca = foreground_data.dot(rpca_evecs) fg_rpca = np.array( [fg_rpca[i, 0] * rpca_evecs[:, 0] for i in range(len(fg_rpca))]) bg_rpca = background_data.dot(rpca_evecs) bg_rpca = np.array( [bg_rpca[i, 0] * rpca_evecs[:, 0] for i in range(len(bg_rpca))]) #plt.subplot(2,4,4) fig = plt.figure() ax = plt.gca() ax.scatter(fg_rpca[:, 0], fg_rpca[:, 1], marker='*') ax.scatter(bg_rpca[:, 0], bg_rpca[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) rpca_data = np.vstack((fg_rpca, bg_rpca)) rpca_silhouette = measure_silhouette(rpca_data) if annotate_sil: annotate_location = get_annotate_loc(ax, rpca_data) ax.annotate("S: {:.3f}".format(rpca_silhouette), annotate_location) plt.tight_layout() plt.savefig("rPCA") print("rPCA Silhouette: {:.3f}".format(rpca_silhouette)) print("Fitting CCA...", end='') t = time.time() from sklearn.cross_decomposition import CCA cca = CCA(n_components=n_components, scale=True) cca.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1))))) cca_components = cca.x_weights_.T cca_all_data = cca.transform(all_data).dot( cca_components) #cca.predict(train_data)#.dot(cca_components) fg_cca = cca_all_data[:n_fg] bg_cca = cca_all_data[n_fg:] fig = plt.figure() ax = plt.gca() ax.scatter(fg_cca[:, 0], fg_cca[:, 1], marker='*') ax.scatter(bg_cca[:, 0], bg_cca[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() plt.savefig("CCA") # sPCA #plt.subplot(2, 4, 5) fig = plt.figure() ax = plt.gca() spca = SparsePCA(n_components=n_components, max_iter=1000, verbose=False, alpha=10., ridge_alpha=0.0) spca.fit(all_data) spca_components = spca.components_ spca_all_data = spca.fit_transform(all_data).dot(spca_components) fg_spca = spca_all_data[:n_fg] bg_spca = spca_all_data[n_fg:] ax.scatter(fg_spca[:, 0], fg_spca[:, 1], marker='*') ax.scatter(bg_spca[:, 0], bg_spca[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) spca_data = np.vstack((fg_spca, bg_spca)) spca_silhouette = measure_silhouette(spca_data) if annotate_sil: annotate_location = get_annotate_loc(ax, spca_data) ax.annotate("S: {:.3f}".format(spca_silhouette), annotate_location) plt.tight_layout() plt.savefig("sPCA") print("sPCA Silhouette: {:.3f}".format(spca_silhouette)) # LDA #plt.subplot(2, 4, 5) fig = plt.figure() ax = plt.gca() t = time.time() lda = LDA(n_components=n_components) lda.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1))))) lda_all_data = lda.transform(all_data).dot(lda.scalings_.T) print("LDA took {:.3f} seconds".format(time.time() - t)) fg_lda = lda_all_data[:n_fg] bg_lda = lda_all_data[n_fg:] ax.scatter(fg_lda[:, 0], fg_lda[:, 1], marker='*') ax.scatter(bg_lda[:, 0], bg_lda[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) lda_data = np.vstack((fg_lda, bg_lda)) lda_silhouette = measure_silhouette(lda_data) annotate_location = get_annotate_loc(ax, lda_data) if annotate_sil: ax.annotate("S: {:.3f}".format(lda_silhouette), annotate_location) print("LDA Silhouette: {:.3f}".format(lda_silhouette)) plt.tight_layout() plt.savefig("LDA") #print(lda.scalings_) # Supervised PCA sup_pca = supervised_pca.SupervisedPCAClassifier(n_components=n_components) sup_pca.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1))))) fg_sup_pca = sup_pca.get_transformed_data(foreground_data).dot( sup_pca.get_components()) bg_sup_pca = sup_pca.get_transformed_data(background_data).dot( sup_pca.get_components()) fig = plt.figure() ax = plt.gca() ax.scatter(fg_sup_pca[:, 0], fg_sup_pca[:, 1], marker='*') ax.scatter(bg_sup_pca[:, 0], bg_sup_pca[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) sup_pca_data = np.vstack((fg_sup_pca, bg_sup_pca)) sup_silhouette = measure_silhouette(sup_pca_data) annotate_location = get_annotate_loc(ax, sup_pca_data) if annotate_sil: ax.annotate("S: {:.3f}".format(sup_silhouette), annotate_location) print("SupPCA Silhouette: {:.3f}".format(sup_silhouette)) plt.tight_layout() plt.savefig("supPCA") # PLSRegression from sklearn.cross_decomposition import PLSRegression plsr = PLSRegression(n_components=n_components, scale=False) plsr.fit(all_data, np.vstack((np.ones((n_fg, 1)), np.zeros((n_bg, 1))))) fg_plsr = plsr.x_scores_[:n_fg].dot(plsr.x_weights_.T) bg_plsr = plsr.x_scores_[n_fg:].dot(plsr.x_weights_.T) #print(plsr.x_scores_.shape) #print(plsr.x_weights_.shape) fig = plt.figure() ax = plt.gca() ax.scatter(fg_plsr[:, 0], fg_plsr[:, 1], marker='*') ax.scatter(bg_plsr[:, 0], bg_plsr[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() plt.savefig("PLSR") #plsr_silhouette = measure_silhouette() # dPCA-Mean x = np.mean(foreground_data, axis=0) - np.mean(background_data, axis=0) pca = PCA(n_components=n_components) x = x.reshape((1, -1)) pca.fit(np.vstack((x, np.zeros_like(x)))) dpca_mean_components = pca.components_ print(dpca_mean_components) dpca_mean_transformed = pca.transform(all_data).dot(dpca_mean_components) fg_dpca_mean = dpca_mean_transformed[:n_fg] bg_dpca_mean = dpca_mean_transformed[n_fg:] #plt.subplot(2, 4, 6) fig = plt.figure() ax = plt.gca() ax.scatter(fg_dpca_mean[:, 0], fg_dpca_mean[:, 1], marker='*') ax.scatter(bg_dpca_mean[:, 0], bg_dpca_mean[:, 1], marker='+') names = ["dPCA_mean FG", "dPCA_mean BG "] #ax.legend(names) set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) dpca_mean_data = np.vstack((fg_dpca_mean, bg_dpca_mean)) dpca_mean_silhouette = measure_silhouette(dpca_mean_data) annotate_location = get_annotate_loc(ax, dpca_mean_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dpca_mean_silhouette), annotate_location) plt.tight_layout() plt.savefig("dPCA-Mean") print("dPCA-mean Silhouette: {:.3f}".format(dpca_mean_silhouette)) # dPCA pca.fit(differential_matched) dpca_components = pca.components_ print(dpca_components) dpca_transformed = pca.transform(all_data).dot(dpca_components) fg_dpca = dpca_transformed[:n_fg] bg_dpca = dpca_transformed[n_fg:] #plt.subplot(2,4,7) fig = plt.figure() set_ax_lims(ax) ax = plt.gca() #fg_mapped = fg_dpca*dpca_components #bg_mapped = bg_dpca*dpca_components ax.scatter(fg_dpca[:, 0], fg_dpca[:, 1], marker='*') #ax.scatter(fg_diff_transformed[:, 0], fg_diff_transformed[:, 1], marker='+') ax.scatter(bg_dpca[:, 0], bg_dpca[:, 1], marker='+') #ax.scatter(bg_diff_transformed[:, 0], bg_diff_transformed[:, 1], marker='*') #names.append("dPCA FG") #names.append("dPCA BG") #names = ["dPCA FG", "dPCA BG"] #ax.legend(names) ax.set_xticks([]) ax.set_yticks([]) dpca_data = np.vstack((fg_dpca, bg_dpca)) dpca_silhouette = measure_silhouette(dpca_data) annotate_location = get_annotate_loc(ax, dpca_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dpca_silhouette), annotate_location) plt.tight_layout() plt.savefig("dPCA-Matched") print("dPCA-Matched Silhouette: {:.3f}".format(dpca_silhouette)) # dPCA pca.fit(differential_unmatched) dpca_components = pca.components_ print(dpca_components) dpca_transformed = pca.transform(all_data).dot(dpca_components) fg_dpca = dpca_transformed[:n_fg] bg_dpca = dpca_transformed[n_fg:] #plt.subplot(2,4,7) fig = plt.figure() set_ax_lims(ax) ax = plt.gca() #fg_mapped = fg_dpca*dpca_components #bg_mapped = bg_dpca*dpca_components ax.scatter(fg_dpca[:, 0], fg_dpca[:, 1], marker='*') #ax.scatter(fg_diff_transformed[:, 0], fg_diff_transformed[:, 1], marker='+') ax.scatter(bg_dpca[:, 0], bg_dpca[:, 1], marker='+') #ax.scatter(bg_diff_transformed[:, 0], bg_diff_transformed[:, 1], marker='*') #names.append("dPCA FG") #names.append("dPCA BG") #names = ["dPCA FG", "dPCA BG"] #ax.legend(names) ax.set_xticks([]) ax.set_yticks([]) dpca_data = np.vstack((fg_dpca, bg_dpca)) dpca_silhouette = measure_silhouette(dpca_data) annotate_location = get_annotate_loc(ax, dpca_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dpca_silhouette), annotate_location) plt.tight_layout() plt.savefig("dPCA-Unmatched") print("dPCA-Unmatched Silhouette: {:.3f}".format(dpca_silhouette)) # drPCA t = time.time() rpca = R_pca(differential_matched) L, S = rpca.fit(max_iter=10000, iter_print=1000) drpca_components, drpca_evals, drpca_evecs = get_differential( L, n_components) fg_drpca = foreground_data.dot(drpca_evecs) fg_drpca = np.array( [fg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(fg_drpca))]) bg_drpca = background_data.dot(drpca_evecs) bg_drpca = np.array( [bg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(bg_drpca))]) print("drPCA took {:.3f} seconds".format(time.time() - t)) #plt.subplot(2,4,7) fig = plt.figure() ax = plt.gca() set_ax_lims(ax) ax.scatter(fg_drpca[:, 0], fg_drpca[:, 1], marker='*') ax.scatter(bg_drpca[:, 0], bg_drpca[:, 1], marker='+') names = ["drPCA FG", "drPCA BG"] drpca_data = np.vstack((fg_drpca, bg_drpca)) drpca_silhouette = measure_silhouette(drpca_data) annotate_location = get_annotate_loc(ax, drpca_data) ax.set_xticks([]) ax.set_yticks([]) if annotate_sil: ax.annotate("S: {:.3f}".format(drpca_silhouette), annotate_location) print("drPCA-Matched Silhouette: {:.3f}".format(drpca_silhouette)) plt.tight_layout() plt.savefig("drPCA-Matched") # drPCA t = time.time() rpca = R_pca(differential_unmatched) L, S = rpca.fit(max_iter=10000, iter_print=1000) drpca_components, drpca_evals, drpca_evecs = get_differential( L, n_components) fg_drpca = foreground_data.dot(drpca_evecs) fg_drpca = np.array( [fg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(fg_drpca))]) bg_drpca = background_data.dot(drpca_evecs) bg_drpca = np.array( [bg_drpca[i, 0] * drpca_evecs[:, 0] for i in range(len(bg_drpca))]) print("drPCA took {:.3f} seconds".format(time.time() - t)) #plt.subplot(2,4,7) fig = plt.figure() ax = plt.gca() set_ax_lims(ax) ax.scatter(fg_drpca[:, 0], fg_drpca[:, 1], marker='*') ax.scatter(bg_drpca[:, 0], bg_drpca[:, 1], marker='+') names = ["drPCA FG", "drPCA BG"] drpca_data = np.vstack((fg_drpca, bg_drpca)) drpca_silhouette = measure_silhouette(drpca_data) annotate_location = get_annotate_loc(ax, drpca_data) ax.set_xticks([]) ax.set_yticks([]) if annotate_sil: ax.annotate("S: {:.3f}".format(drpca_silhouette), annotate_location) print("drPCA-Unmatched Silhouette: {:.3f}".format(drpca_silhouette)) plt.tight_layout() plt.savefig("drPCA-Unmatched") # dsPCA #plt.subplot(2, 4, 8) fig = plt.figure() ax = plt.gca() spca = SparsePCA(n_components=n_components, max_iter=1000, verbose=False, alpha=10., ridge_alpha=0.0) spca.fit(differential_matched) dspca_components = spca.components_ dspca_all_data = spca.transform(all_data).dot(dspca_components) fg_dspca = dspca_all_data[:n_fg] bg_dspca = dspca_all_data[n_fg:] #fg_dspca = spca.transform(foreground_data, ridge_alpha=0.0).dot(dspca_components) #bg_dspca = spca.transform(background_data, ridge_alpha=0.0).dot(dspca_components) ax.scatter(fg_dspca[:, 0], fg_dspca[:, 1], marker='*') ax.scatter(bg_dspca[:, 0], bg_dspca[:, 1], marker='+') plt.tight_layout() plt.savefig("dsPCA-Matched") set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) dspca_data = np.vstack((fg_dspca, bg_dspca)) dspca_silhouette = measure_silhouette(dspca_data) annotate_location = get_annotate_loc(ax, dspca_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dspca_silhouette), annotate_location) print("dsPCA-Matched Silhouette: {:.3f}".format(dspca_silhouette)) # dsPCA #plt.subplot(2, 4, 8) fig = plt.figure() ax = plt.gca() spca = SparsePCA(n_components=n_components, max_iter=1000, verbose=False, alpha=10., ridge_alpha=0.0) spca.fit(differential_unmatched) dspca_components = spca.components_ dspca_all_data = spca.transform(all_data).dot(dspca_components) fg_dspca = dspca_all_data[:n_fg] bg_dspca = dspca_all_data[n_fg:] #fg_dspca = spca.transform(foreground_data, ridge_alpha=0.0).dot(dspca_components) #bg_dspca = spca.transform(background_data, ridge_alpha=0.0).dot(dspca_components) ax.scatter(fg_dspca[:, 0], fg_dspca[:, 1], marker='*') ax.scatter(bg_dspca[:, 0], bg_dspca[:, 1], marker='+') plt.tight_layout() plt.savefig("dsPCA-Unmatched") set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) dspca_data = np.vstack((fg_dspca, bg_dspca)) dspca_silhouette = measure_silhouette(dspca_data) annotate_location = get_annotate_loc(ax, dspca_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dspca_silhouette), annotate_location) print("dsPCA-Unmatched Silhouette: {:.3f}".format(dspca_silhouette)) # ICA print("Fitting ICA...", end='') t = time.time() ica = FastICA(n_components=n_components, max_iter=1000) ica.fit(all_data) #print(ica.mixing_) print("Took {:.3f} seconds.".format(time.time() - t)) fg_ica = ica.transform(foreground_data).dot(ica.mixing_.T) bg_ica = ica.transform(background_data).dot(ica.mixing_.T) print(fg_ica) print(bg_ica) fig = plt.figure() ax = plt.gca() ax.scatter(fg_ica[:, 0], fg_ica[:, 1], marker='*') ax.scatter(bg_ica[:, 0], bg_ica[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) ica_data = np.vstack((fg_ica, bg_ica)) ica_silhouette = measure_silhouette(ica_data) annotate_location = get_annotate_loc(ax, ica_data) if annotate_sil: ax.annotate("S: {:.3f}".format(ica_silhouette), annotate_location) plt.tight_layout() plt.savefig("ICA") print("ICA Silhouette: {:.3f}".format(ica_silhouette)) # ICA print("Fitting dICA...", end='') t = time.time() dica = FastICA(n_components=n_components, max_iter=1000) dica.fit(differential_matched) print("Took {:.3f} seconds.".format(time.time() - t)) fg_dica = dica.transform(foreground_data).dot(dica.mixing_.T) bg_dica = dica.transform(background_data).dot(dica.mixing_.T) fig = plt.figure() ax = plt.gca() ax.scatter(fg_dica[:, 0], fg_dica[:, 1], marker='*') ax.scatter(bg_dica[:, 0], bg_dica[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) dica_data = np.vstack((fg_dica, bg_dica)) dica_silhouette = measure_silhouette(dica_data) annotate_location = get_annotate_loc(ax, dica_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dica_silhouette), annotate_location) print("dICA-Matched Silhouette: {:.3f}".format(dica_silhouette)) plt.tight_layout() plt.savefig("dICA-Matched") # ICA print("Fitting dICA...", end='') t = time.time() dica = FastICA(n_components=n_components, max_iter=1000) dica.fit(differential_unmatched) print("Took {:.3f} seconds.".format(time.time() - t)) print(dica.components_) fg_dica = dica.transform(foreground_data).dot(dica.mixing_.T) bg_dica = dica.transform(background_data).dot(dica.mixing_.T) fig = plt.figure() ax = plt.gca() ax.scatter(fg_dica[:, 0], fg_dica[:, 1], marker='*') ax.scatter(bg_dica[:, 0], bg_dica[:, 1], marker='+') set_ax_lims(ax) ax.set_xticks([]) ax.set_yticks([]) dica_data = np.vstack((fg_dica, bg_dica)) dica_silhouette = measure_silhouette(dica_data) annotate_location = get_annotate_loc(ax, dica_data) if annotate_sil: ax.annotate("S: {:.3f}".format(dica_silhouette), annotate_location) print("dICA-Unmatched Silhouette: {:.3f}".format(dica_silhouette)) plt.tight_layout() plt.savefig("dICA-Unmatched") #plt.scatter(reduced[:n_fg, 0], reduced[:n_fg, 1]) #plt.scatter(reduced[n_fg:, 0], reduced[n_fg:, 1]) #plt.scatter(foreground_data[:, 0], foreground_data[:, 1]) #plt.scatter(background_data[:, 0], background_data[:, 1]) #plt.scatter(fg_diff_transformed[:, 0], fg_diff_transformed[:, 1]) #plt.scatter(bg_diff_transformed[:, 0], bg_diff_transformed[:, 1]) #plt.legend(names) #plt.title("Toy Example of Differential PCA") #plt.suptitle(title) """