def generate_model_point(b, pca): eigenvectors = PCA_analysis.get_eigenvectors(pca) P = eigenvectors.transpose() #b = get_eigenvalues(pca) xm = PCA_analysis.get_mean(pca) x = np.dot(P, b) return x + xm
def plot_eigenvalues(): all_tooth_variations = landmarks[:, 0] pca = PCA_analysis.PCA_analysis(all_tooth_variations, None) eigenvalues = PCA_analysis.get_eigenvalues(pca) x = np.zeros(14) for i in range(14): x[i] = eigenvalues[i] plt.plot(range(14), x) plt.title('Influence of eigenvalues') plt.xlabel('n-th principal component') plt.ylabel('Eigenvalue') plt.show()
def preperation_all(radiograph, all_landmarks): #median = prep.median_filter(radiograph) # edge_img = prep.edge_detection_low(median) edge_img = prep.calc_external_img_active_contour(radiograph) pcas_tooth = PCA.PCA_analysis_all(all_landmarks, None) return edge_img, pcas_tooth
def show_tooth_variatins(toothID, pcaID, size): landmarks = FileManager.load_landmarks_std() all_tooth_variations = landmarks[:, toothID] pca = PCA_analysis.PCA_analysis(all_tooth_variations, None) b = np.zeros(14) pca1 = get_range_of(pcaID, pca) b[pcaID] = pca1 * size x = generate_model_point(b, pca) return x.reshape(40, 2)
def preperation(radiograph, tooth_variations): #median = prep.median_filter(radiograph) # edge_img = prep.edge_detection_low(median) edge_img = prep.calc_external_img_active_contour(radiograph) pca_tooth = PCA.PCA_analysis(tooth_variations, None) return edge_img, pca_tooth
def test_matching(i=0): fitted = np.load('fitted_tooth.npy') tooth = fitted[i] all_tooth_variations = landmarks[:, i] pca = PCA_analysis.PCA_analysis(all_tooth_variations, None) b, pose_param = match_model_points(tooth, pca) x = generate_model_point(b, pca) y = inv_transform(x.reshape(40, 2), pose_param) return tooth, y
def get_range_of(i, pca): eigenvalues = PCA_analysis.get_eigenvalues(pca) bound = 3 * math.sqrt(eigenvalues[i]) return bound
def update_model_param(y, pca): xm = pca.mean_ PT = PCA_analysis.get_eigenvectors(pca) return np.dot(PT, y.reshape(-1) - xm)
#make a plot sns.set_style('whitegrid') plt.plot(range(0, len(cumvar)), cumvar*100) plt.plot([cut_off, cut_off], [0, 100], 'k') plt.text(cut_off, 100, cut_off) plt.xlabel('Number of Principal Components', fontsize =16) plt.ylabel('variance explained', fontsize =16) #now put the 1:cut_off PCs into a dataframe PCname = ['PC_%d' %(p+1) for p in range (0,cut_off+1)] PC_df = pd.DataFrame(data= X2[:,:cut_off+1], columns = PCname) PC_df['drug'] = featZall['drug'] PC_df['chunk'] = featZall['chunk'] #make the PC plots PC_custom.PC12_plots(PC_df, [], 'all' , directoryA, 'tif', 'chunk') PCmean, PCsem = PC_custom.PC_av(PC_df, ['PC_1', 'PC_2'], 'chunk') test = ['DMSO', 'V3'] PC_custom.PC_traj(PCmean, PCsem,['PC_1', 'PC_2'], 'all' , directoryA, 'tif', cmap, []) #which features contribute to the variance? #components that explain the variance #make a dataframe ranking the features for each PC and also include the explained variance (z-normalised) #in separate dataframe called PC_sum PC_feat = [] #features PC_sum =[] #explained variance for PC in range(0, len(PCname)): PC_feat.append(list(featZall.iloc[:,:-3].columns[np.argsort(pca.components_[PC])])) PC_sum.append(list((pca.components_[PC])/ np.sum(abs(pca.components_[PC]))))
cmap_name = drug # Create the colormap cm[drug] = LinearSegmentedColormap.from_list( cmap_name, lutGraded[drug], N=60) plt.register_cmap(cmap = cm[drug]) #plt.register_cmap(name=drug, data=LinearSegmentedColormap.from_list()) # optional lut kwarg #have a look at the colors import make_colormaps as mkc mkc.plot_color_gradients(cmap_list=cm, drug_names = lutGraded.keys()) plt.savefig(os.path.join(os.path.dirname(saveDir), 'Figures', 'GradeddrugColors.png')) #make the PC plots for chunks in chunkSize: PC_custom.PC12_plots(PC_df[PC_df['chunkTime']==chunks], [], chunks , cmap, saveDir,'tif', 'chunk') PCmean = {} PCsem = {} for chunks in chunkSize: PCmean[chunks], PCsem[chunks] = PC_custom.PC_av(PC_df[PC_df['chunkTime'] == chunks], [], 'chunk') #make the plots for chunks in PCmean: plt.figure() xscale = 1/(PCmean[chunks].max()['PC_1'] - PCmean[chunks].min()['PC_1']) yscale = 1/(PCmean[chunks].max()['PC_1'] - PCmean[chunks].min()['PC_2']) cscale = np.arange(1, np.unique(PCmean[chunks]['chunk']).shape[0]+1,1) for drug in selDrugs: plt.errorbar(x= PCmean[chunks][PCmean[chunks]['drug']==drug]['PC_1']*xscale,\
plt.figure() for i in range(0,1): plt.arrow(0,0, PC_vals.iloc[0,:][PC_feat[0][-1-i]]*100, \ PC_vals.iloc[1,:][PC_feat[0][-1-i]]*100,color= 'b') plt.arrow(0,0, PC_vals.iloc[0,:][PC_feat[1][-1-i]]*100,\ PC_vals.iloc[1,:][PC_feat[1][-1-i]]*100, color='r') plt.text(PC_vals.iloc[0,:][PC_feat[0][-1-i]] + 0.7,\ PC_vals.iloc[1,:][PC_feat[0][-1-i]] - 0.3, PC_feat[0][-1-i],\ ha='center', va='center') plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+1,\ PC_feat[1][-1-i], ha='center', va='center') plt.xlim (-2, 2) plt.ylim (-2, 2) plt.xlabel('%' + 'PC_1 (%.2f)' % (pca.explained_variance_ratio_[0]*100), fontsize = 16) plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1]*100), fontsize = 16) plt.show() #plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png')) import PCA_analysis as PC_custom cmap1 = sns.color_palette("tab20", len(np.unique(featuresZ2['drug']))+1) #need this to match the clustergram from mRMR so add one for cloz10 #get rid of 5th row, which woudl be cloz10 - there is probably a smarter way to do this... cmap1 = np.delete(cmap1, 4, axis = 0) #make the PC plots PC_custom.PC12_plots(PC_df, [],[],cmap1, dirFeats, 'tif', 'concentration') PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration') PC_custom.PC_traj(PCmean, PCsem,['PC_1', 'PC_2'], [], dirFeats, 'tif', cmap1,[], start_end = False)
X_std1[rep] = StandardScaler().fit_transform(featMatTotalNorm_mean[rep]) #X_std2[rep] = StandardScaler().fit_transform(features2[rep].iloc[:,4:-2]) #don't include the recording info in the PCA cov_mat[rep] = np.cov(X_std1[rep].T) #cov_mat2[rep] = np.cov(X_std2[rep].T) eig_vecs1 = {} eig_vals1 = {} eig_pairs1 = {} PC_pairs1 = {} PC_df1 = {} cut_off1 = {} for rep in X_std1: eig_vecs1[rep], eig_vals1[rep], eig_pairs1[rep], PC_pairs1[rep],\ PC_df1[rep], cut_off1[rep] = PC.pca(X_std1[rep], rep, directoryA, '.tif') PC_conts1 = {} PC_feats1 = {} PC_top1 = {} x1 = {} for rep in eig_pairs1: PC_conts1[rep], PC_feats1[rep], \ PC_top1[rep], x1[rep] = PC.PC_feats(eig_pairs1[rep], cut_off1[rep], featuresZ[rep]) #now make biplots for all the reps for rep in PC_top1: PC.biplot(PC_top1[rep], PC_feats1[rep], 1, 2, 1, directoryA, rep, '.tif') #%% now to transform into feature space #concanenate the eigen_vector matrix across the top 80 eigenvalues
#make a figure of colors for a legend #make a figure of the colorbar colors = [(v) for k,v in lut.items()] #plot separately plt.figure(figsize = (30,10)) ax = plt.imshow([colors]) ax.axes.set_xticklabels(range(0,len(allDrugs),1)) ax.axes.set_xticklabels(lut.keys(), rotation = 90) ax.axes.set_xticks(np.arange(0,len(allDrugs),1)) ax.axes.xaxis.set_ticks_position('top') plt.savefig(os.path.join(savedir, 'drug_colors.png'),\ bbox_inches='tight',dpi =150) plt.close() PC_custom.PC12_plots(PC_df, [],[], lut, savedir, 'tif', 'concentration') PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration') PCJ.PC_trajGraded(PCmean, PCsem, ['PC_1', 'PC_2'], [], savedir, '.png', 'concentration', start_end = False,\ cum_var = cumvar, legend = 'off') # ============================================================================= # To do: # 1. Do contrastive PCA # 2. Label antipsychotics (typical, atypical, and test compounds) and pesticides # and look at the distribution of these compounds across multiple principal components # 3. Is it possible to train a classifier to differentiate between antipsychotics and pesticides? # 4. tSNE embedding
import make_colormaps as mkc from matplotlib.colors import LinearSegmentedColormap import PC_traj as PCJ import PCA_analysis as PC_custom cmap1 = sns.color_palette('tab20', len(np.unique(drugs))) cmapGraded = [] #and graded colormaps for item in cmap1: cmapGraded.append([(1, 1, 1), (item)]) lutGraded = dict(zip(allDrugs, cmapGraded)) cm = {} for drug in lutGraded: cmap_name = drug # Create the colormap cm[drug] = LinearSegmentedColormap.from_list(cmap_name, lutGraded[drug], N=60) plt.register_cmap(cmap=cm[drug]) #make the PC plots savedir = '/Volumes/behavgenom$/Ida/Data/Antipsychotics' PC_custom.PC12_plots(IC_df, [], [], cmap1, savedir, 'tif', 'concentration') ICmean, ICsem = PC_custom.PC_av(IC_df, [], 'concentration') PCJ.PC_trajGraded(ICmean, ICsem,['IC_1','IC_2'], [], savedir, '.png', 'concentration', start_end = False,\ cum_var = None, legend = 'off') #find the features that contribute most to the ICA
plt.xlim (-10, 10) plt.ylim (-10, 10) plt.xlabel('%' + 'PC_1 (%.2f)' % (pca.explained_variance_ratio_[0]*100), fontsize = 16) plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1]*100), fontsize = 16) plt.show() plt.savefig(os.path.join(foldIn, 'Figures', 'agar_biplot.png')) import PCA_analysis as PC_custom sns.set() cmap1 = sns.color_palette('tab10',len(np.unique(conds['drug']))) allDrugs = np.unique(conds['drug']) #make the PC plots savedir = os.path.join(foldIn, 'Figures') for rep in featMat: PC_custom.PC12_plots(PC_df.loc[PC_df['date']==rep], 1 ,rep + '_1worm', \ cmap1, savedir, 'tif', 'Nworms', False) plt.close('all') #combined PC_custom.PC12_plots(PC_df, 5,'combined_5worms', cmap1, savedir, 'tif', 'Nworms', False) #all the data PC_custom.PC12_plots(PC_df, [],'alldata', \ cmap1, savedir, 'tif', [], False) #make another version of dataframe with the drug column also containing the Nworms PC_df['drug2'] = list(zip(PC_df.drug, PC_df.Nworms)) conds['drug2'] = PC_df['drug2'] PC_df = PC_df.drop(columns = 'drug') PC_df = PC_df.rename(columns ={ 'drug2': 'drug'})
ha='center', va='center') plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+0.5,\ PC_feat[1][-1-i], ha='center', va='center') plt.xlim (-3, 3) plt.ylim (-3,3) plt.xlabel('PC_1') plt.ylabel('PC_2') plt.show() plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png')) import PCA_analysis as PC_custom #make the PC plots PC_custom.PC12_plots(PC_df, 10, rep, directoryA, 'tif') test = PC_custom.PC_av(PC_df, []) PC_custom.PC_traj(test, rep, directoryA, 'tif') #the sklearn and my custom PCA gave exactly the same results - Phew #%% now on to the stats #for this it is usful to append the conditions onto the dataframe for rep in featuresEA_1: featuresEA_1 [rep] ['drug'] = drugA2[rep] featuresEA_1[rep] ['concentration'] = concA2[rep] #featuresA2[rep]['exp'] =exp_namesA[rep] featuresEA_1[rep] ['date'] = dateA2[rep] #compare each compound to control data controlMeans = {}
show_tooth_points(x, False, 'red') x = show_tooth_variatins(0, pcaID, -size) show_tooth_points(x, show, 'blue') # In[3]: if __name__ == "__main__": teeth = np.load('initial_position.npy') tooth = teeth[0, 0] landmarks = FileManager.load_landmarks_std() all_tooth_variations = landmarks[:, 0] FileManager.show_tooth_points(tooth) pca = PCA_analysis.PCA_analysis(all_tooth_variations, None) b, pose_param = match_model_points(tooth, pca) x = generate_model_point(b, pca) y = inv_transform(x.reshape(40, 2), pose_param) FileManager.show_tooth_points(y) # print(tooth - y) # In[8]: fitted = np.load('fitted_tooth.npy') tooth = fitted[0] FileManager.show_tooth_points(tooth)
X_std1[rep] = StandardScaler().fit_transform(featMatTotalNorm_mean[rep]) #X_std2[rep] = StandardScaler().fit_transform(features2[rep].iloc[:,4:-2]) #don't include the recording info in the PCA cov_mat[rep] = np.cov(X_std1[rep].T) #cov_mat2[rep] = np.cov(X_std2[rep].T) eig_vecs1 = {} eig_vals1 = {} eig_pairs1 = {} PC_pairs1 = {} PC_df1 = {} cut_off1 = {} for rep in X_std1: eig_vecs1[rep], eig_vals1[rep], eig_pairs1[rep], PC_pairs1[rep],\ PC_df1[rep], cut_off1[rep] = PC.pca(X_std1[rep], rep, directoryA, '.tif') PC_conts1 = {} PC_feats1 = {} PC_top1 = {} x1 = {} for rep in eig_pairs1: PC_conts1[rep], PC_feats1[rep], \ PC_top1[rep], x1[rep] = PC.PC_feats(eig_pairs1[rep], cut_off1[rep], featuresZ[rep]) #now make biplots for all the reps for rep in PC_top1: PC.biplot(PC_top1[rep], PC_feats1[rep], 1, 2, 1, directoryA, rep, '.tif', uniqueDrugs) #%% now to transform into feature space
for rep in featMatTotalNorm_mean: X_std1[rep] = StandardScaler().fit_transform(featMatTotalNorm_mean[rep]) cov_mat[rep] = np.cov(X_std1[rep].T) #pca eig_vecs1 = {} eig_vals1 = {} eig_pairs1 = {} PC_pairs1 = {} PC_df1 = {} cut_off1 = {} for rep in X_std1: eig_vecs1[rep], eig_vals1[rep], eig_pairs1[rep], PC_pairs1[rep],\ PC_df1[rep], cut_off1[rep] = PC.pca(X_std1[rep], rep, directoryA, '.tif') #now to find the top features that contribute to PC1 and PC2 PC_conts1 = {} PC_feats1 = {} PC_top1 = {} x1 = {} for rep in eig_pairs1: PC_conts1[rep], PC_feats1[rep], \ PC_top1[rep], x1[rep] = PC.PC_feats(eig_pairs1[rep], cut_off1[rep], featuresZ[rep]) #now make biplots for all the reps for rep in PC_top1: PC.biplot(PC_top1[rep], PC_feats1[rep], 1, 2, 1, directoryA, rep, '.tif', uniqueDrugs)