def generate_model_point(b, pca):
    eigenvectors = PCA_analysis.get_eigenvectors(pca)
    P = eigenvectors.transpose()
    #b = get_eigenvalues(pca)
    xm = PCA_analysis.get_mean(pca)

    x = np.dot(P, b)
    return x + xm
def plot_eigenvalues():
    all_tooth_variations = landmarks[:, 0]
    pca = PCA_analysis.PCA_analysis(all_tooth_variations, None)
    eigenvalues = PCA_analysis.get_eigenvalues(pca)
    x = np.zeros(14)
    for i in range(14):
        x[i] = eigenvalues[i]
    plt.plot(range(14), x)
    plt.title('Influence of eigenvalues')
    plt.xlabel('n-th principal component')
    plt.ylabel('Eigenvalue')
    plt.show()
Ejemplo n.º 3
0
def preperation_all(radiograph, all_landmarks):

    #median = prep.median_filter(radiograph)
    #     edge_img = prep.edge_detection_low(median)
    edge_img = prep.calc_external_img_active_contour(radiograph)
    pcas_tooth = PCA.PCA_analysis_all(all_landmarks, None)

    return edge_img, pcas_tooth
def show_tooth_variatins(toothID, pcaID, size):
    landmarks = FileManager.load_landmarks_std()
    all_tooth_variations = landmarks[:, toothID]
    pca = PCA_analysis.PCA_analysis(all_tooth_variations, None)
    b = np.zeros(14)
    pca1 = get_range_of(pcaID, pca)
    b[pcaID] = pca1 * size
    x = generate_model_point(b, pca)
    return x.reshape(40, 2)
Ejemplo n.º 5
0
def preperation(radiograph, tooth_variations):

    #median = prep.median_filter(radiograph)
    #     edge_img = prep.edge_detection_low(median)

    edge_img = prep.calc_external_img_active_contour(radiograph)
    pca_tooth = PCA.PCA_analysis(tooth_variations, None)

    return edge_img, pca_tooth
    def test_matching(i=0):
        fitted = np.load('fitted_tooth.npy')
        tooth = fitted[i]
        all_tooth_variations = landmarks[:, i]
        pca = PCA_analysis.PCA_analysis(all_tooth_variations, None)

        b, pose_param = match_model_points(tooth, pca)
        x = generate_model_point(b, pca)
        y = inv_transform(x.reshape(40, 2), pose_param)
        return tooth, y
def get_range_of(i, pca):
    eigenvalues = PCA_analysis.get_eigenvalues(pca)
    bound = 3 * math.sqrt(eigenvalues[i])
    return bound
def update_model_param(y, pca):
    xm = pca.mean_
    PT = PCA_analysis.get_eigenvectors(pca)
    return np.dot(PT, y.reshape(-1) - xm)
Ejemplo n.º 9
0
#make a plot
sns.set_style('whitegrid')
plt.plot(range(0, len(cumvar)), cumvar*100)
plt.plot([cut_off, cut_off], [0, 100], 'k')
plt.text(cut_off, 100, cut_off)
plt.xlabel('Number of Principal Components', fontsize =16)
plt.ylabel('variance explained', fontsize =16)

#now put the 1:cut_off PCs into a dataframe
PCname = ['PC_%d' %(p+1) for p in range (0,cut_off+1)]
PC_df = pd.DataFrame(data= X2[:,:cut_off+1], columns = PCname)
PC_df['drug'] = featZall['drug']
PC_df['chunk'] = featZall['chunk']

#make the PC plots
PC_custom.PC12_plots(PC_df, [], 'all' , directoryA, 'tif', 'chunk')
PCmean, PCsem = PC_custom.PC_av(PC_df, ['PC_1', 'PC_2'], 'chunk')
test = ['DMSO', 'V3']
PC_custom.PC_traj(PCmean, PCsem,['PC_1', 'PC_2'], 'all' , directoryA, 'tif', cmap, [])


#which features contribute to the variance?
#components that explain the variance
    #make a dataframe ranking the features for each PC and also include the explained variance (z-normalised)
        #in separate dataframe called PC_sum
PC_feat = [] #features
PC_sum =[] #explained variance
for PC in range(0, len(PCname)):
    PC_feat.append(list(featZall.iloc[:,:-3].columns[np.argsort(pca.components_[PC])]))
    PC_sum.append(list((pca.components_[PC])/ np.sum(abs(pca.components_[PC]))))
Ejemplo n.º 10
0
    cmap_name = drug
    # Create the colormap
    cm[drug] = LinearSegmentedColormap.from_list(
        cmap_name, lutGraded[drug], N=60)
    plt.register_cmap(cmap = cm[drug])
    #plt.register_cmap(name=drug, data=LinearSegmentedColormap.from_list())  # optional lut kwarg

#have a look at the colors
import make_colormaps as mkc
mkc.plot_color_gradients(cmap_list=cm, drug_names = lutGraded.keys())
plt.savefig(os.path.join(os.path.dirname(saveDir), 'Figures', 'GradeddrugColors.png'))


#make the PC plots
for chunks in chunkSize:    
    PC_custom.PC12_plots(PC_df[PC_df['chunkTime']==chunks], [], chunks ,  cmap, saveDir,'tif', 'chunk')

PCmean = {}
PCsem = {}
for chunks in chunkSize:
    PCmean[chunks], PCsem[chunks] = PC_custom.PC_av(PC_df[PC_df['chunkTime'] == chunks], [], 'chunk')

#make the plots
for chunks in PCmean:
    plt.figure()
    xscale = 1/(PCmean[chunks].max()['PC_1'] - PCmean[chunks].min()['PC_1'])
    yscale = 1/(PCmean[chunks].max()['PC_1'] - PCmean[chunks].min()['PC_2'])
    cscale = np.arange(1, np.unique(PCmean[chunks]['chunk']).shape[0]+1,1)
    
    for drug in selDrugs:
        plt.errorbar(x= PCmean[chunks][PCmean[chunks]['drug']==drug]['PC_1']*xscale,\
Ejemplo n.º 11
0
plt.figure()
for i in range(0,1):
    plt.arrow(0,0, PC_vals.iloc[0,:][PC_feat[0][-1-i]]*100, \
              PC_vals.iloc[1,:][PC_feat[0][-1-i]]*100,color= 'b')
    plt.arrow(0,0, PC_vals.iloc[0,:][PC_feat[1][-1-i]]*100,\
              PC_vals.iloc[1,:][PC_feat[1][-1-i]]*100, color='r')
    plt.text(PC_vals.iloc[0,:][PC_feat[0][-1-i]] + 0.7,\
             PC_vals.iloc[1,:][PC_feat[0][-1-i]] - 0.3, PC_feat[0][-1-i],\
             ha='center', va='center')
    plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+1,\
         PC_feat[1][-1-i], ha='center', va='center')

plt.xlim (-2, 2)
plt.ylim (-2, 2)
plt.xlabel('%' + 'PC_1 (%.2f)' % (pca.explained_variance_ratio_[0]*100), fontsize = 16)
plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1]*100), fontsize = 16)
plt.show()
#plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png'))

import PCA_analysis as PC_custom 
cmap1 = sns.color_palette("tab20", len(np.unique(featuresZ2['drug']))+1) #need this to match the clustergram from mRMR so add one for cloz10
#get rid of 5th row, which woudl be cloz10 -  there is probably a smarter way to do this...
cmap1 = np.delete(cmap1, 4, axis = 0)

#make the PC plots
PC_custom.PC12_plots(PC_df, [],[],cmap1,  dirFeats, 'tif', 'concentration')
PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration')
PC_custom.PC_traj(PCmean, PCsem,['PC_1', 'PC_2'], [], dirFeats, 'tif', cmap1,[], start_end = False)


Ejemplo n.º 12
0
    X_std1[rep] = StandardScaler().fit_transform(featMatTotalNorm_mean[rep])
    #X_std2[rep] = StandardScaler().fit_transform(features2[rep].iloc[:,4:-2]) #don't include the recording info in the PCA

    cov_mat[rep] = np.cov(X_std1[rep].T)
    #cov_mat2[rep] = np.cov(X_std2[rep].T)

eig_vecs1 = {}
eig_vals1 = {}
eig_pairs1 = {}
PC_pairs1 = {}
PC_df1 = {}
cut_off1 = {}

for rep in X_std1:
    eig_vecs1[rep], eig_vals1[rep], eig_pairs1[rep], PC_pairs1[rep],\
    PC_df1[rep], cut_off1[rep] = PC.pca(X_std1[rep], rep, directoryA, '.tif')

PC_conts1 = {}
PC_feats1 = {}
PC_top1 = {}
x1 = {}
for rep in eig_pairs1:
    PC_conts1[rep], PC_feats1[rep], \
    PC_top1[rep], x1[rep] = PC.PC_feats(eig_pairs1[rep], cut_off1[rep], featuresZ[rep])

#now make biplots for all the reps
for rep in PC_top1:
    PC.biplot(PC_top1[rep], PC_feats1[rep], 1, 2, 1, directoryA, rep, '.tif')

#%% now to transform into feature space
#concanenate the eigen_vector matrix across the top 80 eigenvalues
Ejemplo n.º 13
0
#make a figure of colors for a legend
#make a figure of the colorbar
colors = [(v) for k,v in lut.items()]
    #plot separately
plt.figure(figsize = (30,10))
ax = plt.imshow([colors])
ax.axes.set_xticklabels(range(0,len(allDrugs),1))
ax.axes.set_xticklabels(lut.keys(), rotation = 90)
ax.axes.set_xticks(np.arange(0,len(allDrugs),1))
ax.axes.xaxis.set_ticks_position('top')
plt.savefig(os.path.join(savedir, 'drug_colors.png'),\
            bbox_inches='tight',dpi =150)
plt.close()

PC_custom.PC12_plots(PC_df, [],[], lut, savedir, 'tif', 'concentration')
PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration')

PCJ.PC_trajGraded(PCmean, PCsem, ['PC_1', 'PC_2'], [], savedir, '.png', 'concentration', start_end = False,\
                  cum_var = cumvar, legend = 'off')


# =============================================================================
# To do:
# 1. Do contrastive PCA 
# 2. Label antipsychotics (typical, atypical, and test compounds) and pesticides
    # and look at the distribution of these compounds across multiple principal components
# 3. Is it possible to train a classifier to differentiate between antipsychotics and pesticides?
    
# 4. tSNE embedding
    
Ejemplo n.º 14
0
import make_colormaps as mkc
from matplotlib.colors import LinearSegmentedColormap
import PC_traj as PCJ
import PCA_analysis as PC_custom

cmap1 = sns.color_palette('tab20', len(np.unique(drugs)))
cmapGraded = []  #and graded colormaps
for item in cmap1:
    cmapGraded.append([(1, 1, 1), (item)])

lutGraded = dict(zip(allDrugs, cmapGraded))
cm = {}
for drug in lutGraded:
    cmap_name = drug
    # Create the colormap
    cm[drug] = LinearSegmentedColormap.from_list(cmap_name,
                                                 lutGraded[drug],
                                                 N=60)
    plt.register_cmap(cmap=cm[drug])

#make the PC plots
savedir = '/Volumes/behavgenom$/Ida/Data/Antipsychotics'
PC_custom.PC12_plots(IC_df, [], [], cmap1, savedir, 'tif', 'concentration')
ICmean, ICsem = PC_custom.PC_av(IC_df, [], 'concentration')

PCJ.PC_trajGraded(ICmean, ICsem,['IC_1','IC_2'], [], savedir, '.png', 'concentration', start_end = False,\
                  cum_var = None, legend = 'off')

#find the features that contribute most to the ICA
Ejemplo n.º 15
0
plt.xlim (-10, 10)
plt.ylim (-10, 10)
plt.xlabel('%' + 'PC_1 (%.2f)' % (pca.explained_variance_ratio_[0]*100), fontsize = 16)
plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1]*100), fontsize = 16)
plt.show()
plt.savefig(os.path.join(foldIn, 'Figures', 'agar_biplot.png'))

import PCA_analysis as PC_custom 
sns.set()
cmap1 = sns.color_palette('tab10',len(np.unique(conds['drug'])))
allDrugs = np.unique(conds['drug'])

#make the PC plots
savedir =  os.path.join(foldIn, 'Figures')
for rep in featMat:   
    PC_custom.PC12_plots(PC_df.loc[PC_df['date']==rep], 1 ,rep + '_1worm', \
                         cmap1, savedir, 'tif', 'Nworms', False)
plt.close('all')

#combined
PC_custom.PC12_plots(PC_df, 5,'combined_5worms', cmap1, savedir, 'tif', 'Nworms', False)

#all the data
PC_custom.PC12_plots(PC_df, [],'alldata', \
                         cmap1, savedir, 'tif', [], False)

#make another version of dataframe with the drug column also containing the Nworms
PC_df['drug2'] = list(zip(PC_df.drug, PC_df.Nworms))
conds['drug2'] = PC_df['drug2']
PC_df = PC_df.drop(columns = 'drug')
PC_df = PC_df.rename(columns ={ 'drug2': 'drug'})
Ejemplo n.º 16
0
             ha='center', va='center')
    plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+0.5,\
         PC_feat[1][-1-i], ha='center', va='center')

plt.xlim (-3, 3)
plt.ylim (-3,3)
plt.xlabel('PC_1')
plt.ylabel('PC_2')
plt.show()
plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png'))


import PCA_analysis as PC_custom

#make the PC plots
PC_custom.PC12_plots(PC_df, 10, rep, directoryA, 'tif')
test = PC_custom.PC_av(PC_df, [])
PC_custom.PC_traj(test, rep, directoryA, 'tif')

#the sklearn and my custom PCA gave exactly the same results - Phew

#%% now on to the stats
    #for this it is usful to append the conditions onto the dataframe
for rep in featuresEA_1:
    featuresEA_1 [rep] ['drug'] = drugA2[rep]
    featuresEA_1[rep] ['concentration'] = concA2[rep]
    #featuresA2[rep]['exp'] =exp_namesA[rep]
    featuresEA_1[rep] ['date'] = dateA2[rep]
    
#compare each compound to control data
controlMeans = {}
    show_tooth_points(x, False, 'red')
    x = show_tooth_variatins(0, pcaID, -size)
    show_tooth_points(x, show, 'blue')


# In[3]:

if __name__ == "__main__":
    teeth = np.load('initial_position.npy')
    tooth = teeth[0, 0]

    landmarks = FileManager.load_landmarks_std()
    all_tooth_variations = landmarks[:, 0]

    FileManager.show_tooth_points(tooth)
    pca = PCA_analysis.PCA_analysis(all_tooth_variations, None)

    b, pose_param = match_model_points(tooth, pca)

    x = generate_model_point(b, pca)
    y = inv_transform(x.reshape(40, 2), pose_param)
    FileManager.show_tooth_points(y)

    # print(tooth - y)

    # In[8]:

    fitted = np.load('fitted_tooth.npy')
    tooth = fitted[0]
    FileManager.show_tooth_points(tooth)
Ejemplo n.º 18
0
    X_std1[rep] = StandardScaler().fit_transform(featMatTotalNorm_mean[rep])
    #X_std2[rep] = StandardScaler().fit_transform(features2[rep].iloc[:,4:-2]) #don't include the recording info in the PCA

    cov_mat[rep] = np.cov(X_std1[rep].T)
    #cov_mat2[rep] = np.cov(X_std2[rep].T)

eig_vecs1 = {}
eig_vals1 = {}
eig_pairs1 = {}
PC_pairs1 = {}
PC_df1 = {}
cut_off1 = {}

for rep in X_std1:
    eig_vecs1[rep], eig_vals1[rep], eig_pairs1[rep], PC_pairs1[rep],\
    PC_df1[rep], cut_off1[rep] = PC.pca(X_std1[rep], rep, directoryA, '.tif')

PC_conts1 = {}
PC_feats1 = {}
PC_top1 = {}
x1 = {}
for rep in eig_pairs1:
    PC_conts1[rep], PC_feats1[rep], \
    PC_top1[rep], x1[rep] = PC.PC_feats(eig_pairs1[rep], cut_off1[rep], featuresZ[rep])

#now make biplots for all the reps
for rep in PC_top1:
    PC.biplot(PC_top1[rep], PC_feats1[rep], 1, 2, 1, directoryA, rep, '.tif',
              uniqueDrugs)

#%% now to transform into feature space
Ejemplo n.º 19
0
for rep in featMatTotalNorm_mean:
    X_std1[rep] = StandardScaler().fit_transform(featMatTotalNorm_mean[rep])

    cov_mat[rep] = np.cov(X_std1[rep].T)

#pca
eig_vecs1 = {}
eig_vals1 = {}
eig_pairs1 = {}
PC_pairs1 = {}
PC_df1 = {}
cut_off1 = {}

for rep in X_std1:
    eig_vecs1[rep], eig_vals1[rep], eig_pairs1[rep], PC_pairs1[rep],\
    PC_df1[rep], cut_off1[rep] = PC.pca(X_std1[rep], rep, directoryA, '.tif')

#now to find the top features that contribute to PC1 and PC2
PC_conts1 = {}
PC_feats1 = {}
PC_top1 = {}
x1 = {}
for rep in eig_pairs1:
    PC_conts1[rep], PC_feats1[rep], \
    PC_top1[rep], x1[rep] = PC.PC_feats(eig_pairs1[rep], cut_off1[rep], featuresZ[rep])

#now make biplots for all the reps
for rep in PC_top1:
    PC.biplot(PC_top1[rep], PC_feats1[rep], 1, 2, 1, directoryA, rep, '.tif',
              uniqueDrugs)