Ejemplo n.º 1
0
#make a plot
sns.set_style('whitegrid')
plt.plot(range(0, len(cumvar)), cumvar*100)
plt.plot([cut_off, cut_off], [0, 100], 'k')
plt.text(cut_off, 100, cut_off)
plt.xlabel('Number of Principal Components', fontsize =16)
plt.ylabel('variance explained', fontsize =16)

#now put the 1:cut_off PCs into a dataframe
PCname = ['PC_%d' %(p+1) for p in range (0,cut_off+1)]
PC_df = pd.DataFrame(data= X2[:,:cut_off+1], columns = PCname)
PC_df['drug'] = featZall['drug']
PC_df['chunk'] = featZall['chunk']

#make the PC plots
PC_custom.PC12_plots(PC_df, [], 'all' , directoryA, 'tif', 'chunk')
PCmean, PCsem = PC_custom.PC_av(PC_df, ['PC_1', 'PC_2'], 'chunk')
test = ['DMSO', 'V3']
PC_custom.PC_traj(PCmean, PCsem,['PC_1', 'PC_2'], 'all' , directoryA, 'tif', cmap, [])


#which features contribute to the variance?
#components that explain the variance
    #make a dataframe ranking the features for each PC and also include the explained variance (z-normalised)
        #in separate dataframe called PC_sum
PC_feat = [] #features
PC_sum =[] #explained variance
for PC in range(0, len(PCname)):
    PC_feat.append(list(featZall.iloc[:,:-3].columns[np.argsort(pca.components_[PC])]))
    PC_sum.append(list((pca.components_[PC])/ np.sum(abs(pca.components_[PC]))))
#%% now to transform into feature space
#concanenate the eigen_vector matrix across the top 80 eigenvalues

matrix_w1 = {}
Y1 = {}
PC_df2 = {}
for rep in featuresZ1:
    matrix_w1[rep], Y1[rep], PC_df2[rep] = PC.feature_space(featuresZ1[rep], eig_pairs1[rep],\
            X_std1[rep], cut_off1[rep], x1[rep], drugA[rep], concA[rep], dateA[rep])

#set palette for plots
sns.palplot(sns.choose_colorbrewer_palette(data_type='q'))
#now make the plots
for rep in PC_df2:
    for i in [1, 10, 100, 200]:
        PC.PC12_plots(PC_df2[rep], i, rep, directoryA, 'tif')

#now can make dataframe containing means and column names to plot trajectories through PC space
PC_means1 = {}
for rep in PC_df2:
    PC_means1[rep] = PC.PC_av(PC_df2[rep], x1[rep])

sns.set_style('whitegrid')
for rep in PC_means1:
    PC.PC_traj(PC_means1[rep], rep, directoryA, 'tif')

#%% now to do the stats on the experiments

from scipy import stats

#for this it is usful to append the conditions onto the dataframe
Ejemplo n.º 3
0
plt.xlim (-10, 10)
plt.ylim (-10, 10)
plt.xlabel('%' + 'PC_1 (%.2f)' % (pca.explained_variance_ratio_[0]*100), fontsize = 16)
plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1]*100), fontsize = 16)
plt.show()
plt.savefig(os.path.join(foldIn, 'Figures', 'agar_biplot.png'))

import PCA_analysis as PC_custom 
sns.set()
cmap1 = sns.color_palette('tab10',len(np.unique(conds['drug'])))
allDrugs = np.unique(conds['drug'])

#make the PC plots
savedir =  os.path.join(foldIn, 'Figures')
for rep in featMat:   
    PC_custom.PC12_plots(PC_df.loc[PC_df['date']==rep], 1 ,rep + '_1worm', \
                         cmap1, savedir, 'tif', 'Nworms', False)
plt.close('all')

#combined
PC_custom.PC12_plots(PC_df, 5,'combined_5worms', cmap1, savedir, 'tif', 'Nworms', False)

#all the data
PC_custom.PC12_plots(PC_df, [],'alldata', \
                         cmap1, savedir, 'tif', [], False)

#make another version of dataframe with the drug column also containing the Nworms
PC_df['drug2'] = list(zip(PC_df.drug, PC_df.Nworms))
conds['drug2'] = PC_df['drug2']
PC_df = PC_df.drop(columns = 'drug')
PC_df = PC_df.rename(columns ={ 'drug2': 'drug'})
Ejemplo n.º 4
0
             ha='center', va='center')
    plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+0.5,\
         PC_feat[1][-1-i], ha='center', va='center')

plt.xlim (-3, 3)
plt.ylim (-3,3)
plt.xlabel('PC_1')
plt.ylabel('PC_2')
plt.show()
plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png'))


import PCA_analysis as PC_custom

#make the PC plots
PC_custom.PC12_plots(PC_df, 10, rep, directoryA, 'tif')
test = PC_custom.PC_av(PC_df, [])
PC_custom.PC_traj(test, rep, directoryA, 'tif')

#the sklearn and my custom PCA gave exactly the same results - Phew

#%% now on to the stats
    #for this it is usful to append the conditions onto the dataframe
for rep in featuresEA_1:
    featuresEA_1 [rep] ['drug'] = drugA2[rep]
    featuresEA_1[rep] ['concentration'] = concA2[rep]
    #featuresA2[rep]['exp'] =exp_namesA[rep]
    featuresEA_1[rep] ['date'] = dateA2[rep]
    
#compare each compound to control data
controlMeans = {}
Ejemplo n.º 5
0
    plt.text(PC_vals.iloc[0,:][PC_feat[0][-1-i]] + 0.5, PC_vals.iloc[1,:][PC_feat[0][-1-i]] + 0.5, PC_feat[0][-1-i],\
             ha='center', va='center')
    plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+0.5,\
         PC_feat[1][-1-i], ha='center', va='center')

plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.xlabel('PC_1')
plt.ylabel('PC_2')
plt.show()
plt.savefig(os.path.join(directoryL[:-7], 'Figures', 'liquid_biplot.png'))

import PCA_analysis as PC_custom

#make the PC plots
PC_custom.PC12_plots(PC_df, [], rep, directoryL, 'tif')
test = PC_custom.PC_av(PC_df, [])
PC_custom.PC_traj(test, rep, directoryL, 'tif')

#%% now onto the stats

#for this it is usful to append the conditions onto the dataframe
for rep in featuresL2:
    featuresL2[rep]['drug'] = drugL[rep]
    featuresL2[rep]['concentration'] = concL[rep]
    #featuresA2[rep]['exp'] =exp_namesA[rep]
    featuresL2[rep]['date'] = dateL[rep]

#compare each compound to control data
controlMeans = {}
for rep in featuresL2:
Ejemplo n.º 6
0
    cmap_name = drug
    # Create the colormap
    cm[drug] = LinearSegmentedColormap.from_list(
        cmap_name, lutGraded[drug], N=60)
    plt.register_cmap(cmap = cm[drug])
    #plt.register_cmap(name=drug, data=LinearSegmentedColormap.from_list())  # optional lut kwarg

#have a look at the colors
import make_colormaps as mkc
mkc.plot_color_gradients(cmap_list=cm, drug_names = lutGraded.keys())
plt.savefig(os.path.join(os.path.dirname(saveDir), 'Figures', 'GradeddrugColors.png'))


#make the PC plots
for chunks in chunkSize:    
    PC_custom.PC12_plots(PC_df[PC_df['chunkTime']==chunks], [], chunks ,  cmap, saveDir,'tif', 'chunk')

PCmean = {}
PCsem = {}
for chunks in chunkSize:
    PCmean[chunks], PCsem[chunks] = PC_custom.PC_av(PC_df[PC_df['chunkTime'] == chunks], [], 'chunk')

#make the plots
for chunks in PCmean:
    plt.figure()
    xscale = 1/(PCmean[chunks].max()['PC_1'] - PCmean[chunks].min()['PC_1'])
    yscale = 1/(PCmean[chunks].max()['PC_1'] - PCmean[chunks].min()['PC_2'])
    cscale = np.arange(1, np.unique(PCmean[chunks]['chunk']).shape[0]+1,1)
    
    for drug in selDrugs:
        plt.errorbar(x= PCmean[chunks][PCmean[chunks]['drug']==drug]['PC_1']*xscale,\
Ejemplo n.º 7
0
plt.figure()
for i in range(0,1):
    plt.arrow(0,0, PC_vals.iloc[0,:][PC_feat[0][-1-i]]*100, \
              PC_vals.iloc[1,:][PC_feat[0][-1-i]]*100,color= 'b')
    plt.arrow(0,0, PC_vals.iloc[0,:][PC_feat[1][-1-i]]*100,\
              PC_vals.iloc[1,:][PC_feat[1][-1-i]]*100, color='r')
    plt.text(PC_vals.iloc[0,:][PC_feat[0][-1-i]] + 0.7,\
             PC_vals.iloc[1,:][PC_feat[0][-1-i]] - 0.3, PC_feat[0][-1-i],\
             ha='center', va='center')
    plt.text(PC_vals.iloc[0,:][PC_feat[1][-1-i]]+0.5, PC_vals.iloc[1,:][PC_feat[1][-1-i]]+1,\
         PC_feat[1][-1-i], ha='center', va='center')

plt.xlim (-2, 2)
plt.ylim (-2, 2)
plt.xlabel('%' + 'PC_1 (%.2f)' % (pca.explained_variance_ratio_[0]*100), fontsize = 16)
plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1]*100), fontsize = 16)
plt.show()
#plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png'))

import PCA_analysis as PC_custom 
cmap1 = sns.color_palette("tab20", len(np.unique(featuresZ2['drug']))+1) #need this to match the clustergram from mRMR so add one for cloz10
#get rid of 5th row, which woudl be cloz10 -  there is probably a smarter way to do this...
cmap1 = np.delete(cmap1, 4, axis = 0)

#make the PC plots
PC_custom.PC12_plots(PC_df, [],[],cmap1,  dirFeats, 'tif', 'concentration')
PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration')
PC_custom.PC_traj(PCmean, PCsem,['PC_1', 'PC_2'], [], dirFeats, 'tif', cmap1,[], start_end = False)


Ejemplo n.º 8
0
#%% now to transform into feature space
#concanenate the eigen_vector matrix across the top 80 eigenvalues

matrix_w1 = {}
Y1 = {}
PC_df2 = {}
for rep in featuresZ1:
    matrix_w1[rep], Y1[rep], PC_df2[rep] = PC.feature_space(featuresZ1[rep], eig_pairs1[rep],\
            X_std1[rep], cut_off1[rep], x1[rep], drugA[rep], concA[rep], dateA[rep])

#set palette for plots
sns.palplot(sns.choose_colorbrewer_palette(data_type='q'))
#now make the plots
for rep in PC_df2:
    for i in [1, 10, 100, 200]:
        PC.PC12_plots(PC_df2[rep], i, rep, directoryA, 'tif')

#now can make dataframe containing means and column names to plot trajectories through PC space
PC_means1 = {}
for rep in PC_df2:
    PC_means1[rep] = PC.PC_av(PC_df2[rep], x1[rep])

sns.set_style('whitegrid')
for rep in PC_means1:
    PC.PC_traj(PC_means1[rep], rep, directoryA, 'svg')

#%% now to do the stats on the experiments

from scipy import stats

#for this it is usful to append the conditions onto the dataframe
Ejemplo n.º 9
0
#make a figure of colors for a legend
#make a figure of the colorbar
colors = [(v) for k,v in lut.items()]
    #plot separately
plt.figure(figsize = (30,10))
ax = plt.imshow([colors])
ax.axes.set_xticklabels(range(0,len(allDrugs),1))
ax.axes.set_xticklabels(lut.keys(), rotation = 90)
ax.axes.set_xticks(np.arange(0,len(allDrugs),1))
ax.axes.xaxis.set_ticks_position('top')
plt.savefig(os.path.join(savedir, 'drug_colors.png'),\
            bbox_inches='tight',dpi =150)
plt.close()

PC_custom.PC12_plots(PC_df, [],[], lut, savedir, 'tif', 'concentration')
PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration')

PCJ.PC_trajGraded(PCmean, PCsem, ['PC_1', 'PC_2'], [], savedir, '.png', 'concentration', start_end = False,\
                  cum_var = cumvar, legend = 'off')


# =============================================================================
# To do:
# 1. Do contrastive PCA 
# 2. Label antipsychotics (typical, atypical, and test compounds) and pesticides
    # and look at the distribution of these compounds across multiple principal components
# 3. Is it possible to train a classifier to differentiate between antipsychotics and pesticides?
    
# 4. tSNE embedding
    
Ejemplo n.º 10
0
import make_colormaps as mkc
from matplotlib.colors import LinearSegmentedColormap
import PC_traj as PCJ
import PCA_analysis as PC_custom

cmap1 = sns.color_palette('tab20', len(np.unique(drugs)))
cmapGraded = []  #and graded colormaps
for item in cmap1:
    cmapGraded.append([(1, 1, 1), (item)])

lutGraded = dict(zip(allDrugs, cmapGraded))
cm = {}
for drug in lutGraded:
    cmap_name = drug
    # Create the colormap
    cm[drug] = LinearSegmentedColormap.from_list(cmap_name,
                                                 lutGraded[drug],
                                                 N=60)
    plt.register_cmap(cmap=cm[drug])

#make the PC plots
savedir = '/Volumes/behavgenom$/Ida/Data/Antipsychotics'
PC_custom.PC12_plots(IC_df, [], [], cmap1, savedir, 'tif', 'concentration')
ICmean, ICsem = PC_custom.PC_av(IC_df, [], 'concentration')

PCJ.PC_trajGraded(ICmean, ICsem,['IC_1','IC_2'], [], savedir, '.png', 'concentration', start_end = False,\
                  cum_var = None, legend = 'off')

#find the features that contribute most to the ICA
Ejemplo n.º 11
0
           fontsize=16)
plt.ylabel('%' + 'PC_2 (%.2f)' % (pca.explained_variance_ratio_[1] * 100),
           fontsize=16)
plt.show()
plt.savefig(os.path.join(directoryA[:-7], 'Figures', 'agar_biplot.png'))

import PCA_analysis as PC_custom
cmap1 = sns.color_palette(
    "tab20",
    len(np.unique(drug_all)) +
    1)  #need this to match the clustergram from mRMR so add one for cloz10
#get rid of 5th row, which woudl be cloz10 -  there is probably a smarter way to do this...
cmap1 = np.delete(cmap1, 4, axis=0)

#make the PC plots
PC_custom.PC12_plots(PC_df, [], 'all', cmap1, directoryA, 'tif',
                     'concentration')
PCmean, PCsem = PC_custom.PC_av(PC_df, [], 'concentration')
PC_custom.PC_traj(PCmean,
                  PCsem, ['PC_1', 'PC_2'],
                  'all',
                  directoryA,
                  'tif',
                  cmap1, [],
                  start_end=False)

#the sklearn and my custom PCA gave exactly the same results - Phew

#updated PC12 plots
import PC_traj as PCJ
from matplotlib.colors import LinearSegmentedColormap
import make_colormaps as mkc