def getTaxonAbundance(self, level, taxon): self.r.getTaxonMap(level) df_sub_data_nmds= self.r.getTaxonAbundance(self.df_data_nmds, level, taxon) df_sub_data_diversity= self.r.getTaxonAbundance(self.df_data_diversity, level, taxon) sub_data_nmds= pandas_df_to_r_df(self.df_sub_data_nmds.T) sub_data_diversity= pandas_df_to_r_df(self.df_sub_data_diversity.T) return sub_data_nmds, sub_data_diversity
def transformDFByRFunction(self, df, function, library="", *args): r_df= pandas_df_to_r_df(df) if library != "": import_=r.r("library(%s)"%library) function_=r.r(function) r_updated_df= function_(r_df, *args) return r_matrix_to_dataframe(r_updated_df)
def plotCommunityResponse(self, diversity_data, metadata): title= self.r.initRVariable('title', self.analysis_type) metadata= metadata.ix[diversity_data.index] r_diversity_data= pandas_df_to_r_df(diversity_data) r_dis_matrix= self.r.fPlotCommunityResponse(r_diversity_data) dis_matrix= r_matrix_to_dataframe(r_dis_matrix) ### build a new dataframe with triclosan values A={int(b):a for a,b in zip(diversity_data.T ,set(list(dis_matrix['Var1'])))} new_df= DataFrame(columns= ["Var1","Var2","value","Triclosan1","Triclosan2"], index= dis_matrix.index) for index in dis_matrix.index: try: new_df['Var1'][index]= A[dis_matrix['Var1'][index]] new_df['Var2'][index]= A[dis_matrix['Var2'][index]] new_df['Triclosan1'][index]= self.r.df_metadata.ix[new_df['Var1'][index]]['Triclosan'] new_df['Triclosan2'][index]= self.r.df_metadata.ix[new_df['Var2'][index]]['Triclosan'] new_df['value'].loc[index]= dis_matrix['value'].loc[index] except Exception,e: print e continue
def plotIndex(self, diversity_data, metadata, metadata_type, r=False): #df= r_matrix_to_dataframe(diversity_data) #df_merged_data= df.join(self.df_metadata) #pdb.set_trace() metadata= metadata.ix[diversity_data.index] r_diversity_data= pandas_df_to_r_df(diversity_data) indice_data= self.r.fBuildDiversityIndices(r_diversity_data) #pdb.set_trace() #index_data= map(np.array, indice_data) Indice_Data= dict(zip(("chao1", "shannon", "richness", "pielou"), indice_data)) for indice, Index in self.r.divIndexes.iteritems(): indice_data= Indice_Data[indice] ind= self.r.divIndexes[indice] ind.data= indice_data try: ind_data= list(ind.data) names= diversity_data.index s1= Series(index= names, data= ind_data ) s2= metadata df_indice= concat([s2,s1], axis=1) df_indice.columns= ['metadata', indice] except: pass if r: self.r.fPlotDiversityIndex(ind.type_short, ind.type_long, metadata, ind.data, ind.title) else: self.plotDiversityIndex(ind.type_short, ind.type_long,metadata_type, df_indice, ind.data) if indice == "richness": fRichness= os.path.join(self.analysis_dir, 'richness.tsv') ind_data= list(ind.data) names= list(ind.data.names) s1= Series(index= names, data= ind_data ) s2= metadata df_indice= concat([s2,s1], axis=1) df_indice.columns= ['metadata', indice] df_indice.sort('metadata').to_csv(fRichness, sep='\t', index_label="Sample")
def applyRFunctionToDF(self, df, function, library="", *args): r_df= pandas_df_to_r_df(df) if library != "": import_=r.r("library(%s)"%library) function_=r.r(function) return function_(r_df, *args)
def plotNMDS(self, nmds_data, metadata, analysis_type): r_nmds_data= pandas_df_to_r_df(nmds_data) r_analysis_type= self.r.initRVariable('analysis_type', analysis_type) r_nmds= self.r.fMetaMDS(r_nmds_data, r_analysis_type) r_scores= self.r.fMDSScores(r_nmds) stress= list(r_nmds.rx('stress')[0])[0] scores= r_matrix_to_dataframe(r_scores) plt.figure() dim1= scores["NMDS1"] dim2= scores["NMDS2"] metadata= metadata.ix[nmds_data.index] c_data = plt.cm.jet(np.log10(metadata)) cm = plt.cm.get_cmap('Greys') # adding a negligible value to the controls so that logs can be meaningful metadata[metadata == 0.0] = metadata[metadata == 0.0] +0.1 sc = plt.scatter(dim1,dim2, c= np.log10(metadata), vmin=min(np.log10(metadata)), vmax=max(np.log10(metadata)),s=100, cmap=cm) cb= plt.colorbar(sc, shrink= 0.6, aspect= 10, pad= 0.1, norm=matplotlib.colors.LogNorm()) cb.ax.tick_params(labelsize='x-small',top=False) cb.set_label('Triclosan concentration [nM]', labelpad=-70) labels= np.sort(np.array(list(set(metadata)))) ind = np.log10(labels) ind_labels= map(str, labels) ind_labels[0] = "0.0" cb.set_ticks(ind) cb.set_ticklabels(ind_labels) ax= plt.gca() ax.set_xlabel('NMDS1') ax.set_ylabel('NMDS2') ax.set_title('NMDS Ordination of %s OTU Abundance' %self.analysis_type) left, width = 1.05, .5 bottom, height = 0.05, .5 ax.grid(True, color="gray", alpha= 0.5) ax.text(left, bottom, 'Stress: %.2g'% stress, fontsize= 11, horizontalalignment='left', verticalalignment='bottom', transform= ax.transAxes)# bbox={ 'facecolor':'white','pad':10}) plt.savefig( os.path.join(self.analysis_dir,'nmds.png')) annotations= map(str, list(metadata)) for label, x, y in zip(annotations, dim1, dim2): plt.annotate( label, xy = (x, y), xytext = (random.randint(-50,-1), random.randint(1,50)), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.8', fc = 'yellow', alpha = 0.2), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'), fontsize= 10) #""" plt.savefig( os.path.join(self.analysis_dir, 'nmds_with_sample_names.png')) plt.close()
def plotRarefaction(self, nmds_data): r_analysis_type= self.r.initRVariable('analysis_type', self.analysis_type) r_analysis_dir= self.r.initRVariable('analysis_dir', self.analysis_dir) r_nmds_data= pandas_df_to_r_df(nmds_data) rarefaction= self.r.fPlotRarefaction(r_nmds_data, r_analysis_type, r_analysis_dir)