def samplemeanpervalue(expdat,field): """ create a new experiment, with 1 sample per value in field, containing the mean of all samples with that value input: expdat : Experiment field : string the field to use (i.e. 'ENV_MATTER') output: newexp : Experiment The new experiment with 1 sample per unique value of field """ params=locals() uvals=hs.getfieldvals(expdat,field,ounique=True) vals=hs.getfieldvals(expdat,field,ounique=False) vdict=hs.listtodict(vals) nsamps=[] for cval in uvals: nsamps.append(vdict[cval][0]) newexp=hs.reordersamples(expdat,nsamps) for idx,cval in enumerate(uvals): cdat=expdat.data[:,vdict[cval]] mv=np.mean(cdat,axis=1) newexp.data[:,idx]=mv newexp.filters.append('samplemeanpervalue for field %s' % field) hs.addcommand(newexp,"samplemeanpervalue",params=params,replaceparams={'expdat':expdat}) return(newexp)
def clustersamples(exp,minreads=0): """ cluster samples in an experiment according to similar behavior input: exp :Experiment minreads : int the minimal original number of reads per sample to keep it output: newexp : Experiment the filtered and clustered experiment """ params=locals() newexp=hs.filterorigreads(exp,minreads) # normalize each row (bacteria) to sum 1 dat=copy.copy(newexp.data) dat=np.transpose(dat) dat[dat<=2]=2 dat=np.log2(dat) # cluster dm=spatial.distance.pdist(dat,metric='braycurtis') ll=cluster.hierarchy.single(dm) order=cluster.hierarchy.leaves_list(ll) newexp=hs.reordersamples(newexp,order) hs.addcommand(newexp,"clustersamples",params=params,replaceparams={'exp':exp}) newexp.filters.append("cluster samples minreads=%d" % minreads) return newexp
def sortsamples(exp,field,numeric=False,logit=True): """ sort samples according to field input: exp : Experiment field : string name of the field to sort by numeric : bool True for numeric values in field, false for text output: newexp : Experiment the sorted experiment """ params=locals() fvals=hs.getfieldvals(exp,field) if numeric: fvals=hs.tofloat(fvals) svals,sidx=hs.isort(fvals) newexp=hs.reordersamples(exp,sidx) if logit: hs.addcommand(newexp,"sortsamples",params=params,replaceparams={'exp':exp}) newexp.filters.append('sorted samples by field %s' % field) return newexp
def filtersamples(expdat,field,filtval,exact=True,exclude=False,numexpression=False,shownumoutput=True): """ filter samples in experiment according to value in field input: exp : Experiment field : string name of the field to filter by filtval : string or list of strings the string to filter (if a list of strings, filter if any in the list) exact : bool True for exact match, False for substring exclude : bool False to keep only matching samples, True to exclude matching samples numexpression : bool True if val is a python expression, False if just a value. For an expression assume value is the beggining of the line (i.e. '<=5') shownumoutput : bool True (default) to show number of samples remaining, False to not show """ params=locals() if not isinstance(filtval,list): filtval=[filtval] keep=[] filt='' for cidx,csamp in enumerate(expdat.samples): keepit=False for filt in filtval: if numexpression: cval=expdat.smap[csamp][field] if len(cval)==0: continue if eval(cval+filt): keepit=True elif exact: if expdat.smap[csamp][field]==filt: keepit=True else: if filt in expdat.smap[csamp][field]: keepit=True # if exclude reverse the decision if exclude: keepit=not keepit if keepit: keep.append(cidx) newexp=hs.reordersamples(expdat,keep) fstr="filter data %s in %s " % (filt,field) if exact: fstr=fstr+"(exact)" else: fstr=fstr+"(substr)" if exclude: fstr+=" (exclude)" newexp.filters.append(fstr) hs.addcommand(newexp,"filtersamples",params=params,replaceparams={'expdat':expdat}) if shownumoutput: hs.Debug(6,'%d Samples left' % len(newexp.samples)) else: hs.Debug(1,'%d Samples left' % len(newexp.samples)) return newexp
def view(self): cexp=self.cexp allsamp=np.arange(len(cexp.samples)) allbact=np.arange(len(cexp.seqs)) x=np.setdiff1d(allsamp,self.samples) sampo=np.concatenate((self.samples,x)) ubact=[] for cseq in self.seqs: ubact.append(cexp.seqdict[cseq]) bacto=np.concatenate((ubact,np.setdiff1d(allbact,ubact))) newexp=hs.reorderbacteria(cexp,bacto) newexp=hs.reordersamples(newexp,sampo,inplace=True) hs.plotexp(newexp,seqdb=self.bactdb,sortby=False,numeric=False,usegui=True,cdb=self.cooldb,showline=False)
def filterorigreads(expdat,minreads,inplace=False): """ filter away all samples that contained originally (before normalization) less than minreads input: expdat : Experiment minreads : float the minimum number of reads of the sample in the biom table to filter if less (usually int) inplace : Bool True to replace current experiment, False (default) to create a new one output: newexp : Experiment the filtered experiment """ params=locals() numreads=np.array(expdat.origreads) keep=np.where(numreads>=minreads) newexp=hs.reordersamples(expdat,keep[0]) hs.Debug(6,'%d Samples left' % len(newexp.samples)) hs.addcommand(newexp,"filterorigreads",params=params,replaceparams={'expdat':expdat}) return newexp
def filtersamplesfromfile(expdat,filename,field='#SampleID',exclude=False): """ filter samples based on a list in a file (one line per sample) input: expdat filename - name of the text file (1 line per sample) field - field for the experiment that the file contains exclude - true to throw away instead of keep output: newexp - the filtered experiment """ params=locals() fl=open(filename,'rU') vals=[] for cline in fl: cline=cline.strip() vals.append(cline) fl.close() keep=[] for cidx,csamp in enumerate(expdat.samples): keepit=False if expdat.smap[csamp][field] in vals: hs.Debug(3,'found file value %s in sample %s' % (expdat.smap[csamp][field],csamp)) keepit=True # if exclude reverse the decision if exclude: keepit=not keepit if keepit: keep.append(cidx) newexp=hs.reordersamples(expdat,keep) fstr="filter data from file %s in %s " % (filename,field) if exclude: fstr+=" (exclude)" newexp.filters.append(fstr) hs.addcommand(newexp,"filtersamplesfromfile",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d Samples left' % len(newexp.samples)) return newexp
def load(tablename, mapname='map.txt', taxfile='', nameisseq=True,studyname=False,tabletype='biom',normalize=True,addsname='',keepzero=False,removefrom=False,removenum=1,mapsampletolowercase=False,sortit=True,useseqnamefortax=True,rawreads=False,usesparse=False): """ Load an experiment - a biom table and a mapping file input: tablename - the name of the biom table file mapname - name of the mapping file taxfile - empty ('') to load taxonomy from biom table, non-empty to load from rdp output file (web) nameisseq - False to keep otu name as sid without hashing it, True to treat otuid as sequence addsname - a string to add to each table sample name (or empty to not add) studyname - Flase to assign from table file name, otherwise string to store as study name tabletype: 'biom' - a biom table 'meta' - a metabolomics table (row per sample, col per metabolite, can contain duplicate metaboliteids) normalize - True to normalize to 10k reads per sample, False to not normalize (change to mean 10k reads/sample) keepzero : bool True (default) to keep samples with 0 reads, False to throw away removefrom : string if non empty - cut table sample name after (and including) the first occurance of removefrom mapsampletolowercase : bool True to convert the mapping file sample id to lower case (for EMP data). default=False sortit : bool True (default) to sort sequences by taxonomy, False to not sort useseqnamefortax : bool True (default) to use the sequence as taxonomy if no taxonomy supplied, False to use 'unknown' rawreads : bool True in combination with normalize=False - do not modify read count to mean 10k usesparse : book True to use sparse matrix representation, False to use non-sparse (default) output: an experiment class for the current experiment """ params=locals() # load the table if tabletype=='biom': hs.Debug(6,'Loading biom table') table=biom.load_table(tablename) elif tabletype=='meta': hs.Debug(6,'Loading metabolite table') table=loadmetabuckettable(tablename) else: hs.Debug(9,'Table type %s not supported' % tabletype) return False datamd5g=hashlib.md5() datamd5g.update(table.matrix_data.todense().A.view(np.uint8)) datamd5=datamd5g.hexdigest() print(datamd5) # if need to cut table sample names if removefrom: idtable={} foundids={} ids=table.ids(axis='sample') if len(set(ids))!=len(ids): hs.Debug(8,'non unique ids identified') for cid in ids: if removefrom in cid: fpos=hs.findn(cid,removefrom,removenum) if fpos==-1: hs.Debug(6,'Did not find enough %s in %s' % (removefrom,cid)) tid=cid else: tid=cid[:fpos] else: hs.Debug(6,'%s not found in sample name %s (removefrom)' % (removefrom,cid)) tid=cid if tid in foundids: hs.Debug(6,'already have id %s' % cid) foundids[tid]+=1 idtable[cid]=tid+'-rep-'+str(foundids[tid]) print(idtable[cid]) else: foundids[tid]=1 idtable[cid]=tid hs.Debug(6,'found %d keys %d values' % (len(set(idtable.keys())),len(set(idtable.values())))) table=table.update_ids(idtable,axis='sample') # if need to add constant string to sample names in table if addsname!='': idtable={} ids=table.ids(axis='sample') for cid in ids: idtable[cid]=addsname+cid table=table.update_ids(idtable,axis='sample') smap = {} mapsamples = [] mapmd5='' if mapname: # if mapping file supplied, load it mapsamples,smap,fields,mapmd5=loadmap(mapname,mapsampletolowercase=mapsampletolowercase) else: # no mapping file, so just create the #SampleID field hs.Debug(6,'No mapping file supplied - using just sample names') tablesamples = table.ids(axis='sample') for cid in tablesamples: smap[cid]={'#SampleID':cid} mapsamples.append(cid) fields=['#SampleID'] mapmd5='' # remove table samples not in mapping file tablesamples = table.ids(axis='sample') hs.Debug(6,'number of samples in table is %d' % len(tablesamples)) removelist=[] for cid in tablesamples: if cid not in mapsamples: removelist.append(cid) hs.Debug(6,'Table sample %s not found in mapping file' % cid) hs.Debug(6,'removing %s samples' % len(removelist)) if len(removelist)>0: table=table.filter(removelist,axis='sample',invert=True) tablesamples = table.ids(axis='sample') hs.Debug(6,'deleted. number of samples in table is now %d' % len(tablesamples)) # remove samples not in table from mapping file removemap=[] addlist=[] for idx,cmap in enumerate(mapsamples): if cmap not in tablesamples: hs.Debug(2,'Map sample %s not in table' % cmap) if not keepzero: removemap.append(idx) try: del smap[cmap] except: hs.Debug(8,'Duplicate SampleID %s in mapping file' % cmap) else: addlist.append(cmap) if len(removemap)>0: hs.Debug(7,'removing %d samples from mapping file' % len(removemap)) mapsamples=hs.delete(mapsamples,removemap) hs.Debug(7,'number of samples in mapping file is now %d' % len(mapsamples)) # get info about the sequences tableseqs = table.ids(axis='observation') sids = [] tax = [] osnames=[] for cid in tableseqs: # get the original sample name osnames.append(cid) # get the sid (hash ) if nameisseq: sids.append(hs.hashseq(cid)) else: sids.append(cid) # get the taxonomy string ctax=gettaxfromtable(table,cid,useseqname=useseqnamefortax) tax.append(ctax) if not studyname: studyname=os.path.basename(tablename) exp=hs.Experiment() exp.datatype=tabletype if usesparse: exp.data=scipy.sparse.dok_matrix(table.matrix_data) else: exp.data=table.matrix_data.todense().A # check if need to add the 0 read samples to the data if len(addlist)>0: tablesamples=list(tablesamples) tablesamples=tablesamples+addlist exp.data=np.hstack([exp.data,np.zeros([np.shape(exp.data)[0],len(addlist)])]) exp.smap=smap exp.samples=tablesamples exp.seqs=tableseqs for idx,cseq in enumerate(exp.seqs): exp.seqdict[cseq]=idx exp.sids=sids exp.origotunames=osnames exp.tax=tax exp.tablefilename=tablename exp.studyname=studyname exp.mapfilename=tablename exp.filters = [tablename] exp.fields = fields exp.datamd5 = datamd5 exp.mapmd5 = mapmd5 colsum=np.sum(exp.data,axis=0,keepdims=False) exp.origreads=list(colsum) # add the original number of reads as a field to the experiment exp.fields.append('origReads') for idx,csamp in enumerate(exp.samples): exp.smap[csamp]['origReads']=str(exp.origreads[idx]) # normalize samples to 10k reads per samples colsum=np.sum(exp.data,axis=0,keepdims=True) okreads=np.where(colsum>0) if np.size(colsum)-np.size(okreads[1])>0: print("Samples with 0 reads: %d" % (np.size(colsum)-np.size(okreads[1]))) if not keepzero: exp=hs.reordersamples(exp,okreads[1]) colsum=np.sum(exp.data,axis=0,keepdims=True) if tabletype=='meta': normalize=False if normalize: exp.data=10000*exp.data/colsum else: if not rawreads: exp.data=10000*exp.data/np.mean(colsum) exp.uniqueid=exp.getexperimentid() if sortit: exp=hs.sortbacteria(exp,logit=False) hs.addcommand(exp,"load",params=params) exp.filters.append('loaded table=%s, map=%s' % (tablename,mapname)) return(exp)
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle : str (optional) '' to show o show processing history as name, None to not show title, or str of name of the figure lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall subline : str Name of category for subline plotting or '' (Default) for no sublines showhline : bool True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them newfig : bool True (default) to open figure in new window, False to use current fixfont : bool (optional) False (default) to use fixedfont, True to use fixed width font fontsize : int or None (optional) None (default) to use default font size, number to use that font size nosort : bool (optional) False (default) to sort by the sort field, True to skip the sorting zeroisnone : bool (optional) False (default) to plot zeros as 0, True to assign None (white color) xlabelrotation : int (optional) the rotation of the xtick labels showtaxnames : book (optional) False (default) to not show tax names (need to press 'h' to show) True to show the taxonomy names output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) if exp.sparse: hs.Debug(9,'Sparse matrix - converting to dense') exp=hs.copyexp(exp,todense=True) vals=[] if cdb is None: cdb=hs.cdb if seqdb is None: seqdb=hs.bdb if sortby: if not nosort: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"no sorting but showing columns") svals=hs.getfieldvals(exp,sortby) newexp=hs.copyexp(exp) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb newexp.scdb=hs.scdb # if usegui: # hs.Debug(1,"Using the GUI window") # import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) # guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # ldat=ldat[:,sidx] ldat=newexp.data if zeroisnone: ldat[ldat==0]=None if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') if newfig: f=plt.figure(tight_layout=True) else: f=plt.gcf() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if ptitle is not None: if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) plt.title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) # plot the sublines (smaller category lines) if subline: slval=hs.getfieldvals(newexp,subline) prevval=slval[0] for idx,cval in enumerate(slval): if cval!=prevval: xpos=idx-0.5 plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:') prevval=cval if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right') for cx in linepos: plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth) plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) # f.tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)-0.5) if fixfont: fontProperties = {'family':'monospace'} ax.set_yticklabels(ax.get_yticks(), fontProperties) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) from heatsequer.plots import plotwingui guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') ax.plot_labelsize=fontsize if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) if showhline: if newexp.hlines: for cpos in newexp.hlines: plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g') plt.show() if showtaxnames: showtaxonomies(newexp,ax,showdb=False,showcontam=False) # if usegui: # app.exec_() return newexp,ax
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load) showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle - name of the figure or False to show processing history as name lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) vals=[] if sortby: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb # ldat=ldat[:,sidx] ldat=newexp.data if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') f=figure() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=45,ha='right') for cx in linepos: plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)+0.5) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # from heatsequer.plots import plotwingui # guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) show() return newexp,ax
def filtersimilarsamples(expdat,field,method='mean'): """ join similar samples into one sample (i.e. to remove samples of same individual) input: expdat : Experiment field : string Name of the field containing the values (for which similar values will be joined) method : string What to do with samples with similar value. options: 'mean' - replace with a sample containing the mean of the samples 'median'- replace with a sample containing the median of the samples 'random' - replace with a single random sample out of these samples 'sum' - replace with sum of original reads in all samples, renormalized after to 10k and orignumreads updated 'fracpres' - replace with fraction of samples where the bacteria is present output: newexp : Experiment like the input experiment but only one sample per unique value in field """ params=locals() newexp=hs.copyexp(expdat) if method=='sum': newexp=hs.toorigreads(newexp) uvals=hs.getfieldvals(expdat,field,ounique=True) keep=[] for cval in uvals: cpos=hs.findsamples(expdat,field,cval) if len(cpos)==1: keep.append(cpos[0]) continue if method=='random': keep.append(cpos[np.random.randint(len(cpos))]) continue # set the mapping file values cmap=expdat.smap[expdat.samples[cpos[0]]] for ccpos in cpos[1:]: for cfield in cmap.keys(): if cmap[cfield]!=expdat.smap[expdat.samples[ccpos]][cfield]: cmap[cfield]='NA' if method=='mean': cval=np.mean(expdat.data[:,cpos],axis=1) newexp.data[:,cpos[0]]=cval keep.append(cpos[0]) elif method=='median': cval=np.median(expdat.data[:,cpos],axis=1) newexp.data[:,cpos[0]]=cval keep.append(cpos[0]) elif method=='sum': cval=np.sum(newexp.data[:,cpos],axis=1) newexp.data[:,cpos[0]]=cval newexp.origreads[cpos[0]]=np.sum(hs.reorder(expdat.origreads,cpos)) keep.append(cpos[0]) elif method=='fracpres': cval=np.sum(expdat.data[:,cpos]>0,axis=1) newexp.data[:,cpos[0]]=cval/len(cpos) keep.append(cpos[0]) else: hs.Debug(9,'method %s not supported' % method) return False newexp.smap[expdat.samples[cpos[0]]]=cmap newexp=hs.reordersamples(newexp,keep) if method=='sum': newexp=hs.normalizereads(newexp) newexp.filters.append('Filter similar samples field %s method %s' % (field,method)) hs.addcommand(newexp,"filtersimilarsamples",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples))) return newexp