def samplemeanpervalue(expdat,field): """ create a new experiment, with 1 sample per value in field, containing the mean of all samples with that value input: expdat : Experiment field : string the field to use (i.e. 'ENV_MATTER') output: newexp : Experiment The new experiment with 1 sample per unique value of field """ params=locals() uvals=hs.getfieldvals(expdat,field,ounique=True) vals=hs.getfieldvals(expdat,field,ounique=False) vdict=hs.listtodict(vals) nsamps=[] for cval in uvals: nsamps.append(vdict[cval][0]) newexp=hs.reordersamples(expdat,nsamps) for idx,cval in enumerate(uvals): cdat=expdat.data[:,vdict[cval]] mv=np.mean(cdat,axis=1) newexp.data[:,idx]=mv newexp.filters.append('samplemeanpervalue for field %s' % field) hs.addcommand(newexp,"samplemeanpervalue",params=params,replaceparams={'expdat':expdat}) return(newexp)
def fieldtobact(expdat,field,bactname='',meanreads=1000,cutoff=0): """ convert values in a map file field to a new bacteria (to facilitate numeric analysis) input: expdat : Experiment field : string name of the field to convert bactname : string name of the new bacteria (empty to have similar to field name) meanreads : int the mean number of reads for the new field bacteria cutoff : int the minimal value of the field per sample (otherwise replace with meanreads) output: newexp : Experiment with added bacteria with the field vals as reads """ params=locals() if len(bactname)==0: bactname=field fv=hs.getfieldvals(expdat,field) vals=np.array(hs.tofloat(fv)) okpos=np.where(vals>=cutoff)[0] badpos=np.where(vals<cutoff)[0] scalefactor=np.mean(vals[okpos]) vals[okpos]=(vals[okpos]/scalefactor)*meanreads vals[badpos]=meanreads newexp=hs.copyexp(expdat) hs.insertbacteria(newexp,vals,bactname,bactname,logit=False) newexp.filters.append('add bacteria from map field %s' % field) hs.addcommand(newexp,"fieldtobact",params=params,replaceparams={'expdat':expdat}) return(newexp)
def findmislabels(expdat,field,distmetric='bc'): """" find mislabelled samples according to field input: expdat : Experiment field : string name of the field to examine (i.e. subjectid) distmetric : string the distance meteric to use (see calcdist) """ expdat=hs.sortsamples(expdat,field) fvals=hs.getfieldvals(expdat,field) ufvals=list(set(fvals)) onames=[] for idx,csamp in enumerate(expdat.samples): onames.append(csamp+';'+fvals[idx]) omat=np.zeros([len(fvals),len(ufvals)]) for groupidx,groupval in enumerate(ufvals): cexp=hs.filtersamples(expdat,field,groupval,exact=True) for aidx,aval in enumerate(expdat.samples): cdist=[] for gidx,gval in enumerate(cexp.samples): # don't measure distance to ourselves if gval==aval: continue cdist.append(hs.calcdist(cexp.data[:,gidx],expdat.data[:,aidx],distmetric=distmetric)) omat[aidx,groupidx]=np.mean(cdist) figure() iax=imshow(omat,interpolation='nearest',aspect='auto') ax=iax.get_axes() ax.set_xticks(range(len(ufvals))) ax.set_xticklabels(ufvals,rotation=90) ax.set_yticks(range(len(onames))) ax.set_yticklabels(onames)
def values(self): cfield = str(self.cField.currentText()) val, ok = QtWidgets.QInputDialog.getItem( self, "Select field value", "Field=%s" % cfield, list(set(hs.getfieldvals(self.cexp, cfield))) ) if ok: self.tValue.setText(val)
def sortsamples(exp,field,numeric=False,logit=True): """ sort samples according to field input: exp : Experiment field : string name of the field to sort by numeric : bool True for numeric values in field, false for text output: newexp : Experiment the sorted experiment """ params=locals() fvals=hs.getfieldvals(exp,field) if numeric: fvals=hs.tofloat(fvals) svals,sidx=hs.isort(fvals) newexp=hs.reordersamples(exp,sidx) if logit: hs.addcommand(newexp,"sortsamples",params=params,replaceparams={'exp':exp}) newexp.filters.append('sorted samples by field %s' % field) return newexp
def prepstudyinfo(self): """ add the study info from the mapping file if available """ fieldlist=[('SRA_Study_s','sra'),('project_name_s','name'),('experiment_title','name'),('experiment_design_description','name'),('BioProject_s','sra')] cexp=self.cexp for (cfield,infofield) in fieldlist: if cfield in cexp.fields: uvals=hs.getfieldvals(cexp,cfield,ounique=True) if len(uvals)==1: self.addentry(fromdb=False,ctype=infofield,value=uvals[0].lower(),color='black')
def getgroupgroupdist(expdat,field,distmat,dsamp,uvals=False,subfield='host_subject_id',vmin=0,vmax=1): """ calculate the distance matrix based on groups of samples according to field but calculate seperately for each individual and then combine using a distance matrix and mapping for the Amina skin cosmetics study input: expdat : Experiment field : string name of the field to group by distmat : numpy 2d arrau the distance matrix (from calcdistmat or loaddistmat) dsamp : dict the mapping of each sample id to the distance matrix position (from calcdistmat or loaddistmat) uvals : string empty to plot all values, or a list of values to plot only them (in field) subfield : str name of the subfield so all distances are calculated seperately for each subfield value (i.e. 'host_subject_id') """ vals=hs.getfieldvals(expdat,field) if not uvals: uvals=list(set(vals)) svals=hs.getfieldvals(expdat,subfield,ounique=True) omat=np.zeros([len(uvals),len(uvals)]) numok=0 for cval in svals: newexp=hs.filtersamples(expdat,subfield,cval) # dmap,dmapd=hs.loaddistmat(newexp,'amnon/bray_curtis_armpit-diff-log.txt') gdist,uvals=hs.getgroupdist(newexp,field,distmat,dsamp,plotit=False,uvals=uvals) gdist[np.isnan(gdist)]=0 # print(cval) # print(gdist) if np.isnan(np.sum(np.sum(gdist))): continue omat=omat+gdist numok+=1 omat=omat/numok # print('-----') # print(omat) plotdistheatmap(omat,uvals,vmin=vmin,vmax=vmax) return omat
def filterwinperid(expdat,idfield,field,val1,val2,mineffect=1): """ do filterfieldwave on each individual (based on idfield) and join the resulting bacteria """ params=locals() iseqs=[] uids=hs.getfieldvals(expdat,idfield,ounique=True) for cid in uids: cexp=hs.filtersamples(expdat,idfield,cid) texp=hs.filterfieldwave(cexp,field,val1,val2,mineffect=mineffect) iseqs+=texp.seqs iseqs=list(set(iseqs)) newexp=hs.filterseqs(expdat,iseqs) return newexp
def prefillinfo(self): """ prefill "ALL" data fields based on mapping file if all samples have same info """ hs.Debug(1,'prefill info') ontologyfromid=self.ontologyfromid # fl=open('/Users/amnon/Python/git/heatsequer/db/ncbitaxontofromid.pickle','rb') fl=open(os.path.join(hs.heatsequerdir,'db/ncbitaxontofromid.pickle'),'rb') ncbitax=pickle.load(fl) fl.close() cexp=self.cexp for cfield in cexp.fields: uvals=[] if cfield in cexp.fields: uvals=hs.getfieldvals(cexp,cfield,ounique=True) # if we have 1 value if len(uvals)==1: cval=uvals[0] hs.Debug(1,'found 1 value %s' % cval) if cfield=='HOST_TAXID' or cfield=='host_taxid': hs.Debug(2,'%s field has 1 value %s' % (cfield,cval)) # if ncbi taxonomy (field used differently) cval='NCBITaxon:'+cval if cval in ncbitax: hs.Debug(2,'found in ncbitax %s' % cval) cval=ncbitax[cval] else: # get the XXX from ENVO:XXX value uvalspl=cval.split(':',1) if len(uvalspl)>1: cval=uvalspl[1] cval=uvalspl[1]+' :'+uvalspl[0] if cval in self.ontology: cval=ontologyfromid[self.ontology[cval]] hs.Debug(2,'term %s found in ontologyfromid' % cval) conto=cval hs.Debug(1,'add prefill %s' % conto) self.addtolist('ALL',conto) else: hs.Debug(3,'term %s NOT found in ontologyfromid' % uvals[0]) else: hs.Debug(1,'found %d values' % len(uvals))
def getgroupdist(expdat,field,distmat,dsamp,plotit=True,plottype='heatmap',uvals=False): """ calculate the distance matrix based on groups of samples according to field using a distance matrix and mapping input: expdat : Experiment field : string name of the field to group by distmat : numpy 2d arrau the distance matrix (from calcdistmat or loaddistmat) dsamp : dict the mapping of each sample id to the distance matrix position (from calcdistmat or loaddistmat) plotit : bool True to plot heatmap, False to no plot plottype: string 'heatmap' - to plot heatmap of pairwise values 'hist' - to plot histogram of pairwise values uvals : string empty to plot all values, or a list of values to plot only them (in field) output: gdist : numpy 2d array the group distance matrix uvals : list group names in the matrix (ordered) """ vals=hs.getfieldvals(expdat,field) if not uvals: uvals=list(set(vals)) gdist=np.empty([len(uvals),len(uvals)]) gdist.fill(np.NaN) gmap=defaultdict(list) distdict={} for idx,cval in enumerate(vals): gmap[cval].append(idx) for idx1,cg1 in enumerate(uvals): pos1=gmap[cg1] for idx2,cg2 in enumerate(uvals): pos2=gmap[cg2] adist=[] for p1 in pos1: if expdat.samples[p1] not in dsamp: continue for p2 in pos2: if expdat.samples[p2] not in dsamp: continue if p1==p2: continue adist.append(distmat[dsamp[expdat.samples[p1]],dsamp[expdat.samples[p2]]]) distdict[(cg1,cg2)]=adist gdist[idx1,idx2]=np.mean(adist) if plotit: figure() if plottype=='heatmap': plotdistheatmap(gdist,uvals) title(expdat.studyname+' '+field) elif plottype=='hist': pl=[] pairs=[] names=[] for k,v in distdict.items(): ks=set(k) if ks in pairs: continue pl.append(v) pairs.append(ks) names.append(k) hist(pl,alpha=0.5,normed=True,bins=50,range=[0,1]) legend(names) return gdist,uvals
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle : str (optional) '' to show o show processing history as name, None to not show title, or str of name of the figure lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall subline : str Name of category for subline plotting or '' (Default) for no sublines showhline : bool True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them newfig : bool True (default) to open figure in new window, False to use current fixfont : bool (optional) False (default) to use fixedfont, True to use fixed width font fontsize : int or None (optional) None (default) to use default font size, number to use that font size nosort : bool (optional) False (default) to sort by the sort field, True to skip the sorting zeroisnone : bool (optional) False (default) to plot zeros as 0, True to assign None (white color) xlabelrotation : int (optional) the rotation of the xtick labels showtaxnames : book (optional) False (default) to not show tax names (need to press 'h' to show) True to show the taxonomy names output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) if exp.sparse: hs.Debug(9,'Sparse matrix - converting to dense') exp=hs.copyexp(exp,todense=True) vals=[] if cdb is None: cdb=hs.cdb if seqdb is None: seqdb=hs.bdb if sortby: if not nosort: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"no sorting but showing columns") svals=hs.getfieldvals(exp,sortby) newexp=hs.copyexp(exp) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb newexp.scdb=hs.scdb # if usegui: # hs.Debug(1,"Using the GUI window") # import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) # guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # ldat=ldat[:,sidx] ldat=newexp.data if zeroisnone: ldat[ldat==0]=None if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') if newfig: f=plt.figure(tight_layout=True) else: f=plt.gcf() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if ptitle is not None: if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) plt.title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) # plot the sublines (smaller category lines) if subline: slval=hs.getfieldvals(newexp,subline) prevval=slval[0] for idx,cval in enumerate(slval): if cval!=prevval: xpos=idx-0.5 plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:') prevval=cval if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right') for cx in linepos: plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth) plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) # f.tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)-0.5) if fixfont: fontProperties = {'family':'monospace'} ax.set_yticklabels(ax.get_yticks(), fontProperties) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) from heatsequer.plots import plotwingui guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') ax.plot_labelsize=fontsize if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) if showhline: if newexp.hlines: for cpos in newexp.hlines: plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g') plt.show() if showtaxnames: showtaxonomies(newexp,ax,showdb=False,showcontam=False) # if usegui: # app.exec_() return newexp,ax
def values(self): cfield=str(self.cField.currentText()) val,ok=QtGui.QInputDialog.getItem(self,'Select field value','Field=%s' % cfield,list(set(hs.getfieldvals(self.cexp,cfield)))) if ok: self.tValue.setText(val)
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load) showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle - name of the figure or False to show processing history as name lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) vals=[] if sortby: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb # ldat=ldat[:,sidx] ldat=newexp.data if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') f=figure() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=45,ha='right') for cx in linepos: plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)+0.5) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # from heatsequer.plots import plotwingui # guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) show() return newexp,ax
def filtersimilarsamples(expdat,field,method='mean'): """ join similar samples into one sample (i.e. to remove samples of same individual) input: expdat : Experiment field : string Name of the field containing the values (for which similar values will be joined) method : string What to do with samples with similar value. options: 'mean' - replace with a sample containing the mean of the samples 'median'- replace with a sample containing the median of the samples 'random' - replace with a single random sample out of these samples 'sum' - replace with sum of original reads in all samples, renormalized after to 10k and orignumreads updated 'fracpres' - replace with fraction of samples where the bacteria is present output: newexp : Experiment like the input experiment but only one sample per unique value in field """ params=locals() newexp=hs.copyexp(expdat) if method=='sum': newexp=hs.toorigreads(newexp) uvals=hs.getfieldvals(expdat,field,ounique=True) keep=[] for cval in uvals: cpos=hs.findsamples(expdat,field,cval) if len(cpos)==1: keep.append(cpos[0]) continue if method=='random': keep.append(cpos[np.random.randint(len(cpos))]) continue # set the mapping file values cmap=expdat.smap[expdat.samples[cpos[0]]] for ccpos in cpos[1:]: for cfield in cmap.keys(): if cmap[cfield]!=expdat.smap[expdat.samples[ccpos]][cfield]: cmap[cfield]='NA' if method=='mean': cval=np.mean(expdat.data[:,cpos],axis=1) newexp.data[:,cpos[0]]=cval keep.append(cpos[0]) elif method=='median': cval=np.median(expdat.data[:,cpos],axis=1) newexp.data[:,cpos[0]]=cval keep.append(cpos[0]) elif method=='sum': cval=np.sum(newexp.data[:,cpos],axis=1) newexp.data[:,cpos[0]]=cval newexp.origreads[cpos[0]]=np.sum(hs.reorder(expdat.origreads,cpos)) keep.append(cpos[0]) elif method=='fracpres': cval=np.sum(expdat.data[:,cpos]>0,axis=1) newexp.data[:,cpos[0]]=cval/len(cpos) keep.append(cpos[0]) else: hs.Debug(9,'method %s not supported' % method) return False newexp.smap[expdat.samples[cpos[0]]]=cmap newexp=hs.reordersamples(newexp,keep) if method=='sum': newexp=hs.normalizereads(newexp) newexp.filters.append('Filter similar samples field %s method %s' % (field,method)) hs.addcommand(newexp,"filtersimilarsamples",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples))) return newexp