def samplingTotal(self, region_file=None, region_out_file=None, exclude_low_percent=1, exclude_high_percent=1, bnum=100000, nonzero=False): ''' Description: caculate the sum of each wig's values after excluding the low and high percentile Parameter: None Value: None ''' #if exclude_low_percent==0 and exclude_high_percent==0:return None #else: #print 'calculating normalization factors by sampling ...' names = self.data.keys() if exclude_low_percent == 0 and exclude_high_percent == 0 and region_file == None: return None sampling_total = {} if region_file == None: print 'calculate total signal in each sample after excluding the top', exclude_high_percent, 'and bottom', exclude_low_percent, 'percents of genomic regions with extremely high and low signal values' wsums = {} for name in names: wsums[name] = self.data[name].sum() wavg = sum(wsums.values()) / len(wsums.values()) rfwig = deepcopy(self.data[names[0]]) rfwig.foldChange(wavg * 1.0 / wsums[names[0]]) for name in names[1:]: self.data[name].foldChange(wavg * 1.0 / wsums[name]) rfwig.add(self.data[name]) self.data[name].foldChange(wsums[name] * 1.0 / wavg) rfwig.foldChange(1.0 / len(names)) lowcut, highcut = rfwig.percentile( p=[exclude_low_percent, 100 - exclude_high_percent], bnum=bnum, nonzero_end=nonzero) rg = rfwig.regionWithinValueRange(lowcut, highcut) if region_out_file != None: rg.save(region_out_file) else: print 'calculate total signal in each sample in genomic regions defined by', region_file rg = Wig(region_file) for name in names: sampling_total[name] = self.data[name].multiply(rg).sum() print rg.sum(), '(' + str( rg.sum() * 100.0 / rg.gsize()) + '%) of', rg.gsize(), 'base pairs calculated:' for name in names: print name, sampling_total[name], '(' + str( sampling_total[name] * 100.0 / self.data[name].sum()) + '% of total)' return sampling_total
def samplingTotal(self,region_file=None,region_out_file=None,exclude_low_percent=1,exclude_high_percent=1,bnum=100000,nonzero=False): ''' Description: caculate the sum of each wig's values after excluding the low and high percentile Parameter: None Value: None ''' #if exclude_low_percent==0 and exclude_high_percent==0:return None #else: #print 'calculating normalization factors by sampling ...' names=list(self.data.keys()) if exclude_low_percent==0 and exclude_high_percent==0 and region_file==None: return None sampling_total={} if region_file==None: sys.stdout.write('calculate total signal in each sample after excluding the top ' + str(exclude_high_percent) + ' and bottom ' + str(exclude_low_percent) + 'percents of genomic regions with extremely high and low signal values\n') wsums={} for name in names:wsums[name]=self.data[name].sum() wavg=functions.div(sum(wsums.values()),len(list(wsums.values()))) rfwig=deepcopy(self.data[names[0]]) rfwig.foldChange(functions.div(wavg*1.0,wsums[names[0]])) for name in names[1:]: self.data[name].foldChange(functions.div(wavg*1.0,wsums[name])) rfwig.add(self.data[name]) self.data[name].foldChange(functions.div(wsums[name]*1.0,wavg)) rfwig.foldChange(functions.div(1.0,len(names))) lowcut,highcut=rfwig.percentile(p=[exclude_low_percent,100-exclude_high_percent],bnum=bnum,nonzero_end=nonzero) rg=rfwig.regionWithinValueRange(lowcut,highcut) if region_out_file!=None:rg.save(region_out_file) else: sys.stdout.write('calculate total signal in each sample in genomic regions defined by' + region_file + "\n") rg=Wig(region_file) for name in names:sampling_total[name]=self.data[name].multiply(rg).sum() sys.stdout.write(str(rg.sum()) + ' (' + str(functions.div(rg.sum()*100.0,rg.gsize())) + ' %) of ' + str(rg.gsize()) + ' base pairs calculated:\n') for name in names: sys.stdout.write(name + str(sampling_total[name]) + ' (' + str(functions.div(sampling_total[name]*100.0,self.data[name].sum())) + '% of total)\n') return sampling_total
def samplingTotal(self,region_file=None,region_out_file=None,exclude_low_percent=1,exclude_high_percent=1,bnum=100000,nonzero=False): ''' Description: caculate the sum of each wig's values after excluding the low and high percentile Parameter: None Value: None ''' #if exclude_low_percent==0 and exclude_high_percent==0:return None #else: #print 'calculating normalization factors by sampling ...' names=self.data.keys() if exclude_low_percent==0 and exclude_high_percent==0 and region_file==None: return None sampling_total={} if region_file==None: print 'calculate total signal in each sample after excluding the top',exclude_high_percent,'and bottom',exclude_low_percent,'percents of genomic regions with extremely high and low signal values' wsums={} for name in names:wsums[name]=self.data[name].sum() wavg=sum(wsums.values())/len(wsums.values()) rfwig=deepcopy(self.data[names[0]]) rfwig.foldChange(wavg*1.0/wsums[names[0]]) for name in names[1:]: self.data[name].foldChange(wavg*1.0/wsums[name]) rfwig.add(self.data[name]) self.data[name].foldChange(wsums[name]*1.0/wavg) rfwig.foldChange(1.0/len(names)) lowcut,highcut=rfwig.percentile(p=[exclude_low_percent,100-exclude_high_percent],bnum=bnum,nonzero_end=nonzero) rg=rfwig.regionWithinValueRange(lowcut,highcut) if region_out_file!=None:rg.save(region_out_file) else: print 'calculate total signal in each sample in genomic regions defined by',region_file rg=Wig(region_file) for name in names:sampling_total[name]=self.data[name].multiply(rg).sum() print rg.sum(),'('+str(rg.sum()*100.0/rg.gsize())+'%) of',rg.gsize(),'base pairs calculated:' for name in names:print name,sampling_total[name],'('+str(sampling_total[name]*100.0/self.data[name].sum())+'% of total)' return sampling_total