Exemple #1
0
    def samplingTotal(self,
                      region_file=None,
                      region_out_file=None,
                      exclude_low_percent=1,
                      exclude_high_percent=1,
                      bnum=100000,
                      nonzero=False):
        '''
        Description:
            caculate the sum of each wig's values after excluding the low and high percentile
        Parameter:
            None
        Value:
            None
        '''

        #if exclude_low_percent==0 and exclude_high_percent==0:return None
        #else:
        #print 'calculating normalization factors by sampling ...'
        names = self.data.keys()
        if exclude_low_percent == 0 and exclude_high_percent == 0 and region_file == None:
            return None
        sampling_total = {}
        if region_file == None:
            print 'calculate total signal in each sample after excluding the top', exclude_high_percent, 'and bottom', exclude_low_percent, 'percents of genomic regions with extremely high and low signal values'
            wsums = {}
            for name in names:
                wsums[name] = self.data[name].sum()
            wavg = sum(wsums.values()) / len(wsums.values())

            rfwig = deepcopy(self.data[names[0]])
            rfwig.foldChange(wavg * 1.0 / wsums[names[0]])
            for name in names[1:]:
                self.data[name].foldChange(wavg * 1.0 / wsums[name])
                rfwig.add(self.data[name])
                self.data[name].foldChange(wsums[name] * 1.0 / wavg)

            rfwig.foldChange(1.0 / len(names))
            lowcut, highcut = rfwig.percentile(
                p=[exclude_low_percent, 100 - exclude_high_percent],
                bnum=bnum,
                nonzero_end=nonzero)
            rg = rfwig.regionWithinValueRange(lowcut, highcut)
            if region_out_file != None: rg.save(region_out_file)
        else:
            print 'calculate total signal in each sample in genomic regions defined by', region_file
            rg = Wig(region_file)
        for name in names:
            sampling_total[name] = self.data[name].multiply(rg).sum()
        print rg.sum(), '(' + str(
            rg.sum() * 100.0 /
            rg.gsize()) + '%) of', rg.gsize(), 'base pairs calculated:'
        for name in names:
            print name, sampling_total[name], '(' + str(
                sampling_total[name] * 100.0 /
                self.data[name].sum()) + '% of total)'

        return sampling_total
Exemple #2
0
    def samplingTotal(self,region_file=None,region_out_file=None,exclude_low_percent=1,exclude_high_percent=1,bnum=100000,nonzero=False):
        '''
        Description:
            caculate the sum of each wig's values after excluding the low and high percentile
        Parameter:
            None
        Value:
            None
        '''
        
        #if exclude_low_percent==0 and exclude_high_percent==0:return None
        #else:
        #print 'calculating normalization factors by sampling ...'
        names=list(self.data.keys())
        if exclude_low_percent==0 and exclude_high_percent==0 and region_file==None: return None
        sampling_total={}
        if region_file==None:
            sys.stdout.write('calculate total signal in each sample after excluding the top ' +
                str(exclude_high_percent) + ' and bottom ' + str(exclude_low_percent) + 
                'percents of genomic regions with extremely high and low signal values\n')
            wsums={}
            for name in names:wsums[name]=self.data[name].sum()
            wavg=functions.div(sum(wsums.values()),len(list(wsums.values())))
            
            rfwig=deepcopy(self.data[names[0]])
            rfwig.foldChange(functions.div(wavg*1.0,wsums[names[0]]))
            for name in names[1:]:
                self.data[name].foldChange(functions.div(wavg*1.0,wsums[name]))
                rfwig.add(self.data[name])
                self.data[name].foldChange(functions.div(wsums[name]*1.0,wavg))
                
            rfwig.foldChange(functions.div(1.0,len(names)))
            lowcut,highcut=rfwig.percentile(p=[exclude_low_percent,100-exclude_high_percent],bnum=bnum,nonzero_end=nonzero)
            rg=rfwig.regionWithinValueRange(lowcut,highcut)
            if region_out_file!=None:rg.save(region_out_file)
        else:
            sys.stdout.write('calculate total signal in each sample in genomic regions defined by' + region_file + "\n")
            rg=Wig(region_file)
        for name in names:sampling_total[name]=self.data[name].multiply(rg).sum()
        sys.stdout.write(str(rg.sum()) + ' (' + str(functions.div(rg.sum()*100.0,rg.gsize())) + 
            ' %) of ' + str(rg.gsize()) + ' base pairs calculated:\n')
        for name in names:
            sys.stdout.write(name + str(sampling_total[name]) + ' (' + 
                str(functions.div(sampling_total[name]*100.0,self.data[name].sum())) + '% of total)\n')

        return sampling_total
Exemple #3
0
    def samplingTotal(self,region_file=None,region_out_file=None,exclude_low_percent=1,exclude_high_percent=1,bnum=100000,nonzero=False):
        '''
        Description:
            caculate the sum of each wig's values after excluding the low and high percentile
        Parameter:
            None
        Value:
            None
        '''
        
        #if exclude_low_percent==0 and exclude_high_percent==0:return None
        #else:
        #print 'calculating normalization factors by sampling ...'
        names=self.data.keys()
        if exclude_low_percent==0 and exclude_high_percent==0 and region_file==None: return None
        sampling_total={}
        if region_file==None:
            print 'calculate total signal in each sample after excluding the top',exclude_high_percent,'and bottom',exclude_low_percent,'percents of genomic regions with extremely high and low signal values'
            wsums={}
            for name in names:wsums[name]=self.data[name].sum()
            wavg=sum(wsums.values())/len(wsums.values())
            
            rfwig=deepcopy(self.data[names[0]])
            rfwig.foldChange(wavg*1.0/wsums[names[0]])
            for name in names[1:]:
                self.data[name].foldChange(wavg*1.0/wsums[name])
                rfwig.add(self.data[name])
                self.data[name].foldChange(wsums[name]*1.0/wavg)
                
            rfwig.foldChange(1.0/len(names))
            lowcut,highcut=rfwig.percentile(p=[exclude_low_percent,100-exclude_high_percent],bnum=bnum,nonzero_end=nonzero)
            rg=rfwig.regionWithinValueRange(lowcut,highcut)
            if region_out_file!=None:rg.save(region_out_file)
        else:
            print 'calculate total signal in each sample in genomic regions defined by',region_file
            rg=Wig(region_file)
        for name in names:sampling_total[name]=self.data[name].multiply(rg).sum()
        print rg.sum(),'('+str(rg.sum()*100.0/rg.gsize())+'%) of',rg.gsize(),'base pairs calculated:'
        for name in names:print name,sampling_total[name],'('+str(sampling_total[name]*100.0/self.data[name].sum())+'% of total)'

        return sampling_total