예제 #1
0
class OccChunk(Chunk):
    """Class for calculating occupancy and occupancy peaks
    """
    def __init__(self, chunk):
        self.start = chunk.start
        self.end = chunk.end
        self.chrom = chunk.chrom
        self.peaks = {}
        self.nfrs = []
    def getFragmentMat(self):
        self.mat = FragmentMat2D(self.chrom, self.start - self.params.flank,
                                 self.end + self.params.flank, 0, self.params.upper)
        self.mat.makeFragmentMat(self.params.bam)
    def makeBiasMat(self):
        self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank,
                                 self.end + self.params.flank, 0, self.params.upper)
        if self.params.fasta is not None:
            bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
                                  self.end + self.params.window + self.params.upper/2 + 1, log = True)
            bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
            self.bias_mat.makeBiasMat(bias_track)
    def calculateOcc(self):
        """calculate occupancy for chunk"""
        self.occ = OccupancyTrack(self.chrom,self.start,self.end)
        self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params)
        self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0)
    def getCov(self):
        """Get read coverage for regions"""
        self.cov = CoverageTrack(self.chrom, self.start, self.end)
        self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window)
    def callPeaks(self):
        """Call peaks of occupancy profile"""
        peaks = call_peaks(self.occ.smoothed_vals, sep = self.params.sep, min_signal = self.params.min_occ)
        for peak in peaks:
            tmp = OccPeak(peak + self.start, self)
            if tmp.occ_lower > self.params.min_occ and tmp.reads > 0:
                self.peaks[peak] = tmp
    def getNucDist(self):
        """Get nucleosomal insert distribution"""
        nuc_dist = np.zeros(self.params.upper)
        for peak in self.peaks.keys():
            sub = self.mat.get(start = self.peaks[peak].start-self.params.flank, end = self.peaks[peak].start+1+self.params.flank)
            sub_sum = np.sum(sub,axis=1)
            sub_sum = sub_sum / float(sum(sub_sum))
            nuc_dist += sub_sum
        return(nuc_dist)
    def process(self, params):
        """proces chunk -- calculat occupancy, get coverage, call peaks"""
        self.params = params
        self.getFragmentMat()
        self.makeBiasMat()        
        self.calculateOcc()
        self.getCov()
        self.callPeaks()
    def removeData(self):
        """remove data from chunk-- deletes all attributes"""
        names = self.__dict__.keys()
        for name in names:
            delattr(self, name)
예제 #2
0
 def getNucSignal(self):
     """Gets Nucleosome Signal Track"""
     self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end)
     self.nuc_cov.calculateCoverage(self.mat, self.params.lower,
                                    self.params.upper, self.params.window)
     self.bias = BiasTrack(self.chrom, self.start, self.end)
     self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat,
                                         self.nuc_cov)
     self.nuc_signal = SignalTrack(self.chrom, self.start, self.end)
     self.nuc_signal.calculateSignal(self.mat, self.params.vmat)
     self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end)
     self.norm_signal.calculateNormSignal(self.nuc_signal, self.bias)
예제 #3
0
def _covHelper(arg):
    """Computes coverage track for a particular set of bed regions"""
    (chunk, args) = arg
    try:
        offset = args.window / 2
        mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) 
        mat.makeFragmentMat(args.bam)
        cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end)
        cov.calculateCoverage(mat, lower = args.lower, upper = args.upper, window_len = args.window)
        cov.vals *= args.scale / float(args.window)
    except Exception as e:
        print('Caught exception when processing:\n'+  chunk.asBed()+"\n")
        traceback.print_exc()
        print()
        raise e
    return cov
예제 #4
0
 def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
     offset=self.start-mat.start-vmat.w
     if offset<0:
         raise Exception("Insufficient flanking region on \
                 mat to calculate signal")
     self.vmat = vmat
     self.bias_mat = mat
     self.cov = CoverageTrack(self.chrom, self.start, self.end)
     self.cov.calculateCoverage(self.bias_mat, vmat.lower,
                                vmat.upper, vmat.w*2+1)
     self.nuc_cov = nuc_cov.vals
     self.vals = signal.correlate(self.bias_mat.get(vmat.lower,vmat.upper,
                                                      self.bias_mat.start + offset,
                                                      self.bias_mat.end - offset),
                                    vmat.mat,mode = 'valid')[0]
     self.vals = self.vals * self.nuc_cov/ self.cov.vals
예제 #5
0
 def getNucSignal(self):
     """Gets Nucleosome Signal Track"""
     self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end)
     self.nuc_cov.calculateCoverage(self.mat, self.params.lower, self.params.upper, self.params.window)
     self.bias = BiasTrack(self.chrom, self.start, self.end)
     self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat, self.nuc_cov)
     self.nuc_signal = SignalTrack(self.chrom, self.start, self.end)
     self.nuc_signal.calculateSignal(self.mat, self.params.vmat)
     self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end)
     self.norm_signal.calculateNormSignal(self.nuc_signal, self.bias)
예제 #6
0
class BiasTrack(Track):
    """Class for getting Bias Signal Track-- Background model"""
    def __init__(self, chrom, start, end):
        Track.__init__(self, chrom, start, end, "bias")
    def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
        offset=self.start-mat.start-vmat.w
        if offset<0:
            raise Exception("Insufficient flanking region on \
                    mat to calculate signal")
        self.vmat = vmat
        self.bias_mat = mat
        self.cov = CoverageTrack(self.chrom, self.start, self.end)
        self.cov.calculateCoverage(self.bias_mat, vmat.lower,
                                   vmat.upper, vmat.w*2+1)
        self.nuc_cov = nuc_cov.vals
        self.vals = signal.correlate(self.bias_mat.get(vmat.lower,vmat.upper,
                                                         self.bias_mat.start + offset,
                                                         self.bias_mat.end - offset),
                                       vmat.mat,mode = 'valid')[0]
        self.vals = self.vals * self.nuc_cov/ self.cov.vals
예제 #7
0
class BiasTrack(Track):
    """Class for getting Bias Signal Track-- Background model"""
    def __init__(self, chrom, start, end):
        Track.__init__(self, chrom, start, end, "bias")
    def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
        offset=self.start-mat.start-vmat.w
        if offset<0:
            raise Exception("Insufficient flanking region on \
                    mat to calculate signal")
        self.vmat = vmat
        self.bias_mat = mat
        self.cov = CoverageTrack(self.chrom, self.start, self.end)
        self.cov.calculateCoverage(self.bias_mat, vmat.lower,
                                   vmat.upper, vmat.w*2+1)
        self.nuc_cov = nuc_cov.vals
        self.vals = signal.correlate(self.bias_mat.get(vmat.lower,vmat.upper,
                                                         self.bias_mat.start + offset,
                                                         self.bias_mat.end - offset),
                                       vmat.mat,mode = 'valid')[0]
        self.vals = self.vals * self.nuc_cov/ self.cov.vals
예제 #8
0
def _covHelper(arg):
    """Computes coverage track for a particular set of bed regions"""
    (chunk, args) = arg
    try:
        offset = args.window // 2
        mat = FragmentMat2D(chunk.chrom, chunk.start - offset,
                            chunk.end + offset, args.lower, args.upper,
                            args.atac)
        mat.makeFragmentMat(args.bam)
        cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end)
        cov.calculateCoverage(mat,
                              lower=args.lower,
                              upper=args.upper,
                              window_len=args.window)
        cov.vals *= args.scale / float(args.window)
    except Exception as e:
        print(('Caught exception when processing:\n' + chunk.asBed() + "\n"))
        traceback.print_exc()
        print()
        raise e
    return cov
예제 #9
0
 def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
     offset=self.start-mat.start-vmat.w
     if offset<0:
         raise Exception("Insufficient flanking region on \
                 mat to calculate signal")
     self.vmat = vmat
     self.bias_mat = mat
     self.cov = CoverageTrack(self.chrom, self.start, self.end)
     self.cov.calculateCoverage(self.bias_mat, vmat.lower,
                                vmat.upper, vmat.w*2+1)
     self.nuc_cov = nuc_cov.vals
     self.vals = signal.correlate(self.bias_mat.get(vmat.lower,vmat.upper,
                                                      self.bias_mat.start + offset,
                                                      self.bias_mat.end - offset),
                                    vmat.mat,mode = 'valid')[0]
     self.vals = self.vals * self.nuc_cov/ self.cov.vals
예제 #10
0
 def getCov(self):
     """Get read coverage for regions"""
     self.cov = CoverageTrack(self.chrom, self.start, self.end)
     self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window)
예제 #11
0
 def getNFR(self):
     """get number of reads of sub-nucleosomal length"""
     self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end)
     self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower,
                                    self.params.window)
예제 #12
0
class NucChunk(Chunk):
    """Class for storing and determining collection of nucleosome positions
    """
    def __init__(self, chunk):
        self.start = chunk.start
        self.end = chunk.end
        self.chrom = chunk.chrom

    def initialize(self, parameters):
        self.params = parameters

    def getFragmentMat(self):
        self.mat = FragmentMat2D(
            self.chrom,
            self.start - max(self.params.window, self.params.upper // 2 + 1),
            self.end + max(self.params.window, self.params.upper // 2 + 1),
            0,
            self.params.upper,
            atac=self.params.atac)
        self.mat.makeFragmentMat(self.params.bam)

    def makeBiasMat(self):
        self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
                                  self.end + self.params.window, 0,
                                  self.params.upper)
        bias_track = InsertionBiasTrack(
            self.chrom,
            self.start - self.params.window - self.params.upper // 2,
            self.end + self.params.window + self.params.upper // 2 + 1,
            log=True)
        if self.params.fasta is not None:
            bias_track.computeBias(self.params.fasta, self.params.chrs,
                                   self.params.pwm)
            self.bias_mat.makeBiasMat(bias_track)
        self.bias_mat_prenorm = BiasMat2D(self.chrom,
                                          self.start - self.params.window,
                                          self.end + self.params.window, 0,
                                          self.params.upper)
        self.bias_mat_prenorm.mat = copy(self.bias_mat.mat)
        self.bias_mat.normByInsertDist(self.params.fragmentsizes)

    def getNucSignal(self):
        """Gets Nucleosome Signal Track"""
        self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end)
        self.nuc_cov.calculateCoverage(self.mat, self.params.lower,
                                       self.params.upper, self.params.window)
        self.bias = BiasTrack(self.chrom, self.start, self.end)
        self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat,
                                            self.nuc_cov)
        self.nuc_signal = SignalTrack(self.chrom, self.start, self.end)
        self.nuc_signal.calculateSignal(self.mat, self.params.vmat)
        self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end)
        self.norm_signal.calculateNormSignal(self.nuc_signal, self.bias)

    def getNFR(self):
        """get number of reads of sub-nucleosomal length"""
        self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end)
        self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower,
                                       self.params.window)

    def smoothSignal(self):
        """Smooth thenormalized signal track"""
        window_len = 6 * self.params.smooth_sd + 1
        self.smoothed = Track(self.chrom, self.start, self.end,
                              "Smooth Signal")
        tmp = copy(self.norm_signal.vals)
        self.smoothed.assign_track(tmp)
        self.smoothed.vals[self.smoothed.vals < 0] = 0
        self.smoothed.smooth_track(window_len,
                                   window="gaussian",
                                   sd=self.params.smooth_sd,
                                   mode='same',
                                   norm=True)

    def getOcc(self):
        """gets occupancy track-- either reads in from bw handle given, or makes new"""
        self.occ = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ.read_track(self.params.occ_track)
        lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz'
        self.occ_lower = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ_lower.read_track(lower_file)
        upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz'
        self.occ_upper = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ_upper.read_track(upper_file)

    def findAllNucs(self):
        """Find peaks in data"""
        self.nuc_collection = {}
        combined = self.norm_signal.vals + self.smoothed.vals
        #find peaks in normalized sigal
        cands1 = call_peaks(combined,
                            min_signal=0,
                            sep=self.params.redundant_sep,
                            boundary=self.params.nonredundant_sep // 2,
                            order=self.params.redundant_sep // 2)
        for i in cands1:
            nuc = Nucleosome(i + self.start, self)
            if nuc.nuc_cov > self.params.min_reads:
                nuc.getLR(self)
                if nuc.lr > self.params.min_lr:
                    nuc.getZScore(self)
                    if nuc.z >= self.params.min_z:
                        nuc.getOcc(self)
                        self.nuc_collection[i] = nuc
        self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
        self.nonredundant = reduce_peaks(
            self.sorted_nuc_keys,
            [self.nuc_collection[x].z
             for x in self.sorted_nuc_keys], self.params.nonredundant_sep)
        self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)

    def fit(self):
        x = np.linspace(0, self.length() - 1, self.length())
        fit = np.zeros(self.length())
        for nuc in self.sorted_nuc_keys:
            self.nuc_collection[nuc].getFuzz(self)
            fit += norm(x, self.nuc_collection[nuc].fuzz**2,
                        self.nuc_collection[nuc].weight,
                        self.nuc_collection[nuc].fit_pos)
        self.fitted = Track(self.chrom, self.start, self.end,
                            "Fitted Nucleosome Signal")
        self.fitted.assign_track(fit)

    def makeInsertionTrack(self):
        """make insertion track for chunk"""
        self.ins = self.mat.getIns()

    def process(self, params):
        """wrapper to carry out all methods needed to call nucleosomes and nfrs"""
        self.initialize(params)
        self.getFragmentMat()
        self.makeBiasMat()
        self.getNucSignal()
        self.getNFR()
        self.smoothSignal()
        if params.occ_track is not None:
            self.getOcc()
        self.findAllNucs()
        self.fit()
        self.makeInsertionTrack()

    def removeData(self):
        """remove data from chunk-- deletes all attributes"""
        names = list(self.__dict__.keys())
        for name in names:
            delattr(self, name)
예제 #13
0
 def getNFR(self):
     """get number of reads of sub-nucleosomal length"""
     self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end)
     self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower,
                                                     self.params.window)
예제 #14
0
class NucChunk(Chunk):
    """Class for storing and determining collection of nucleosome positions
    """
    def __init__(self, chunk):
        self.start = chunk.start
        self.end = chunk.end
        self.chrom = chunk.chrom
    def initialize(self, parameters):
        self.params = parameters
    def getFragmentMat(self):
        self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1),
                                 self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac)
        self.mat.makeFragmentMat(self.params.bam)
    def makeBiasMat(self):
        self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
                                 self.end + self.params.window, 0, self.params.upper)
        bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
                                  self.end + self.params.window + self.params.upper/2 + 1, log = True)
        if self.params.fasta is not None:
            bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
            self.bias_mat.makeBiasMat(bias_track)
        self.bias_mat_prenorm = BiasMat2D(self.chrom, self.start - self.params.window,
                                 self.end + self.params.window, 0, self.params.upper)
        self.bias_mat_prenorm.mat = copy(self.bias_mat.mat)
        self.bias_mat.normByInsertDist(self.params.fragmentsizes)
    def getNucSignal(self):
        """Gets Nucleosome Signal Track"""
        self.nuc_cov = CoverageTrack(self.chrom, self.start,
                                     self.end)
        self.nuc_cov.calculateCoverage(self.mat, self.params.lower, self.params.upper,
                                        self.params.window)
        self.bias = BiasTrack(self.chrom, self.start,
                                     self.end)
        self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat, self.nuc_cov)
        self.nuc_signal = SignalTrack(self.chrom, self.start,
                                     self.end)
        self.nuc_signal.calculateSignal(self.mat, self.params.vmat)
        self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end)
        self.norm_signal.calculateNormSignal(self.nuc_signal,self.bias)
    def getNFR(self):
        """get number of reads of sub-nucleosomal length"""
        self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end)
        self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower,
                                                        self.params.window)
    def smoothSignal(self):
        """Smooth thenormalized signal track"""
        window_len = 6 * self.params.smooth_sd + 1
        self.smoothed = Track(self.chrom,self.start,self.end, "Smooth Signal")
        tmp = copy(self.norm_signal.vals)
        self.smoothed.assign_track(tmp)
        self.smoothed.vals[ self.smoothed.vals < 0] = 0
        self.smoothed.smooth_track(window_len, window = "gaussian",
                             sd = self.params.smooth_sd, mode = 'same',
                             norm = True)
    def getOcc(self):
        """gets occupancy track-- either reads in from bw handle given, or makes new"""
        self.occ = Track(self.chrom,self.start,self.end,"Occupancy")
        self.occ.read_track(self.params.occ_track)
        lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz'
        self.occ_lower = Track(self.chrom,self.start,self.end,"Occupancy")
        self.occ_lower.read_track(lower_file)
        upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz'
        self.occ_upper = Track(self.chrom,self.start,self.end,"Occupancy")
        self.occ_upper.read_track(upper_file)
    def findAllNucs(self):
        """Find peaks in data"""
        self.nuc_collection = {}
        combined = self.norm_signal.vals + self.smoothed.vals
        #find peaks in normalized sigal
        cands1 = call_peaks(combined, min_signal = 0,
                                sep = self.params.redundant_sep,
                                boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2)
        for i in cands1:
            nuc = Nucleosome(i + self.start, self)
            if nuc.nuc_cov > self.params.min_reads:
                nuc.getLR(self)
                if nuc.lr > self.params.min_lr:
                    nuc.getZScore(self)
                    if nuc.z >= self.params.min_z:
                        nuc.getOcc(self)
                        self.nuc_collection[i] = nuc
        self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
        self.nonredundant = reduce_peaks( self.sorted_nuc_keys,
                                            map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys),
                                                self.params.nonredundant_sep)
        self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)
    def fit(self):
        x = np.linspace(0,self.length() -1, self.length())
        fit = np.zeros(self.length())
        for nuc in self.sorted_nuc_keys:
            self.nuc_collection[nuc].getFuzz(self)
            fit += norm(x,self.nuc_collection[nuc].fuzz**2, self.nuc_collection[nuc].weight, self.nuc_collection[nuc].fit_pos)
        self.fitted = Track(self.chrom, self.start, self.end,
                            "Fitted Nucleosome Signal")
        self.fitted.assign_track(fit)
    def makeInsertionTrack(self):
        """make insertion track for chunk"""
        self.ins = self.mat.getIns()
    def process(self, params):
        """wrapper to carry out all methods needed to call nucleosomes and nfrs"""
        self.initialize(params)
        self.getFragmentMat()
        self.makeBiasMat()
        self.getNucSignal()
        self.getNFR()
        self.smoothSignal()
        if params.occ_track is not None:
            self.getOcc()
        self.findAllNucs()
        self.fit()
        self.makeInsertionTrack()
    def removeData(self):
        """remove data from chunk-- deletes all attributes"""
        names = self.__dict__.keys()
        for name in names:
            delattr(self,name)