Ejemplo n.º 1
0
def run_nfr(args):
    """run nfr calling

    """
    if args.bam is None and args.ins_track is None:
        raise Exception("Must supply either bam file or insertion track")
    if not args.out:
        args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
    if args.fasta is not None:
        chrs_fasta = read_chrom_sizes_from_fasta(args.fasta)
        pwm = PWM.open(args.pwm)
        chunks = ChunkList.read(args.bed, chromDict = chrs_fasta, min_offset = max(pwm.up, pwm.down))
    else:
        chunks = ChunkList.read(args.bed)
    if args.bam is not None:
        chrs_bam = read_chrom_sizes_from_bam(args.bam)
        chunks.checkChroms(chrs_bam, chrom_source = "BAM file") 
    chunks.merge()
    maxQueueSize = args.cores * 10 
    params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
                            fasta = args.fasta, pwm = args.pwm)
    sets = chunks.split(items = args.cores * 5)
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    nfr_handle = open(args.out + '.nfrpos.bed','w')
    nfr_handle.close()
    nfr_queue = mp.JoinableQueue()
    nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
    nfr_process.start()
    if params.ins_track is None:
        ins_handle = open(args.out + '.ins.bedgraph','w')
        ins_handle.close()
        ins_queue = mp.JoinableQueue()
        ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
        ins_process.start()
    for j in sets:
        tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
        for result in tmp:
            if params.ins_track is None:
                nfr_queue.put(result[0])
                ins_queue.put(result[1])
            else:
                nfr_queue.put(result)
    pool1.close()
    pool1.join()
    nfr_queue.put('STOP')
    nfr_process.join()
    if params.ins_track is None:
        ins_queue.put('STOP')
        ins_process.join()
    pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
    shell_command('rm ' + args.out + '.nfrpos.bed')
    pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
    if params.ins_track is None:
        pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
        shell_command('rm ' + args.out + '.ins.bedgraph')
        pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
Ejemplo n.º 2
0
def get_counts(args):
    """function to get fragment sizes

    """
    if args.out is None:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed)
    mat = np.zeros(len(chunks), dtype=np.int)
    bamHandle = AlignmentFile(args.bam)
    j = 0
    for chunk in chunks:
        for read in bamHandle.fetch(chunk.chrom,
                                    max(0, chunk.start - args.upper),
                                    chunk.end + args.upper):
            if read.is_proper_pair and not read.is_reverse:
                if args.atac:
                    #get left position
                    l_pos = read.pos + 4
                    #get insert size
                    #correct by 8 base pairs to be inserion to insertion
                    ilen = abs(read.template_length) - 8
                else:
                    l_pos = read.pos
                    ilen = abs(read.template_length)
                r_pos = l_pos + ilen - 1
                if _between(ilen, args.lower, args.upper) and (
                        _between(l_pos, chunk.start, chunk.end)
                        or _between(r_pos, chunk.start, chunk.end)):
                    mat[j] += 1
        j += 1
    bamHandle.close()
    np.savetxt(args.out + ".counts.txt.gz", mat, delimiter="\n", fmt='%i')
Ejemplo n.º 3
0
def get_signal(args):
    """function to get signal from a track around some sites

    """
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed, strand_col=args.strand)
    params = _signalParams(args.bg, args.sizes, args.up, args.down, args.exp,
                           args.scale, args.positive, args.all)
    sets = chunks.split(items=min(args.cores * 20, len(chunks)))
    pool = Pool(processes=args.cores)
    tmp = pool.map(_signalHelper, zip(sets, itertools.repeat(params)))
    pool.close()
    pool.join()
    if args.all:
        mat = np.vstack(tmp)
        np.savetxt(args.out + ".tracks.txt.gz",
                   mat,
                   delimiter=",",
                   fmt="%1.5g")
        mat[np.isnan(mat)] = 0
        result = np.sum(mat, axis=0)
    else:
        result = sum(tmp)
    if not args.no_agg:
        if args.norm:
            result = result / len(chunks)
        fig = plt.figure()
        plt.plot(range(-args.up, args.down + 1), result)
        plt.xlabel("Position relative to Site")
        plt.ylabel("Signal Intensity")
        fig.savefig(args.out + '.agg.track.eps')
        plt.close(fig)
        np.savetxt(args.out + '.agg.track.txt', result, delimiter="\t")
Ejemplo n.º 4
0
def get_counts(args):
    """function to get fragment sizes

    """
    if args.out is None:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])  
    chunks = ChunkList.read(args.bed)
    mat = np.zeros(len(chunks), dtype=np.int)
    bamHandle = AlignmentFile(args.bam)
    j = 0
    for chunk in chunks:
        for read in bamHandle.fetch(chunk.chrom, max(0, chunk.start - args.upper), chunk.end + args.upper):
            if read.is_proper_pair and not read.is_reverse:
                if args.atac:
                    #get left position
                    l_pos = read.pos + 4
                    #get insert size
                    #correct by 8 base pairs to be inserion to insertion
                    ilen = abs(read.template_length) - 8
                else:
                    l_pos = read.pos
                    ilen = abs(read.template_length)
                r_pos = l_pos + ilen - 1
                if _between(ilen, args.lower, args.upper) and (_between(l_pos, chunk.start, chunk.end) or _between(r_pos, chunk.start, chunk.end)):
                    mat[j] += 1
        j += 1
    bamHandle.close()
    np.savetxt(args.out + ".counts.txt.gz", mat, delimiter="\n", fmt='%i')
Ejemplo n.º 5
0
def get_cov(args, bases = 50000, splitsize = 1000):
    """function to get coverages

    """
    if not args.out:
        if args.bed is None:
            args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    if args.bed is None:
        chrs = read_chrom_sizes_from_bam(args.bam)
        chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases = bases)
    maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    out_handle = open(args.out + '.cov.bedgraph','w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
        for track in tmp:
            write_queue.put(track)
    pool1.close()
    pool1.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
    shell_command('rm ' + args.out + '.cov.bedgraph')
    pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
Ejemplo n.º 6
0
def get_nucleotide(args):
    """Function to obain sequence content around sites"""
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed, strand_col=args.strand)
    params = _NucleotideParameters(args.up, args.down, args.fasta,
                                   args.dinucleotide)
    sets = chunks.split(bases=10000)
    pool = Pool(processes=args.cores)
    tmp = pool.map(_nucleotideHelper, list(zip(sets,
                                               itertools.repeat(params))))
    pool.close()
    pool.join()
    result = np.zeros(params.matsize)
    n = 0.0
    for i in tmp:
        result += i[0]
        n += i[1]
    result = result // n
    if args.norm:
        normfreqs = seq.getNucFreqs(params.fasta, params.nucleotides)
        result = result / np.reshape(np.repeat(normfreqs, result.shape[1]),
                                     result.shape)
    #save text output
    out = np.hstack(
        (np.array(params.nucleotides)[:, np.newaxis], result.astype('|S8')))
    np.savetxt(args.out + '.nucfreq.txt', out, delimiter="\t",
               fmt='%s')  #,fmt = '%1.4g')
Ejemplo n.º 7
0
def get_signal(args):
    """function to get signal from a track around some sites

    """
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed, strand_col = args.strand)
    params = _signalParams(args.bg, args.sizes, args.up, args.down,args.exp,
                             args.scale, args.positive, args.all)
    sets = chunks.split(items = min(args.cores*20,len(chunks)))
    pool = Pool(processes = args.cores)
    tmp = pool.map(_signalHelper, zip(sets,itertools.repeat(params)))
    pool.close()
    pool.join()
    if args.all:
        mat = np.vstack(tmp)
        np.savetxt(args.out + ".tracks.txt.gz", mat, delimiter = ",", fmt="%1.5g")
        mat[np.isnan(mat)]=0
        result = np.sum(mat, axis = 0)
    else:
        result = sum(tmp)
    if not args.no_agg:
        if args.norm:
            result = result / len(chunks)
        fig = plt.figure()
        plt.plot(range(-args.up,args.down+1),result)
        plt.xlabel("Position relative to Site")
        plt.ylabel("Signal Intensity")
        fig.savefig(args.out+'.agg.track.eps')
        plt.close(fig)
        np.savetxt(args.out+'.agg.track.txt',result,delimiter="\t")
Ejemplo n.º 8
0
def make_bias_vplot(args):
    """function to make vplot

    """
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed, strand_col = args.strand)
    sets = chunks.split(items = min(args.cores*20,len(chunks)))
    params = _BiasVplotParams(flank = args.flank, lower = args.lower, upper = args.upper, bg = args.bg,
                                sizes = args.sizes, scale = args.scale,
                                pwm = args.pwm, fasta = args.fasta)
    pool = Pool(processes = args.cores)
    tmp = pool.map(_biasVplotHelper, zip(sets,itertools.repeat(params)))
    pool.close()
    pool.join()
    result = sum(tmp)
    ##Turn matrix into VMat object
    vmat=VMat(result,args.lower,args.upper)
    vmat.plot(filename=args.out+".Bias.Vplot.eps")
    if args.plot_extra:
        ##get insertion profile represented by vplot
        vmat.converto1d()
        vmat.plot_1d(filename=args.out+'.InsertionProfile.eps')
        #get insert size dstribution represented by vplot
        vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps")
    ##save
    vmat.save(args.out+".Bias.VMat")
Ejemplo n.º 9
0
def make_bias_vplot(args):
    """function to make vplot

    """
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed, strand_col=args.strand)
    sets = chunks.split(items=min(args.cores * 20, len(chunks)))
    params = _BiasVplotParams(flank=args.flank,
                              lower=args.lower,
                              upper=args.upper,
                              bg=args.bg,
                              sizes=args.sizes,
                              scale=args.scale,
                              pwm=args.pwm,
                              fasta=args.fasta)
    pool = Pool(processes=args.cores)
    tmp = pool.map(_biasVplotHelper, zip(sets, itertools.repeat(params)))
    pool.close()
    pool.join()
    result = sum(tmp)
    ##Turn matrix into VMat object
    vmat = VMat(result, args.lower, args.upper)
    vmat.plot(filename=args.out + ".Bias.Vplot.eps")
    if args.plot_extra:
        ##get insertion profile represented by vplot
        vmat.converto1d()
        vmat.plot_1d(filename=args.out + '.InsertionProfile.eps')
        #get insert size dstribution represented by vplot
        vmat.plot_insertsize(filename=args.out + ".InsertSizes.eps")
    ##save
    vmat.save(args.out + ".Bias.VMat")
Ejemplo n.º 10
0
def make_bias_track(args, bases = 500000, splitsize = 1000):
    """function to compute bias track

    """
    if args.out is None:
        if args.bed is not None:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
    params = _BiasParams(args.fasta, args.pwm)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases = bases)
    maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool = mp.Pool(processes = max(1,args.cores-1))
    out_handle = open(args.out + '.Scores.bedgraph','w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
        for track in tmp:
            write_queue.put(track)
    pool.close()
    pool.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
    shell_command('rm ' + args.out + '.Scores.bedgraph')
    pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
Ejemplo n.º 11
0
 def setUp(self):
     """setup Test_BiasMat class with construction of a biasmat"""
     bed_list = ChunkList.read('example/example.bed')
     self.chunk = bed_list[0]
     self.biastrack = InsertionBiasTrack(self.chunk.chrom, self.chunk.start, self.chunk.end)
     self.biastrack.read_track('example/example.Scores.bedgraph.gz')
     self.biasmat = BiasMat2D(self.chunk.chrom,self.chunk.start+100,self.chunk.end-100,100,200)
     self.biasmat.makeBiasMat(self.biastrack)
Ejemplo n.º 12
0
 def setUp(self):
     """setup Test_occupancy class by establishing parameters"""
     bed_list = ChunkList.read('example/example.bed')
     self.chunk = bed_list[0]
     self.vmat = V.VMat.open('example/example.VMat')
     self.vmat = V.VMat.open('example/example.VMat')
     self.mat = FragmentMat2D(self.chunk.chrom,self.chunk.start-self.vmat.w,self.chunk.end+self.vmat.w,self.vmat.lower,self.vmat.upper)
     self.mat.makeFragmentMat('example/example.bam')
     self.signal = Nuc.SignalTrack(self.chunk.chrom,self.chunk.start,self.chunk.end)
     self.signal.calculateSignal(self.mat, self.vmat)
Ejemplo n.º 13
0
 def setUp(self):
     """setup Test_BiasMat class with construction of a biasmat"""
     bed_list = ChunkList.read('example/example.bed')
     self.chunk = bed_list[0]
     self.biastrack = InsertionBiasTrack(self.chunk.chrom, self.chunk.start,
                                         self.chunk.end)
     self.biastrack.read_track('example/example.Scores.bedgraph.gz')
     self.biasmat = BiasMat2D(self.chunk.chrom, self.chunk.start + 100,
                              self.chunk.end - 100, 100, 200)
     self.biasmat.makeBiasMat(self.biastrack)
Ejemplo n.º 14
0
    def setUp(self):
        """ set up class for testing variance calculation for background signal

        """
        bed_list = ChunkList.read('example/example.bed')
        chunk = bed_list[0]
        vmat = V.VMat.open('example/example.VMat')
        biastrack = InsertionBiasTrack(chunk.chrom, chunk.start, chunk.end)
        biastrack.read_track('example/example.Scores.bedgraph.gz')
        biasmat = BiasMat2D(chunk.chrom,chunk.start+200,chunk.end-200,100,250)
        biasmat.makeBiasMat(biastrack)
        self.signaldist = Nuc.SignalDistribution(chunk.start+300,vmat,biasmat,35)
Ejemplo n.º 15
0
    def setUp(self):
        """ set up class for testing variance calculation for background signal

        """
        bed_list = ChunkList.read('example/example.bed')
        chunk = bed_list[0]
        vmat = V.VMat.open('example/example.VMat')
        biastrack = InsertionBiasTrack(chunk.chrom, chunk.start, chunk.end)
        biastrack.read_track('example/example.Scores.bedgraph.gz')
        biasmat = BiasMat2D(chunk.chrom,chunk.start+200,chunk.end-200,100,250)
        biasmat.makeBiasMat(biastrack)
        self.signaldist = Nuc.SignalDistribution(chunk.start+300,vmat,biasmat,35)
Ejemplo n.º 16
0
def run_diff(args, bases=500000):
    """run differential occupancy calling

    """
    chrs = read_chrom_sizes_from_bam(args.bam)
    pwm = PWM.open(args.pwm)
    chunks = ChunkList.read(args.bed,
                            chromDict=chrs,
                            min_offset=args.flank + args.upper / 2 +
                            max(pwm.up, pwm.down))
    chunks.merge()
    maxQueueSize = max(
        2, int(100 * bases / np.mean([chunk.length() for chunk in chunks])))
    #get fragmentsizes
    fragment_dist1 = FragmentMixDistribution(0, upper=args.upper)
    fragment_dist1.fragmentsizes = FragmentSizes(
        0, args.upper, vals=FragmentSizes.open(args.sizes1).get(0, args.upper))
    fragment_dist1.modelNFR()
    fragment_dist2 = FragmentMixDistribution(0, upper=args.upper)
    fragment_dist2.fragmentsizes = FragmentSizes(
        0, args.upper, vals=FragmentSizes.open(args.sizes2).get(0, args.upper))
    fragment_dist2.modelNFR()
    params = OccupancyParameters(fragment_dist,
                                 args.upper,
                                 args.fasta,
                                 args.pwm,
                                 sep=args.nuc_sep,
                                 min_occ=args.min_occ,
                                 flank=args.flank,
                                 bam=args.bam,
                                 ci=args.confidence_interval)
    sets = chunks.split(bases=bases)
    pool1 = mp.Pool(processes=max(1, args.cores - 1))
    diff_handle = open(args.out + '.occdiff.bed', 'w')
    diff_handle.close()
    diff_queue = mp.JoinableQueue()
    diff_process = mp.Process(target=_writeDiff, args=(diff_queue, args.out))
    diff_process.start()
    nuc_dist = np.zeros(args.upper)
    for j in sets:
        tmp = pool1.map(_occHelper, zip(j, itertools.repeat(params)))
        for result in tmp:
            diff_queue.put(result[1])
    pool1.close()
    pool1.join()
    diff_queue.put('STOP')
    diff_process.join()
    pysam.tabix_compress(args.out + '.occdiff.bed',
                         args.out + '.occdiff.bed.gz',
                         force=True)
    shell_command('rm ' + args.out + '.occdiff.bed')
    pysam.tabix_index(args.out + '.occdiff.bed.gz', preset="bed", force=True)
Ejemplo n.º 17
0
def get_pwm(args, bases=50000, splitsize=1000):
    """Functiono obtain PWM around ATAC insertion"""
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
    chrs = read_chrom_sizes_from_fasta(args.fasta)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(chrs,
                                             splitsize=splitsize,
                                             offset=args.flank)
        sets = chunks.split(items=bases / splitsize)
    else:
        chunks = ChunkList.read(args.bed,
                                chromDict=chrs,
                                min_offset=args.flank)
        sets = chunks.split(bases=bases)
    params = _PWMParameters(bam=args.bam,
                            up=args.flank,
                            down=args.flank,
                            fasta=args.fasta,
                            lower=args.lower,
                            upper=args.upper,
                            atac=args.atac,
                            sym=args.sym)
    pool = Pool(processes=args.cores)
    tmp = pool.map(_pwmHelper, zip(sets, itertools.repeat(params)))
    pool.close()
    pool.join()
    n = 0.0
    result = np.zeros((len(params.nucleotides), params.up + params.down + 1))
    for i in tmp:
        result += i[0]
        n += i[1]
    result /= n
    if args.bed:
        normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta,
                                                 params.nucleotides)
    else:
        normfreqs = seq.getNucFreqs(args.fasta, params.nucleotides)
    result = result / np.reshape(np.repeat(normfreqs, result.shape[1]),
                                 result.shape)
    if args.sym:
        #Symmetrize
        left = result[:, 0:(args.flank + 1)]
        right = result[:, args.flank:]
        rightflipped = np.fliplr(np.flipud(right))
        combined = (left + rightflipped) / 2
        result = np.hstack(
            (combined, np.fliplr(np.flipud(combined[:, 0:args.flank]))))
    #save
    pwm = PWM(result, args.flank, args.flank, params.nucleotides)
    pwm.save(args.out + '.PWM.txt')
Ejemplo n.º 18
0
 def setUp(self):
     """setup Test_occupancy class by establishing parameters"""
     bed_list = ChunkList.read('example/example.bed')
     self.chunk = bed_list[0]
     self.vmat = V.VMat.open('example/example.VMat')
     self.vmat = V.VMat.open('example/example.VMat')
     self.mat = FragmentMat2D(self.chunk.chrom,
                              self.chunk.start - self.vmat.w,
                              self.chunk.end + self.vmat.w, self.vmat.lower,
                              self.vmat.upper)
     self.mat.makeFragmentMat('example/example.bam')
     self.signal = Nuc.SignalTrack(self.chunk.chrom, self.chunk.start,
                                   self.chunk.end)
     self.signal.calculateSignal(self.mat, self.vmat)
Ejemplo n.º 19
0
def run_nfr(args):
    """run nfr calling

    """
    if args.bam is None and args.ins_track is None:
        raise Exception("Must supply either bam file or insertion track")
    if not args.out:
        args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
    chunks = ChunkList.read(args.bed)
    chunks.merge()
    maxQueueSize = args.cores * 10 
    params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
                            fasta = args.fasta, pwm = args.pwm)
    sets = chunks.split(items = args.cores * 5)
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    nfr_handle = open(args.out + '.nfrpos.bed','w')
    nfr_handle.close()
    nfr_queue = mp.JoinableQueue()
    nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
    nfr_process.start()
    if params.ins_track is None:
        ins_handle = open(args.out + '.ins.bedgraph','w')
        ins_handle.close()
        ins_queue = mp.JoinableQueue()
        ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
        ins_process.start()
    for j in sets:
        tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
        for result in tmp:
            if params.ins_track is None:
                nfr_queue.put(result[0])
                ins_queue.put(result[1])
            else:
                nfr_queue.put(result)
    pool1.close()
    pool1.join()
    nfr_queue.put('STOP')
    nfr_process.join()
    if params.ins_track is None:
        ins_queue.put('STOP')
        ins_process.join()
    pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
    shell_command('rm ' + args.out + '.nfrpos.bed')
    pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
    if params.ins_track is None:
        pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
        shell_command('rm ' + args.out + '.ins.bedgraph')
        pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
Ejemplo n.º 20
0
def make_bias_track(args, bases=500000, splitsize=1000):
    """function to compute bias track

    """
    if args.out is None:
        if args.bed is not None:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
    params = _BiasParams(args.fasta, args.pwm)

    if args.bed is None:
        chunks = ChunkList.convertChromSizes(params.chrs, splitsize=splitsize)
        sets = chunks.split(items=bases // splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.checkChroms(list(params.chrs.keys()))
        chunks.merge()
        sets = chunks.split(bases=bases)

    maxQueueSize = max(
        2, int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool = mp.Pool(processes=max(1, args.cores - 1))
    out_handle = open(args.out + '.Scores.bedgraph', 'w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize=maxQueueSize)
    write_process = mp.Process(target=_writeBias, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool.map(_biasHelper, list(zip(j, itertools.repeat(params))))
        for track in tmp:
            write_queue.put(track)
    pool.close()
    pool.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.Scores.bedgraph',
                         args.out + '.Scores.bedgraph.gz',
                         force=True)
    shell_command('rm ' + args.out + '.Scores.bedgraph')
    pysam.tabix_index(args.out + '.Scores.bedgraph.gz',
                      preset="bed",
                      force=True)
Ejemplo n.º 21
0
def get_ins(args, bases=50000, splitsize=1000):
    """function to get insertions

    """
    if not args.out:
        if args.bed is None:
            args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    if args.bed is None:
        chrs = read_chrom_sizes_from_bam(args.bam)
        chunks = ChunkList.convertChromSizes(chrs, splitsize=splitsize)
        sets = chunks.split(items=bases / splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases=bases)
    maxQueueSize = max(
        2, int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool1 = mp.Pool(processes=max(1, args.cores - 1))
    out_handle = open(args.out + '.ins.bedgraph', 'w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize=maxQueueSize)
    write_process = mp.Process(target=_writeIns, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        if args.smooth:
            tmp = pool1.map(_insHelperSmooth,
                            list(zip(j, itertools.repeat(args))))
        else:
            tmp = pool1.map(_insHelper, list(zip(j, itertools.repeat(args))))
        for track in tmp:
            write_queue.put(track)
    pool1.close()
    pool1.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.ins.bedgraph',
                         args.out + '.ins.bedgraph.gz',
                         force=True)
    shell_command('rm ' + args.out + '.ins.bedgraph')
    pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset="bed", force=True)
Ejemplo n.º 22
0
def get_sizes(args):
    """function to get fragment sizes

    """
    if args.out is None:
        args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
    sizes = FragmentSizes(lower = args.lower, upper = args.upper, atac = args.atac)
    if args.bed:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sizes.calculateSizes(args.bam, chunks)
    else:
        sizes.calculateSizes(args.bam)
    sizes.save(args.out+'.fragmentsizes.txt')
    if not args.no_plot:
        #make figure
        fig = plt.figure()
        plt.plot(range(sizes.lower,sizes.upper),sizes.get(sizes.lower,sizes.upper),label = args.out)
        plt.xlabel("Fragment Size")
        plt.ylabel("Frequency")
        fig.savefig(args.out+'.fragmentsizes.eps')
        plt.close(fig)
Ejemplo n.º 23
0
def get_pwm(args, bases = 50000, splitsize = 1000):
    """Functiono obtain PWM around ATAC insertion"""
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
    chrs = read_chrom_sizes_from_fasta(args.fasta)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank)
        sets = chunks.split(bases = bases)
    params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta,
                            lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym)
    pool = Pool(processes = args.cores)
    tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params)))
    pool.close()
    pool.join()
    n = 0.0
    result = np.zeros((len(params.nucleotides), params.up + params.down + 1))
    for i in tmp:
        result += i[0]
        n += i[1]
    result /= n
    if args.bed:
        normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta, params.nucleotides)
    else:
        normfreqs = seq.getNucFreqs(args.fasta, params.nucleotides)
    result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape)
    if args.sym:
        #Symmetrize
        left = result[:,0:(args.flank + 1)]
        right = result[:,args.flank:]
        rightflipped = np.fliplr(np.flipud(right))
        combined = (left + rightflipped) / 2
        result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank]))))
    #save
    pwm = PWM(result, args.flank, args.flank, params.nucleotides)
    pwm.save(args.out + '.PWM.txt')
Ejemplo n.º 24
0
def get_nucleotide(args):
    """Function to obain sequence content around sites"""
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    chunks = ChunkList.read(args.bed, strand_col = args.strand)
    params = _NucleotideParameters(args.up, args.down, args.fasta, args.dinucleotide)
    sets = chunks.split(bases = 10000)
    pool = Pool(processes = args.cores)
    tmp = pool.map(_nucleotideHelper, zip(sets,itertools.repeat(params)))
    pool.close()
    pool.join()
    result = np.zeros(params.matsize)
    n = 0.0
    for i in tmp:
        result += i[0]
        n += i[1]
    result = result / n
    if args.norm:
        normfreqs = seq.getNucFreqs(params.fasta, params.nucleotides)
        result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape)
    #save text output
    out = np.hstack((np.array(params.nucleotides)[:,np.newaxis], result.astype('|S8')))
    np.savetxt(args.out+'.nucfreq.txt', out, delimiter="\t", fmt = '%s')#,fmt = '%1.4g')
Ejemplo n.º 25
0
def run_diff(args, bases = 500000):
    """run differential occupancy calling

    """
    chrs = read_chrom_sizes_from_bam(args.bam)
    pwm = PWM.open(args.pwm)
    chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down))
    chunks.merge()
    maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks])))
    #get fragmentsizes
    fragment_dist1 = FragmentMixDistribution(0, upper = args.upper)
    fragment_dist1.fragmentsizes = FragmentSizes(0, args.upper, vals = FragmentSizes.open(args.sizes1).get(0,args.upper))
    fragment_dist1.modelNFR()
    fragment_dist2 = FragmentMixDistribution(0, upper = args.upper)
    fragment_dist2.fragmentsizes = FragmentSizes(0, args.upper, vals = FragmentSizes.open(args.sizes2).get(0,args.upper))
    fragment_dist2.modelNFR()
    params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ,
            flank = args.flank, bam = args.bam, ci = args.confidence_interval)
    sets = chunks.split(bases = bases)
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    diff_handle = open(args.out + '.occdiff.bed','w')
    diff_handle.close()
    diff_queue = mp.JoinableQueue()
    diff_process = mp.Process(target = _writeDiff, args=(diff_queue, args.out))
    diff_process.start()
    nuc_dist = np.zeros(args.upper)
    for j in sets:
        tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params)))
        for result in tmp:
            diff_queue.put(result[1])
    pool1.close()
    pool1.join()
    diff_queue.put('STOP')
    diff_process.join()
    pysam.tabix_compress(args.out + '.occdiff.bed', args.out + '.occdiff.bed.gz',force = True)
    shell_command('rm ' + args.out + '.occdiff.bed')
    pysam.tabix_index(args.out + '.occdiff.bed.gz', preset = "bed", force = True)
Ejemplo n.º 26
0
def get_sizes(args):
    """function to get fragment sizes

    """
    if args.out is None:
        args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
    sizes = FragmentSizes(lower=args.lower, upper=args.upper, atac=args.atac)
    if args.bed:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sizes.calculateSizes(args.bam, chunks)
    else:
        sizes.calculateSizes(args.bam)
    sizes.save(args.out + '.fragmentsizes.txt')
    if not args.no_plot:
        #make figure
        fig = plt.figure()
        plt.plot(list(range(sizes.lower, sizes.upper)),
                 sizes.get(sizes.lower, sizes.upper),
                 label=args.out)
        plt.xlabel("Fragment Size")
        plt.ylabel("Frequency")
        fig.savefig(args.out + '.fragmentsizes.pdf')
        plt.close(fig)
Ejemplo n.º 27
0
def run_nuc(args):
    """run occupancy calling

    """
    vmat = VMat.open(args.vmat)
    if args.fasta:
        chrs = read_chrom_sizes_from_fasta(args.fasta)
    else:
        chrs = read_chrom_sizes_from_bam(args.bam)
    pwm = PWM.open(args.pwm)
    chunks = ChunkList.read(args.bed,
                            chromDict=chrs,
                            min_offset=vmat.mat.shape[1] + vmat.upper // 2 +
                            max(pwm.up, pwm.down) + args.nuc_sep // 2,
                            min_length=args.nuc_sep * 2)
    chunks.slop(chrs, up=args.nuc_sep // 2, down=args.nuc_sep // 2)
    chunks.merge()
    maxQueueSize = args.cores * 10
    if args.sizes is not None:
        fragment_dist = FragmentSizes.open(args.sizes)
    else:
        fragment_dist = FragmentSizes(0, upper=vmat.upper)
        fragment_dist.calculateSizes(args.bam, chunks)
    params = NucParameters(vmat=vmat,
                           fragmentsizes=fragment_dist,
                           bam=args.bam,
                           fasta=args.fasta,
                           pwm=args.pwm,
                           occ_track=args.occ_track,
                           sd=args.sd,
                           nonredundant_sep=args.nuc_sep,
                           redundant_sep=args.redundant_sep,
                           min_z=args.min_z,
                           min_lr=args.min_lr,
                           atac=args.atac)
    sets = chunks.split(items=args.cores * 5)
    pool1 = mp.Pool(processes=max(1, args.cores - 1))
    if args.write_all:
        outputs = [
            'nucpos', 'nucpos.redundant', 'nucleoatac_signal',
            'nucleoatac_signal.smooth', 'nucleoatac_background',
            'nucleoatac_raw'
        ]
    else:
        outputs = [
            'nucpos', 'nucpos.redundant', 'nucleoatac_signal',
            'nucleoatac_signal.smooth'
        ]
    handles = {}
    write_queues = {}
    write_processes = {}
    for i in outputs:
        if i not in ['nucpos', 'nucpos.redundant', 'nfrpos']:
            handles[i] = open(args.out + '.' + i + '.bedgraph', 'w')
        else:
            handles[i] = open(args.out + '.' + i + '.bed', 'w')
        handles[i].close()
        write_queues[i] = mp.JoinableQueue(maxsize=maxQueueSize)
        write_processes[i] = mp.Process(target=_writeFuncs[i],
                                        args=(write_queues[i], args.out))
        write_processes[i].start()
    for j in sets:
        tmp = pool1.map(_nucHelper, list(zip(j, itertools.repeat(params))))
        for result in tmp:
            for i in outputs:
                write_queues[i].put(result[i])
    pool1.close()
    pool1.join()
    for i in outputs:
        write_queues[i].put('STOP')
    for i in outputs:
        write_processes[i].join()
        if i not in ['nucpos', 'nucpos.redundant']:
            pysam.tabix_compress(args.out + '.' + i + '.bedgraph',
                                 args.out + '.' + i + '.bedgraph.gz',
                                 force=True)
            shell_command('rm ' + args.out + '.' + i + '.bedgraph')
            pysam.tabix_index(args.out + '.' + i + '.bedgraph.gz',
                              preset="bed",
                              force=True)
        else:
            pysam.tabix_compress(args.out + '.' + i + '.bed',
                                 args.out + '.' + i + '.bed.gz',
                                 force=True)
            shell_command('rm ' + args.out + '.' + i + '.bed')
            pysam.tabix_index(args.out + '.' + i + '.bed.gz',
                              preset="bed",
                              force=True)
Ejemplo n.º 28
0
def run_occ(args):
    """run occupancy calling

    """
    if args.fasta:
        chrs = read_chrom_sizes_from_fasta(args.fasta)
    else:
        chrs = read_chrom_sizes_from_bam(args.bam)
    pwm = PWM.open(args.pwm)
    chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2)
    chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2)
    chunks.merge()
    maxQueueSize = args.cores*10
    fragment_dist = FragmentMixDistribution(0, upper = args.upper)
    if args.sizes is not None:
        tmp = FragmentSizes.open(args.sizes)
        fragment_dist.fragmentsizes = FragmentSizes(0, args.upper, vals = tmp.get(0,args.upper))
    else:
        fragment_dist.getFragmentSizes(args.bam, chunks)
    fragment_dist.modelNFR()
    fragment_dist.plotFits(args.out + '.occ_fit.eps')
    fragment_dist.fragmentsizes.save(args.out + '.fragmentsizes.txt')
    params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ,
            flank = args.flank, bam = args.bam, ci = args.confidence_interval, step = args.step)
    sets = chunks.split(items = args.cores * 5)
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    out_handle1 = open(args.out + '.occ.bedgraph','w')
    out_handle1.close()
    out_handle2 = open(args.out + '.occ.lower_bound.bedgraph','w')
    out_handle2.close()
    out_handle3 = open(args.out + '.occ.upper_bound.bedgraph','w')
    out_handle3.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeOcc, args=(write_queue, args.out))
    write_process.start()
    peaks_handle = open(args.out + '.occpeaks.bed','w')
    peaks_handle.close()
    peaks_queue = mp.JoinableQueue()
    peaks_process = mp.Process(target = _writePeaks, args=(peaks_queue, args.out))
    peaks_process.start()
    nuc_dist = np.zeros(args.upper)
    for j in sets:
        tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params)))
        for result in tmp:
            nuc_dist += result[0]
            write_queue.put(result[1])
            peaks_queue.put(result[2])
    pool1.close()
    pool1.join()
    write_queue.put('STOP')
    peaks_queue.put('STOP')
    write_process.join()
    peaks_process.join()
    pysam.tabix_compress(args.out + '.occpeaks.bed', args.out + '.occpeaks.bed.gz',force = True)
    shell_command('rm ' + args.out + '.occpeaks.bed')
    pysam.tabix_index(args.out + '.occpeaks.bed.gz', preset = "bed", force = True)
    for i in ('occ','occ.lower_bound','occ.upper_bound'):
        pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out + '.'+i+'.bedgraph.gz',force = True)
        shell_command('rm ' + args.out + '.' + i + '.bedgraph')
        pysam.tabix_index(args.out + '.' + i + '.bedgraph.gz', preset = "bed", force = True)

    dist_out = FragmentSizes(0, args.upper, vals = nuc_dist)
    dist_out.save(args.out + '.nuc_dist.txt')

    print "Making figure"
    #make figure
    fig = plt.figure()
    plt.plot(range(0,args.upper),dist_out.get(0,args.upper),label = "Nucleosome Distribution")
    plt.xlabel("Fragment Size")
    plt.ylabel("Frequency")
    fig.savefig(args.out+'.nuc_dist.eps')
    plt.close(fig)
Ejemplo n.º 29
0
 def setUp(self):
     """setup Test_Track class"""
     bed_list = ChunkList.read("example/example.bed")
     self.chunk = bed_list[0]
Ejemplo n.º 30
0
 def setUp(self):
     """setup Test_Ins class by making a fragmentlist"""
     bed_list = ChunkList.read("example/example.bed")
     self.chunk = bed_list[0]
Ejemplo n.º 31
0
def run_nuc(args):
    """run occupancy calling

    """
    vmat = VMat.open(args.vmat)
    if args.fasta:
        chrs = read_chrom_sizes_from_fasta(args.fasta)
    else:
        chrs = read_chrom_sizes_from_bam(args.bam)
    pwm = PWM.open(args.pwm)
    chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2, min_length = args.nuc_sep * 2)
    chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2)
    chunks.merge()
    maxQueueSize = args.cores*10
    if args.sizes is not None:
        fragment_dist = FragmentSizes.open(args.sizes)
    else:
        fragment_dist = FragmentSizes(0, upper = vmat.upper)
        fragment_dist.calculateSizes(args.bam, chunks)
    params = NucParameters(vmat = vmat, fragmentsizes = fragment_dist, bam = args.bam, fasta = args.fasta, pwm = args.pwm,
                           occ_track = args.occ_track,
                           sd = args.sd, nonredundant_sep = args.nuc_sep, redundant_sep = args.redundant_sep,
                           min_z = args.min_z, min_lr = args.min_lr , atac = args.atac)
    sets = chunks.split(items = args.cores*5)
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    if args.write_all:
        outputs = ['nucpos','nucpos.redundant','nucleoatac_signal','nucleoatac_signal.smooth',
                       'nucleoatac_background','nucleoatac_raw']
    else:
        outputs = ['nucpos','nucpos.redundant','nucleoatac_signal','nucleoatac_signal.smooth']
    handles = {}
    write_queues = {}
    write_processes = {}
    for i in outputs:
        if i not in ['nucpos','nucpos.redundant','nfrpos']:
            handles[i] = open(args.out + '.'+i+'.bedgraph','w')
        else:
            handles[i] = open(args.out + '.'+i+'.bed','w')
        handles[i].close()
        write_queues[i] = mp.JoinableQueue(maxsize = maxQueueSize)
        write_processes[i] = mp.Process(target = _writeFuncs[i], args=(write_queues[i], args.out))
        write_processes[i].start()
    for j in sets:
        tmp = pool1.map(_nucHelper, zip(j,itertools.repeat(params)))
        for result in tmp:
            for i in outputs:
                write_queues[i].put(result[i])
    pool1.close()
    pool1.join()
    for i in outputs:
        write_queues[i].put('STOP')
    for i in outputs:
        write_processes[i].join()
        if i not in ['nucpos','nucpos.redundant']:
            pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out +  '.' + i + '.bedgraph.gz',force = True)
            shell_command('rm ' + args.out +  '.' + i + '.bedgraph')
            pysam.tabix_index(args.out +  '.' + i + '.bedgraph.gz', preset = "bed", force = True)
        else:
            pysam.tabix_compress(args.out + '.' + i + '.bed', args.out +  '.' + i + '.bed.gz',force = True)
            shell_command('rm ' + args.out +  '.' + i + '.bed')
            pysam.tabix_index(args.out +  '.' + i + '.bed.gz', preset = "bed", force = True)
Ejemplo n.º 32
0
 def setUp(self):
     """setup Test_Ins class by making a fragmentlist"""
     bed_list = ChunkList.read('example/example.bed')
     self.chunk = bed_list[0]
Ejemplo n.º 33
0
 def setUp(self):
     """setup Test_Track class"""
     bed_list = ChunkList.read('example/example.bed')
     self.chunk = bed_list[0]