def get_cov(args, bases = 50000, splitsize = 1000): """function to get coverages """ if not args.out: if args.bed is None: args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1]) else: args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1]) if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize) sets = chunks.split(items = bases/splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() sets = chunks.split(bases = bases) maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool1 = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.cov.bedgraph','w') out_handle.close() write_queue = mp.JoinableQueue(maxsize = maxQueueSize) write_process = mp.Process(target = _writeCov, args=(write_queue, args.out)) write_process.start() for j in sets: tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args))) for track in tmp: write_queue.put(track) pool1.close() pool1.join() write_queue.put('STOP') write_process.join() pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True) shell_command('rm ' + args.out + '.cov.bedgraph') pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
def make_bias_track(args, bases = 500000, splitsize = 1000): """function to compute bias track """ if args.out is None: if args.bed is not None: args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1]) else: args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1]) params = _BiasParams(args.fasta, args.pwm) if args.bed is None: chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize) sets = chunks.split(items = bases/splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() sets = chunks.split(bases = bases) maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.Scores.bedgraph','w') out_handle.close() write_queue = mp.JoinableQueue(maxsize = maxQueueSize) write_process = mp.Process(target = _writeBias, args=(write_queue, args.out)) write_process.start() for j in sets: tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params))) for track in tmp: write_queue.put(track) pool.close() pool.join() write_queue.put('STOP') write_process.join() pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True) shell_command('rm ' + args.out + '.Scores.bedgraph') pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
def get_pwm(args, bases=50000, splitsize=1000): """Functiono obtain PWM around ATAC insertion""" if not args.out: args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1]) chrs = read_chrom_sizes_from_fasta(args.fasta) if args.bed is None: chunks = ChunkList.convertChromSizes(chrs, splitsize=splitsize, offset=args.flank) sets = chunks.split(items=bases / splitsize) else: chunks = ChunkList.read(args.bed, chromDict=chrs, min_offset=args.flank) sets = chunks.split(bases=bases) params = _PWMParameters(bam=args.bam, up=args.flank, down=args.flank, fasta=args.fasta, lower=args.lower, upper=args.upper, atac=args.atac, sym=args.sym) pool = Pool(processes=args.cores) tmp = pool.map(_pwmHelper, zip(sets, itertools.repeat(params))) pool.close() pool.join() n = 0.0 result = np.zeros((len(params.nucleotides), params.up + params.down + 1)) for i in tmp: result += i[0] n += i[1] result /= n if args.bed: normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta, params.nucleotides) else: normfreqs = seq.getNucFreqs(args.fasta, params.nucleotides) result = result / np.reshape(np.repeat(normfreqs, result.shape[1]), result.shape) if args.sym: #Symmetrize left = result[:, 0:(args.flank + 1)] right = result[:, args.flank:] rightflipped = np.fliplr(np.flipud(right)) combined = (left + rightflipped) / 2 result = np.hstack( (combined, np.fliplr(np.flipud(combined[:, 0:args.flank])))) #save pwm = PWM(result, args.flank, args.flank, params.nucleotides) pwm.save(args.out + '.PWM.txt')
def make_bias_track(args, bases=500000, splitsize=1000): """function to compute bias track """ if args.out is None: if args.bed is not None: args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1]) else: args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1]) params = _BiasParams(args.fasta, args.pwm) if args.bed is None: chunks = ChunkList.convertChromSizes(params.chrs, splitsize=splitsize) sets = chunks.split(items=bases // splitsize) else: chunks = ChunkList.read(args.bed) chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases=bases) maxQueueSize = max( 2, int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool = mp.Pool(processes=max(1, args.cores - 1)) out_handle = open(args.out + '.Scores.bedgraph', 'w') out_handle.close() write_queue = mp.JoinableQueue(maxsize=maxQueueSize) write_process = mp.Process(target=_writeBias, args=(write_queue, args.out)) write_process.start() for j in sets: tmp = pool.map(_biasHelper, list(zip(j, itertools.repeat(params)))) for track in tmp: write_queue.put(track) pool.close() pool.join() write_queue.put('STOP') write_process.join() pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force=True) shell_command('rm ' + args.out + '.Scores.bedgraph') pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset="bed", force=True)
def get_ins(args, bases=50000, splitsize=1000): """function to get insertions """ if not args.out: if args.bed is None: args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1]) else: args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1]) if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize=splitsize) sets = chunks.split(items=bases / splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() sets = chunks.split(bases=bases) maxQueueSize = max( 2, int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool1 = mp.Pool(processes=max(1, args.cores - 1)) out_handle = open(args.out + '.ins.bedgraph', 'w') out_handle.close() write_queue = mp.JoinableQueue(maxsize=maxQueueSize) write_process = mp.Process(target=_writeIns, args=(write_queue, args.out)) write_process.start() for j in sets: if args.smooth: tmp = pool1.map(_insHelperSmooth, list(zip(j, itertools.repeat(args)))) else: tmp = pool1.map(_insHelper, list(zip(j, itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() pool1.join() write_queue.put('STOP') write_process.join() pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force=True) shell_command('rm ' + args.out + '.ins.bedgraph') pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset="bed", force=True)
def get_pwm(args, bases = 50000, splitsize = 1000): """Functiono obtain PWM around ATAC insertion""" if not args.out: args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1]) chrs = read_chrom_sizes_from_fasta(args.fasta) if args.bed is None: chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank) sets = chunks.split(items = bases/splitsize) else: chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank) sets = chunks.split(bases = bases) params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta, lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym) pool = Pool(processes = args.cores) tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params))) pool.close() pool.join() n = 0.0 result = np.zeros((len(params.nucleotides), params.up + params.down + 1)) for i in tmp: result += i[0] n += i[1] result /= n if args.bed: normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta, params.nucleotides) else: normfreqs = seq.getNucFreqs(args.fasta, params.nucleotides) result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape) if args.sym: #Symmetrize left = result[:,0:(args.flank + 1)] right = result[:,args.flank:] rightflipped = np.fliplr(np.flipud(right)) combined = (left + rightflipped) / 2 result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank])))) #save pwm = PWM(result, args.flank, args.flank, params.nucleotides) pwm.save(args.out + '.PWM.txt')