def get_sizes(args): """function to get fragment sizes """ if args.out is None: args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1]) sizes = FragmentSizes(lower = args.lower, upper = args.upper, atac = args.atac) if args.bed: chunks = ChunkList.read(args.bed) chunks.merge() sizes.calculateSizes(args.bam, chunks) else: sizes.calculateSizes(args.bam) sizes.save(args.out+'.fragmentsizes.txt') if not args.no_plot: #make figure fig = plt.figure() plt.plot(range(sizes.lower,sizes.upper),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") fig.savefig(args.out+'.fragmentsizes.eps') plt.close(fig)
def get_sizes(args): """function to get fragment sizes """ if args.out is None: args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1]) sizes = FragmentSizes(lower=args.lower, upper=args.upper, atac=args.atac) if args.bed: chunks = ChunkList.read(args.bed) chunks.merge() sizes.calculateSizes(args.bam, chunks) else: sizes.calculateSizes(args.bam) sizes.save(args.out + '.fragmentsizes.txt') if not args.no_plot: #make figure fig = plt.figure() plt.plot(list(range(sizes.lower, sizes.upper)), sizes.get(sizes.lower, sizes.upper), label=args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") fig.savefig(args.out + '.fragmentsizes.pdf') plt.close(fig)
def run_occ(args): """run occupancy calling """ if args.fasta: chrs = read_chrom_sizes_from_fasta(args.fasta) else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2) chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) chunks.merge() maxQueueSize = args.cores*10 fragment_dist = FragmentMixDistribution(0, upper = args.upper) if args.sizes is not None: tmp = FragmentSizes.open(args.sizes) fragment_dist.fragmentsizes = FragmentSizes(0, args.upper, vals = tmp.get(0,args.upper)) else: fragment_dist.getFragmentSizes(args.bam, chunks) fragment_dist.modelNFR() fragment_dist.plotFits(args.out + '.occ_fit.eps') fragment_dist.fragmentsizes.save(args.out + '.fragmentsizes.txt') params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ, flank = args.flank, bam = args.bam, ci = args.confidence_interval, step = args.step) sets = chunks.split(items = args.cores * 5) pool1 = mp.Pool(processes = max(1,args.cores-1)) out_handle1 = open(args.out + '.occ.bedgraph','w') out_handle1.close() out_handle2 = open(args.out + '.occ.lower_bound.bedgraph','w') out_handle2.close() out_handle3 = open(args.out + '.occ.upper_bound.bedgraph','w') out_handle3.close() write_queue = mp.JoinableQueue(maxsize = maxQueueSize) write_process = mp.Process(target = _writeOcc, args=(write_queue, args.out)) write_process.start() peaks_handle = open(args.out + '.occpeaks.bed','w') peaks_handle.close() peaks_queue = mp.JoinableQueue() peaks_process = mp.Process(target = _writePeaks, args=(peaks_queue, args.out)) peaks_process.start() nuc_dist = np.zeros(args.upper) for j in sets: tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params))) for result in tmp: nuc_dist += result[0] write_queue.put(result[1]) peaks_queue.put(result[2]) pool1.close() pool1.join() write_queue.put('STOP') peaks_queue.put('STOP') write_process.join() peaks_process.join() pysam.tabix_compress(args.out + '.occpeaks.bed', args.out + '.occpeaks.bed.gz',force = True) shell_command('rm ' + args.out + '.occpeaks.bed') pysam.tabix_index(args.out + '.occpeaks.bed.gz', preset = "bed", force = True) for i in ('occ','occ.lower_bound','occ.upper_bound'): pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out + '.'+i+'.bedgraph.gz',force = True) shell_command('rm ' + args.out + '.' + i + '.bedgraph') pysam.tabix_index(args.out + '.' + i + '.bedgraph.gz', preset = "bed", force = True) dist_out = FragmentSizes(0, args.upper, vals = nuc_dist) dist_out.save(args.out + '.nuc_dist.txt') print "Making figure" #make figure fig = plt.figure() plt.plot(range(0,args.upper),dist_out.get(0,args.upper),label = "Nucleosome Distribution") plt.xlabel("Fragment Size") plt.ylabel("Frequency") fig.savefig(args.out+'.nuc_dist.eps') plt.close(fig)