def make_ambiguity_bigwig_by_readname(prefix, bam_filenames, stop_after=None, subsample=1): #import pysam #alf = pysam.AlignmentFile(bam_filenames[0]) #header = alf.header header = sam.parsed_bam_headers(bam_filenames[0]) with open(prefix+"-chrom.sizes","wb") as f: for entry in header["SQ"]: f.write("{}\t{}\n".format(entry["SN"],entry["LN"])) chrom_names = [ entry["SN"] for entry in header["SQ"] ] chrom_sizes = [ int(entry["LN"]) for entry in header["SQ"] ] #alf.close() unambiguous = dict([ (i,Piler(j)) for i,j in zip(chrom_names,chrom_sizes) ]) total = dict([ (i,Piler(j)) for i,j in zip(chrom_names,chrom_sizes) ]) old = grace.status("Ambiguity bigwig") for filename in bam_filenames: #alf = pysam.AlignmentFile(filename) alf = sam.Bam_reader(filename) n = 0 sub = subsample-1 for (key,items) in itertools.groupby(alf, lambda item: item.query_name): sub = (sub + 1) % subsample if sub: continue items = [ item for item in items if not item.is_unmapped and not item.is_supplementary ] if not items: continue # Only use top scoring alignments AS = [ item.get_AS() for item in items ] best_AS = max(AS) items = [ item for item, this_AS in zip(items,AS) if this_AS >= best_AS ] for item in items: #spanner = fragment_split_coverage([item]) spanner = fragment_coverage([item]) #TODO fixme when blocks available spanner = scale_spanner(1.0/len(items), spanner) total[item.reference_name].add(spanner) if len(items) == 1: unambiguous[item.reference_name].add(spanner) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: grace.status(os.path.basename(prefix)+" "+filename+" "+grace.pretty_number(n)) alf.close() ambiguities = [ ] for i in xrange(len(total)): u = unambiguous[chrom_names[i]].get() t = map_spanner(lambda x: x*1j, total[chrom_names[i]].get()) c = pile([u,t],initial=0.0) c = map_spanner(lambda x: max(0.0,x.imag-x.real)/max(x.imag,1.0), c) ambiguities.append(c) bedgraph(prefix+".bedgraph", zip(chrom_names, [ item for item in ambiguities ])) subprocess.check_call([ "wigToBigWig",prefix+".bedgraph",prefix+"-chrom.sizes",prefix+".bw"]) os.unlink(prefix+".bedgraph") os.unlink(prefix+"-chrom.sizes") grace.status(old)
def make_ambiguity_bigwig(prefix, bam_filenames, stop_after=None, subsample=1): #import pysam #alf = pysam.AlignmentFile(bam_filenames[0]) #header = alf.header header = sam.parsed_bam_headers(bam_filenames[0]) with open(prefix+"-chrom.sizes","wb") as f: for entry in header["SQ"]: f.write("{}\t{}\n".format(entry["SN"],entry["LN"])) chrom_names = [ entry["SN"] for entry in header["SQ"] ] chrom_sizes = [ int(entry["LN"]) for entry in header["SQ"] ] #alf.close() unambiguous = dict([ (i,Piler(j)) for i,j in zip(chrom_names,chrom_sizes) ]) total = dict([ (i,Piler(j)) for i,j in zip(chrom_names,chrom_sizes) ]) for filename in bam_filenames: #alf = pysam.AlignmentFile(filename) alf = sam.Bam_reader(filename) n = 0 sub = subsample-1 for item in alf: if item.is_unmapped or item.is_supplementary: continue sub = (sub + 1) % subsample if sub: continue #spanner = fragment_split_coverage([item]) spanner = fragment_coverage([item]) #TODO fixme when blocks available total[item.reference_name].add(spanner) NH = 1 for item2 in item.extra: if item2.startswith("NH:i:"): NH = int(item2[5:]) if NH == 1: unambiguous[item.reference_name].add(spanner) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: print prefix, filename, grace.pretty_number(n) alf.close() ambiguities = [ ] for i in xrange(len(total)): u = unambiguous[chrom_names[i]].get() t = map_spanner(lambda x: x*1j, total[chrom_names[i]].get()) c = pile([u,t],initial=0.0) c = map_spanner(lambda x: max(0.0,x.imag-x.real)/max(x.imag,1.0), c) ambiguities.append(c) bedgraph(prefix+".bedgraph", zip(chrom_names, [ item for item in ambiguities ])) subprocess.check_call([ "wigToBigWig",prefix+".bedgraph",prefix+"-chrom.sizes",prefix+".bw"]) os.unlink(prefix+".bedgraph") os.unlink(prefix+"-chrom.sizes")
def make_bigwig(prefix, bam_filenames, make_spanner, fragments=False, stop_after=None, scale=1.0, polya=False): have_pysam = False try: import pysam have_pysam = True except ImportError: pass #alf = pysam.AlignmentFile(bam_filenames[0]) #header = alf.header header = sam.parsed_bam_headers(bam_filenames[0]) with open(prefix+"-chrom.sizes","wb") as f: for entry in header["SQ"]: f.write("{}\t{}\n".format(entry["SN"],entry["LN"])) chrom_names = [ entry["SN"] for entry in header["SQ"] ] chrom_sizes = [ int(entry["LN"]) for entry in header["SQ"] ] #alf.close() forward = dict([ (i,Piler(j)) for i,j in zip(chrom_names,chrom_sizes) ]) reverse = dict([ (i,Piler(j)) for i,j in zip(chrom_names,chrom_sizes) ]) old = grace.status("Bigwig") for filename in bam_filenames: if have_pysam: alf = pysam.AlignmentFile(filename) else: alf = sam.Bam_reader(filename) n = 0 if not fragments: for item in alf: if item.is_unmapped or item.is_secondary or item.is_supplementary: continue if polya and not alignment_is_polya(item): continue # Assume --> <-- oriented read pairs which = forward if bool(item.is_reverse) == bool(item.is_read2) else reverse which[item.reference_name].add( make_spanner(item) ) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: grace.status(os.path.basename(prefix)+" "+filename+" "+grace.pretty_number(n)) else: for item in iter_fragments(alf): if polya and not any(alignment_is_polya(al) for al in item): continue # Assume --> <-- oriented read pairs which = forward if bool(item[0].is_reverse) == bool(item[0].is_read2) else reverse which[item[0].reference_name].add( make_spanner(item) ) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: grace.status(os.path.basename(prefix)+" "+filename+" "+grace.pretty_number(n)) if have_pysam: alf.close() bedgraph(prefix+"-fwd.bedgraph", zip(chrom_names, [ scale_spanner(scale, forward[item].get()) for item in chrom_names ])) subprocess.check_call([ "wigToBigWig",prefix+"-fwd.bedgraph",prefix+"-chrom.sizes",prefix+"-fwd.bw"]) os.unlink(prefix+"-fwd.bedgraph") bedgraph(prefix+"-rev.bedgraph", zip(chrom_names, [ scale_spanner(scale, reverse[item].get()) for item in chrom_names ])) subprocess.check_call([ "wigToBigWig",prefix+"-rev.bedgraph",prefix+"-chrom.sizes",prefix+"-rev.bw"]) os.unlink(prefix+"-rev.bedgraph") os.unlink(prefix+"-chrom.sizes") grace.status(old)
def make_ambiguity_bigwig_by_readname(prefix, bam_filenames, stop_after=None, subsample=1): #import pysam #alf = pysam.AlignmentFile(bam_filenames[0]) #header = alf.header header = sam.parsed_bam_headers(bam_filenames[0]) with open(prefix + "-chrom.sizes", "wb") as f: for entry in header["SQ"]: f.write("{}\t{}\n".format(entry["SN"], entry["LN"])) chrom_names = [entry["SN"] for entry in header["SQ"]] chrom_sizes = [int(entry["LN"]) for entry in header["SQ"]] #alf.close() unambiguous = dict([(i, Piler(j)) for i, j in zip(chrom_names, chrom_sizes)]) total = dict([(i, Piler(j)) for i, j in zip(chrom_names, chrom_sizes)]) old = grace.status("Ambiguity bigwig") for filename in bam_filenames: #alf = pysam.AlignmentFile(filename) alf = sam.Bam_reader(filename) n = 0 sub = subsample - 1 for (key, items) in itertools.groupby(alf, lambda item: item.query_name): sub = (sub + 1) % subsample if sub: continue items = [ item for item in items if not item.is_unmapped and not item.is_supplementary ] if not items: continue # Only use top scoring alignments AS = [item.get_AS() for item in items] best_AS = max(AS) items = [ item for item, this_AS in zip(items, AS) if this_AS >= best_AS ] for item in items: #spanner = fragment_split_coverage([item]) spanner = fragment_coverage( [item]) #TODO fixme when blocks available spanner = scale_spanner(1.0 / len(items), spanner) total[item.reference_name].add(spanner) if len(items) == 1: unambiguous[item.reference_name].add(spanner) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: grace.status( os.path.basename(prefix) + " " + filename + " " + grace.pretty_number(n)) alf.close() ambiguities = [] for i in xrange(len(total)): u = unambiguous[chrom_names[i]].get() t = map_spanner(lambda x: x * 1j, total[chrom_names[i]].get()) c = pile([u, t], initial=0.0) c = map_spanner(lambda x: max(0.0, x.imag - x.real) / max(x.imag, 1.0), c) ambiguities.append(c) bedgraph(prefix + ".bedgraph", zip(chrom_names, [item for item in ambiguities])) subprocess.check_call([ "wigToBigWig", prefix + ".bedgraph", prefix + "-chrom.sizes", prefix + ".bw" ]) os.unlink(prefix + ".bedgraph") os.unlink(prefix + "-chrom.sizes") grace.status(old)
def make_ambiguity_bigwig(prefix, bam_filenames, stop_after=None, subsample=1): #import pysam #alf = pysam.AlignmentFile(bam_filenames[0]) #header = alf.header header = sam.parsed_bam_headers(bam_filenames[0]) with open(prefix + "-chrom.sizes", "wb") as f: for entry in header["SQ"]: f.write("{}\t{}\n".format(entry["SN"], entry["LN"])) chrom_names = [entry["SN"] for entry in header["SQ"]] chrom_sizes = [int(entry["LN"]) for entry in header["SQ"]] #alf.close() unambiguous = dict([(i, Piler(j)) for i, j in zip(chrom_names, chrom_sizes)]) total = dict([(i, Piler(j)) for i, j in zip(chrom_names, chrom_sizes)]) for filename in bam_filenames: #alf = pysam.AlignmentFile(filename) alf = sam.Bam_reader(filename) n = 0 sub = subsample - 1 for item in alf: if item.is_unmapped or item.is_supplementary: continue sub = (sub + 1) % subsample if sub: continue #spanner = fragment_split_coverage([item]) spanner = fragment_coverage([item ]) #TODO fixme when blocks available total[item.reference_name].add(spanner) NH = 1 for item2 in item.extra: if item2.startswith("NH:i:"): NH = int(item2[5:]) if NH == 1: unambiguous[item.reference_name].add(spanner) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: print prefix, filename, grace.pretty_number(n) alf.close() ambiguities = [] for i in xrange(len(total)): u = unambiguous[chrom_names[i]].get() t = map_spanner(lambda x: x * 1j, total[chrom_names[i]].get()) c = pile([u, t], initial=0.0) c = map_spanner(lambda x: max(0.0, x.imag - x.real) / max(x.imag, 1.0), c) ambiguities.append(c) bedgraph(prefix + ".bedgraph", zip(chrom_names, [item for item in ambiguities])) subprocess.check_call([ "wigToBigWig", prefix + ".bedgraph", prefix + "-chrom.sizes", prefix + ".bw" ]) os.unlink(prefix + ".bedgraph") os.unlink(prefix + "-chrom.sizes")
def make_bigwig(prefix, bam_filenames, make_spanner, fragments=False, stop_after=None, scale=1.0, polya=False): have_pysam = False try: import pysam have_pysam = True except ImportError: pass #alf = pysam.AlignmentFile(bam_filenames[0]) #header = alf.header header = sam.parsed_bam_headers(bam_filenames[0]) with open(prefix + "-chrom.sizes", "wb") as f: for entry in header["SQ"]: f.write("{}\t{}\n".format(entry["SN"], entry["LN"])) chrom_names = [entry["SN"] for entry in header["SQ"]] chrom_sizes = [int(entry["LN"]) for entry in header["SQ"]] #alf.close() forward = dict([(i, Piler(j)) for i, j in zip(chrom_names, chrom_sizes)]) reverse = dict([(i, Piler(j)) for i, j in zip(chrom_names, chrom_sizes)]) old = grace.status("Bigwig") for filename in bam_filenames: if have_pysam: alf = pysam.AlignmentFile(filename) else: alf = sam.Bam_reader(filename) n = 0 if not fragments: for item in alf: if item.is_unmapped or item.is_secondary or item.is_supplementary: continue if polya and not alignment_is_polya(item): continue # Assume --> <-- oriented read pairs which = forward if bool(item.is_reverse) == bool( item.is_read2) else reverse which[item.reference_name].add(make_spanner(item)) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: grace.status( os.path.basename(prefix) + " " + filename + " " + grace.pretty_number(n)) else: for item in iter_fragments(alf): if polya and not any(alignment_is_polya(al) for al in item): continue # Assume --> <-- oriented read pairs which = forward if bool(item[0].is_reverse) == bool( item[0].is_read2) else reverse which[item[0].reference_name].add(make_spanner(item)) n += 1 if stop_after is not None and n > stop_after: break if n % 1000000 == 0: grace.status( os.path.basename(prefix) + " " + filename + " " + grace.pretty_number(n)) if have_pysam: alf.close() bedgraph( prefix + "-fwd.bedgraph", zip(chrom_names, [ scale_spanner(scale, forward[item].get()) for item in chrom_names ])) subprocess.check_call([ "wigToBigWig", prefix + "-fwd.bedgraph", prefix + "-chrom.sizes", prefix + "-fwd.bw" ]) os.unlink(prefix + "-fwd.bedgraph") bedgraph( prefix + "-rev.bedgraph", zip(chrom_names, [ scale_spanner(scale, reverse[item].get()) for item in chrom_names ])) subprocess.check_call([ "wigToBigWig", prefix + "-rev.bedgraph", prefix + "-chrom.sizes", prefix + "-rev.bw" ]) os.unlink(prefix + "-rev.bedgraph") os.unlink(prefix + "-chrom.sizes") grace.status(old)