def process_locus(locus, args): depth = {} s2psl = SAMtoPSLconversionFactory() unique = {} chr = locus[0].value('rname') for sam in locus: p = PSL(s2psl.convert_line(sam.get_line())) g = GenePredEntry(p.get_genepred_line()) g = g.get_smoothed(args.min_intron) for i in range(0,g.get_exon_count()): rng = str(g.value('exonStarts')[i])+"\t"+str(g.value('exonEnds')[i]) if rng not in unique: unique[rng] = 0 unique[rng]+=1 for bstr in unique: [start,end] = bstr.split("\t") for i in range(int(start),int(end)): if i not in depth: depth[i] = 0 depth[i] += unique[bstr] # add the number of these to the depth #now we can print the depth prevdepth = 0 prevstart = None lasti = None for i in sorted(depth.keys()): if depth[i] < args.min_depth: continue if depth[i] != prevdepth: #output what we have so far if we have something if prevstart: output_depth(chr+"\t"+str(prevstart)+"\t"+str(lasti+1)+"\t"+str(prevdepth),args) prevstart = i prevdepth = depth[i] lasti = i if prevstart: output_depth(chr+"\t"+str(prevstart)+"\t"+str(lasti+1)+"\t"+str(prevdepth),args)
def main(): parser = argparse.ArgumentParser( description= "Take a sam file and join together mate pairs into single alignments. Alignments must be ordered by query name." ) parser.add_argument( 'input', help="FILENAME input .sam or .bam or '-' for STDIN sam") group = parser.add_mutually_exclusive_group() group.add_argument('--sam', action='store_true') group.add_argument('--bam', action='store_true') parser.add_argument('--mates_only', action='store_true', help="Only output combined mates") parser.add_argument('--threads', type=int, default=1, help="Number of threads to use, default is 1") args = parser.parse_args() inf = sys.stdin if args.bam or (not args.sam and not args.input == '-'): fh = open(args.input) p = Popen('samtools view - -h'.split(), stdin=fh, stdout=PIPE) inf = p.stdout buffer_size = 10000 buffer = [] msr = MultiEntrySamReader(inf) spc = SAMtoPSLconversionFactory() psc = PSLtoSAMconversionFactory() # set the headers for the spc for h in msr.header: print h.rstrip() spc.read_header_line(h) if args.threads > 1: p1 = Pool(processes=args.threads) while True: entries = msr.read_entries() if not entries: break buffer.append(entries) if len(buffer) >= buffer_size: if args.threads > 1: p1.apply_async(do_buffer, args=(buffer, msr, spc, psc, args), callback=do_callback) else: v = do_buffer(buffer, msr, spc, psc, args) do_callback(v) buffer = [] if len(buffer) > 0: if args.threads > 1: p1.apply_async(do_buffer, args=(buffer, msr, spc, psc, args), callback=do_callback) else: v = do_buffer(buffer, msr, spc, psc, args) do_callback(v) if args.threads > 1: p1.close() p1.join()
def main(): parser = argparse.ArgumentParser( description="Take a sam or bam file and output the best alignment for each read, it still can output the same read name twice if they happen to be mate pairs, but it will only output the best alignment for each individual mate, not necessarily the two together. You could combine mates if that is helpful with another script." ) parser.add_argument("input", help="FILENAME input .sam or .bam or '-' for STDIN sam") group = parser.add_mutually_exclusive_group() group.add_argument("--bam", action="store_true") group.add_argument("--sam", action="store_true") args = parser.parse_args() inf = sys.stdin if args.bam or (not args.sam and not args.input == "-"): fh = open(args.input) p = Popen("samtools view - -h".split(), stdin=fh, stdout=PIPE) inf = p.stdout msr = MultiEntrySamReader(inf) spc = SAMtoPSLconversionFactory() # set the headers for the spc for h in msr.header: print h.rstrip() spc.read_header_line(h) while True: entries = msr.read_entries() if not entries: break longest0 = 0 entry0 = None longest1 = 0 entry1 = None longest2 = 0 entry2 = None for sam in entries: pline = spc.convert_line(sam.get_line()) if not pline: continue side = None if sam.check_flag(64): side = 1 if sam.check_flag(128): side = 2 p = PSL(pline) if p.get_coverage() > longest0: longest0 = p.get_coverage() entry0 = sam if side == 1 and p.get_coverage() > longest1: longest1 = p.get_coverage() entry1 = sam if side == 2 and p.get_coverage() > longest2: longest2 = p.get_coverage() entry2 = sam if entry0: # output the combined if its there print entry0.get_line() else: if entry1: # output each of the mates if they are paired but not joined print entry1.get_line() if entry2: print entry2.get_line()
def main(): parser = argparse.ArgumentParser( description= "Take a sam or bam file and output the best alignment for each read, it still can output the same read name twice if they happen to be mate pairs, but it will only output the best alignment for each individual mate, not necessarily the two together. You could combine mates if that is helpful with another script." ) parser.add_argument( 'input', help="FILENAME input .sam or .bam or '-' for STDIN sam") group = parser.add_mutually_exclusive_group() group.add_argument('--bam', action='store_true') group.add_argument('--sam', action='store_true') args = parser.parse_args() inf = sys.stdin if args.bam or (not args.sam and not args.input == '-'): fh = open(args.input) p = Popen('samtools view - -h'.split(), stdin=fh, stdout=PIPE) inf = p.stdout msr = MultiEntrySamReader(inf) spc = SAMtoPSLconversionFactory() # set the headers for the spc for h in msr.header: print h.rstrip() spc.read_header_line(h) while True: entries = msr.read_entries() if not entries: break longest0 = 0 entry0 = None longest1 = 0 entry1 = None longest2 = 0 entry2 = None for sam in entries: pline = spc.convert_line(sam.get_line()) if not pline: continue side = None if sam.check_flag(64): side = 1 if sam.check_flag(128): side = 2 p = PSL(pline) if p.get_coverage() > longest0: longest0 = p.get_coverage() entry0 = sam if side == 1 and p.get_coverage() > longest1: longest1 = p.get_coverage() entry1 = sam if side == 2 and p.get_coverage() > longest2: longest2 = p.get_coverage() entry2 = sam if entry0: #output the combined if its there print entry0.get_line() else: if entry1: #output each of the mates if they are paired but not joined print entry1.get_line() if entry2: print entry2.get_line()
def main(): parser = argparse.ArgumentParser(description="Take a sam file and join together mate pairs into single alignments. Alignments must be ordered by query name.") parser.add_argument('input',help="FILENAME input .sam or .bam or '-' for STDIN sam") group = parser.add_mutually_exclusive_group() group.add_argument('--sam',action='store_true') group.add_argument('--bam',action='store_true') parser.add_argument('--mates_only',action='store_true',help="Only output combined mates") parser.add_argument('--threads',type=int,default=1,help="Number of threads to use, default is 1") args = parser.parse_args() inf = sys.stdin if args.bam or (not args.sam and not args.input == '-'): fh = open(args.input) p = Popen('samtools view - -h'.split(),stdin=fh,stdout=PIPE) inf = p.stdout buffer_size = 10000 buffer = [] msr = MultiEntrySamReader(inf) spc = SAMtoPSLconversionFactory() psc = PSLtoSAMconversionFactory() # set the headers for the spc for h in msr.header: print h.rstrip() spc.read_header_line(h) if args.threads > 1: p1 = Pool(processes=args.threads) while True: entries = msr.read_entries() if not entries: break buffer.append(entries) if len(buffer) >= buffer_size: if args.threads > 1: p1.apply_async(do_buffer,args=(buffer,msr,spc,psc,args),callback=do_callback) else: v = do_buffer(buffer,msr,spc,psc,args) do_callback(v) buffer = [] if len(buffer) > 0: if args.threads > 1: p1.apply_async(do_buffer,args=(buffer,msr,spc,psc,args),callback=do_callback) else: v = do_buffer(buffer,msr,spc,psc,args) do_callback(v) if args.threads > 1: p1.close() p1.join()