def main(): parser = argparse.ArgumentParser( description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input', help="BAM file or Use - for STDIN for SAM") parser.add_argument('--minimum_intron', type=int, default=68, help="smallest intron") parser.add_argument('-o', '--output', help="Output file, gzip is okay") args = parser.parse_args() of = sys.stdout if args.output: if args.output[-3:] == '.gz': of = gzip.open(args.output, 'w') else: of = open(args.output, 'w') if args.input == '-': sh = SamStream(sys.stdin) else: sh = BAMFile(args.input) for e in sh: if not e.is_aligned(): continue gpd_line = e.get_target_transcript( min_intron=args.minimum_intron).get_gpd_line() of.write(gpd_line + "\n") sh.close() of.close()
def main(): parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input',help="Use bam file") parser.add_argument('output',help="Use bam file") parser.add_argument('--threads',type=int,default=cpu_count(),help="Thread count") args = parser.parse_args() m = re.match('^(\S+)\.bam$',args.output) if not m: sys.stderr.write("use bam output") sys.exit() cmd1 = 'samtools view -H '+args.input p1 = Popen(cmd1.split(),stdout=PIPE) bs = SamStream(p1.stdout) rlens = bs.get_header().get_sequence_lengths() htext = bs.header_text p1.communicate() hlines = htext.rstrip().split("\n") done_lens = False cmd = 'samtools sort -@ '+str(args.threads)+' - '+m.group(1) sys.stderr.write(cmd+"\n") p = Popen(cmd.split(),stdin=PIPE) for ln in hlines: if re.match('@SQ\tSN:',ln): if not done_lens: done_lens = True for chr in sorted(rlens.keys()): p.stdin.write("@SQ\tSN:"+chr+"\tLN:"+str(rlens[chr])+"\n") else: p.stdin.write(ln.rstrip("\n")+"\n") cmd1 = 'samtools view '+args.input p1 = Popen(cmd1.split(),stdout=p.stdin) p1.communicate() p.communicate()
def main(): parser = argparse.ArgumentParser( description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input', help="Use bam file") parser.add_argument('output', help="Use bam file") parser.add_argument('--threads', type=int, default=cpu_count(), help="Thread count") args = parser.parse_args() m = re.match('^(\S+)\.bam$', args.output) if not m: sys.stderr.write("use bam output") sys.exit() cmd1 = 'samtools view -H ' + args.input p1 = Popen(cmd1.split(), stdout=PIPE) bs = SamStream(p1.stdout) rlens = bs.get_header().get_sequence_lengths() htext = bs.header_text p1.communicate() hlines = htext.rstrip().split("\n") done_lens = False cmd = 'samtools sort -@ ' + str(args.threads) + ' - ' + m.group(1) sys.stderr.write(cmd + "\n") p = Popen(cmd.split(), stdin=PIPE) for ln in hlines: if re.match('@SQ\tSN:', ln): if not done_lens: done_lens = True for chr in sorted(rlens.keys()): p.stdin.write("@SQ\tSN:" + chr + "\tLN:" + str(rlens[chr]) + "\n") else: p.stdin.write(ln.rstrip("\n") + "\n") cmd1 = 'samtools view ' + args.input p1 = Popen(cmd1.split(), stdout=p.stdin) p1.communicate() p.communicate()
def main(): parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input',help="BAM file or Use - for STDIN for SAM") parser.add_argument('--minimum_intron',type=int,default=68,help="smallest intron") parser.add_argument('-o','--output',help="Output file, gzip is okay") args = parser.parse_args() of = sys.stdout if args.output: if args.output[-3:]=='.gz': of = gzip.open(args.output,'w') else: of = open(args.output,'w') if args.input =='-': sh = SamStream(sys.stdin) else: sh = BAMFile(args.input) for e in sh: if not e.is_aligned(): continue gpd_line = e.get_target_transcript(min_intron=args.minimum_intron).get_gpd_line() of.write(gpd_line+"\n") sh.close() of.close()
def do_sam(args): if args.input != '-': m = re.search('\.bam$',args.input) if not m: sys.stderr.write("ERROR input expects bam unless piping to stdin.. then SAM with header\n") sys.exit() if not args.output: sys.stderr.write("ERROR sam sorts must output to a bam file\n") sys.exit() m = re.match('^(.+)\.bam$',args.output) if not m: sys.stderr.write("ERROR sam sorts must output to a bam file\n") sys.exit() cmdout = 'samtools sort - '+m.group(1) if args.threads: cmdout += ' -@ '+str(args.threads) inf = None if args.input == '-': inf = sys.stdin else: cmd = 'samtools view -h '+args.input p = Popen(cmd.split(),stdout=PIPE,bufsize=1) inf = p.stdout s = SamStream(inf) header = s.header_text.rstrip().split("\n") split_stream = [header[i].split("\t") for i in range(0,len(header))] sq_inds = [i for i in range(0,len(split_stream)) if split_stream[i][0]=='@SQ'] nonsq_inds = [i for i in range(0,len(split_stream)) if split_stream[i][0]!='@SQ'] top = [header[i] for i in nonsq_inds] chroms = sorted([split_stream[i] for i in sq_inds],key = lambda x: x[1][3:]) cmd2 = 'samtools view -Sb -' pout = Popen(cmdout.split(),stdin=PIPE) p2 = Popen(cmd2.split(),stdin=PIPE,stdout=pout.stdin) for t in top: p2.stdin.write(t.rstrip()+"\n") for c in chroms: p2.stdin.write("\t".join(c).rstrip()+"\n") for sam in s: p2.stdin.write(sam.get_line().rstrip()+"\n") p2.communicate() pout.communicate() if args.input != '-': p.communicate() return
def main(): parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input',help="Use - for STDIN or read bam") parser.add_argument('--minimum_intron_size',type=int,default=68,help="Require intron to be this size or larger") parser.add_argument('--minimum_overhang',type=int,default=10,help="At least this many bases on each side of an intron") parser.add_argument('--minimum_support',type=int,default=1,help="Minimum number of reads that should support a junction to report any of the reads") args = parser.parse_args() inf = sys.stdin if args.input != '-': cmd = 'samtools view -F 4 -h '+args.input p = Popen(cmd.split(),stdout=PIPE,bufsize=1) inf = p.stdout cmd2 = 'awk '+"'"+'{if(NF<10) print $0; else if($6~/N/) print $0;}'+"'" p2 = Popen(cmd2,stdout=PIPE,stdin=inf,bufsize=1,shell=True) stream = SamStream(p2.stdout,minimum_intron_size=args.minimum_intron_size,minimum_overhang=args.minimum_overhang) lstream = LocusStream(stream) for h in stream.header: print h.rstrip() for r in lstream: # now we have all the possible junctions from the range [juncs,sams] = get_junctions(r.get_payload(),args) evidence = {} lines = {} for x in set([x[0].get_range_string() for x in juncs]): evidence[x] = 0 lines[x] = set() for i in range(0,len(juncs)): jstr = juncs[i][0].get_range_string() evidence[jstr]+=1 lines[jstr].add(juncs[i][1]) accepted = set() for jstr in evidence: if evidence[jstr] >= args.minimum_support: #print jstr for i in lines[jstr]: accepted.add(i) for i in sorted(list(accepted)): print sams[i].get_line().rstrip() p2.communicate() if args.input != '-': p.communicate()
def main(): parser = argparse.ArgumentParser( description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input', help="Use - for STDIN or specify a BAM file") parser.add_argument('-r', '--reference', help="Reference fasta", required=True) args = parser.parse_args() ref = None if args.reference: ref = FastaData(open(args.reference, 'rb').read()) if args.input == '-': args.input = SamStream(sys.stdin, reference=ref) else: args.input = BAMFile(args.input, reference=ref) for e in args.input: if e.is_aligned(): print e.get_PSL()