def main(): parser = argparse.ArgumentParser( description= "Takes a BAM file preferably one already filtered to be uniquely mapped reads." ) parser.add_argument('input_fasta', help="FASTAFILE indexed") parser.add_argument('input_sorted_bam', help="BAMFILE sorted indexed") parser.add_argument('--threads', type=int, default=multiprocessing.cpu_count(), help="Number of threads defautl cpu_count") parser.add_argument( '--include_multiply_mapped_reads', action='store_true', help= "Include multiply mapped reads that are excluded by default. Note that this feature is not complete as it is with the 256 sam filter. it will only remove secondary alignments while still leaving the multiply mapped primary alignments. To only use uniquely mapped reads you need to pre-filter on unique and start from that indexed bam." ) parser.add_argument( '--include_indels', action='store_true', help= "By default only SNPs and only loci with multiple genotypes are output. This will output indels." ) parser.add_argument('--consensus', action='store_true', help="Use the original caller") args = parser.parse_args() #read the sam header p = Popen(('samtools view -H ' + args.input_sorted_bam).split(), stdout=PIPE) chromlens = {} for line in p.stdout: m = re.match('@SQ\s+SN:(\S+)\s+LN:(\d+)', line.rstrip()) if not m: continue chromlens[m.group(1)] = int(m.group(2)) #Lets break these up now z = 0 itersize = 10000000 for chrom in chromlens: for i in range(1, chromlens[chrom], itersize): z += 1 global gtotal gtotal = z if args.threads > 1: p = multiprocessing.Pool(processes=args.threads) for chrom in chromlens: for i in range(1, chromlens[chrom], itersize): rstart = i rend = itersize + i - 1 if rend > chromlens[chrom]: rend = chromlens[chrom] if args.threads <= 1: v = get_region_vcf(args, chrom, rstart, rend) do_output(v) else: p.apply_async(get_region_vcf, args=(args, chrom, rstart, rend), callback=do_output) if args.threads > 1: p.close() p.join()
def main(): parser = argparse.ArgumentParser( description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( 'input', help= "Sorted bam (preferrably indexed) Use - for STDIN sam. If streaming in be sure to remove unmapped reads" ) parser.add_argument( '--threads', type=int, default=1, help= "use multiple threads the bam has been indexed. Order is not preserved." ) args = parser.parse_args() single_thread = True if args.threads == 1: single_thread = True elif args.input != '-': if os.path.isfile(args.input + '.bai'): single_thread = False else: single_thread = True sys.stderr.write( "Warning doing single thread because lacking index\n") chrs = None if args.input != '-': chrs = set() cmd = 'samtools view -H ' + args.input p = Popen(cmd.split(), stdout=PIPE) for line in p.stdout: m = re.match('@SQ\s+SN:(\S+)\s+LN:\d+', line) if m: chrs.add(m.group(1)) p.communicate() #easy case of single thread if single_thread: if args.input == '-': dostream(sys.stdin) else: cmd = 'samtools view -F 4 -h ' + args.input p = Popen(cmd.split(), stdout=PIPE) dostream(p.stdout) p.communicate() else: p = Pool(processes=args.threads) for chr in sorted(chrs): p.apply_async(dofilestream, args=(args.input, chr), callback=printres) p.close() p.join()
def main(): parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input',help="Sorted bam (preferrably indexed) Use - for STDIN sam. If streaming in be sure to remove unmapped reads") parser.add_argument('--threads',type=int,default=1,help="use multiple threads the bam has been indexed. Order is not preserved.") args = parser.parse_args() single_thread = True if args.threads == 1: single_thread = True elif args.input != '-': if os.path.isfile(args.input+'.bai'): single_thread = False else: single_thread = True sys.stderr.write("Warning doing single thread because lacking index\n") chrs = None if args.input != '-': chrs = set() cmd = 'samtools view -H '+args.input p = Popen(cmd.split(),stdout=PIPE) for line in p.stdout: m = re.match('@SQ\s+SN:(\S+)\s+LN:\d+',line) if m: chrs.add(m.group(1)) p.communicate() #easy case of single thread if single_thread: if args.input == '-': dostream(sys.stdin) else: cmd = 'samtools view -F 4 -h '+args.input p = Popen(cmd.split(),stdout=PIPE) dostream(p.stdout) p.communicate() else: p = Pool(processes=args.threads) for chr in sorted(chrs): p.apply_async(dofilestream,args=(args.input,chr),callback=printres) p.close() p.join()
def main(): parser = argparse.ArgumentParser(description="Takes a BAM file preferably one already filtered to be uniquely mapped reads.") parser.add_argument('input_fasta',help="FASTAFILE indexed") parser.add_argument('input_sorted_bam',help="BAMFILE sorted indexed") parser.add_argument('--threads',type=int,default=multiprocessing.cpu_count(),help="Number of threads defautl cpu_count") parser.add_argument('--include_multiply_mapped_reads',action='store_true',help="Include multiply mapped reads that are excluded by default. Note that this feature is not complete as it is with the 256 sam filter. it will only remove secondary alignments while still leaving the multiply mapped primary alignments. To only use uniquely mapped reads you need to pre-filter on unique and start from that indexed bam.") parser.add_argument('--include_indels',action='store_true',help="By default only SNPs and only loci with multiple genotypes are output. This will output indels.") parser.add_argument('--consensus',action='store_true',help="Use the original caller") args = parser.parse_args() #read the sam header p = Popen(('samtools view -H '+args.input_sorted_bam).split(),stdout=PIPE) chromlens = {} for line in p.stdout: m = re.match('@SQ\s+SN:(\S+)\s+LN:(\d+)',line.rstrip()) if not m: continue chromlens[m.group(1)] = int(m.group(2)) #Lets break these up now z = 0 itersize = 10000000 for chrom in chromlens: for i in range(1,chromlens[chrom],itersize): z+=1 global gtotal gtotal = z if args.threads > 1: p = multiprocessing.Pool(processes=args.threads) for chrom in chromlens: for i in range(1,chromlens[chrom],itersize): rstart = i rend = itersize+i-1 if rend > chromlens[chrom]: rend = chromlens[chrom] if args.threads <= 1: v = get_region_vcf(args,chrom,rstart,rend) do_output(v) else: p.apply_async(get_region_vcf,args=(args,chrom,rstart,rend),callback=do_output) if args.threads > 1: p.close() p.join()