Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Takes a BAM file preferably one already filtered to be uniquely mapped reads."
    )
    parser.add_argument('input_fasta', help="FASTAFILE indexed")
    parser.add_argument('input_sorted_bam', help="BAMFILE sorted indexed")
    parser.add_argument('--threads',
                        type=int,
                        default=multiprocessing.cpu_count(),
                        help="Number of threads defautl cpu_count")
    parser.add_argument(
        '--include_multiply_mapped_reads',
        action='store_true',
        help=
        "Include multiply mapped reads that are excluded by default.  Note that this feature is not complete as it is with the 256 sam filter.  it will only remove secondary alignments while still leaving the multiply mapped primary alignments.  To only use uniquely mapped reads you need to pre-filter on unique and start from that indexed bam."
    )
    parser.add_argument(
        '--include_indels',
        action='store_true',
        help=
        "By default only SNPs and only loci with multiple genotypes are output.  This will output indels."
    )
    parser.add_argument('--consensus',
                        action='store_true',
                        help="Use the original caller")
    args = parser.parse_args()
    #read the sam header
    p = Popen(('samtools view -H ' + args.input_sorted_bam).split(),
              stdout=PIPE)
    chromlens = {}
    for line in p.stdout:
        m = re.match('@SQ\s+SN:(\S+)\s+LN:(\d+)', line.rstrip())
        if not m: continue
        chromlens[m.group(1)] = int(m.group(2))
    #Lets break these up now
    z = 0
    itersize = 10000000
    for chrom in chromlens:
        for i in range(1, chromlens[chrom], itersize):
            z += 1
    global gtotal
    gtotal = z
    if args.threads > 1:
        p = multiprocessing.Pool(processes=args.threads)
    for chrom in chromlens:
        for i in range(1, chromlens[chrom], itersize):
            rstart = i
            rend = itersize + i - 1
            if rend > chromlens[chrom]: rend = chromlens[chrom]
            if args.threads <= 1:
                v = get_region_vcf(args, chrom, rstart, rend)
                do_output(v)
            else:
                p.apply_async(get_region_vcf,
                              args=(args, chrom, rstart, rend),
                              callback=do_output)
    if args.threads > 1:
        p.close()
        p.join()
def main():
    parser = argparse.ArgumentParser(
        description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'input',
        help=
        "Sorted bam (preferrably indexed) Use - for STDIN sam. If streaming in be sure to remove unmapped reads"
    )
    parser.add_argument(
        '--threads',
        type=int,
        default=1,
        help=
        "use multiple threads the bam has been indexed.  Order is not preserved."
    )
    args = parser.parse_args()

    single_thread = True
    if args.threads == 1: single_thread = True
    elif args.input != '-':
        if os.path.isfile(args.input + '.bai'): single_thread = False
        else:
            single_thread = True
            sys.stderr.write(
                "Warning doing single thread because lacking index\n")

    chrs = None
    if args.input != '-':
        chrs = set()
        cmd = 'samtools view -H ' + args.input
        p = Popen(cmd.split(), stdout=PIPE)
        for line in p.stdout:
            m = re.match('@SQ\s+SN:(\S+)\s+LN:\d+', line)
            if m: chrs.add(m.group(1))
        p.communicate()
    #easy case of single thread
    if single_thread:
        if args.input == '-':
            dostream(sys.stdin)
        else:
            cmd = 'samtools view -F 4 -h ' + args.input
            p = Popen(cmd.split(), stdout=PIPE)
            dostream(p.stdout)
            p.communicate()
    else:
        p = Pool(processes=args.threads)
        for chr in sorted(chrs):
            p.apply_async(dofilestream,
                          args=(args.input, chr),
                          callback=printres)
        p.close()
        p.join()
Ejemplo n.º 3
0
def main():
  parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('input',help="Sorted bam (preferrably indexed) Use - for STDIN sam. If streaming in be sure to remove unmapped reads")
  parser.add_argument('--threads',type=int,default=1,help="use multiple threads the bam has been indexed.  Order is not preserved.")
  args = parser.parse_args()


  single_thread = True
  if args.threads == 1: single_thread = True
  elif args.input != '-':
    if os.path.isfile(args.input+'.bai'): single_thread = False
    else: 
      single_thread = True
      sys.stderr.write("Warning doing single thread because lacking index\n")

  chrs = None
  if args.input != '-':
    chrs = set()
    cmd = 'samtools view -H '+args.input
    p = Popen(cmd.split(),stdout=PIPE)
    for line in p.stdout:
      m = re.match('@SQ\s+SN:(\S+)\s+LN:\d+',line)
      if m: chrs.add(m.group(1))
    p.communicate()
  #easy case of single thread
  if single_thread:
    if args.input == '-':
      dostream(sys.stdin)
    else:
      cmd = 'samtools view -F 4 -h '+args.input
      p = Popen(cmd.split(),stdout=PIPE)
      dostream(p.stdout)
      p.communicate()
  else:
    p = Pool(processes=args.threads)
    for chr in sorted(chrs):
      p.apply_async(dofilestream,args=(args.input,chr),callback=printres)
    p.close()
    p.join()
Ejemplo n.º 4
0
def main():
  parser = argparse.ArgumentParser(description="Takes a BAM file preferably one already filtered to be uniquely mapped reads.")
  parser.add_argument('input_fasta',help="FASTAFILE indexed")
  parser.add_argument('input_sorted_bam',help="BAMFILE sorted indexed")
  parser.add_argument('--threads',type=int,default=multiprocessing.cpu_count(),help="Number of threads defautl cpu_count")
  parser.add_argument('--include_multiply_mapped_reads',action='store_true',help="Include multiply mapped reads that are excluded by default.  Note that this feature is not complete as it is with the 256 sam filter.  it will only remove secondary alignments while still leaving the multiply mapped primary alignments.  To only use uniquely mapped reads you need to pre-filter on unique and start from that indexed bam.")
  parser.add_argument('--include_indels',action='store_true',help="By default only SNPs and only loci with multiple genotypes are output.  This will output indels.")
  parser.add_argument('--consensus',action='store_true',help="Use the original caller")
  args = parser.parse_args()
  #read the sam header  
  p = Popen(('samtools view -H '+args.input_sorted_bam).split(),stdout=PIPE)
  chromlens = {}
  for line in p.stdout:
    m = re.match('@SQ\s+SN:(\S+)\s+LN:(\d+)',line.rstrip())
    if not m: continue
    chromlens[m.group(1)] = int(m.group(2))
  #Lets break these up now
  z = 0
  itersize = 10000000
  for chrom in chromlens:
    for i in range(1,chromlens[chrom],itersize):
      z+=1
  global gtotal
  gtotal = z
  if args.threads > 1:
    p = multiprocessing.Pool(processes=args.threads)
  for chrom in chromlens:
    for i in range(1,chromlens[chrom],itersize):
      rstart = i
      rend = itersize+i-1
      if rend > chromlens[chrom]: rend = chromlens[chrom]
      if args.threads <= 1:
        v = get_region_vcf(args,chrom,rstart,rend)
        do_output(v)
      else:
        p.apply_async(get_region_vcf,args=(args,chrom,rstart,rend),callback=do_output)
  if args.threads > 1:
    p.close()
    p.join()