def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input',
                        help="Bam file in order of query name - for stdin")
    args = parser.parse_args()
    inf = sys.stdin
    if args.input != '-':
        cmd = "samtools view -h " + args.input
        p = Popen(cmd.split(), stdout=PIPE)
        inf = p.stdout
    mesr = MultiEntrySamReader(inf)
    while True:
        entries = mesr.read_entries()
        if not entries: break
        if len(entries) == 0: break
        if entries[0].value('cigar') == '*':
            print entries[0].value('qname') + "\t0"
            continue
        sam = entries[0]
        m = re.search('NH:i:(\d+)', sam.entry['remainder'])
        if not m:
            sys.stderr.write("ERROR not a hisat entry\n")
            sys.exit()
        cnt = max([len(entries), int(m.group(1))])
        print entries[0].value('qname') + "\t" + str(cnt)
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Take a sam file and join together mate pairs into single alignments.  Alignments must be ordered by query name."
    )
    parser.add_argument(
        'input', help="FILENAME input .sam or .bam or '-' for STDIN sam")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--sam', action='store_true')
    group.add_argument('--bam', action='store_true')
    parser.add_argument('--mates_only',
                        action='store_true',
                        help="Only output combined mates")
    parser.add_argument('--threads',
                        type=int,
                        default=1,
                        help="Number of threads to use, default is 1")
    args = parser.parse_args()
    inf = sys.stdin
    if args.bam or (not args.sam and not args.input == '-'):
        fh = open(args.input)
        p = Popen('samtools view - -h'.split(), stdin=fh, stdout=PIPE)
        inf = p.stdout
    buffer_size = 10000
    buffer = []
    msr = MultiEntrySamReader(inf)
    spc = SAMtoPSLconversionFactory()
    psc = PSLtoSAMconversionFactory()
    # set the headers for the spc
    for h in msr.header:
        print h.rstrip()
        spc.read_header_line(h)
    if args.threads > 1:
        p1 = Pool(processes=args.threads)
    while True:
        entries = msr.read_entries()
        if not entries: break
        buffer.append(entries)
        if len(buffer) >= buffer_size:
            if args.threads > 1:
                p1.apply_async(do_buffer,
                               args=(buffer, msr, spc, psc, args),
                               callback=do_callback)
            else:
                v = do_buffer(buffer, msr, spc, psc, args)
                do_callback(v)
            buffer = []
    if len(buffer) > 0:
        if args.threads > 1:
            p1.apply_async(do_buffer,
                           args=(buffer, msr, spc, psc, args),
                           callback=do_callback)
        else:
            v = do_buffer(buffer, msr, spc, psc, args)
            do_callback(v)
    if args.threads > 1:
        p1.close()
        p1.join()
def main():
    parser = argparse.ArgumentParser(
        description="Take a sam or bam file and output the best alignment for each read, it still can output the same read name twice if they happen to be mate pairs, but it will only output the best alignment for each individual mate, not necessarily the two together.  You could combine mates if that is helpful with another script."
    )
    parser.add_argument("input", help="FILENAME input .sam or .bam or '-' for STDIN sam")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("--bam", action="store_true")
    group.add_argument("--sam", action="store_true")
    args = parser.parse_args()
    inf = sys.stdin
    if args.bam or (not args.sam and not args.input == "-"):
        fh = open(args.input)
        p = Popen("samtools view - -h".split(), stdin=fh, stdout=PIPE)
        inf = p.stdout
    msr = MultiEntrySamReader(inf)
    spc = SAMtoPSLconversionFactory()
    # set the headers for the spc
    for h in msr.header:
        print h.rstrip()
        spc.read_header_line(h)
    while True:
        entries = msr.read_entries()
        if not entries:
            break
        longest0 = 0
        entry0 = None
        longest1 = 0
        entry1 = None
        longest2 = 0
        entry2 = None
        for sam in entries:
            pline = spc.convert_line(sam.get_line())
            if not pline:
                continue
            side = None
            if sam.check_flag(64):
                side = 1
            if sam.check_flag(128):
                side = 2
            p = PSL(pline)
            if p.get_coverage() > longest0:
                longest0 = p.get_coverage()
                entry0 = sam
            if side == 1 and p.get_coverage() > longest1:
                longest1 = p.get_coverage()
                entry1 = sam
            if side == 2 and p.get_coverage() > longest2:
                longest2 = p.get_coverage()
                entry2 = sam
        if entry0:  # output the combined if its there
            print entry0.get_line()
        else:
            if entry1:  # output each of the mates if they are paired but not joined
                print entry1.get_line()
            if entry2:
                print entry2.get_line()
Example #4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Take a sam or bam file and output the best alignment for each read, it still can output the same read name twice if they happen to be mate pairs, but it will only output the best alignment for each individual mate, not necessarily the two together.  You could combine mates if that is helpful with another script."
    )
    parser.add_argument(
        'input', help="FILENAME input .sam or .bam or '-' for STDIN sam")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--bam', action='store_true')
    group.add_argument('--sam', action='store_true')
    args = parser.parse_args()
    inf = sys.stdin
    if args.bam or (not args.sam and not args.input == '-'):
        fh = open(args.input)
        p = Popen('samtools view - -h'.split(), stdin=fh, stdout=PIPE)
        inf = p.stdout
    msr = MultiEntrySamReader(inf)
    spc = SAMtoPSLconversionFactory()
    # set the headers for the spc
    for h in msr.header:
        print h.rstrip()
        spc.read_header_line(h)
    while True:
        entries = msr.read_entries()
        if not entries: break
        longest0 = 0
        entry0 = None
        longest1 = 0
        entry1 = None
        longest2 = 0
        entry2 = None
        for sam in entries:
            pline = spc.convert_line(sam.get_line())
            if not pline: continue
            side = None
            if sam.check_flag(64): side = 1
            if sam.check_flag(128): side = 2
            p = PSL(pline)
            if p.get_coverage() > longest0:
                longest0 = p.get_coverage()
                entry0 = sam
            if side == 1 and p.get_coverage() > longest1:
                longest1 = p.get_coverage()
                entry1 = sam
            if side == 2 and p.get_coverage() > longest2:
                longest2 = p.get_coverage()
                entry2 = sam
        if entry0:  #output the combined if its there
            print entry0.get_line()
        else:
            if entry1:  #output each of the mates if they are paired but not joined
                print entry1.get_line()
            if entry2:
                print entry2.get_line()
def main():
  parser = argparse.ArgumentParser(description="Take a sam file and join together mate pairs into single alignments.  Alignments must be ordered by query name.")
  parser.add_argument('input',help="FILENAME input .sam or .bam or '-' for STDIN sam")
  group = parser.add_mutually_exclusive_group()
  group.add_argument('--sam',action='store_true')
  group.add_argument('--bam',action='store_true')
  parser.add_argument('--mates_only',action='store_true',help="Only output combined mates")
  parser.add_argument('--threads',type=int,default=1,help="Number of threads to use, default is 1")
  args = parser.parse_args()
  inf = sys.stdin
  if args.bam or (not args.sam and not args.input == '-'):
    fh = open(args.input)
    p = Popen('samtools view - -h'.split(),stdin=fh,stdout=PIPE)
    inf = p.stdout
  buffer_size = 10000
  buffer = []
  msr = MultiEntrySamReader(inf)
  spc = SAMtoPSLconversionFactory()
  psc = PSLtoSAMconversionFactory()
  # set the headers for the spc
  for h in msr.header:
    print h.rstrip()
    spc.read_header_line(h)
  if args.threads > 1:
    p1 = Pool(processes=args.threads)
  while True:
    entries = msr.read_entries()
    if not entries: break
    buffer.append(entries)
    if len(buffer) >= buffer_size:
      if args.threads > 1:
        p1.apply_async(do_buffer,args=(buffer,msr,spc,psc,args),callback=do_callback)
      else: 
        v = do_buffer(buffer,msr,spc,psc,args)
        do_callback(v)
      buffer = []
  if len(buffer) > 0:
    if args.threads > 1:
      p1.apply_async(do_buffer,args=(buffer,msr,spc,psc,args),callback=do_callback)
    else:
      v = do_buffer(buffer,msr,spc,psc,args)
      do_callback(v)
  if args.threads > 1:
    p1.close()
    p1.join()
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('input',help="Bam file in order of query name - for stdin")
  args = parser.parse_args()
  inf = sys.stdin
  if args.input != '-': 
    cmd = "samtools view -h "+args.input
    p = Popen(cmd.split(),stdout=PIPE)
    inf = p.stdout
  mesr = MultiEntrySamReader(inf)
  while True:
    entries = mesr.read_entries()
    if not entries: break
    if len(entries) == 0: break
    if entries[0].value('cigar') == '*': 
      print entries[0].value('qname')+"\t0"
      continue
    sam = entries[0]
    m = re.search('NH:i:(\d+)',sam.entry['remainder'])
    if not m:
      sys.stderr.write("ERROR not a hisat entry\n")
      sys.exit()
    cnt = max([len(entries),int(m.group(1))])
    print entries[0].value('qname')+"\t"+str(cnt)