Example #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input',
                        help="input sorted bam or - for STDIN. expects header")
    parser.add_argument(
        '--positional_duplicates',
        type=int,
        help=
        "maximum number of positional duplicatse to allow through from a sorted sam"
    )
    args = parser.parse_args()
    in_header = True
    bam = False
    if args.input == '-': args.input = sys.stdin
    else:
        bam = True
        cmd = "samtools view -h " + args.input
        p = Popen(cmd.split(), stdout=PIPE)
        args.input = p.stdout
    line = args.input.readline()
    if not line: return  #done
    buffer_name = ''
    buffer_count = 0
    while True:
        if in_header:
            line = args.input.readline()
            if not line: break
            if is_header(line):
                print line.rstrip()
                continue
            else:
                in_header = False
        else:
            line = args.input.readline()
            if not line: break
        #have a line
        f = line.split("\t")
        if args.positional_duplicates:
            pos = ':'.join([f[2], f[3], f[5]])
            if pos != buffer_name:
                buffer_count = 0
                buffer_name = pos
            buffer_count += 1
            if buffer_count <= args.positional_duplicates:
                print line.rstrip()
        else:
            print line.rstrip()
    if bam: p.communicate()
def main():
  parser = argparse.ArgumentParser(description="For every genepred entry report its alignability",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('input',help="STDIN is -")
  parser.add_argument('-k','--fragment_size',default=100,type=int,help="Fragment size to try to align")
  parser.add_argument('--threads',type=int,default=cpu_count(),help="number of threads")
  parser.add_argument('--type',choices=['mean','median'],default='mean',help="how to combine mappability fraction")
  parser.add_argument('--perbase',action='store_true',help='show all averages')
  parser.add_argument('--output','-o',help="output file or STDOUT if not set")
  args = parser.parse_args()
  
  if args.input == '-': args.input = sys.stdin
  else: args.input = open(args.input)

  if args.output:  args.output = open(args.output,'w')
  else: args.output = sys.stdout

  buffer = []
  prev = -1
  for line in args.input:
    if is_header(line): continue
    sam = SAM(line)
    name = decode_name(sam.value('qname')).split("\t")
    qlen = len(sam.value('seq'))
    if qlen != args.fragment_size:
      sys.stderr.write("WARNING qlen != fragment_size\n")
    cnt = 0
    if sam.value('cigar')!='*': 
      m = re.search('NH:i:(\d+)',sam.value('remainder'))
      if not m: 
        sys.stderr.write("ERROR not hisat format\n")
        sys.exit()
      cnt = int(m.group(1))
    name[2] = int(name[2])
    name[3] = int(name[3])
    name[4] = int(name[4])
    name.append(cnt)
    name.append(qlen)
    if name[2] != prev:
      if len(buffer) > 0: 
        output_buffer(buffer,args)
        buffer = []
    prev = name[2]
    buffer.append(name)
  if len(buffer) > 0:
    output_buffer(buffer,args)
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('input',help="input sorted bam or - for STDIN. expects header")
  parser.add_argument('--positional_duplicates',type=int,help="maximum number of positional duplicatse to allow through from a sorted sam")
  args = parser.parse_args()
  in_header = True
  bam = False
  if args.input == '-': args.input = sys.stdin
  else: 
    bam = True
    cmd = "samtools view -h "+args.input
    p = Popen(cmd.split(),stdout=PIPE)
    args.input = p.stdout
  line = args.input.readline()
  if not line: return #done
  buffer_name = ''
  buffer_count = 0
  while True:
    if in_header: 
        line = args.input.readline()
        if not line: break
        if is_header(line):
          print line.rstrip()
          continue
        else: in_header = False
    else:
      line = args.input.readline()
      if not line: break
    #have a line
    f = line.split("\t")
    if args.positional_duplicates:
      pos = ':'.join([f[2],f[3],f[5]])
      if pos != buffer_name:
        buffer_count = 0
        buffer_name = pos
      buffer_count += 1
      if buffer_count <= args.positional_duplicates:
        print line.rstrip()
    else:
      print line.rstrip()
  if bam: p.communicate()
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('input',help="BAM FILE or '-' for STDIN SAM format")
  args = parser.parse_args()
  inf = sys.stdin
  if args.input != '-': 
    cmd = "samtools view -F 4 -h "+args.input
    p = Popen(cmd.split(),stdout=PIPE)
    inf = p.stdout
  for line in inf:
    if is_header(line): 
      print line.rstrip()
      continue
    sam = SAM(line)
    if sam.entry['cigar'] == '*': continue
    m = re.search('NH:i:(\d+)',sam.entry['remainder'])
    if not m:
      sys.stderr.write("ERROR not a hisat entry\n")
      sys.exit()
    if int(m.group(1))==1:
      print line.rstrip()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help="BAM FILE or '-' for STDIN SAM format")
    args = parser.parse_args()
    inf = sys.stdin
    if args.input != '-':
        cmd = "samtools view -F 4 -h " + args.input
        p = Popen(cmd.split(), stdout=PIPE)
        inf = p.stdout
    for line in inf:
        if is_header(line):
            print line.rstrip()
            continue
        sam = SAM(line)
        if sam.entry['cigar'] == '*': continue
        m = re.search('NH:i:(\d+)', sam.entry['remainder'])
        if not m:
            sys.stderr.write("ERROR not a hisat entry\n")
            sys.exit()
        if int(m.group(1)) == 1:
            print line.rstrip()