def main():
    parser = argparse.ArgumentParser(
        description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input', help="BAM file or Use - for STDIN for SAM")
    parser.add_argument('--minimum_intron',
                        type=int,
                        default=68,
                        help="smallest intron")
    parser.add_argument('-o', '--output', help="Output file, gzip is okay")
    args = parser.parse_args()

    of = sys.stdout
    if args.output:
        if args.output[-3:] == '.gz':
            of = gzip.open(args.output, 'w')
        else:
            of = open(args.output, 'w')

    if args.input == '-':
        sh = SamStream(sys.stdin)
    else:
        sh = BAMFile(args.input)
    for e in sh:
        if not e.is_aligned(): continue
        gpd_line = e.get_target_transcript(
            min_intron=args.minimum_intron).get_gpd_line()
        of.write(gpd_line + "\n")
    sh.close()
    of.close()
Ejemplo n.º 2
0
def main():
  parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('input',help="Use bam file")
  parser.add_argument('output',help="Use bam file")
  parser.add_argument('--threads',type=int,default=cpu_count(),help="Thread count")
  args = parser.parse_args()
  m = re.match('^(\S+)\.bam$',args.output)
  if not m:
    sys.stderr.write("use bam output")
    sys.exit()
  cmd1 = 'samtools view -H '+args.input
  p1 = Popen(cmd1.split(),stdout=PIPE)
  bs = SamStream(p1.stdout)
  rlens = bs.get_header().get_sequence_lengths()
  htext = bs.header_text  
  p1.communicate()
  hlines = htext.rstrip().split("\n")
  done_lens = False
  cmd = 'samtools sort -@ '+str(args.threads)+'  - '+m.group(1)
  sys.stderr.write(cmd+"\n")
  p = Popen(cmd.split(),stdin=PIPE)
  for ln in hlines:
    if re.match('@SQ\tSN:',ln):
      if not done_lens:
        done_lens = True
        for chr in sorted(rlens.keys()):
          p.stdin.write("@SQ\tSN:"+chr+"\tLN:"+str(rlens[chr])+"\n")
    else:
      p.stdin.write(ln.rstrip("\n")+"\n")
  cmd1 = 'samtools view '+args.input
  p1 = Popen(cmd1.split(),stdout=p.stdin)
  p1.communicate()
  p.communicate()
def main():
    parser = argparse.ArgumentParser(
        description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input', help="Use bam file")
    parser.add_argument('output', help="Use bam file")
    parser.add_argument('--threads',
                        type=int,
                        default=cpu_count(),
                        help="Thread count")
    args = parser.parse_args()
    m = re.match('^(\S+)\.bam$', args.output)
    if not m:
        sys.stderr.write("use bam output")
        sys.exit()
    cmd1 = 'samtools view -H ' + args.input
    p1 = Popen(cmd1.split(), stdout=PIPE)
    bs = SamStream(p1.stdout)
    rlens = bs.get_header().get_sequence_lengths()
    htext = bs.header_text
    p1.communicate()
    hlines = htext.rstrip().split("\n")
    done_lens = False
    cmd = 'samtools sort -@ ' + str(args.threads) + '  - ' + m.group(1)
    sys.stderr.write(cmd + "\n")
    p = Popen(cmd.split(), stdin=PIPE)
    for ln in hlines:
        if re.match('@SQ\tSN:', ln):
            if not done_lens:
                done_lens = True
                for chr in sorted(rlens.keys()):
                    p.stdin.write("@SQ\tSN:" + chr + "\tLN:" +
                                  str(rlens[chr]) + "\n")
        else:
            p.stdin.write(ln.rstrip("\n") + "\n")
    cmd1 = 'samtools view ' + args.input
    p1 = Popen(cmd1.split(), stdout=p.stdin)
    p1.communicate()
    p.communicate()
Ejemplo n.º 4
0
def main():
  parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('input',help="BAM file or Use - for STDIN for SAM")
  parser.add_argument('--minimum_intron',type=int,default=68,help="smallest intron")
  parser.add_argument('-o','--output',help="Output file, gzip is okay")
  args = parser.parse_args()

  of = sys.stdout
  if args.output:
    if args.output[-3:]=='.gz':
      of = gzip.open(args.output,'w')
    else:
      of = open(args.output,'w')  

  if args.input =='-':
    sh = SamStream(sys.stdin)
  else:
    sh = BAMFile(args.input)
  for e in sh:
    if not e.is_aligned(): continue
    gpd_line = e.get_target_transcript(min_intron=args.minimum_intron).get_gpd_line()
    of.write(gpd_line+"\n")
  sh.close()
  of.close()
def do_sam(args):
  if args.input != '-':
    m = re.search('\.bam$',args.input)
    if not m:  
      sys.stderr.write("ERROR input expects bam unless piping to stdin.. then SAM with header\n")
      sys.exit()
  if not args.output:
    sys.stderr.write("ERROR sam sorts must output to a bam file\n")
    sys.exit()
  m = re.match('^(.+)\.bam$',args.output)
  if not m:
    sys.stderr.write("ERROR sam sorts must output to a bam file\n")
    sys.exit()
  cmdout = 'samtools sort - '+m.group(1)
  if args.threads:  cmdout += ' -@ '+str(args.threads)
  inf = None
  if args.input == '-':
    inf = sys.stdin
  else:
    cmd = 'samtools view -h '+args.input
    p = Popen(cmd.split(),stdout=PIPE,bufsize=1)
    inf = p.stdout
  s = SamStream(inf)
  header = s.header_text.rstrip().split("\n")
  split_stream = [header[i].split("\t") for i in range(0,len(header))] 
  sq_inds = [i for i in range(0,len(split_stream)) if split_stream[i][0]=='@SQ']
  nonsq_inds = [i for i in range(0,len(split_stream)) if split_stream[i][0]!='@SQ']
  top = [header[i] for i in nonsq_inds]
  chroms = sorted([split_stream[i] for i in sq_inds],key = lambda x: x[1][3:])
  cmd2 = 'samtools view -Sb -'
  pout = Popen(cmdout.split(),stdin=PIPE)
  p2 = Popen(cmd2.split(),stdin=PIPE,stdout=pout.stdin)
  for t in top:
    p2.stdin.write(t.rstrip()+"\n")
  for c in chroms:
    p2.stdin.write("\t".join(c).rstrip()+"\n")
  for sam in s:
    p2.stdin.write(sam.get_line().rstrip()+"\n")
  p2.communicate()
  pout.communicate()
  if args.input != '-':
    p.communicate()
  return
def main():
  parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('input',help="Use - for STDIN or read bam")
  parser.add_argument('--minimum_intron_size',type=int,default=68,help="Require intron to be this size or larger")
  parser.add_argument('--minimum_overhang',type=int,default=10,help="At least this many bases on each side of an intron")
  parser.add_argument('--minimum_support',type=int,default=1,help="Minimum number of reads that should support a junction to report any of the reads")
  args = parser.parse_args()

  inf = sys.stdin
  if args.input != '-':
    cmd = 'samtools view -F 4 -h '+args.input
    p = Popen(cmd.split(),stdout=PIPE,bufsize=1)
    inf = p.stdout
  cmd2 = 'awk '+"'"+'{if(NF<10) print $0; else if($6~/N/) print $0;}'+"'"
  p2 = Popen(cmd2,stdout=PIPE,stdin=inf,bufsize=1,shell=True)
  stream = SamStream(p2.stdout,minimum_intron_size=args.minimum_intron_size,minimum_overhang=args.minimum_overhang)
  lstream = LocusStream(stream)
  for h in stream.header:
    print h.rstrip()
  for r in lstream:
    # now we have all the possible junctions from the range
    [juncs,sams] = get_junctions(r.get_payload(),args)
    evidence = {}
    lines = {}
    for x in set([x[0].get_range_string() for x in juncs]):
      evidence[x] = 0
      lines[x] = set()
    for i in range(0,len(juncs)):
      jstr = juncs[i][0].get_range_string()
      evidence[jstr]+=1
      lines[jstr].add(juncs[i][1])
    accepted = set()
    for jstr in evidence:
      if evidence[jstr] >= args.minimum_support:
        #print jstr
        for i in lines[jstr]: accepted.add(i)
    for i in sorted(list(accepted)):
      print sams[i].get_line().rstrip()
  p2.communicate()
  if args.input != '-':
    p.communicate()
def main():
    parser = argparse.ArgumentParser(
        description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input', help="Use - for STDIN or specify a BAM file")
    parser.add_argument('-r',
                        '--reference',
                        help="Reference fasta",
                        required=True)
    args = parser.parse_args()

    ref = None
    if args.reference:
        ref = FastaData(open(args.reference, 'rb').read())

    if args.input == '-':
        args.input = SamStream(sys.stdin, reference=ref)
    else:
        args.input = BAMFile(args.input, reference=ref)
    for e in args.input:
        if e.is_aligned():
            print e.get_PSL()