예제 #1
0
def process_locus(locus, args):
  depth = {}
  s2psl = SAMtoPSLconversionFactory()
  unique = {}
  chr = locus[0].value('rname')
  for sam in locus:
    p = PSL(s2psl.convert_line(sam.get_line()))
    g = GenePredEntry(p.get_genepred_line())
    g = g.get_smoothed(args.min_intron)
    for i in range(0,g.get_exon_count()):
      rng = str(g.value('exonStarts')[i])+"\t"+str(g.value('exonEnds')[i])
      if rng not in unique: unique[rng] = 0
      unique[rng]+=1
  for bstr in unique:
    [start,end] = bstr.split("\t")
    for i in range(int(start),int(end)):
      if i not in depth:  depth[i] = 0
      depth[i] += unique[bstr] # add the number of these to the depth
  #now we can print the depth
  prevdepth = 0
  prevstart = None
  lasti = None
  for i in sorted(depth.keys()):
    if depth[i] < args.min_depth: continue
    if depth[i] != prevdepth: #output what we have so far if we have something
      if prevstart: 
        output_depth(chr+"\t"+str(prevstart)+"\t"+str(lasti+1)+"\t"+str(prevdepth),args)
      prevstart = i
    prevdepth = depth[i]
    lasti = i
  if prevstart:
    output_depth(chr+"\t"+str(prevstart)+"\t"+str(lasti+1)+"\t"+str(prevdepth),args)
예제 #2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Take a sam file and join together mate pairs into single alignments.  Alignments must be ordered by query name."
    )
    parser.add_argument(
        'input', help="FILENAME input .sam or .bam or '-' for STDIN sam")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--sam', action='store_true')
    group.add_argument('--bam', action='store_true')
    parser.add_argument('--mates_only',
                        action='store_true',
                        help="Only output combined mates")
    parser.add_argument('--threads',
                        type=int,
                        default=1,
                        help="Number of threads to use, default is 1")
    args = parser.parse_args()
    inf = sys.stdin
    if args.bam or (not args.sam and not args.input == '-'):
        fh = open(args.input)
        p = Popen('samtools view - -h'.split(), stdin=fh, stdout=PIPE)
        inf = p.stdout
    buffer_size = 10000
    buffer = []
    msr = MultiEntrySamReader(inf)
    spc = SAMtoPSLconversionFactory()
    psc = PSLtoSAMconversionFactory()
    # set the headers for the spc
    for h in msr.header:
        print h.rstrip()
        spc.read_header_line(h)
    if args.threads > 1:
        p1 = Pool(processes=args.threads)
    while True:
        entries = msr.read_entries()
        if not entries: break
        buffer.append(entries)
        if len(buffer) >= buffer_size:
            if args.threads > 1:
                p1.apply_async(do_buffer,
                               args=(buffer, msr, spc, psc, args),
                               callback=do_callback)
            else:
                v = do_buffer(buffer, msr, spc, psc, args)
                do_callback(v)
            buffer = []
    if len(buffer) > 0:
        if args.threads > 1:
            p1.apply_async(do_buffer,
                           args=(buffer, msr, spc, psc, args),
                           callback=do_callback)
        else:
            v = do_buffer(buffer, msr, spc, psc, args)
            do_callback(v)
    if args.threads > 1:
        p1.close()
        p1.join()
예제 #3
0
def main():
    parser = argparse.ArgumentParser(
        description="Take a sam or bam file and output the best alignment for each read, it still can output the same read name twice if they happen to be mate pairs, but it will only output the best alignment for each individual mate, not necessarily the two together.  You could combine mates if that is helpful with another script."
    )
    parser.add_argument("input", help="FILENAME input .sam or .bam or '-' for STDIN sam")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("--bam", action="store_true")
    group.add_argument("--sam", action="store_true")
    args = parser.parse_args()
    inf = sys.stdin
    if args.bam or (not args.sam and not args.input == "-"):
        fh = open(args.input)
        p = Popen("samtools view - -h".split(), stdin=fh, stdout=PIPE)
        inf = p.stdout
    msr = MultiEntrySamReader(inf)
    spc = SAMtoPSLconversionFactory()
    # set the headers for the spc
    for h in msr.header:
        print h.rstrip()
        spc.read_header_line(h)
    while True:
        entries = msr.read_entries()
        if not entries:
            break
        longest0 = 0
        entry0 = None
        longest1 = 0
        entry1 = None
        longest2 = 0
        entry2 = None
        for sam in entries:
            pline = spc.convert_line(sam.get_line())
            if not pline:
                continue
            side = None
            if sam.check_flag(64):
                side = 1
            if sam.check_flag(128):
                side = 2
            p = PSL(pline)
            if p.get_coverage() > longest0:
                longest0 = p.get_coverage()
                entry0 = sam
            if side == 1 and p.get_coverage() > longest1:
                longest1 = p.get_coverage()
                entry1 = sam
            if side == 2 and p.get_coverage() > longest2:
                longest2 = p.get_coverage()
                entry2 = sam
        if entry0:  # output the combined if its there
            print entry0.get_line()
        else:
            if entry1:  # output each of the mates if they are paired but not joined
                print entry1.get_line()
            if entry2:
                print entry2.get_line()
예제 #4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Take a sam or bam file and output the best alignment for each read, it still can output the same read name twice if they happen to be mate pairs, but it will only output the best alignment for each individual mate, not necessarily the two together.  You could combine mates if that is helpful with another script."
    )
    parser.add_argument(
        'input', help="FILENAME input .sam or .bam or '-' for STDIN sam")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--bam', action='store_true')
    group.add_argument('--sam', action='store_true')
    args = parser.parse_args()
    inf = sys.stdin
    if args.bam or (not args.sam and not args.input == '-'):
        fh = open(args.input)
        p = Popen('samtools view - -h'.split(), stdin=fh, stdout=PIPE)
        inf = p.stdout
    msr = MultiEntrySamReader(inf)
    spc = SAMtoPSLconversionFactory()
    # set the headers for the spc
    for h in msr.header:
        print h.rstrip()
        spc.read_header_line(h)
    while True:
        entries = msr.read_entries()
        if not entries: break
        longest0 = 0
        entry0 = None
        longest1 = 0
        entry1 = None
        longest2 = 0
        entry2 = None
        for sam in entries:
            pline = spc.convert_line(sam.get_line())
            if not pline: continue
            side = None
            if sam.check_flag(64): side = 1
            if sam.check_flag(128): side = 2
            p = PSL(pline)
            if p.get_coverage() > longest0:
                longest0 = p.get_coverage()
                entry0 = sam
            if side == 1 and p.get_coverage() > longest1:
                longest1 = p.get_coverage()
                entry1 = sam
            if side == 2 and p.get_coverage() > longest2:
                longest2 = p.get_coverage()
                entry2 = sam
        if entry0:  #output the combined if its there
            print entry0.get_line()
        else:
            if entry1:  #output each of the mates if they are paired but not joined
                print entry1.get_line()
            if entry2:
                print entry2.get_line()
def main():
  parser = argparse.ArgumentParser(description="Take a sam file and join together mate pairs into single alignments.  Alignments must be ordered by query name.")
  parser.add_argument('input',help="FILENAME input .sam or .bam or '-' for STDIN sam")
  group = parser.add_mutually_exclusive_group()
  group.add_argument('--sam',action='store_true')
  group.add_argument('--bam',action='store_true')
  parser.add_argument('--mates_only',action='store_true',help="Only output combined mates")
  parser.add_argument('--threads',type=int,default=1,help="Number of threads to use, default is 1")
  args = parser.parse_args()
  inf = sys.stdin
  if args.bam or (not args.sam and not args.input == '-'):
    fh = open(args.input)
    p = Popen('samtools view - -h'.split(),stdin=fh,stdout=PIPE)
    inf = p.stdout
  buffer_size = 10000
  buffer = []
  msr = MultiEntrySamReader(inf)
  spc = SAMtoPSLconversionFactory()
  psc = PSLtoSAMconversionFactory()
  # set the headers for the spc
  for h in msr.header:
    print h.rstrip()
    spc.read_header_line(h)
  if args.threads > 1:
    p1 = Pool(processes=args.threads)
  while True:
    entries = msr.read_entries()
    if not entries: break
    buffer.append(entries)
    if len(buffer) >= buffer_size:
      if args.threads > 1:
        p1.apply_async(do_buffer,args=(buffer,msr,spc,psc,args),callback=do_callback)
      else: 
        v = do_buffer(buffer,msr,spc,psc,args)
        do_callback(v)
      buffer = []
  if len(buffer) > 0:
    if args.threads > 1:
      p1.apply_async(do_buffer,args=(buffer,msr,spc,psc,args),callback=do_callback)
    else:
      v = do_buffer(buffer,msr,spc,psc,args)
      do_callback(v)
  if args.threads > 1:
    p1.close()
    p1.join()