Exemple #1
0
def do_buffer(buffer, msr, spc, psc, args):
    outputs = []
    for entries in buffer:
        l = []
        r = []
        for sam in entries:
            #Print line if its not a pair
            if not_a_mate_sam(sam):
                if not args.mates_only:
                    outputs.append(sam.get_line())
                continue
            if sam.check_flag(64): l.append(sam)
            if sam.check_flag(128): r.append(sam)
        if not (len(l) == 1 and len(r) == 1):
            # more than just a unique pair here
            if not args.mates_only:
                for sam in l:
                    outputs.append(sam.get_line())
                for sam in r:
                    outputs.append(sam.get_line())
            continue
        #Verify pairing by reference and direction
        if l[0].value('rname') != r[0].value('rname') or l[0].check_flag(
                16) == r[0].check_flag(16):
            sys.stderr.write(
                "ERROR, these are not actually properly paired as we were led to believe\n"
            )
            sys.exit()
        p1 = PSL(spc.convert_line(l[0].get_line()))
        if not re.search('[HP]', l[0].value('cigar')):
            p1.set_query(l[0].value('seq'))
            p1.set_quality_seq(l[0].value('qual'))
            if l[0].check_flag(16):
                # set the query to what it actually is
                p1.set_query(rc(l[0].value('seq')))
                p1.set_quality_seq(l[0].value('qual')[::-1])
        p2 = PSL(spc.convert_line(r[0].get_line()))
        if not re.search('[HP]', r[0].value('cigar')):
            p2.set_query(r[0].value('seq'))
            p2.set_quality_seq(r[0].value('qual'))
            if r[0].check_flag(16):
                # set the query to what it actually is
                p2.set_query(rc(r[0].value('seq')))
                p2.set_quality_seq(r[0].value('qual')[::-1])
        p12 = join_mated(p1, p2)
        if not p12:
            if not args.mates_only:
                outputs.append(l[0].get_line())
                outputs.append(r[0].get_line())
            continue
        #if p1.value('strand') == '-' and p2.value('strand') == '+' \
        #and p2.value('tEnd') < p1.value('tStart'):
        sline = psc.convert_line(p12.get_line(),
                                 query_sequence=p12.get_query(),
                                 quality_sequence=p12.get_quality_seq())
        #print p12.get_line()
        outputs.append(sline)
    return outputs
def do_buffer(buffer,msr,spc,psc,args):
  outputs = []
  for entries in buffer:
    l = []
    r = []
    for sam in entries:
      #Print line if its not a pair
      if not_a_mate_sam(sam):
        if not args.mates_only:
          outputs.append(sam.get_line())
        continue
      if sam.check_flag(64): l.append(sam)
      if sam.check_flag(128): r.append(sam)
    if not (len(l)==1 and len(r)==1):
      # more than just a unique pair here
      if not args.mates_only:
        for sam in l:  outputs.append(sam.get_line())
        for sam in r:  outputs.append(sam.get_line())
      continue
    #Verify pairing by reference and direction
    if l[0].value('rname') != r[0].value('rname') or l[0].check_flag(16) == r[0].check_flag(16):
      sys.stderr.write("ERROR, these are not actually properly paired as we were led to believe\n")
      sys.exit()
    p1 = PSL(spc.convert_line(l[0].get_line()))
    if not re.search('[HP]',l[0].value('cigar')): 
      p1.set_query(l[0].value('seq'))
      p1.set_quality_seq(l[0].value('qual'))
      if l[0].check_flag(16):
        # set the query to what it actually is
        p1.set_query(rc(l[0].value('seq')))
        p1.set_quality_seq(l[0].value('qual')[::-1])      
    p2 = PSL(spc.convert_line(r[0].get_line()))
    if not re.search('[HP]',r[0].value('cigar')): 
      p2.set_query(r[0].value('seq'))
      p2.set_quality_seq(r[0].value('qual'))
      if r[0].check_flag(16):
        # set the query to what it actually is
        p2.set_query(rc(r[0].value('seq')))
        p2.set_quality_seq(r[0].value('qual')[::-1])      
    p12 = join_mated(p1,p2)
    if not p12:
      if not args.mates_only:
        outputs.append(l[0].get_line())
        outputs.append(r[0].get_line())
      continue
    #if p1.value('strand') == '-' and p2.value('strand') == '+' \
    #and p2.value('tEnd') < p1.value('tStart'):
    sline = psc.convert_line(p12.get_line(),query_sequence=p12.get_query(),quality_sequence=p12.get_quality_seq())
    #print p12.get_line()
    outputs.append(sline)
  return outputs