def do_buffer(buffer, msr, spc, psc, args): outputs = [] for entries in buffer: l = [] r = [] for sam in entries: #Print line if its not a pair if not_a_mate_sam(sam): if not args.mates_only: outputs.append(sam.get_line()) continue if sam.check_flag(64): l.append(sam) if sam.check_flag(128): r.append(sam) if not (len(l) == 1 and len(r) == 1): # more than just a unique pair here if not args.mates_only: for sam in l: outputs.append(sam.get_line()) for sam in r: outputs.append(sam.get_line()) continue #Verify pairing by reference and direction if l[0].value('rname') != r[0].value('rname') or l[0].check_flag( 16) == r[0].check_flag(16): sys.stderr.write( "ERROR, these are not actually properly paired as we were led to believe\n" ) sys.exit() p1 = PSL(spc.convert_line(l[0].get_line())) if not re.search('[HP]', l[0].value('cigar')): p1.set_query(l[0].value('seq')) p1.set_quality_seq(l[0].value('qual')) if l[0].check_flag(16): # set the query to what it actually is p1.set_query(rc(l[0].value('seq'))) p1.set_quality_seq(l[0].value('qual')[::-1]) p2 = PSL(spc.convert_line(r[0].get_line())) if not re.search('[HP]', r[0].value('cigar')): p2.set_query(r[0].value('seq')) p2.set_quality_seq(r[0].value('qual')) if r[0].check_flag(16): # set the query to what it actually is p2.set_query(rc(r[0].value('seq'))) p2.set_quality_seq(r[0].value('qual')[::-1]) p12 = join_mated(p1, p2) if not p12: if not args.mates_only: outputs.append(l[0].get_line()) outputs.append(r[0].get_line()) continue #if p1.value('strand') == '-' and p2.value('strand') == '+' \ #and p2.value('tEnd') < p1.value('tStart'): sline = psc.convert_line(p12.get_line(), query_sequence=p12.get_query(), quality_sequence=p12.get_quality_seq()) #print p12.get_line() outputs.append(sline) return outputs
def do_buffer(buffer,msr,spc,psc,args): outputs = [] for entries in buffer: l = [] r = [] for sam in entries: #Print line if its not a pair if not_a_mate_sam(sam): if not args.mates_only: outputs.append(sam.get_line()) continue if sam.check_flag(64): l.append(sam) if sam.check_flag(128): r.append(sam) if not (len(l)==1 and len(r)==1): # more than just a unique pair here if not args.mates_only: for sam in l: outputs.append(sam.get_line()) for sam in r: outputs.append(sam.get_line()) continue #Verify pairing by reference and direction if l[0].value('rname') != r[0].value('rname') or l[0].check_flag(16) == r[0].check_flag(16): sys.stderr.write("ERROR, these are not actually properly paired as we were led to believe\n") sys.exit() p1 = PSL(spc.convert_line(l[0].get_line())) if not re.search('[HP]',l[0].value('cigar')): p1.set_query(l[0].value('seq')) p1.set_quality_seq(l[0].value('qual')) if l[0].check_flag(16): # set the query to what it actually is p1.set_query(rc(l[0].value('seq'))) p1.set_quality_seq(l[0].value('qual')[::-1]) p2 = PSL(spc.convert_line(r[0].get_line())) if not re.search('[HP]',r[0].value('cigar')): p2.set_query(r[0].value('seq')) p2.set_quality_seq(r[0].value('qual')) if r[0].check_flag(16): # set the query to what it actually is p2.set_query(rc(r[0].value('seq'))) p2.set_quality_seq(r[0].value('qual')[::-1]) p12 = join_mated(p1,p2) if not p12: if not args.mates_only: outputs.append(l[0].get_line()) outputs.append(r[0].get_line()) continue #if p1.value('strand') == '-' and p2.value('strand') == '+' \ #and p2.value('tEnd') < p1.value('tStart'): sline = psc.convert_line(p12.get_line(),query_sequence=p12.get_query(),quality_sequence=p12.get_quality_seq()) #print p12.get_line() outputs.append(sline) return outputs