def parseSam(refDict): infn, outfn = sys.argv[1], sys.argv[2] bamfile = pysam.Samfile(infn, "rb") out = open(outfn, 'w') counter = 0 currid = "" tidDict = {} for ar in bamfile.fetch(): query_id = ar.qname if ar.tid in tidDict: target_id = tidDict[ar.tid] else: target_id = bamfile.getrname(ar.tid) tidDict[ar.tid] = target_id if ar.is_secondary: counter += 1 continue target_start = ar.aend - ar.alen target_end = ar.aend target_strand = "+" if ar.is_reverse: target_strand = "-" if currid == query_id: counter += 1 continue currid = query_id alignedpositions = ar.aligned_pairs alignedQuery = buildAlignmentSequence(ar.seq, alignedpositions, 0) alignedTarget = buildAlignmentSequence(refDict[target_id], alignedpositions, 1) aq = alignedQuery at = alignedTarget states = insDelHmm.outputDelInsCython(aq, at, t_log, e_log) #if 1 in states or 0 in states: if len(states) > 1: insDelHmm.printSummaryFast(query_id, target_id, target_start, target_end, target_strand, states, alignedQuery, alignedTarget, out) counter += 1
def parseSam(refDict): infn , outfn = sys.argv[1], sys.argv[2] bamfile = pysam.Samfile( infn, "rb" ) out = open(outfn, 'w') counter = 0 currid = "" tidDict = {} for ar in bamfile.fetch(): query_id = ar.qname if ar.tid in tidDict: target_id = tidDict[ar.tid] else: target_id = bamfile.getrname(ar.tid) tidDict[ar.tid] = target_id if ar.is_secondary: counter += 1 continue target_start = ar.aend-ar.alen target_end = ar.aend target_strand = "+" if ar.is_reverse: target_strand = "-" if currid == query_id: counter += 1 continue currid = query_id alignedpositions = ar.aligned_pairs alignedQuery = buildAlignmentSequence(ar.seq, alignedpositions, 0) alignedTarget = buildAlignmentSequence(refDict[target_id], alignedpositions, 1) aq = alignedQuery at = alignedTarget states = insDelHmm.outputDelInsCython (aq, at, t_log, e_log) #if 1 in states or 0 in states: if len(states) > 1: insDelHmm.printSummaryFast(query_id, target_id, target_start, target_end, target_strand, states, alignedQuery, alignedTarget, out) counter += 1
def parseRm5(): infn, outfn = sys.argv[1], sys.argv[2] out = open(outfn, 'w') counter = 0 currid = "" for line in open(infn, buffering=100000): values = line.rstrip("\n").split(" ") query_id, query_length, query_start, query_end, query_strand = values[ 0], int(values[1]), int(values[2]), int(values[3]), values[4] if currid == query_id: counter += 1 continue currid = query_id target_id, target_length, target_start, target_end, target_strand = values[ 6], int(values[7]), int(values[8]), int(values[9]), values[10] alignedQuery = values[17] alignedTarget = values[19] aligned = values[18] score = int(values[11]) if target_strand == "-": alignedQuery = alignedQuery[::-1] alignedTarget = alignedTarget[::-1] aligned = aligned[::-1] aq = alignedQuery at = alignedTarget states = insDelHmm.outputDelInsCython(aq, at, t_log, e_log) #if 1 in states or 0 in states: if len(states) > 1: insDelHmm.printSummaryFast(query_id, target_id, target_start, target_end, target_strand, states, alignedQuery, alignedTarget, out) counter += 1
def parseRm5(): infn , outfn = sys.argv[1], sys.argv[2] out = open(outfn, 'w') counter = 0 currid = "" for line in open(infn, buffering=100000): values = line.rstrip("\n").split(" ") query_id, query_length, query_start, query_end, query_strand = values[0], int(values[1]), int(values[2]), int(values[3]), values[4] if currid == query_id: counter += 1 continue currid = query_id target_id, target_length, target_start, target_end, target_strand = values[6], int(values[7]), int(values[8]), int(values[9]), values[10] alignedQuery = values[17] alignedTarget = values[19] aligned = values[18] score = int(values[11]) if target_strand == "-": alignedQuery = alignedQuery[::-1] alignedTarget = alignedTarget[::-1] aligned = aligned[::-1] aq = alignedQuery at = alignedTarget states = insDelHmm.outputDelInsCython (aq, at, t_log, e_log) #if 1 in states or 0 in states: if len(states) > 1: insDelHmm.printSummaryFast(query_id, target_id, target_start, target_end, target_strand, states, alignedQuery, alignedTarget, out) counter += 1