def parseRm5( file ): """Parses readmatcher -printFormat 5 output into AlignmentHit objects""" for line in open(file): #print line values = line.rstrip("\n").split(" ") hit = AlignmentHit() hit.query_id, hit.query_length, hit.query_start, hit.query_end, hit.query_strand = values[0], int(values[1]), int(values[2]), int(values[3]), values[4] hit.target_id, hit.target_length, hit.target_start, hit.target_end, hit.target_strand = values[6], int(values[7]), int(values[8]), int(values[9]), values[10] hit.score = -1*int(values[11]) #target_id = values[6] because there is white space in the m5 file before target_id hit.query_id = "/".join(hit.query_id.split("/")[0:3]) hit.alignedQuery = values[17] hit.QueryStrOrg = hit.alignedQuery tempQList = [] tempQList = list(hit.alignedQuery) for i in range(0, len(tempQList)): if tempQList[i] == 'A' or tempQList[i] == 'C' or tempQList[i] == 'G' or tempQList[i] == 'T': hit.QuerySeq.append(tempQList[i]) hit.QueryStr = ''.join(hit.QuerySeq) hit.alignedTarget = values[19] hit.TargetStrOrg = hit.alignedTarget tempTList = [] tempTList = list(hit.alignedTarget) for i in range(0, len(tempTList)): if tempTList[i] == 'A' or tempTList[i] == 'C' or tempTList[i] == 'G' or tempTList[i] == 'T': hit.TargetSeq.append(tempTList[i]) hit.TargetStr = ''.join(hit.TargetSeq) hit.aligned = values[18] hit.line = line #print hit.target_strand if hit.target_strand == "+": hit.target_strand = 0 else: hit.target_strand = 1 if hit.query_strand == "+": hit.query_strand = 0 else: hit.query_strand = 1 hit.revcomp() tempRevQList = [] tempRevQList = list(hit.alignedQuery) for i in range(0, len(tempRevQList)): if tempRevQList[i] == 'A' or tempRevQList[i] == 'C' or tempRevQList[i] == 'G' or tempRevQList[i] == 'T': hit.RevQuerySeq.append(tempRevQList[i]) hit.RevQueryStr = ''.join(hit.RevQuerySeq) tempRevTList = [] tempRevTList = list(hit.alignedTarget) for i in range(0, len(tempRevTList)): if tempRevTList[i] == 'A' or tempRevTList[i] == 'C' or tempRevTList[i] == 'G' or tempRevTList[i] == 'T': hit.RevTargetSeq.append(tempRevTList[i]) hit.RevTargetStr = ''.join(hit.RevTargetSeq) yield hit