def parseRm4( file ): """Parses readmatcher -printFormat 4 output into AlignmentHit objects""" for line in open(file): fields = line.rstrip("\n").split(" ") hit = AlignmentHit() hit.query_id = fields[0] hit.target_id = fields[1] hit.score = - int(fields[2]) hit.pctidentity = float(fields[3]) hit.query_strand = "+" if fields[4] == "0" else "-" hit.query_start = int(fields[5]) hit.query_end = int(fields[6]) hit.query_length = int(fields[7]) # for negative strand readMatcher query coords are reported on reverse # complement of the sequence. For the alignmenthit they need to be # reported on the forward strand if hit.query_strand == "-": hit.query_end, hit.query_start = hit.query_length - hit.query_start, hit.query_length - hit.query_end hit.target_strand = "+" if fields[8] == "0" else "-" hit.target_start = int(fields[9]) hit.target_end = int(fields[10]) hit.target_length = int(fields[11]) if hit.target_strand == "-": hit.target_end, hit.target_start = hit.target_length - hit.target_start, hit.target_length - hit.target_end #if hit.target_strand == hit.query_strand: # always report strand for query # hit.target_strand = hit.query_strand = "+" #else: # hit.query_strand = "-" # hit.target_strand = "+" hit.alignedLength = abs(hit.target_end-hit.target_start) yield hit