def __build_hit_for_match(self): """At least one of read or mate are a match""" if self.__read.type == BWA_TYPE_NO_MATCH: # the read isn't matched, which implies we have a mate # and it is matched. We place the read at the mate's position self.pos = self.__mate.pos strand = self.__mate.strand self.flag |= SAM_FSU # query unmapped ref_len = 1 # ref_len is the length of the reference in the alignment else: self.pos = self.__read.pos strand = self.__read.strand ref_len = self.__read.get_pos_end() - self.pos self.qual = self.__read.mapQ if strand: self.flag |= SAM_FSR # query on the reverse strand # Where in the reference is pos? self.ref_id, self.__nn = bwa.get_seq_id(self.bns, self.pos, ref_len) self.tid = self.bns.anns[self.ref_id].name # make the position relative to the start of its reference section self.pos = self.pos - self.bns.anns[self.ref_id].offset # Check whether the read alignment bridges two adjacent reference sequences if (self.__read.type != BWA_TYPE_NO_MATCH and (self.pos + ref_len > self.bns.anns[self.ref_id].len)): self.flag |= SAM_FSU # it does, so we set it as unmapped # presume we can't calculate any of these fields, # so we set them to the "default" values" self.mtid = None self.mpos = 0 self.isize = 0 if self.__mate: if self.__mate.type != BWA_TYPE_NO_MATCH: if self.__mate.strand: self.flag |= SAM_FMR # XXX: below, self.__mate.len. Shouldn't it be (self.__mate.get_pos_end() - self.__mate.pos) ?? # I posted to the list about this and await a response. self.m_ref_id, m_nn = bwa.get_seq_id(self.bns, self.__mate.pos, self.__mate.len) if self.m_ref_id == self.ref_id: self.mtid = '=' if self.__read.type != BWA_TYPE_NO_MATCH: self.isize = self.__mate.get_pos_5() - self.__read.get_pos_5() # else the isize remains at 0 else: self.mtid = self.bns.anns[self.m_ref_id].name self.mpos = self.__mate.pos - self.bns.anns[self.m_ref_id].offset else: # unmapped mate self.flag |= SAM_FMU self.mtid = "=" self.mpos = self.__read.pos - self.bns.anns[self.ref_id].offset self.pos = self.pos + 1 # base 1 self.mpos = self.mpos + 1 # base 1
def __analyze_multi(self, i): multi_hit_chunk = [] q = self.__read.multi[i] ref_len = q.get_pos_end(len(self.__read)) - q.pos ref_id, nn = bwa.get_seq_id(self.bns, q.pos, ref_len) sign = '-' if q.strand else '+' multi_hit_chunk.append(self.bns.anns[ref_id].name) multi_hit_chunk.append("%s%d" % (sign, (q.pos - self.bns.anns[ref_id].offset + 1))) # FIXME: duplicate from samt_adapter, we should centralize formatting cigar_str = "".join(['%d%s' % t for t in q.get_cigar(len(self.__read))]) or "*" multi_hit_chunk.append(cigar_str) multi_hit_chunk.append("%d" % (q.n_gapo + q.n_gape + q.n_mm)) return multi_hit_chunk