class Paired: """ Represents paired reads and single read when mate is None """ _readStrand = True # strand of read _mateStrand = False # strand of mate rtype = enum(# type of read NORMAL=0, SINGLE=1, READ_SPLIT=2, MATE_SPLIT=3, FILTERED=4 ) def __init__(self, read, mate, reflengths, refnames, splitparts): """ Initialize reads and find out type of reads """ if mate and Paired.isFirst(mate, read): self.__read = Read(mate, True, Paired._readStrand, refnames) self.__mate = Read(read, False, Paired._mateStrand, refnames) else: self.__read = Read(read, True, Paired._readStrand, refnames) self.__mate = Read(mate, False, Paired._mateStrand, refnames) self.__qname = read.qname self.__reflengths = reflengths self.__splitpair = None if not self.__read.hasMinQuality(): # read doesn't have minimal mapping quality if self.__mate.isUnmapped() or not self.__mate.hasMinQuality(): # both don't have minimal mapping quality self.__type = Paired.rtype.FILTERED else: # make mate single self.__read = self.__mate self.__mate = None self.__type = Paired.rtype.SINGLE elif self.__mate.isUnmapped(): # mate unmapped self.__type = Paired.rtype.SINGLE elif not self.__mate.hasMinQuality(): # mate doesn't have minimal mapping quality self.__type = Paired.rtype.SINGLE self.__mate = None elif self.__read.isDuplicate(): # read is duplicate if self.__mate.isDuplicate(): # both are duplicated -> filter out self.__type = Paired.rtype.FILTERED else: # only read is duplicte -> make mate single self.__type = Paired.rtype.SINGLE self.__read = self.__mate self.__mate = None elif self.__mate.isDuplicate(): # mate is duplicate -> make read single self.__type = Paired.rtype.SINGLE self.__mate = None elif self.__read.isSplit(): # read is split if self.__mate.isInverted() or self.__mate.hasGaps() or self.isInterchromosomal(): # filter both self.__type = Paired.rtype.FILTERED else: # split read self.__type = Paired.rtype.READ_SPLIT self.__splitpair = SplitPair(False, self.__mate, SplitRead(self.__read, Paired._readStrand, splitparts)) elif self.__mate.isSplit(): # mate is split if self.__read.isInverted() or self.__read.hasGaps() or self.isInterchromosomal(): # filter both self.__type = Paired.rtype.FILTERED else: # split mate self.__type = Paired.rtype.MATE_SPLIT self.__splitpair = SplitPair(True, self.__read, SplitRead(self.__mate, Paired._mateStrand, splitparts)) elif not self.__read.hasGaps() and not self.__mate.hasGaps(): # paired without any gaps self.__type = Paired.rtype.NORMAL else: self.__type = Paired.rtype.FILTERED if self.__splitpair and (not self.__splitpair.splitread.hasMinQuality() or not self.__splitpair.splitread.hasMinLengths() or self.actualSize() <= 0): self.__type = Paired.rtype.FILTERED @staticmethod def isFirst(read, mate): """ Test if read is before his mate """ return (read.pos <= mate.pos and read.tid == mate.tid) or read.tid < mate.tid @property def qname(self): """ Return query name """ return self.__qname @property def read(self): """ Return read """ return self.__read @property def mate(self): """ Return mate """ return self.__mate @property def splitpair(self): """ Return split read """ return self.__splitpair def size(self): """ Return insert size information from read """ if not self.isNormal() or self.hasOverlap() or self.isRearranged() or self.__read.isInverted() or self.__mate.isInverted() or self.isInterchromosomal(): return 0 return self.__read.sam.tlen - (self.__read.end - self.__read.pos) - (self.__mate.end - self.__mate.pos) def actualSize(self): """ Return counted insert size if template length is zero """ size = self.size() if size or self.isSingle(): # size from read or read is single return size elif self.hasOverlap(): # overlapping pair if self.isRearranged(): # and also rearranged return self.__read.pos - self.__mate.end return self.__mate.pos - self.__read.end else: # normal or rearranged pair lengthBetween = 0 if self.__read.tid != self.__mate.tid: # another chromosome lengthBetween = sum(self.__reflengths[self.__read.tid:self.__mate.tid]) result = lengthBetween + self.__mate.pos - self.__read.end - 1 if self.isRearranged(): return -(result + self.__read.len + self.__mate.len) return result def hasOverlap(self): """ Test if reads overlap """ return self.__read.pos <= self.__mate.pos and \ ((self.__read.end <= self.__mate.end and \ self.__mate.pos <= self.__read.end) or \ (self.__mate.end <= self.__read.end)) def isNormal(self): """ Test if reads are normal """ return self.__type == Paired.rtype.NORMAL def isSingle(self): """ Test if read is single """ return self.__type == Paired.rtype.SINGLE def isReadSplit(self): """ Test if read is split """ return self.__type == Paired.rtype.READ_SPLIT def isMateSplit(self): """ Test if mate is split """ return self.__type == Paired.rtype.MATE_SPLIT def isFiltered(self): """ Test if read is filtered """ return self.__type == Paired.rtype.FILTERED def isRearranged(self): """ Test if reads are rearranged """ return self.__read.isInverted() and self.__mate.isInverted() def isInterchromosomal(self): """ Test if reads are on different chromosomes """ return self.__read.tid != self.__mate.tid