Example #1
0
 def constructAdjacencies(self, seq):
     """
     Constructs adjacency edges for each the graph.
     """
     prev = seq[:self.kmer_size].upper()
     prev_strandless = strandless(prev)
     for i in xrange(1, len(seq) - self.kmer_size + 1):
         prev_size = len(self.G)
         kmer = seq[i:i + self.kmer_size].upper()
         if "N" in kmer or "N" in prev:
             continue
         kmer_strandless = strandless(kmer)
         if prev == prev_strandless:
             # exiting right side of previous kmer
             if kmer == kmer_strandless:
                 # entering left side of next kmer
                 self.G.add_edge(prev + "_R", kmer + "_L")
             else:
                 # entering right side of next kmer
                 self.G.add_edge(prev + "_R",
                                 reverseComplement(kmer) + "_R")
         else:
             # exiting left side of previous kmer
             if kmer == kmer_strandless:
                 # entering left side of next kmer
                 self.G.add_edge(
                     reverseComplement(prev) + "_L", kmer + "_L")
             else:
                 # entering right side of next kmer
                 self.G.add_edge(
                     reverseComplement(prev) + "_L",
                     reverseComplement(kmer) + "_R")
         assert prev_size == len(self.G)
         prev = kmer
         prev_strandless = kmer_strandless
 def constructAdjacencies(self, seq):
     """
     Constructs adjacency edges for each the graph.
     """
     prev = seq[:self.kmer_size].upper()
     prev_strandless = strandless(prev)
     for i in xrange(1, len(seq) - self.kmer_size + 1):
         prev_size = len(self.G)
         kmer = seq[i:i + self.kmer_size].upper()
         if "N" in kmer or "N" in prev:
             continue
         kmer_strandless = strandless(kmer)
         if prev == prev_strandless:
             # exiting right side of previous kmer
             if kmer == kmer_strandless:
                 # entering left side of next kmer
                 self.G.add_edge(prev + "_R", kmer + "_L")
             else:
                 # entering right side of next kmer
                 self.G.add_edge(prev + "_R", reverseComplement(kmer) + "_R")
         else:
             # exiting left side of previous kmer
             if kmer == kmer_strandless:
                 # entering left side of next kmer
                 self.G.add_edge(reverseComplement(prev) + "_L", kmer + "_L")
             else:
                 # entering right side of next kmer
                 self.G.add_edge(reverseComplement(prev) + "_L", reverseComplement(kmer) + "_R")
         assert prev_size == len(self.G)
         prev = kmer
         prev_strandless = kmer_strandless
Example #3
0
    def countKmers(self):
        refKmers, readKmers = Counter(), Counter()

        for name, seq in fastaRead(self.referenceFastaFile):
            for i in xrange(self.kmerSize, len(seq)):
                s = seq[ i - self.kmerSize : i ]
                if "N" not in s:
                    refKmers[s] += 1
                    refKmers[reverseComplement(s)] += 1


        for name, seq, qual in fastqRead(self.readFastqFile):
            for i in xrange(self.kmerSize, len(seq)):
                s = seq[ i - self.kmerSize : i ]
                if "N" not in s:
                    readKmers[s] += 1
                    readKmers[reverseComplement(s)] += 1

        return (refKmers, readKmers)
Example #4
0
def mergeChainedAlignedSegments(chainedAlignedSegments, refSequence, readSequence):
    """Makes a single alignment for the given chained reads. Will soft soft clip
    the unaligned prefix and suffix of the readSequence.
    
    From doc on building pysam line
    a = pysam.AlignedSegment()
    a.query_name = "read_28833_29006_6945"
    a.query_sequence="AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"
    a.flag = 99
    a.reference_id = 0
    a.reference_start = 32
    a.mapq = 20
    a.cigar = ( (0,10), (2,1), (0,25) )
    a.mrnm = 0
    a.mpos=199
    a.isize=167
    a.qual="<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"
    a.tags = ( ("NM", 1),
               ("RG", "L1") )
    """
    cAR = pysam.AlignedSegment()
    aR = chainedAlignedSegments[0]
    cAR.query_name = aR.query_name
    
    #Parameters we don't and therefore set properly
    #cAR.flag = aR.flag
    #cAR.mapq = aR.mapq
    #cAR.mrnm = 0
    #cAR.mpos=0
    #cAR.isize=0
    #cAR.qual = "<" * len(readSequence)
    #cAR.tags = aR.tags 
    cAR.next_reference_id = -1
    cAR.reference_start = aR.reference_start #Reference start
    cAR.is_reverse = aR.is_reverse
    cAR.query_sequence = reverseComplement(readSequence) if cAR.is_reverse else readSequence
    cAR.reference_id = aR.reference_id
    cigarList = []
    pPos = aR.reference_start
    #Iterate from the other end of the sequence if reversed
    pQPos = -(len(readSequence)-1) if cAR.is_reverse else 0 
        
    for aR in chainedAlignedSegments:
        assert cAR.is_reverse == aR.is_reverse
        #Add a deletion representing the preceding unaligned reference positions
        assert aR.reference_start >= pPos
        if aR.reference_start > pPos:
            cigarList.append((2, aR.reference_start - pPos))
            pPos = aR.reference_start 
    
        #Add an insertion representing the preceding unaligned read positions
        #make it a soft clip if it is the first chained alignment
        qPos = getFirstNonClippedPositionInRead(aR, readSequence)
        assert qPos >= pQPos
        if qPos > pQPos:
            cigarList.append((4 if aR == chainedAlignedSegments[0] else 1, qPos - pQPos)) 
            pQPos = qPos
        
        #Add the operations of the cigar, filtering hard and soft clipping
        for op, length in aR.cigar:
            assert op in (0, 1, 2, 4, 5)
            if op in (0, 1, 2):
                cigarList.append((op, length))
            if op in (0, 2): #Is match or deletion
                pPos += length
            if op in (0, 1): #Is match or insertion
                pQPos += length
        
    assert pPos <= len(refSequence)
    
    #Set reference end coordinate (which is exclusive)
    #cAR.reference_end = pPos #We don't do this because it is set by cigar string
    
    #Now add any trailing, necessary soft clipping
    if cAR.is_reverse:
        assert pQPos <= 1
        if pQPos < 1:
            cigarList.append((4, -pQPos + 1))
    else:
        assert pQPos <= len(readSequence)
        if pQPos < len(readSequence):
            cigarList.append((4, len(readSequence) - pQPos))
    
    cAR.cigar = tuple(cigarList)
    
    #Check ops
    for op, length in cAR.cigar: #We should have no hard clipped ops
        assert op in (0, 1, 2, 4)
     
    #Reference sequence check coordinates
    assert sum([ length for op, length in cigarList if op in (0, 2)]) == cAR.reference_end - cAR.reference_start
    assert cAR.reference_start >= 0 and cAR.reference_start < len(refSequence)
    assert cAR.reference_end >= 0 and cAR.reference_end <= len(refSequence)
    
    #Read sequence check coordinates
    assert cAR.query_alignment_start >= 0 and cAR.query_alignment_start < len(readSequence)
    assert cAR.query_alignment_end >= 0 and cAR.query_alignment_end <= len(readSequence)
    assert cAR.query_alignment_start + sum([ length for op, length in cigarList if op in (0, 1)]) == cAR.query_alignment_end
    
    return cAR
Example #5
0
def strandless(k):
    """
    Returns the strandless version of this kmer. This is defined as whichever comes first, the kmer or the
    reverse complement of the kmer lexicographically.
    """
    return sorted([k, reverseComplement(k)])[0]
def strandless(k):
    """
    Returns the strandless version of this kmer. This is defined as whichever comes first, the kmer or the
    reverse complement of the kmer lexicographically.
    """
    return sorted([k, reverseComplement(k)])[0]
Example #7
0
def learnModelFromSamFileTargetFn(target, samFile, readFastqFile,
                                  referenceFastaFile, outputModel):
    """Does expectation maximisation on sam file to learn the hmm for the sam file.
    """
    #Convert the read file to fasta
    refSequences = getFastaDictionary(
        referenceFastaFile)  #Hash of names to sequences
    readSequences = getFastqDictionary(
        readFastqFile)  #Hash of names to sequences

    reads = os.path.join(target.getGlobalTempDir(), "temp.fa")
    fH = open(reads, 'w')
    for name in readSequences.keys():
        seq = readSequences[name]
        fastaWrite(fH, name, seq)
        fastaWrite(fH, name + "_reverse", reverseComplement(seq))
    fH.close()

    #Get cigars file
    cigars = os.path.join(target.getGlobalTempDir(), "temp.cigar")
    fH = open(cigars, 'w')
    sam = pysam.Samfile(samFile, "r")
    for aR in sam:  #Iterate on the sam lines realigning them in parallel
        #Because these are global alignments with reverse complement coordinates reversed the following should all be true
        assert aR.pos == 0
        assert aR.qstart == 0
        assert aR.qend == len(readSequences[aR.qname])  #aR.query)
        assert aR.aend == len(refSequences[sam.getrname(aR.rname)])
        assert len(aR.query) == len(readSequences[aR.qname])
        if aR.is_reverse:  #Deal with reverse complements
            assert aR.query.upper() == reverseComplement(
                readSequences[aR.qname]).upper()
            aR.qname += "_reverse"
        else:
            assert aR.query.upper() == readSequences[aR.qname].upper()

        fH.write(getExonerateCigarFormatString(aR, sam) + "\n")
        #Exonerate format Cigar string, using global coordinates
        #fH.write(getGlobalAlignmentExonerateCigarFormatString(aR, sam, refSequences[sam.getrname(aR.rname)], readSequences[aR.qname]) + "\n")
    fH.close()

    #Run cactus_expectationMaximisation
    options = cactus_expectationMaximisation.Options()
    options.modelType = "fiveStateAsymmetric"  #"threeStateAsymmetric"
    options.optionsToRealign = "--diagonalExpansion=10 --splitMatrixBiggerThanThis=300"
    options.randomStart = True
    options.trials = 3
    options.outputTrialHmms = True
    options.iterations = 100
    options.maxAlignmentLengthPerJob = 700000
    options.maxAlignmentLengthToSample = 50000000
    options.outputXMLModelFile = outputModel + ".xml"
    #options.updateTheBand = True
    #options.useDefaultModelAsStart = True
    #options.setJukesCantorStartingEmissions=0.3
    options.trainEmissions = True
    #options.tieEmissions = True

    unnormalisedOutputModel = outputModel + "_unnormalised"
    #Do training if necessary
    if not os.path.exists(unnormalisedOutputModel):
        target.addChildTargetFn(
            cactus_expectationMaximisation.expectationMaximisationTrials,
            args=(" ".join([reads, referenceFastaFile]), cigars,
                  unnormalisedOutputModel, options))

    #Now set up normalisation
    target.setFollowOnTargetFn(learnModelFromSamFileTargetFn2,
                               args=(unnormalisedOutputModel, outputModel))
Example #8
0
def mergeChainedAlignedReads(chainedAlignedReads, refSequence, readSequence):
    """Makes a global aligment for the given chained reads.
    From doc on building pysam line
    a = pysam.AlignedRead()
    a.qname = "read_28833_29006_6945"
    a.seq="AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"
    a.flag = 99
    a.rname = 0
    a.pos = 32
    a.mapq = 20
    a.cigar = ( (0,10), (2,1), (0,25) )
    a.mrnm = 0
    a.mpos=199
    a.isize=167
    a.qual="<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"
    a.tags = ( ("NM", 1),
               ("RG", "L1") )
    """
    cAR = pysam.AlignedRead()
    aR = chainedAlignedReads[0]
    cAR.qname = aR.qname

    #Parameters we don't and therefore set properly
    #cAR.flag = aR.flag
    #cAR.mapq = aR.mapq
    #cAR.mrnm = 0
    #cAR.mpos=0
    #cAR.isize=0
    #cAR.qual = "<" * len(readSequence)
    #cAR.tags = aR.tags
    cAR.rnext = -1
    cAR.pos = 0
    cAR.is_reverse = aR.is_reverse
    if cAR.is_reverse:
        cAR.seq = reverseComplement(readSequence)
    else:
        cAR.seq = readSequence
    cAR.rname = aR.rname
    cigarList = []
    pPos = 0
    if cAR.is_reverse:  #Iterate from the other end of the sequence
        pQPos = -(len(readSequence) - 1)
    else:
        pQPos = 0

    for aR in chainedAlignedReads:
        assert cAR.is_reverse == aR.is_reverse
        #Add a deletion representing the preceding unaligned reference positions
        assert aR.pos >= pPos
        if aR.pos > pPos:
            cigarList.append((2, aR.pos - pPos))
            pPos = aR.pos

        #Add an insertion representing the preceding unaligned read positions
        qPos = getAbsoluteReadOffset(aR, refSequence, readSequence)
        assert qPos >= pQPos
        if qPos > pQPos:
            cigarList.append((1, qPos - pQPos))
            pQPos = qPos

        #Add the operations of the cigar, filtering hard and soft clipping
        for op, length in aR.cigar:
            assert op in (0, 1, 2, 4, 5)
            if op in (0, 1, 2):
                cigarList.append((op, length))
            if op in (0, 2):  #Is match or deletion
                pPos += length
            if op in (0, 1):  #Is match or insertion
                pQPos += length

    #Now add any trailing deletions/insertions
    assert pPos <= len(refSequence)
    if pPos < len(refSequence):
        cigarList.append((2, len(refSequence) - pPos))

    if cAR.is_reverse:
        assert pQPos <= 1
        if pQPos < 1:
            cigarList.append((1, -pQPos + 1))
    else:
        assert pQPos <= len(readSequence)
        if pQPos < len(readSequence):
            cigarList.append((1, len(readSequence) - pQPos))

    #Check coordinates
    #print cAR.is_reverse, sum([ length for op, length in cigarList if op in (0, 2)]),  len(refSequence), sum([ length for op, length in cigarList if op in (0, 1)]), len(readSequence), cAR.qname
    assert sum([length for op, length in cigarList
                if op in (0, 2)]) == len(refSequence)
    assert sum([length for op, length in cigarList
                if op in (0, 1)]) == len(readSequence)

    cAR.cigar = tuple(cigarList)

    return cAR
Example #9
0
 def getReadBase(self):
     if self.isReversed:
         return reverseComplement(self.readSeq[self.readPos])
     return self.readSeq[self.readPos]
Example #10
0
 def getReadBase(self):
     if self.isReversed:
         return reverseComplement(self.readSeq[self.readPos]) 
     return self.readSeq[self.readPos]
Example #11
0
def mergeChainedAlignedSegments(chainedAlignedSegments, refSequence, readSequence):
    """Makes a single alignment for the given chained reads. Will soft soft clip
    the unaligned prefix and suffix of the readSequence.
    
    From doc on building pysam line
    a = pysam.AlignedSegment()
    a.query_name = "read_28833_29006_6945"
    a.query_sequence="AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"
    a.flag = 99
    a.reference_id = 0
    a.reference_start = 32
    a.mapq = 20
    a.cigar = ( (0,10), (2,1), (0,25) )
    a.mrnm = 0
    a.mpos=199
    a.isize=167
    a.qual="<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"
    a.tags = ( ("NM", 1),
               ("RG", "L1") )
    """
    cAR = pysam.AlignedSegment()
    aR = chainedAlignedSegments[0]
    cAR.query_name = aR.query_name
    
    #Parameters we don't and therefore set properly
    #cAR.flag = aR.flag
    #cAR.mapq = aR.mapq
    #cAR.mrnm = 0
    #cAR.mpos=0
    #cAR.isize=0
    #cAR.qual = "<" * len(readSequence)
    #cAR.tags = aR.tags 
    cAR.next_reference_id = -1
    cAR.reference_start = aR.reference_start #Reference start
    cAR.is_reverse = aR.is_reverse
    cAR.query_sequence = reverseComplement(readSequence) if cAR.is_reverse else readSequence
    cAR.reference_id = aR.reference_id
    cigarList = []
    pPos = aR.reference_start
    #Iterate from the other end of the sequence if reversed
    pQPos = -(len(readSequence)-1) if cAR.is_reverse else 0 
        
    for aR in chainedAlignedSegments:
        assert cAR.is_reverse == aR.is_reverse
        #Add a deletion representing the preceding unaligned reference positions
        assert aR.reference_start >= pPos
        if aR.reference_start > pPos:
            cigarList.append((2, aR.reference_start - pPos))
            pPos = aR.reference_start 
    
        #Add an insertion representing the preceding unaligned read positions
        #make it a soft clip if it is the first chained alignment
        qPos = getFirstNonClippedPositionInRead(aR, readSequence)
        assert qPos >= pQPos
        if qPos > pQPos:
            cigarList.append((4 if aR == chainedAlignedSegments[0] else 1, qPos - pQPos)) 
            pQPos = qPos
        
        #Add the operations of the cigar, filtering hard and soft clipping
        for op, length in aR.cigar:
            assert op in (0, 1, 2, 4, 5)
            if op in (0, 1, 2):
                cigarList.append((op, length))
            if op in (0, 2): #Is match or deletion
                pPos += length
            if op in (0, 1): #Is match or insertion
                pQPos += length
        
    assert pPos <= len(refSequence)
    
    #Set reference end coordinate (which is exclusive)
    #cAR.reference_end = pPos #We don't do this because it is set by cigar string
    
    #Now add any trailing, necessary soft clipping
    if cAR.is_reverse:
        assert pQPos <= 1
        if pQPos < 1:
            cigarList.append((4, -pQPos + 1))
    else:
        assert pQPos <= len(readSequence)
        if pQPos < len(readSequence):
            cigarList.append((4, len(readSequence) - pQPos))
    
    cAR.cigar = tuple(cigarList)
    
    #Check ops
    for op, length in cAR.cigar: #We should have no hard clipped ops
        assert op in (0, 1, 2, 4)
     
    #Reference sequence check coordinates
    assert sum([ length for op, length in cigarList if op in (0, 2)]) == cAR.reference_end - cAR.reference_start
    assert cAR.reference_start >= 0 and cAR.reference_start < len(refSequence)
    assert cAR.reference_end >= 0 and cAR.reference_end <= len(refSequence)
    
    #Read sequence check coordinates
    assert cAR.query_alignment_start >= 0 and cAR.query_alignment_start < len(readSequence)
    assert cAR.query_alignment_end >= 0 and cAR.query_alignment_end <= len(readSequence)
    assert cAR.query_alignment_start + sum([ length for op, length in cigarList if op in (0, 1)]) == cAR.query_alignment_end
    
    return cAR
Example #12
0
def learnModelFromSamFileTargetFn(target, samFile, readFastqFile, referenceFastaFile, outputModel):
    """Does expectation maximisation on sam file to learn the hmm for the sam file.
    """
    #Convert the read file to fasta
    refSequences = getFastaDictionary(referenceFastaFile) #Hash of names to sequences
    readSequences = getFastqDictionary(readFastqFile) #Hash of names to sequences
    
    reads = os.path.join(target.getGlobalTempDir(), "temp.fa")
    fH = open(reads, 'w')
    for name in readSequences.keys():
        seq = readSequences[name]
        fastaWrite(fH, name, seq)
        fastaWrite(fH, name + "_reverse", reverseComplement(seq))
    fH.close()
    
    #Get cigars file
    cigars = os.path.join(target.getGlobalTempDir(), "temp.cigar")
    fH = open(cigars, 'w')
    sam = pysam.Samfile(samFile, "r" )
    for aR in sam: #Iterate on the sam lines realigning them in parallel            
        #Because these are global alignments with reverse complement coordinates reversed the following should all be true
        assert aR.pos == 0
        assert aR.qstart == 0
        assert aR.qend == len(readSequences[aR.qname]) #aR.query)
        assert aR.aend == len(refSequences[sam.getrname(aR.rname)])
        assert len(aR.query) == len(readSequences[aR.qname])
        if aR.is_reverse: #Deal with reverse complements
            assert aR.query.upper() == reverseComplement(readSequences[aR.qname]).upper()
            aR.qname += "_reverse"
        else:
            assert aR.query.upper() == readSequences[aR.qname].upper()
            
        fH.write(getExonerateCigarFormatString(aR, sam) + "\n")
        #Exonerate format Cigar string, using global coordinates
        #fH.write(getGlobalAlignmentExonerateCigarFormatString(aR, sam, refSequences[sam.getrname(aR.rname)], readSequences[aR.qname]) + "\n")
    fH.close()
    
    #Run cactus_expectationMaximisation
    options = cactus_expectationMaximisation.Options()
    options.modelType="fiveStateAsymmetric" #"threeStateAsymmetric"
    options.optionsToRealign="--diagonalExpansion=10 --splitMatrixBiggerThanThis=300" 
    options.randomStart = True
    options.trials = 3
    options.outputTrialHmms = True
    options.iterations = 100
    options.maxAlignmentLengthPerJob=700000
    options.maxAlignmentLengthToSample = 50000000
    options.outputXMLModelFile = outputModel + ".xml"
    #options.updateTheBand = True
    #options.useDefaultModelAsStart = True
    #options.setJukesCantorStartingEmissions=0.3
    options.trainEmissions=True
    #options.tieEmissions = True
    
    unnormalisedOutputModel = outputModel + "_unnormalised"
    #Do training if necessary
    if not os.path.exists(unnormalisedOutputModel):
        target.addChildTargetFn(cactus_expectationMaximisation.expectationMaximisationTrials, args=(" ".join([reads, referenceFastaFile ]), cigars, unnormalisedOutputModel, options))
    
    #Now set up normalisation
    target.setFollowOnTargetFn(learnModelFromSamFileTargetFn2, args=(unnormalisedOutputModel, outputModel))
Example #13
0
def mergeChainedAlignedReads(chainedAlignedReads, refSequence, readSequence):
    """Makes a global aligment for the given chained reads.
    From doc on building pysam line
    a = pysam.AlignedRead()
    a.qname = "read_28833_29006_6945"
    a.seq="AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"
    a.flag = 99
    a.rname = 0
    a.pos = 32
    a.mapq = 20
    a.cigar = ( (0,10), (2,1), (0,25) )
    a.mrnm = 0
    a.mpos=199
    a.isize=167
    a.qual="<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"
    a.tags = ( ("NM", 1),
               ("RG", "L1") )
    """
    cAR = pysam.AlignedRead()
    aR = chainedAlignedReads[0]
    cAR.qname = aR.qname
    
    #Parameters we don't and therefore set properly
    #cAR.flag = aR.flag
    #cAR.mapq = aR.mapq
    #cAR.mrnm = 0
    #cAR.mpos=0
    #cAR.isize=0
    #cAR.qual = "<" * len(readSequence)
    #cAR.tags = aR.tags 
    cAR.rnext = -1
    cAR.pos = 0
    cAR.is_reverse = aR.is_reverse
    if cAR.is_reverse:
        cAR.seq = reverseComplement(readSequence)
    else:
        cAR.seq = readSequence
    cAR.rname = aR.rname
    cigarList = []
    pPos = 0
    if cAR.is_reverse: #Iterate from the other end of the sequence
        pQPos = -(len(readSequence)-1)
    else:
        pQPos = 0
        
    for aR in chainedAlignedReads:
        assert cAR.is_reverse == aR.is_reverse
        #Add a deletion representing the preceding unaligned reference positions
        assert aR.pos >= pPos
        if aR.pos > pPos:
            cigarList.append((2, aR.pos - pPos))
            pPos = aR.pos 
    
        #Add an insertion representing the preceding unaligned read positions
        qPos = getAbsoluteReadOffset(aR, refSequence, readSequence)
        assert qPos >= pQPos
        if qPos > pQPos:
            cigarList.append((1, qPos - pQPos)) 
            pQPos = qPos
        
        #Add the operations of the cigar, filtering hard and soft clipping
        for op, length in aR.cigar:
            assert op in (0, 1, 2, 4, 5)
            if op in (0, 1, 2):
                cigarList.append((op, length))
            if op in (0, 2): #Is match or deletion
                pPos += length
            if op in (0, 1): #Is match or insertion
                pQPos += length
        
    #Now add any trailing deletions/insertions
    assert pPos <= len(refSequence)
    if pPos < len(refSequence):
        cigarList.append((2, len(refSequence) - pPos))
    
    if cAR.is_reverse:
        assert pQPos <= 1
        if pQPos < 1:
            cigarList.append((1, -pQPos + 1))
    else:
        assert pQPos <= len(readSequence)
        if pQPos < len(readSequence):
            cigarList.append((1, len(readSequence) - pQPos))
    
    #Check coordinates
    #print cAR.is_reverse, sum([ length for op, length in cigarList if op in (0, 2)]),  len(refSequence), sum([ length for op, length in cigarList if op in (0, 1)]), len(readSequence), cAR.qname
    assert sum([ length for op, length in cigarList if op in (0, 2)]) == len(refSequence)
    assert sum([ length for op, length in cigarList if op in (0, 1)]) == len(readSequence)
    
    cAR.cigar = tuple(cigarList)
    
    return cAR