Beispiel #1
0
    def testFindTwoKozakConsensi(self):
        """
        In a given sequence with two Kozak consensuses with different offsets
        and qualities, the output should be as expected.
        """
        read = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG')
        expectedRead1 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG')
        expectedRead2 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG')
        expectedKozakRead1 = DNAKozakRead(expectedRead1, 3, 13, 100.0)
        expectedKozakRead2 = DNAKozakRead(expectedRead2, 13, 23, 60.0)

        self.assertEqual([expectedKozakRead1, expectedKozakRead2],
                         list(findKozakConsensus(read)))
Beispiel #2
0
def findKozakConsensus(read):
    """
    In a given DNA sequence, search for a Kozak consensus: (gcc)gccRccATGG.
    The upper case bases in that pattern are required, and the lower case
    bases are the ones most frequently found at the given positions. The
    initial 'gcc' sequence (in parentheses) is of uncertain significance
    and is not taken into account here.

    @param read: A C{DNARead} instance to be checked for Kozak consensi.
    @return: A generator that yields C{DNAKozakRead} instances.
    """
    from dark.reads import DNAKozakRead

    readLen = len(read)
    if readLen > 9:
        offset = 6
        readSeq = read.sequence
        while offset < readLen - 3:
            triplet = readSeq[offset:offset + 3]
            if triplet == 'ATG':
                if readSeq[offset + 3] == 'G':
                    if readSeq[offset - 3] in 'GA':
                        kozakQualityCount = sum((readSeq[offset - 1] == 'C',
                                                 readSeq[offset - 2] == 'C',
                                                 readSeq[offset - 4] == 'C',
                                                 readSeq[offset - 5] == 'C',
                                                 readSeq[offset - 6] == 'G'))

                        kozakQualityPercent = kozakQualityCount / 5.0 * 100
                        yield DNAKozakRead(read, offset - 6, offset + 4,
                                           kozakQualityPercent)
            offset += 1
Beispiel #3
0
 def testKozakConsensusAtEnd(self):
     """
     In a given sequence without a Kozak consensus, the output should be
     as expected.
     """
     read = DNARead('id', 'AAAAAAATTGCCGCCATGG')
     expectedKozakRead = DNAKozakRead(read, 9, 19, 100.0)
     (result, ) = list(findKozakConsensus(read))
     self.assertEqual(expectedKozakRead, result)
Beispiel #4
0
 def testOneKozakConsensus(self):
     """
     In a given sequence with an exact Kozak consensus sequence, the offset
     and quality percentage should be as expected.
     """
     read = DNARead('id', 'ATTGCCGCCATGGGGG')
     expectedKozakRead = DNAKozakRead(read, 3, 13, 100.0)
     (result, ) = list(findKozakConsensus(read))
     self.assertEqual(expectedKozakRead, result)