def testFindTwoKozakConsensi(self): """ In a given sequence with two Kozak consensuses with different offsets and qualities, the output should be as expected. """ read = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG') expectedRead1 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG') expectedRead2 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG') expectedKozakRead1 = DNAKozakRead(expectedRead1, 3, 13, 100.0) expectedKozakRead2 = DNAKozakRead(expectedRead2, 13, 23, 60.0) self.assertEqual([expectedKozakRead1, expectedKozakRead2], list(findKozakConsensus(read)))
def findKozakConsensus(read): """ In a given DNA sequence, search for a Kozak consensus: (gcc)gccRccATGG. The upper case bases in that pattern are required, and the lower case bases are the ones most frequently found at the given positions. The initial 'gcc' sequence (in parentheses) is of uncertain significance and is not taken into account here. @param read: A C{DNARead} instance to be checked for Kozak consensi. @return: A generator that yields C{DNAKozakRead} instances. """ from dark.reads import DNAKozakRead readLen = len(read) if readLen > 9: offset = 6 readSeq = read.sequence while offset < readLen - 3: triplet = readSeq[offset:offset + 3] if triplet == 'ATG': if readSeq[offset + 3] == 'G': if readSeq[offset - 3] in 'GA': kozakQualityCount = sum((readSeq[offset - 1] == 'C', readSeq[offset - 2] == 'C', readSeq[offset - 4] == 'C', readSeq[offset - 5] == 'C', readSeq[offset - 6] == 'G')) kozakQualityPercent = kozakQualityCount / 5.0 * 100 yield DNAKozakRead(read, offset - 6, offset + 4, kozakQualityPercent) offset += 1
def testKozakConsensusAtEnd(self): """ In a given sequence without a Kozak consensus, the output should be as expected. """ read = DNARead('id', 'AAAAAAATTGCCGCCATGG') expectedKozakRead = DNAKozakRead(read, 9, 19, 100.0) (result, ) = list(findKozakConsensus(read)) self.assertEqual(expectedKozakRead, result)
def testOneKozakConsensus(self): """ In a given sequence with an exact Kozak consensus sequence, the offset and quality percentage should be as expected. """ read = DNARead('id', 'ATTGCCGCCATGGGGG') expectedKozakRead = DNAKozakRead(read, 3, 13, 100.0) (result, ) = list(findKozakConsensus(read)) self.assertEqual(expectedKozakRead, result)