def testORF1abPolyprotein(self): """ Test an ORF1ab polyprotein. The translation goes all the way through the end of the slippery sequence, then continues starting at the final nucleotide of the slippery sequence. """ slipperySeq = 'TTTAAAC' repeats = int(15000 / 3) seq = 'AA' + ('AAA' * repeats) + slipperySeq + 'CCCTAAAA' # The sequence that gets translated is: # AAA 'repeats' times, then AA TTTAAAC C CCCTAAAA # Regrouping, we have: # AAA 'repeats' times, then AAT TTA AAC CCC CTA AAA # K 'repeats' times, then N L N P L K expected = 'K' * repeats + 'NLNPLK' self.assertEqual(expected, translate(seq, 'ORF1ab polyprotein'))
def testAAATTT(self): """ An AAATTT sequence must translate to a KF. """ self.assertEqual('KF', translate('AAATTT'))
def testNameWithAAA(self): """ An AAA codon must translate to a Lysine (K) when a name other than 'ORF1ab polyprotein' is passed. """ self.assertEqual('K', translate('AAA', 'name'))
def testAAA(self): """ An AAA codon must translate to a Lysine (K). """ self.assertEqual('K', translate('AAA'))
def testIncomplete(self): """ An incomplete nt codon must translate to an X. """ self.assertEqual('X', translate('AA'))
def testEmpty(self): """ An empty nt sequence must translate to an empty aa sequence. """ self.assertEqual('', translate(''))
def aaSequences(self, featureName): """ Match the genome and the reference at the amino acid level. @param featureName: A C{str} feature name. @raise TranslationError: or one of its sub-classes (see translate.py) if a feature nucleotide sequence cannot be translated. @return: A 2-C{tuple} of C{dark.reads.AARead} instances, holding the amino acids for the feature as located in the reference genome and then the corresponding amino acids from the genome being examined. """ try: return self._cache['aa'][featureName] except KeyError: pass referenceNt, genomeNt = self.ntSequences(featureName) assert len(referenceNt) == len(genomeNt) feature = self.features[featureName] name = feature['name'] gapCount = genomeNt.sequence.count('-') if (name == 'surface glycoprotein' and gapCount > 0 and gapCount % 3 == 0): referenceAaAligned = AARead( self.features.reference.id + f' ({name})', translateSpike(referenceNt.sequence)) genomeAaAligned = AARead(self.genome.id + f' ({name})', translateSpike(genomeNt.sequence)) if not len(referenceAaAligned) == len(genomeAaAligned): raise TranslatedReferenceAndGenomeLengthError( 'Genome and reference AA sequences have different lengths.' ) else: referenceAa = AARead( self.features.reference.id + f' ({name})', feature.get('translation', translate(feature['sequence'], name))) genomeAa = AARead( self.genome.id + f' ({name})', translate(genomeNt.sequence.replace('-', ''), name)) referenceAaAligned, genomeAaAligned = mafft(Reads( [referenceAa, genomeAa]), options=MAFFT_OPTIONS) if DEBUG: print(f'AA MATCH {name}:') print(f'ref nt aligned {len(referenceNt.sequence)}:', referenceNt.sequence[SLICE]) print(f'gen nt aligned {len(genomeNt.sequence)}:', genomeNt.sequence[SLICE]) print(f'ref aa {len(referenceAa.sequence)}:', referenceAa.sequence[SLICE]) print(f'gen aa {len(genomeAa.sequence)}:', genomeAa.sequence[SLICE]) print(f'ref aa aligned {len(referenceAaAligned.sequence)}:', referenceAaAligned.sequence[SLICE]) print(f'gen aa aligned {len(genomeAaAligned.sequence)}:', genomeAaAligned.sequence[SLICE]) self._cache['aa'][featureName] = referenceAaAligned, genomeAaAligned return referenceAaAligned, genomeAaAligned