def test6970G72VCorrectLocation(self): """ The 69-70 deletion with a substitution leading to G72V must be aligned correctly. """ seq = 'CATGCTAT------CTCTGTGACC' self.assertEqual('HAI--SVT', translateSpike(seq))
def testPlus2GapCorrectLocation(self): """ A sequence with an out of frame gap (G--) must be in the correct location. """ seq = 'TTGG---TTTATTACCAC' self.assertEqual('LV-YYH', translateSpike(seq))
def test6970S71FCorrectLocation(self): """ The 69-70 deletion with a substitution leading to S71F must be aligned correctly. """ seq = 'CATGCTAT------CTTTGGGACC' self.assertEqual('HAI--FGT', translateSpike(seq))
def testInFrameGapAmbiguousCorrectLocation(self): """ A sequence with an in frame gap and an ambiguity must be translated correcty. """ seq = 'TTG---GTTTANTACCAC' self.assertEqual('L-VXYH', translateSpike(seq))
def testB16172_156_157GapCorrectLocation(self): """ The gap at 156/157 in B.1.617.2 must be in the correct location. """ seq = 'GAAAGTG------GAGTTTATTCTAGT' self.assertEqual('ESG--VYSS', translateSpike(seq))
def testPlus1GapAdjacentAmbiguityCorrectLocation(self): """ A sequence with an out of frame gap (TT-) must be translated correctly. """ seq = 'TT---NGTTTATTACCAC' self.assertEqual('X-VYYH', translateSpike(seq))
def testInFrameGapCorrectLocation(self): """ A sequence with an in frame gap must be in the correct location. """ seq = 'TTG---GTTTATTACCAC' self.assertEqual('L-VYYH', translateSpike(seq))
def testPlus2GapCorrectLength(self): """ A sequence with an out of frame gap (A--) must have the correct length. """ seq = 'TTGG---TTTATTACCAC' self.assertEqual(len(seq) / 3, len(translateSpike(seq)))
def testInFrameGapCorrectLength(self): """ A sequence with an in frame gap must have the correct length. """ seq = 'TTG---GTTTATTACCAC' self.assertEqual(len(seq) / 3, len(translateSpike(seq)))
def testNoGapsCorrectSequence(self): """ A sequence with no gaps must be translated correctly. """ seq = 'TTGGTTGTTTATTACCAC' self.assertEqual(Seq(seq).translate(), translateSpike(seq))
def aaSequences(self, featureName): """ Match the genome and the reference at the amino acid level. @param featureName: A C{str} feature name. @raise TranslationError: or one of its sub-classes (see translate.py) if a feature nucleotide sequence cannot be translated. @return: A 2-C{tuple} of C{dark.reads.AARead} instances, holding the amino acids for the feature as located in the reference genome and then the corresponding amino acids from the genome being examined. """ try: return self._cache['aa'][featureName] except KeyError: pass referenceNt, genomeNt = self.ntSequences(featureName) assert len(referenceNt) == len(genomeNt) feature = self.features[featureName] name = feature['name'] gapCount = genomeNt.sequence.count('-') if (name == 'surface glycoprotein' and gapCount > 0 and gapCount % 3 == 0): referenceAaAligned = AARead( self.features.reference.id + f' ({name})', translateSpike(referenceNt.sequence)) genomeAaAligned = AARead(self.genome.id + f' ({name})', translateSpike(genomeNt.sequence)) if not len(referenceAaAligned) == len(genomeAaAligned): raise TranslatedReferenceAndGenomeLengthError( 'Genome and reference AA sequences have different lengths.' ) else: referenceAa = AARead( self.features.reference.id + f' ({name})', feature.get('translation', translate(feature['sequence'], name))) genomeAa = AARead( self.genome.id + f' ({name})', translate(genomeNt.sequence.replace('-', ''), name)) referenceAaAligned, genomeAaAligned = mafft(Reads( [referenceAa, genomeAa]), options=MAFFT_OPTIONS) if DEBUG: print(f'AA MATCH {name}:') print(f'ref nt aligned {len(referenceNt.sequence)}:', referenceNt.sequence[SLICE]) print(f'gen nt aligned {len(genomeNt.sequence)}:', genomeNt.sequence[SLICE]) print(f'ref aa {len(referenceAa.sequence)}:', referenceAa.sequence[SLICE]) print(f'gen aa {len(genomeAa.sequence)}:', genomeAa.sequence[SLICE]) print(f'ref aa aligned {len(referenceAaAligned.sequence)}:', referenceAaAligned.sequence[SLICE]) print(f'gen aa aligned {len(genomeAaAligned.sequence)}:', genomeAaAligned.sequence[SLICE]) self._cache['aa'][featureName] = referenceAaAligned, genomeAaAligned return referenceAaAligned, genomeAaAligned