예제 #1
0
 def test6970G72VCorrectLocation(self):
     """
     The 69-70 deletion with a substitution leading to G72V must be
     aligned correctly.
     """
     seq = 'CATGCTAT------CTCTGTGACC'
     self.assertEqual('HAI--SVT', translateSpike(seq))
예제 #2
0
 def testPlus2GapCorrectLocation(self):
     """
     A sequence with an out of frame gap (G--) must be in the correct
     location.
     """
     seq = 'TTGG---TTTATTACCAC'
     self.assertEqual('LV-YYH', translateSpike(seq))
예제 #3
0
 def test6970S71FCorrectLocation(self):
     """
     The 69-70 deletion with a substitution leading to S71F must be
     aligned correctly.
     """
     seq = 'CATGCTAT------CTTTGGGACC'
     self.assertEqual('HAI--FGT', translateSpike(seq))
예제 #4
0
 def testInFrameGapAmbiguousCorrectLocation(self):
     """
     A sequence with an in frame gap and an ambiguity must be translated
     correcty.
     """
     seq = 'TTG---GTTTANTACCAC'
     self.assertEqual('L-VXYH', translateSpike(seq))
예제 #5
0
 def testB16172_156_157GapCorrectLocation(self):
     """
     The gap at 156/157 in B.1.617.2 must be in the correct location.
     """
     seq = 'GAAAGTG------GAGTTTATTCTAGT'
     self.assertEqual('ESG--VYSS', translateSpike(seq))
예제 #6
0
 def testPlus1GapAdjacentAmbiguityCorrectLocation(self):
     """
     A sequence with an out of frame gap (TT-) must be translated correctly.
     """
     seq = 'TT---NGTTTATTACCAC'
     self.assertEqual('X-VYYH', translateSpike(seq))
예제 #7
0
 def testInFrameGapCorrectLocation(self):
     """
     A sequence with an in frame gap must be in the correct location.
     """
     seq = 'TTG---GTTTATTACCAC'
     self.assertEqual('L-VYYH', translateSpike(seq))
예제 #8
0
 def testPlus2GapCorrectLength(self):
     """
     A sequence with an out of frame gap (A--) must have the correct length.
     """
     seq = 'TTGG---TTTATTACCAC'
     self.assertEqual(len(seq) / 3, len(translateSpike(seq)))
예제 #9
0
 def testInFrameGapCorrectLength(self):
     """
     A sequence with an in frame gap must have the correct length.
     """
     seq = 'TTG---GTTTATTACCAC'
     self.assertEqual(len(seq) / 3, len(translateSpike(seq)))
예제 #10
0
 def testNoGapsCorrectSequence(self):
     """
     A sequence with no gaps must be translated correctly.
     """
     seq = 'TTGGTTGTTTATTACCAC'
     self.assertEqual(Seq(seq).translate(), translateSpike(seq))
예제 #11
0
    def aaSequences(self, featureName):
        """
        Match the genome and the reference at the amino acid level.

        @param featureName: A C{str} feature name.
        @raise TranslationError: or one of its sub-classes (see translate.py)
            if a feature nucleotide sequence cannot be translated.
        @return: A 2-C{tuple} of C{dark.reads.AARead} instances, holding
            the amino acids for the feature as located in the reference
            genome and then the corresponding amino acids from the genome being
            examined.
        """
        try:
            return self._cache['aa'][featureName]
        except KeyError:
            pass

        referenceNt, genomeNt = self.ntSequences(featureName)

        assert len(referenceNt) == len(genomeNt)

        feature = self.features[featureName]
        name = feature['name']

        gapCount = genomeNt.sequence.count('-')
        if (name == 'surface glycoprotein' and gapCount > 0
                and gapCount % 3 == 0):
            referenceAaAligned = AARead(
                self.features.reference.id + f' ({name})',
                translateSpike(referenceNt.sequence))
            genomeAaAligned = AARead(self.genome.id + f' ({name})',
                                     translateSpike(genomeNt.sequence))

            if not len(referenceAaAligned) == len(genomeAaAligned):
                raise TranslatedReferenceAndGenomeLengthError(
                    'Genome and reference AA sequences have different lengths.'
                )
        else:
            referenceAa = AARead(
                self.features.reference.id + f' ({name})',
                feature.get('translation', translate(feature['sequence'],
                                                     name)))

            genomeAa = AARead(
                self.genome.id + f' ({name})',
                translate(genomeNt.sequence.replace('-', ''), name))

            referenceAaAligned, genomeAaAligned = mafft(Reads(
                [referenceAa, genomeAa]),
                                                        options=MAFFT_OPTIONS)

        if DEBUG:
            print(f'AA MATCH {name}:')

            print(f'ref nt aligned {len(referenceNt.sequence)}:',
                  referenceNt.sequence[SLICE])
            print(f'gen nt aligned {len(genomeNt.sequence)}:',
                  genomeNt.sequence[SLICE])

            print(f'ref aa        {len(referenceAa.sequence)}:',
                  referenceAa.sequence[SLICE])
            print(f'gen aa        {len(genomeAa.sequence)}:',
                  genomeAa.sequence[SLICE])

            print(f'ref aa aligned {len(referenceAaAligned.sequence)}:',
                  referenceAaAligned.sequence[SLICE])
            print(f'gen aa aligned {len(genomeAaAligned.sequence)}:',
                  genomeAaAligned.sequence[SLICE])

        self._cache['aa'][featureName] = referenceAaAligned, genomeAaAligned
        return referenceAaAligned, genomeAaAligned