def testSummarizeNoCoverageOneRegionPositionsAtEnd(self):
     """
     If the genome has one region with positions with no coverage at
     its end, it must be possible to summarize the no coverage region.
     """
     reference = AARead('id', 'STRSPFFFFFALFMMM')
     genome = AARead('id', 'KTRSLXXXXXALXMXM')
     # Not summarized.
     self.assertEqual(
         'S1K; P5L; no coverage 6-10; no coverage 13; no coverage 15',
         getSubstitutionsString(reference, genome))
     # Summarized.
     self.assertEqual('S1K; P5L; no coverage 6-10, 13, 15',
                      getSubstitutionsString(reference, genome, True))
 def testSummarizeNoCoverageOneRegionTwoPositions(self):
     """
     If the genome has one region with two positions with no coverage, it
     must be possible to summarize the no coverage region.
     """
     reference = AARead('id', 'STRSPFFFFFA')
     genome = AARead('id', 'KXRXLXXXXXT')
     # Not summarized.
     self.assertEqual(
         'S1K; no coverage 2; no coverage 4; P5L; no coverage 6-10; A11T',
         getSubstitutionsString(reference, genome))
     # Summarized.
     self.assertEqual('S1K; no coverage 2, 4; P5L; no coverage 6-10; A11T',
                      getSubstitutionsString(reference, genome, True))
 def testSummarizeNoCoverageOneRegionPositionsAtStart(self):
     """
     If the genome has one region with two positions with no coverage at
     its beginning, it must be possible to summarize the no coverage region.
     """
     reference = AARead('id', 'TRSPFFFFFA')
     genome = AARead('id', 'XRXLXXXXXT')
     # Not summarized.
     self.assertEqual(
         'no coverage 1; no coverage 3; P4L; no coverage 5-9; A10T',
         getSubstitutionsString(reference, genome))
     # Summarized.
     self.assertEqual('no coverage 1, 3; P4L; no coverage 5-9; A10T',
                      getSubstitutionsString(reference, genome, True))
 def testSummarizeNoCoverageTwoRegionsMultiplePositions(self):
     """
     If the genome has two regions with positions with no coverage, it
     must be possible to summarize the no coverage regions.
     """
     reference = AARead('id', 'STRSPFFFFFALFMMM')
     genome = AARead('id', 'KXRXLXXXXXTLXMXM')
     # Not summarized.
     self.assertEqual(
         'S1K; no coverage 2; no coverage 4; P5L; no coverage 6-10; '
         'A11T; no coverage 13; no coverage 15',
         getSubstitutionsString(reference, genome))
     # Summarized.
     self.assertEqual(
         'S1K; no coverage 2, 4; P5L; no coverage 6-10; A11T; '
         'no coverage 13, 15',
         getSubstitutionsString(reference, genome, True))
 def testTwoStringsOfXs(self):
     """
     If the genome has two strings of Xs, they must be summarized correctly.
     """
     reference = AARead('id', 'STRSPFFFFFA')
     genome = AARead('id', 'KXXXLXXXXXT')
     self.assertEqual('S1K; no coverage 2-4; P5L; no coverage 6-10; A11T',
                      getSubstitutionsString(reference, genome))
 def testFinalStringOfXs(self):
     """
     If the genome ends with Xs, they must be summarized correctly.
     """
     reference = AARead('id', 'TRSP')
     genome = AARead('id', 'LXXX')
     self.assertEqual('T1L; no coverage 2-4',
                      getSubstitutionsString(reference, genome))
 def testStringOfXs(self):
     """
     If the genome has a string of Xs, they must be summarized correctly.
     """
     reference = AARead('id', 'STRSP')
     genome = AARead('id', 'KXXXL')
     self.assertEqual('S1K; no coverage 2-4; P5L',
                      getSubstitutionsString(reference, genome))
 def testInitialStringOfXs(self):
     """
     If the genome starts with Xs, they must be summarized correctly.
     """
     reference = AARead('id', 'TRSP')
     genome = AARead('id', 'XXXL')
     self.assertEqual('no coverage 1-3; P4L',
                      getSubstitutionsString(reference, genome))
 def testTwoLettersBothDifferent(self):
     """
     If two different two-AA sequences are passed, a string showing the
     change at positions 1 and 2 must be retuned.
     """
     reference = AARead('id', 'SP')
     genome = AARead('id', 'KL')
     self.assertEqual('S1K; P2L', getSubstitutionsString(reference, genome))
 def testOneLetterGenomeGap(self):
     """
     If two different one-AA sequences are passed, with a genome gap,
     a string showing the change at position 1 must be retuned.
     """
     reference = AARead('id', 'S')
     genome = AARead('id', '-')
     self.assertEqual('S1-', getSubstitutionsString(reference, genome))
 def testOneLetterIdentical(self):
     """
     If two identical one-AA sequences are passed, the empty string must
     be returned.
     """
     reference = AARead('id', 'K')
     genome = AARead('id', 'K')
     self.assertEqual('', getSubstitutionsString(reference, genome))
 def testEmpty(self):
     """
     If the empty string is passed for both reference and genome, the empty
     string must be returned.
     """
     reference = AARead('id', '')
     genome = AARead('id', '')
     self.assertEqual('', getSubstitutionsString(reference, genome))
    def testUnreportedXsIssue21SimpleWOInsert(self):
        """
        Small test to trigger the issue in
        https://github.com/VirologyCharite/sars2seq/issues/21 but without
        an additional insertion.
        """
        reference = AARead('id', 'CLABF')
        genome = AARead('id', 'CMXXF')

        self.assertEqual('L2M; no coverage 3-4',
                         getSubstitutionsString(reference, genome))
    def testUnreportedXsIssue21Simple(self):
        """
        Small test to trigger the issue in
        https://github.com/VirologyCharite/sars2seq/issues/21
        """
        reference = AARead('id', 'C-LABF')
        genome = AARead('id', 'CSMXXF')

        # Note that the code is currently returning '-2S; L2M; no coverage 3'.
        self.assertEqual('-2S; L2M; no coverage 3-4',
                         getSubstitutionsString(reference, genome))
    def testUnreportedXsIssue21(self):
        """
        Test with the protein sequences that caused the issue in
        https://github.com/VirologyCharite/sars2seq/issues/21
        The output should include the fact that site 417 is not covered
        in the genome.
        """
        reference = AARead(
            'id',
            'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTW'
            'FHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVI'
            'KVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREF'
            'VFKNIDGYFKIYSKHTPINLVR---DLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGD'
            'SSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSN'
            'FRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYG'
            'VSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKV'
            'GGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYR'
            'VVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTT'
            'DAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYS'
            'TGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGA'
            'ENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNR'
            'ALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLA'
            'DAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAA'
            'LQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQ'
            'ALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRAS'
            'ANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDG'
            'KAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFK'
            'EELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWP'
            'WYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*')

        genome = AARead(
            'id',
            'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTW'
            'FHVI--SGTNGTKRFDNPVLPFNDGVYFASIEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVI'
            'KVCEFQFCNDPFLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXPFLMDLEGKQGNFKNLREF'
            'VFKNIDGYFKIYSKHTPII-VREPEDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGD'
            'SSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSN'
            'FRVQPTESIVRFPNITNLCPFDEVFNATRFASVYAXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
            'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYKLPDDFTGCVIAWNSNKLDSKV'
            'SGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFRPTYGVGHQPYR'
            'VVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLKGTGVLTESNKKFLPFQQFGRDIADTT'
            'DAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYS'
            'TGSNVFQTRAGCLIGAEYVNNSYECDIPIGAGICASYQTQTKSHRRARSVASQSIIAYTMSLGV'
            'ENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNXXLQYGSFCTQLKR'
            'ALTGIAVEQDKNTQEVFAQVKQIYKTPPIKYFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLA'
            'DAGFIKQYGDCLGDIAARDLICAQKFKGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAA'
            'LQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNHNAQ'
            'ALNTLVKQLSSKFGAISSVLNDIFSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRAS'
            'ANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDG'
            'KAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFK'
            'EELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWP'
            'WYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*')

        self.assertEqual(
            'A67V; H69-; V70-; T95I; no coverage 142-173; N211I; '
            'L212-; -215E; -216P; -217E; G339D; no coverage '
            '353-422; N440K; G446S; S477N; T478K; E484A; Q493R; '
            'G496S; Q498R; N501Y; Y505H; T547K; D614G; H655Y; '
            'N679K; P681H; A701V; no coverage 752-753; N764K; '
            'D796Y; N856K; Q954H; N969K; L981F',
            getSubstitutionsString(reference, genome))