def testSummarizeNoCoverageOneRegionPositionsAtEnd(self): """ If the genome has one region with positions with no coverage at its end, it must be possible to summarize the no coverage region. """ reference = AARead('id', 'STRSPFFFFFALFMMM') genome = AARead('id', 'KTRSLXXXXXALXMXM') # Not summarized. self.assertEqual( 'S1K; P5L; no coverage 6-10; no coverage 13; no coverage 15', getSubstitutionsString(reference, genome)) # Summarized. self.assertEqual('S1K; P5L; no coverage 6-10, 13, 15', getSubstitutionsString(reference, genome, True))
def testSummarizeNoCoverageOneRegionTwoPositions(self): """ If the genome has one region with two positions with no coverage, it must be possible to summarize the no coverage region. """ reference = AARead('id', 'STRSPFFFFFA') genome = AARead('id', 'KXRXLXXXXXT') # Not summarized. self.assertEqual( 'S1K; no coverage 2; no coverage 4; P5L; no coverage 6-10; A11T', getSubstitutionsString(reference, genome)) # Summarized. self.assertEqual('S1K; no coverage 2, 4; P5L; no coverage 6-10; A11T', getSubstitutionsString(reference, genome, True))
def testSummarizeNoCoverageOneRegionPositionsAtStart(self): """ If the genome has one region with two positions with no coverage at its beginning, it must be possible to summarize the no coverage region. """ reference = AARead('id', 'TRSPFFFFFA') genome = AARead('id', 'XRXLXXXXXT') # Not summarized. self.assertEqual( 'no coverage 1; no coverage 3; P4L; no coverage 5-9; A10T', getSubstitutionsString(reference, genome)) # Summarized. self.assertEqual('no coverage 1, 3; P4L; no coverage 5-9; A10T', getSubstitutionsString(reference, genome, True))
def testSummarizeNoCoverageTwoRegionsMultiplePositions(self): """ If the genome has two regions with positions with no coverage, it must be possible to summarize the no coverage regions. """ reference = AARead('id', 'STRSPFFFFFALFMMM') genome = AARead('id', 'KXRXLXXXXXTLXMXM') # Not summarized. self.assertEqual( 'S1K; no coverage 2; no coverage 4; P5L; no coverage 6-10; ' 'A11T; no coverage 13; no coverage 15', getSubstitutionsString(reference, genome)) # Summarized. self.assertEqual( 'S1K; no coverage 2, 4; P5L; no coverage 6-10; A11T; ' 'no coverage 13, 15', getSubstitutionsString(reference, genome, True))
def testTwoStringsOfXs(self): """ If the genome has two strings of Xs, they must be summarized correctly. """ reference = AARead('id', 'STRSPFFFFFA') genome = AARead('id', 'KXXXLXXXXXT') self.assertEqual('S1K; no coverage 2-4; P5L; no coverage 6-10; A11T', getSubstitutionsString(reference, genome))
def testFinalStringOfXs(self): """ If the genome ends with Xs, they must be summarized correctly. """ reference = AARead('id', 'TRSP') genome = AARead('id', 'LXXX') self.assertEqual('T1L; no coverage 2-4', getSubstitutionsString(reference, genome))
def testStringOfXs(self): """ If the genome has a string of Xs, they must be summarized correctly. """ reference = AARead('id', 'STRSP') genome = AARead('id', 'KXXXL') self.assertEqual('S1K; no coverage 2-4; P5L', getSubstitutionsString(reference, genome))
def testInitialStringOfXs(self): """ If the genome starts with Xs, they must be summarized correctly. """ reference = AARead('id', 'TRSP') genome = AARead('id', 'XXXL') self.assertEqual('no coverage 1-3; P4L', getSubstitutionsString(reference, genome))
def testTwoLettersBothDifferent(self): """ If two different two-AA sequences are passed, a string showing the change at positions 1 and 2 must be retuned. """ reference = AARead('id', 'SP') genome = AARead('id', 'KL') self.assertEqual('S1K; P2L', getSubstitutionsString(reference, genome))
def testOneLetterGenomeGap(self): """ If two different one-AA sequences are passed, with a genome gap, a string showing the change at position 1 must be retuned. """ reference = AARead('id', 'S') genome = AARead('id', '-') self.assertEqual('S1-', getSubstitutionsString(reference, genome))
def testOneLetterIdentical(self): """ If two identical one-AA sequences are passed, the empty string must be returned. """ reference = AARead('id', 'K') genome = AARead('id', 'K') self.assertEqual('', getSubstitutionsString(reference, genome))
def testEmpty(self): """ If the empty string is passed for both reference and genome, the empty string must be returned. """ reference = AARead('id', '') genome = AARead('id', '') self.assertEqual('', getSubstitutionsString(reference, genome))
def testUnreportedXsIssue21SimpleWOInsert(self): """ Small test to trigger the issue in https://github.com/VirologyCharite/sars2seq/issues/21 but without an additional insertion. """ reference = AARead('id', 'CLABF') genome = AARead('id', 'CMXXF') self.assertEqual('L2M; no coverage 3-4', getSubstitutionsString(reference, genome))
def testUnreportedXsIssue21Simple(self): """ Small test to trigger the issue in https://github.com/VirologyCharite/sars2seq/issues/21 """ reference = AARead('id', 'C-LABF') genome = AARead('id', 'CSMXXF') # Note that the code is currently returning '-2S; L2M; no coverage 3'. self.assertEqual('-2S; L2M; no coverage 3-4', getSubstitutionsString(reference, genome))
def testUnreportedXsIssue21(self): """ Test with the protein sequences that caused the issue in https://github.com/VirologyCharite/sars2seq/issues/21 The output should include the fact that site 417 is not covered in the genome. """ reference = AARead( 'id', 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTW' 'FHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVI' 'KVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREF' 'VFKNIDGYFKIYSKHTPINLVR---DLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGD' 'SSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSN' 'FRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYG' 'VSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKV' 'GGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYR' 'VVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTT' 'DAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYS' 'TGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGA' 'ENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNR' 'ALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLA' 'DAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAA' 'LQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQ' 'ALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRAS' 'ANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDG' 'KAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFK' 'EELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWP' 'WYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*') genome = AARead( 'id', 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTW' 'FHVI--SGTNGTKRFDNPVLPFNDGVYFASIEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVI' 'KVCEFQFCNDPFLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXPFLMDLEGKQGNFKNLREF' 'VFKNIDGYFKIYSKHTPII-VREPEDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGD' 'SSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSN' 'FRVQPTESIVRFPNITNLCPFDEVFNATRFASVYAXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYKLPDDFTGCVIAWNSNKLDSKV' 'SGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFRPTYGVGHQPYR' 'VVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLKGTGVLTESNKKFLPFQQFGRDIADTT' 'DAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYS' 'TGSNVFQTRAGCLIGAEYVNNSYECDIPIGAGICASYQTQTKSHRRARSVASQSIIAYTMSLGV' 'ENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNXXLQYGSFCTQLKR' 'ALTGIAVEQDKNTQEVFAQVKQIYKTPPIKYFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLA' 'DAGFIKQYGDCLGDIAARDLICAQKFKGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAA' 'LQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNHNAQ' 'ALNTLVKQLSSKFGAISSVLNDIFSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRAS' 'ANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDG' 'KAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFK' 'EELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWP' 'WYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*') self.assertEqual( 'A67V; H69-; V70-; T95I; no coverage 142-173; N211I; ' 'L212-; -215E; -216P; -217E; G339D; no coverage ' '353-422; N440K; G446S; S477N; T478K; E484A; Q493R; ' 'G496S; Q498R; N501Y; Y505H; T547K; D614G; H655Y; ' 'N679K; P681H; A701V; no coverage 752-753; N764K; ' 'D796Y; N856K; Q954H; N969K; L981F', getSubstitutionsString(reference, genome))