def test_sa(self): ref_genome = "GTCAACGCATGATCGATACGCATGATCGACCNANCN" bw = BurrowsWheeler(ref_genome) wt = WaveletTree(ref_genome) for i in range(len(ref_genome)): self.assertEqual(bw.get_sa(i), wt.get_sa(i))
def test_rank(self): ref_genome = "ACGCATGATCACTAGCTAGCATCGACCNANCN" bw = BurrowsWheeler(ref_genome) wt = WaveletTree(ref_genome) for char in ['A', 'C', 'G', 'T', 'N']: for i in range(len(ref_genome)): self.assertEqual(bw.rank(char, i), wt.rank(char, i))
def test_algorithms(self): ref = "TAGAATCGTTTTTTTTTTATCGACTACNACTACAAAAAAAAATGATCNTACNGTAATTTTTTTTTTTAAAAAAAAAACCCCCCCGGN" simple = BurrowsWheeler(ref, strategy="Simple") manber = BurrowsWheeler(ref, strategy="ManberMyers") kaerkkaeinen = BurrowsWheeler(ref, strategy="KaerkkaeinenSanders") self.assertEqual(str(simple), str(manber)) self.assertEqual(str(simple), str(kaerkkaeinen))
def test_basic(self): with self.assertRaises(ValueError): BurrowsWheeler("") with self.assertRaises(ValueError): BurrowsWheeler("A", "A") with self.assertRaises(ValueError): BurrowsWheeler("A", strategy="fun") with self.assertRaises(ValueError): BurrowsWheeler("A", compression_occ=-1) with self.assertRaises(ValueError): BurrowsWheeler("A", compression_sa=-1) BurrowsWheeler("CACGTACGTGTGCTAACACGTGTGTTTTTGAC") suffix = BurrowsWheeler("GCAGTN").sa encoded = BurrowsWheeler("ACGTGTAC").get_bwt("ACGTGTAC") self.assertIsInstance(encoded, str) self.assertIsInstance(suffix, list) string = "ACGATCGATCAGTAC" self.assertEqual(len(string), len(BurrowsWheeler(string)))
def test_sa_compression(self): refs = ["TAGAATCGTTTTTTTTTTATCGACTACNACTACAAAAAAAAATGATCNTACNGTAA", "TTTTTTTTTTTAAAAAAAAAACCCCCCCGGN", "AGCTA", "T"] for ref in refs: for comp in range(1, 50): for i in range(1, len(ref)): bw_uncompressed = BurrowsWheeler(ref, compression_occ=1, compression_sa=1) bw = BurrowsWheeler(ref, compression_occ=1, compression_sa=comp) self.assertEqual(bw_uncompressed.sa[i], bw.get_sa(i))
def test_encode_decode(self): string = "NNCACGTACGTGTGCTAACACGTGTGTTTTTGAC" bwt = BurrowsWheeler(string) self.assertEqual(str(bwt), string)