def test_sa(self): ref_genome = "GTCAACGCATGATCGATACGCATGATCGACCNANCN" bw = BurrowsWheeler(ref_genome) wt = WaveletTree(ref_genome) for i in range(len(ref_genome)): self.assertEqual(bw.get_sa(i), wt.get_sa(i))
def test_rank(self): ref_genome = "ACGCATGATCACTAGCTAGCATCGACCNANCN" bw = BurrowsWheeler(ref_genome) wt = WaveletTree(ref_genome) for char in ['A', 'C', 'G', 'T', 'N']: for i in range(len(ref_genome)): self.assertEqual(bw.rank(char, i), wt.rank(char, i))
def test_algorithms(self): ref = "TAGAATCGTTTTTTTTTTATCGACTACNACTACAAAAAAAAATGATCNTACNGTAATTTTTTTTTTTAAAAAAAAAACCCCCCCGGN" simple = WaveletTree(ref, strategy="Simple") manber = WaveletTree(ref, strategy="ManberMyers") kaerkkaeinen = WaveletTree(ref, strategy="KaerkkaeinenSanders") self.assertEqual(str(simple), str(manber)) self.assertEqual(str(simple), str(kaerkkaeinen))
def test_sa_compression(self): refs = [ "TAGAATCGTTTTTTTTTTATCGACTACNACTACAAAAAAAAATGATCNTACNGTAA", "TTTTTTTTTTTAAAAAAAAAACCCCCCCGGN", "AGCTA", "T" ] for ref in refs: for comp in range(1, 50): for i in range(1, len(ref)): bw_uncompressed = BurrowsWheeler(ref, compression_sa=1) bw = WaveletTree(ref, compression_sa=comp) self.assertEqual(bw_uncompressed.sa[i], bw.get_sa(i))
def test_basic(self): with self.assertRaises(ValueError): WaveletTree("") with self.assertRaises(ValueError): WaveletTree("A", "A") with self.assertRaises(ValueError): WaveletTree("A", strategy="fun") with self.assertRaises(ValueError): WaveletTree("A", compression_sa=-1) WaveletTree("CACGTACGTGTGCTAACACGTGTGTTTTTGAC") suffix = WaveletTree("GCAGTN").sa encoded = WaveletTree("ACGTGTAC").get_bwt("ACGTGTAC") self.assertIsInstance(encoded, str) self.assertIsInstance(suffix, list) string = "ACGATCGATCAGTAC" self.assertEqual(len(string), len(WaveletTree(string)))
def __init__(self, reference_genome: str, compression_occ: int = 32, compression_sa: int = 32, wavelet=True): if compression_occ < 1 or compression_sa < 1: raise ValueError("compression coefficients need to be >=1") self.__string_checks(reference_genome) self.compression_occ = compression_occ self.n = len(reference_genome) reference_genome += "$" if wavelet: self.bwt = WaveletTree(reference_genome, compression_sa=compression_sa) else: self.bwt = BurrowsWheeler(reference_genome, compression_occ=compression_occ, compression_sa=compression_sa)
def test_encode_decode(self): string = "NNCACGTACGTGTGCTAACACGTGTGTTTTTGAC" bwt = WaveletTree(string) self.assertEqual(str(bwt), string)