def test_sa(self):
        ref_genome = "GTCAACGCATGATCGATACGCATGATCGACCNANCN"

        bw = BurrowsWheeler(ref_genome)
        wt = WaveletTree(ref_genome)

        for i in range(len(ref_genome)):
            self.assertEqual(bw.get_sa(i), wt.get_sa(i))
    def test_rank(self):
        ref_genome = "ACGCATGATCACTAGCTAGCATCGACCNANCN"

        bw = BurrowsWheeler(ref_genome)
        wt = WaveletTree(ref_genome)

        for char in ['A', 'C', 'G', 'T', 'N']:
            for i in range(len(ref_genome)):
                self.assertEqual(bw.rank(char, i), wt.rank(char, i))
    def test_algorithms(self):

        ref = "TAGAATCGTTTTTTTTTTATCGACTACNACTACAAAAAAAAATGATCNTACNGTAATTTTTTTTTTTAAAAAAAAAACCCCCCCGGN"

        simple = WaveletTree(ref, strategy="Simple")
        manber = WaveletTree(ref, strategy="ManberMyers")
        kaerkkaeinen = WaveletTree(ref, strategy="KaerkkaeinenSanders")

        self.assertEqual(str(simple), str(manber))
        self.assertEqual(str(simple), str(kaerkkaeinen))
    def test_sa_compression(self):

        refs = [
            "TAGAATCGTTTTTTTTTTATCGACTACNACTACAAAAAAAAATGATCNTACNGTAA",
            "TTTTTTTTTTTAAAAAAAAAACCCCCCCGGN", "AGCTA", "T"
        ]
        for ref in refs:
            for comp in range(1, 50):
                for i in range(1, len(ref)):
                    bw_uncompressed = BurrowsWheeler(ref, compression_sa=1)
                    bw = WaveletTree(ref, compression_sa=comp)
                    self.assertEqual(bw_uncompressed.sa[i], bw.get_sa(i))
    def test_basic(self):
        with self.assertRaises(ValueError):
            WaveletTree("")

        with self.assertRaises(ValueError):
            WaveletTree("A", "A")

        with self.assertRaises(ValueError):
            WaveletTree("A", strategy="fun")

        with self.assertRaises(ValueError):
            WaveletTree("A", compression_sa=-1)

        WaveletTree("CACGTACGTGTGCTAACACGTGTGTTTTTGAC")

        suffix = WaveletTree("GCAGTN").sa
        encoded = WaveletTree("ACGTGTAC").get_bwt("ACGTGTAC")

        self.assertIsInstance(encoded, str)
        self.assertIsInstance(suffix, list)

        string = "ACGATCGATCAGTAC"
        self.assertEqual(len(string), len(WaveletTree(string)))
예제 #6
0
    def __init__(self,
                 reference_genome: str,
                 compression_occ: int = 32,
                 compression_sa: int = 32,
                 wavelet=True):

        if compression_occ < 1 or compression_sa < 1:
            raise ValueError("compression coefficients need to be >=1")

        self.__string_checks(reference_genome)

        self.compression_occ = compression_occ

        self.n = len(reference_genome)

        reference_genome += "$"

        if wavelet:
            self.bwt = WaveletTree(reference_genome,
                                   compression_sa=compression_sa)
        else:
            self.bwt = BurrowsWheeler(reference_genome,
                                      compression_occ=compression_occ,
                                      compression_sa=compression_sa)
 def test_encode_decode(self):
     string = "NNCACGTACGTGTGCTAACACGTGTGTTTTTGAC"
     bwt = WaveletTree(string)
     self.assertEqual(str(bwt), string)