Ejemplo n.º 1
0
    def test_STree(self):
        testSeqs = [
            '10', 'mississippi', '10_10000', '4_3_10000', '4x3_10000',
            'pc_10000'
        ]
        for s in testSeqs:
            print('.', end='', flush=True)
            seq = tom.load(current_dir + s + '.seq.bz2')
            f = bz2.BZ2File(current_dir + s + '.stree.bz2')
            streeForm = pickle.load(f)
            f.close()
            symbolSize = 2 if seq.nInputSymbols() > 0 else 1
            # create SuffixTree in several random steps:
            sizes = sorted(
                [rnd.randint(1,
                             seq.length() - 1)
                 for i in range(5)]) + [seq.length()]
            stree = tom.STree(seq.sub(0, 0))
            for sz in sizes:
                stree.extendTo(seq.sub(0, sz))
                self.assertTrue(
                    verifySuffixTreeCounts(seq.sub(0, sz), stree),
                    "!!! SuffixTree test NOT passed !!! Substring count discrepancy."
                )
            streeFormNew = canonicalizeSuffixTree(stree)
            self.assertTrue(
                verifySuffixTreeForm(seq, streeFormNew, streeForm),
                "!!! SuffixTree test NOT passed !!! Something has changed.")
            for l in range(1, min(seq.length(), 12)):
                self.assertTrue(
                    tom.stree.Position(stree, seq.slice(-l)).isSuffix(),
                    ".isSuffix() test failed!")

        print(' ', end='', flush=True)
Ejemplo n.º 2
0
 def test_wordsFromData(self):
     testSeqs = ['10_10000', '4_3_10000', '4x3_10000', 'pc_10000']
     for s in testSeqs:
         print('.', end='', flush=True)
         seq = tom.load(current_dir + s + '.seq.bz2')
         stree = tom.STree(seq)
         rstree = tom.STree(seq.reverse())
         wordSettings = [(0,0,2**k,0) for k in range(4, 10)]
         wordSettings += [(2,5,2**k,0) for k in range(4, 10)]
         wordSettings += [(2,5,2**k,0) for k in range(4, 10)]
         for wS in wordSettings:
             XY = tom.wordsFromData(stree, *wS)
             tom.sortWords(XY)
             XYr = tom.wordsFromData(rstree, *wS)
             tom.reverseWords(XYr)
             tom.sortWords(XYr)
             self.assertTrue(wordsAreEqual(XY, XYr),
                             "wordsFromData gives different results on reversed input for " + str(wS))
             Y = tom.wordsFromData(stree, *wS, prefixUnique=True)
             tom.sortWords(Y)
             Yr = tom.wordsFromData(rstree, *wS, suffixUnique=True)
             tom.reverseWords(Yr)
             tom.sortWords(Yr)
             self.assertTrue(wordsAreEqual(Y, Yr),
                             "wordsFromData gives different results on reversed input for characteristic words for " + str(wS))
             Xr = tom.wordsFromData(rstree, *wS, prefixUnique=True)
             tom.reverseWords(Xr)
             tom.sortWords(Xr)
             X = tom.wordsFromData(stree, *wS, suffixUnique=True)
             tom.sortWords(X)
             self.assertTrue(wordsAreEqual(X, Xr),
                             "wordsFromData gives different results on reversed input for indicative words for " + str(wS))
     print(' ', end='', flush=True)
Ejemplo n.º 3
0
 def test_wordsFromData(self):
     testSeqs = ['10_10000', '4_3_10000', '4x3_10000', 'pc_10000']
     for s in testSeqs:
         print('.', end='', flush=True)
         seq = tom.load(current_dir + s + '.seq.bz2')
         stree = tom.STree(seq)
         rstree = tom.STree(seq.reverse())
         wordSettings = [(0, 0, 2**k, 0) for k in range(4, 10)]
         wordSettings += [(2, 5, 2**k, 0) for k in range(4, 10)]
         wordSettings += [(2, 5, 2**k, 0) for k in range(4, 10)]
         for wS in wordSettings:
             XY = tom.wordsFromData(stree, *wS)
             tom.sortWords(XY)
             XYr = tom.wordsFromData(rstree, *wS)
             tom.reverseWords(XYr)
             tom.sortWords(XYr)
             self.assertTrue(
                 wordsAreEqual(XY, XYr),
                 "wordsFromData gives different results on reversed input for "
                 + str(wS))
             Y = tom.wordsFromData(stree, *wS, prefixUnique=True)
             tom.sortWords(Y)
             Yr = tom.wordsFromData(rstree, *wS, suffixUnique=True)
             tom.reverseWords(Yr)
             tom.sortWords(Yr)
             self.assertTrue(
                 wordsAreEqual(Y, Yr),
                 "wordsFromData gives different results on reversed input for characteristic words for "
                 + str(wS))
             Xr = tom.wordsFromData(rstree, *wS, prefixUnique=True)
             tom.reverseWords(Xr)
             tom.sortWords(Xr)
             X = tom.wordsFromData(stree, *wS, suffixUnique=True)
             tom.sortWords(X)
             self.assertTrue(
                 wordsAreEqual(X, Xr),
                 "wordsFromData gives different results on reversed input for indicative words for "
                 + str(wS))
     print(' ', end='', flush=True)
Ejemplo n.º 4
0
    def test_STree(self):
        testSeqs = ['10', 'mississippi', '10_10000', '4_3_10000', '4x3_10000', 'pc_10000']
        for s in testSeqs:
            print('.', end='', flush=True)
            seq = tom.load(current_dir + s + '.seq.bz2')
            f = bz2.BZ2File(current_dir + s + '.stree.bz2')
            streeForm = pickle.load(f)
            f.close()
            symbolSize = 2 if seq.nInputSymbols() > 0 else 1
            # create SuffixTree in several random steps:
            sizes = sorted([rnd.randint(1, seq.length() - 1) for i in range(5)]) + [seq.length()]
            stree = tom.STree(seq.sub(0,0))
            for sz in sizes:
                stree.extendTo(seq.sub(0,sz))
                self.assertTrue(verifySuffixTreeCounts(seq.sub(0, sz), stree),
                                "!!! SuffixTree test NOT passed !!! Substring count discrepancy.")
            streeFormNew = canonicalizeSuffixTree(stree)
            self.assertTrue(verifySuffixTreeForm(seq, streeFormNew, streeForm), "!!! SuffixTree test NOT passed !!! Something has changed.")
            for l in range(1, min(seq.length(), 12)):
                self.assertTrue(tom.stree.Position(stree, seq.slice(-l)).isSuffix(), ".isSuffix() test failed!")

        print(' ', end='', flush=True)