def test_STree(self): testSeqs = [ '10', 'mississippi', '10_10000', '4_3_10000', '4x3_10000', 'pc_10000' ] for s in testSeqs: print('.', end='', flush=True) seq = tom.load(current_dir + s + '.seq.bz2') f = bz2.BZ2File(current_dir + s + '.stree.bz2') streeForm = pickle.load(f) f.close() symbolSize = 2 if seq.nInputSymbols() > 0 else 1 # create SuffixTree in several random steps: sizes = sorted( [rnd.randint(1, seq.length() - 1) for i in range(5)]) + [seq.length()] stree = tom.STree(seq.sub(0, 0)) for sz in sizes: stree.extendTo(seq.sub(0, sz)) self.assertTrue( verifySuffixTreeCounts(seq.sub(0, sz), stree), "!!! SuffixTree test NOT passed !!! Substring count discrepancy." ) streeFormNew = canonicalizeSuffixTree(stree) self.assertTrue( verifySuffixTreeForm(seq, streeFormNew, streeForm), "!!! SuffixTree test NOT passed !!! Something has changed.") for l in range(1, min(seq.length(), 12)): self.assertTrue( tom.stree.Position(stree, seq.slice(-l)).isSuffix(), ".isSuffix() test failed!") print(' ', end='', flush=True)
def test_wordsFromData(self): testSeqs = ['10_10000', '4_3_10000', '4x3_10000', 'pc_10000'] for s in testSeqs: print('.', end='', flush=True) seq = tom.load(current_dir + s + '.seq.bz2') stree = tom.STree(seq) rstree = tom.STree(seq.reverse()) wordSettings = [(0,0,2**k,0) for k in range(4, 10)] wordSettings += [(2,5,2**k,0) for k in range(4, 10)] wordSettings += [(2,5,2**k,0) for k in range(4, 10)] for wS in wordSettings: XY = tom.wordsFromData(stree, *wS) tom.sortWords(XY) XYr = tom.wordsFromData(rstree, *wS) tom.reverseWords(XYr) tom.sortWords(XYr) self.assertTrue(wordsAreEqual(XY, XYr), "wordsFromData gives different results on reversed input for " + str(wS)) Y = tom.wordsFromData(stree, *wS, prefixUnique=True) tom.sortWords(Y) Yr = tom.wordsFromData(rstree, *wS, suffixUnique=True) tom.reverseWords(Yr) tom.sortWords(Yr) self.assertTrue(wordsAreEqual(Y, Yr), "wordsFromData gives different results on reversed input for characteristic words for " + str(wS)) Xr = tom.wordsFromData(rstree, *wS, prefixUnique=True) tom.reverseWords(Xr) tom.sortWords(Xr) X = tom.wordsFromData(stree, *wS, suffixUnique=True) tom.sortWords(X) self.assertTrue(wordsAreEqual(X, Xr), "wordsFromData gives different results on reversed input for indicative words for " + str(wS)) print(' ', end='', flush=True)
def test_wordsFromData(self): testSeqs = ['10_10000', '4_3_10000', '4x3_10000', 'pc_10000'] for s in testSeqs: print('.', end='', flush=True) seq = tom.load(current_dir + s + '.seq.bz2') stree = tom.STree(seq) rstree = tom.STree(seq.reverse()) wordSettings = [(0, 0, 2**k, 0) for k in range(4, 10)] wordSettings += [(2, 5, 2**k, 0) for k in range(4, 10)] wordSettings += [(2, 5, 2**k, 0) for k in range(4, 10)] for wS in wordSettings: XY = tom.wordsFromData(stree, *wS) tom.sortWords(XY) XYr = tom.wordsFromData(rstree, *wS) tom.reverseWords(XYr) tom.sortWords(XYr) self.assertTrue( wordsAreEqual(XY, XYr), "wordsFromData gives different results on reversed input for " + str(wS)) Y = tom.wordsFromData(stree, *wS, prefixUnique=True) tom.sortWords(Y) Yr = tom.wordsFromData(rstree, *wS, suffixUnique=True) tom.reverseWords(Yr) tom.sortWords(Yr) self.assertTrue( wordsAreEqual(Y, Yr), "wordsFromData gives different results on reversed input for characteristic words for " + str(wS)) Xr = tom.wordsFromData(rstree, *wS, prefixUnique=True) tom.reverseWords(Xr) tom.sortWords(Xr) X = tom.wordsFromData(stree, *wS, suffixUnique=True) tom.sortWords(X) self.assertTrue( wordsAreEqual(X, Xr), "wordsFromData gives different results on reversed input for indicative words for " + str(wS)) print(' ', end='', flush=True)
def test_STree(self): testSeqs = ['10', 'mississippi', '10_10000', '4_3_10000', '4x3_10000', 'pc_10000'] for s in testSeqs: print('.', end='', flush=True) seq = tom.load(current_dir + s + '.seq.bz2') f = bz2.BZ2File(current_dir + s + '.stree.bz2') streeForm = pickle.load(f) f.close() symbolSize = 2 if seq.nInputSymbols() > 0 else 1 # create SuffixTree in several random steps: sizes = sorted([rnd.randint(1, seq.length() - 1) for i in range(5)]) + [seq.length()] stree = tom.STree(seq.sub(0,0)) for sz in sizes: stree.extendTo(seq.sub(0,sz)) self.assertTrue(verifySuffixTreeCounts(seq.sub(0, sz), stree), "!!! SuffixTree test NOT passed !!! Substring count discrepancy.") streeFormNew = canonicalizeSuffixTree(stree) self.assertTrue(verifySuffixTreeForm(seq, streeFormNew, streeForm), "!!! SuffixTree test NOT passed !!! Something has changed.") for l in range(1, min(seq.length(), 12)): self.assertTrue(tom.stree.Position(stree, seq.slice(-l)).isSuffix(), ".isSuffix() test failed!") print(' ', end='', flush=True)