def test_repeated_string(self): st = SuffixTree("aaa") self.assertEqual(st.find_substring('a'), 0) self.assertEqual(st.find_substring('aa'), 0) self.assertEqual(st.find_substring('aaa'), 0) self.assertEqual(st.find_substring('b'), -1) self.assertTrue(st.has_substring('a')) self.assertTrue(st.has_substring('aa')) self.assertTrue(st.has_substring('aaa'))
def find_shortest_nonshared_substring(seq_1, seq_2): st = SuffixTree(seq_2) for k in range(2, len(seq_1) + 1): shared = True for kmer in generate_kmers(seq_1, k): if not st.has_substring(kmer): shared = False break if not shared: return kmer
def test_repeated_string(self): st = SuffixTree("aaa") self.assertEqual(st.find_substring('a'), 0) self.assertEqual(st.find_substring('aa'), 0) self.assertEqual(st.find_substring('aaa'), 0) self.assertEqual(st.find_substring('b'), -1) self.assertTrue(st.has_substring('a')) self.assertTrue(st.has_substring('aa')) self.assertTrue(st.has_substring('aaa')) self.assertFalse(st.has_substring('aaaa')) self.assertFalse(st.has_substring('b')) #case sensitive by default self.assertFalse(st.has_substring('A'))
def test_chinese_string(self): st = SuffixTree(u"才高八斗") self.assertTrue(st.has_substring(u'高')) self.assertFalse(st.has_substring(u'豆豆'))
def test_text_string(self): f = codecs.open("test.txt", encoding='utf-8') st = SuffixTree(f.read()) self.assertTrue(st.has_substring(u'a'))
def test_empty_string(self): st = SuffixTree('') self.assertEqual(st.find_substring('not there'), -1) self.assertEqual(st.find_substring(''), -1) self.assertFalse(st.has_substring('not there')) self.assertFalse(st.has_substring(''))
def test_long_string(self): f = open("test.txt") st = SuffixTree(f.read()) self.assertEqual(st.find_substring('Ukkonen'), 1498) self.assertEqual(st.find_substring('Optimal'), 11131) self.assertFalse(st.has_substring('ukkonen'))