def test_repeated_string(self): st = SuffixTree("aaa") self.assertEqual(st.find_substring('a'), 0) self.assertEqual(st.find_substring('aa'), 0) self.assertEqual(st.find_substring('aaa'), 0) self.assertEqual(st.find_substring('b'), -1) self.assertTrue(st.has_substring('a')) self.assertTrue(st.has_substring('aa')) self.assertTrue(st.has_substring('aaa'))
def test_repeated_string(self): st = SuffixTree("aaa") self.assertEqual(st.find_substring('a'), 0) self.assertEqual(st.find_substring('aa'), 0) self.assertEqual(st.find_substring('aaa'), 0) self.assertEqual(st.find_substring('b'), -1) self.assertTrue(st.has_substring('a')) self.assertTrue(st.has_substring('aa')) self.assertTrue(st.has_substring('aaa')) self.assertFalse(st.has_substring('aaaa')) self.assertFalse(st.has_substring('b')) #case sensitive by default self.assertFalse(st.has_substring('A'))
def test_chinese_text(self): st = SuffixTree(codecs.open("test.txt", encoding="utf-8").read()) self.assertTrue(st.find_substring(u'概括性总结'))
def test_empty_string(self): st = SuffixTree('') self.assertEqual(st.find_substring('not there'), -1) self.assertEqual(st.find_substring(''), -1) self.assertFalse(st.has_substring('not there')) self.assertFalse(st.has_substring(''))
def test_case_sensitivity(self): f = open("test.txt") st = SuffixTree(f.read(), case_insensitive=True) self.assertEqual(st.find_substring('ukkonen'), 1498) self.assertEqual(st.find_substring('Optimal'), 1830)
def test_long_string(self): f = open("test.txt") st = SuffixTree(f.read()) self.assertEqual(st.find_substring('Ukkonen'), 1498) self.assertEqual(st.find_substring('Optimal'), 11131) self.assertFalse(st.has_substring('ukkonen'))
inputFasta.append(temp) # print "Input Fasta read" # print "inputFasta coming up" # for i in inputFasta: # print i outputResultsFile = filename + ".exact" # if(len(inputFasta)!=len(inputFastaId)): # print "some problem because total number of reads don't match total number of ids" # exit() with open(outputResultsFile, 'w') as f: for i in range(0, len(inputFasta)): # outputResults[inputFastaId[i]] = find_substring(stree,inputFasta[i]) # c = inputFastaId[i] + d + str(find_substring(stree,inputFasta[i]))+ "\n" if (stree.find_substring(inputFasta[i]) >= 0 or stree.find_substring(revc(inputFasta[i])) >= 0): find = 1 countAlignments += 1 else: find = 0 c = inputFastaId[i] + d + str(find) + "\n" f.write(c) # print "total Alignments - ",countAlignments if (len(inputFastaId) == 0): percent = 0 else: percent = 1.0 * countAlignments / len(inputFastaId) percent = percent * 100.0 # print "percentage of Alignment - ", percent
def test_long_string(self): f = open("test.txt") st = SuffixTree(f.read()) self.assertEqual(st.find_substring('Ukkonen'), 1498) self.assertEqual(st.find_substring('Optimal'), 11131) self.assertFalse(st.has_substring('ukkonen')) def test_case_sensitivity(self): f = open("test.txt") st = SuffixTree(f.read(), case_insensitive=True) self.assertEqual(st.find_substring('ukkonen'), 1498) self.assertEqual(st.find_substring('Optimal'), 1830) def test_repr(self): st = SuffixTree("t") output = '\tStart \tEnd \tSuf \tFirst \tLast \tString\n\t0 \t1 \t-1 \t0 \t0 \tt\n' import pdb;pdb.set_trace() self.assertEqual(st.__repr__(), output) """ if __name__ == '__main__': # unittest.main() # f = open("test.txt") # longstr = f.read() longstr = "banana" st = SuffixTree(longstr) # print(longstr[11074:11081]) st.find_substring('ban') # print(st.find_substring('Ukkonen')) # print(st.find_substring('Optimal'))