def transform(self, s): """ Burrows-Wheeler transform with SuffixTree """ assert self.EOS not in s, "Input string cannot contain null character ('%s')" % self.EOS # add end of text marker s += self.EOS st = SuffixTree() # construct a suffix tree O(n * log n) # can also be done in O(n) time st.add(s) # walk inorder to find sorted suffixes # only get the length of each suffix lens = self._walk(st.root) # as the last column letter will be left of the suffix # this means it's len(suffix) + 1 # from the end of the input string s r = [0]*len(lens) for i in xrange(len(lens)): l = lens[i] if l == len(lens): r[i] = self.EOS else: r[i] = s[-l-1] return ''.join(r)
def transform(self, s): """ Burrows-Wheeler transform with SuffixTree """ assert self.EOS not in s, "Input string cannot contain null character ('%s')" % self.EOS # add end of text marker s += self.EOS st = SuffixTree() # construct a suffix tree O(n * log n) # can also be done in O(n) time st.add(s) # walk inorder to find sorted suffixes # only get the length of each suffix lens = self._walk(st.root) # as the last column letter will be left of the suffix # this means it's len(suffix) + 1 # from the end of the input string s r = [0] * len(lens) for i in xrange(len(lens)): l = lens[i] if l == len(lens): r[i] = self.EOS else: r[i] = s[-l - 1] return ''.join(r)
''' Created on Oct 23, 2018 @author: ckennington ''' from stlm import STLM from suffixtree import SuffixTree from sequence import Sequence trie = SuffixTree() text = 'c a c a o'.split() for w in text: print('adding', w) trie.add(w) print('\n') trie.print_tree() print('\n') trie.update_all_counts() stlm = STLM(trie) tests = [ 'c a'.split(), 'c a o'.split(), 'a o'.split(), 'o'.split(), 'c'.split() ] for test in tests: seq = Sequence()