コード例 #1
0
ファイル: bwt.py プロジェクト: bujingde/fm-index
    def transform(self, s):
        """ Burrows-Wheeler transform with SuffixTree """
        assert self.EOS not in s, "Input string cannot contain null character ('%s')" % self.EOS
        
        # add end of text marker
        s += self.EOS
        
        st = SuffixTree()
        
        # construct a suffix tree O(n * log n)
        # can also be done in O(n) time
        st.add(s)
        
        # walk inorder to find sorted suffixes
        # only get the length of each suffix
        lens = self._walk(st.root)

        # as the last column letter will be left of the suffix
        # this means it's len(suffix) + 1
        # from the end of the input string s
        
        r = [0]*len(lens)
        for i in xrange(len(lens)):
            l = lens[i]
            if l == len(lens):
                r[i] = self.EOS
            else:
                r[i] = s[-l-1]
        return ''.join(r)
コード例 #2
0
ファイル: bwt.py プロジェクト: ArnaudKOPP/fm_index
    def transform(self, s):
        """ Burrows-Wheeler transform with SuffixTree """
        assert self.EOS not in s, "Input string cannot contain null character ('%s')" % self.EOS

        # add end of text marker
        s += self.EOS

        st = SuffixTree()

        # construct a suffix tree O(n * log n)
        # can also be done in O(n) time
        st.add(s)

        # walk inorder to find sorted suffixes
        # only get the length of each suffix
        lens = self._walk(st.root)

        # as the last column letter will be left of the suffix
        # this means it's len(suffix) + 1
        # from the end of the input string s

        r = [0] * len(lens)
        for i in xrange(len(lens)):
            l = lens[i]
            if l == len(lens):
                r[i] = self.EOS
            else:
                r[i] = s[-l - 1]
        return ''.join(r)
コード例 #3
0
ファイル: test_stlm.py プロジェクト: bsu-slim/pystlm
'''
Created on Oct 23, 2018

@author: ckennington
'''
from stlm import STLM
from suffixtree import SuffixTree
from sequence import Sequence

trie = SuffixTree()

text = 'c a c a o'.split()

for w in text:
    print('adding', w)
    trie.add(w)

print('\n')
trie.print_tree()
print('\n')

trie.update_all_counts()

stlm = STLM(trie)

tests = [
    'c a'.split(), 'c a o'.split(), 'a o'.split(), 'o'.split(), 'c'.split()
]

for test in tests:
    seq = Sequence()