def test_alignment(self): bwt = BWT(list("banana")) # bwt.print_table() string = "ana" # This can be a string, because it supports random access and slicing. self.assertEqual(bwt.L(string), 3) self.assertEqual(bwt.U(string), 4) self.assertEqual(sorted(bwt.get_start_indices(string)), [3,5]) # return is index of last token in match
class MarkovModel: def __init__(self, string): self.string = string self.bwt = BWT(self.string) def get_n_tokens(self, context, n): ''' Returns up to n tokens that follow the list of tokens given in context in the source string, or None if no such tokens exist. ''' indices = self.bwt.get_start_indices(context) if indices == []: return None index = choice(indices) return self.get_n_gram_at_index(n, index) def get_n_gram_at_index(self, n, index): return self.string[index+1:min(index+1+n ,len(self.string)-1)] def get_all_possible_n_grams(self, context, n): indices = self.bwt.get_start_indices(context) return (self.get_n_gram_at_index(n, index) for index in indices)