def test_banana(self): string = "blah blah blah blah blah blah baabab blah blah" markov = MarkovModel(list(string)) results = {'a': 0, 'b': 0} for i in range(1000): token = markov.get_n_tokens(list("ba"), 1)[0] results[token] += 1 bwt_log_ratio = self.log_ratio(results, 'a', 'b') print "BWT: log ratio is", bwt_log_ratio results = {'a': 0, 'b': 0} for i in range(1000): start = randrange(len(string)) substring = None while (substring != "ba"): substring = string[start:start+2] if start < (len(string) - 2) else (string[-1:]+string[:1] if start == len(string) - 1 else string[-2:]+string[:0]) start = (start + 1) % len(string) token = string[start+1] results[token] += 1 plain_log_ratio = self.log_ratio(results, 'a', 'b') print "PLAIN: log ratio is", plain_log_ratio self.assertTrue(abs(bwt_log_ratio)<abs(plain_log_ratio))
def test_markov(self): markov = MarkovModel(list("Tom Tucker")) exp_tok = [["o"], ["u"]] token = markov.get_n_tokens(list("T"), 1) self.assertIn(token, exp_tok) tokens = [token for token in markov.get_all_possible_n_grams(list("T"), 1)] self.assertEqual(tokens, exp_tok)