def test_sequence_length_two_returns_distinct_observed_ngrams(self): sequences = create_sequence_list(self.short_corpus, 2) self.assertEqual(set(sequences), set([ ('alpha', 'beta'), ('beta', 'gamma'), ('alef','bet'), ('bet','gimel')]))
def test_sequence_length_one_returns_distinct_words(self): sequences = create_sequence_list(self.corpus, 1) self.assertEqual(set(sequences), set([(x,) for y in self.corpus for x in y])) self.assertEqual(len(sequences), len(set([(x,) for y in self.corpus for x in y])))