def test_words_and_punctuation(self): words_and_punct = ["I", "am", "a", "(", "silly", ")", "sentence", "."] actual = list(remove_punctuation(words_and_punct)) self.assertEqual(["I", "am", "a", "silly", "sentence"], actual)
def test_all_punctuation(self): # Convert the punctuation characters into a tuple of single characters punctuation = tuple(string.punctuation) actual = list(remove_punctuation(punctuation)) self.assertEqual([], actual)
def __init__(self, limit=1000): # TODO: Read all of shakespeare into words? fileid = shakespeare.fileids()[0] words = remove_punctuation(shakespeare.words(fileid)) self.finder = BigramCollocationFinder.from_words(words) self.bigrams = self.finder.nbest(bigram_measures.raw_freq, limit)