Ejemplo n.º 1
0
 def test_words_and_punctuation(self):
     words_and_punct = ["I", "am", "a", "(", "silly", ")", "sentence", "."]
     actual = list(remove_punctuation(words_and_punct))
     self.assertEqual(["I", "am", "a", "silly", "sentence"], actual)
Ejemplo n.º 2
0
 def test_all_punctuation(self):
     # Convert the punctuation characters into a tuple of single characters
     punctuation = tuple(string.punctuation)
     actual = list(remove_punctuation(punctuation))
     self.assertEqual([], actual)
Ejemplo n.º 3
0
 def __init__(self, limit=1000):
     # TODO: Read all of shakespeare into words?
     fileid = shakespeare.fileids()[0]
     words = remove_punctuation(shakespeare.words(fileid))
     self.finder = BigramCollocationFinder.from_words(words)
     self.bigrams = self.finder.nbest(bigram_measures.raw_freq, limit)