# <codecell> # Deal with suffixes for sent in nltk.sent_tokenize(text): for word in nltk.word_tokenize(sent): word = word.lower() print wordnet.morphy(word) or word # <codecell> from metanl import english # <codecell> # Deal with even more suffixes for word in english.normalize_list(text): print word # <codecell> ########NEW FILE######## __FILENAME__ = 2 - Interesting n-grams # -*- coding: utf-8 -*- # <nbformat>3.0</nbformat> # <codecell> from nltk.book import *
text = 'この文も、言葉で構成されています' # Translation: "This sentence is also made of words" # <codecell> for word in nltk.word_tokenize(text): print(word) # <codecell> from metanl import japanese # <codecell> for word in japanese.normalize_list(text): print(word) # <codecell> text2 = 'You might be wondering whether we can deal with suffixes in English' # <codecell> from metanl import english # <codecell> english.normalize_list(text2) # <codecell>