Tk) from nltk.corpus import (cess_cat, brown, nps_chat, treebank, sinica_treebank, alpino, indian, floresta, mac_morpho, machado, cess_esp) from nltk.util import in_idle from nltk.probability import FreqDist CORPUS_LOADED_EVENT = '<<CL_EVENT>>' ERROR_LOADING_CORPUS_EVENT = '<<ELC_EVENT>>' POLL_INTERVAL = 100 _DEFAULT = 'English: Brown Corpus (Humor)' _CORPORA = { 'Catalan: CESS-CAT Corpus': lambda: cess_cat.words(), 'English: Brown Corpus': lambda: brown.words(), 'English: Brown Corpus (Press)': lambda: brown.words(categories=['news', 'editorial', 'reviews']), 'English: Brown Corpus (Religion)': lambda: brown.words(categories='religion'), 'English: Brown Corpus (Learned)': lambda: brown.words(categories='learned'), 'English: Brown Corpus (Science Fiction)': lambda: brown.words(categories='science_fiction'), 'English: Brown Corpus (Romance)': lambda: brown.words(categories='romance'), 'English: Brown Corpus (Humor)': lambda: brown.words(categories='humor'), 'English: NPS Chat Corpus':
def test_catalan(self): words = cess_cat.words()[:15] txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial" self.assertEqual(words, txt.split()) self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
Text, Tk) from nltk.corpus import (cess_cat, brown, nps_chat, treebank, sinica_treebank, alpino, indian, floresta, mac_morpho, machado, cess_esp) from nltk.util import in_idle from nltk.probability import FreqDist CORPUS_LOADED_EVENT = '<<CL_EVENT>>' ERROR_LOADING_CORPUS_EVENT = '<<ELC_EVENT>>' POLL_INTERVAL = 100 _DEFAULT = 'English: Brown Corpus (Humor)' _CORPORA = { 'Catalan: CESS-CAT Corpus': lambda: cess_cat.words(), 'English: Brown Corpus': lambda: brown.words(), 'English: Brown Corpus (Press)': lambda: brown.words(categories=['news', 'editorial', 'reviews']), 'English: Brown Corpus (Religion)': lambda: brown.words(categories='religion'), 'English: Brown Corpus (Learned)': lambda: brown.words(categories='learned'), 'English: Brown Corpus (Science Fiction)': lambda: brown.words(categories='science_fiction'), 'English: Brown Corpus (Romance)': lambda: brown.words(categories='romance'), 'English: Brown Corpus (Humor)': lambda: brown.words(categories='humor'), 'English: NPS Chat Corpus':
import nltk from nltk.corpus import cess_cat import pylab paraules = cess_cat.words() paraules1 = paraules[:1000] freqdist = nltk.FreqDist(paraules1) freqdist.plot()
floresta, mac_morpho, machado, cess_esp, ) from nltk.util import in_idle from nltk.probability import FreqDist CORPUS_LOADED_EVENT = "<<CL_EVENT>>" ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>" POLL_INTERVAL = 100 _DEFAULT = "English: Brown Corpus (Humor)" _CORPORA = { "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(), "English: Brown Corpus": lambda: brown.words(), "English: Brown Corpus (Press)": lambda: brown.words(categories=["news", "editorial", "reviews"]), "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"), "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"), "English: Brown Corpus (Science Fiction)": lambda: brown.words(categories="science_fiction"), "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"), "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"), "English: NPS Chat Corpus": lambda: nps_chat.words(), "English: Wall Street Journal Corpus": lambda: treebank.words(), "Chinese: Sinica Corpus": lambda: sinica_treebank.words(), "Dutch: Alpino Corpus": lambda: alpino.words(), "Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"), "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(), "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(), "Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
floresta, mac_morpho, machado, cess_esp, ) from nltk.util import in_idle from nltk.probability import FreqDist CORPUS_LOADED_EVENT = "<<CL_EVENT>>" ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>" POLL_INTERVAL = 100 _DEFAULT = "English: Brown Corpus (Humor)" _CORPORA = { "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(), "English: Brown Corpus": lambda: brown.words(), "English: Brown Corpus (Press)": lambda: brown.words( categories=["news", "editorial", "reviews"] ), "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"), "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"), "English: Brown Corpus (Science Fiction)": lambda: brown.words( categories="science_fiction" ), "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"), "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"), "English: NPS Chat Corpus": lambda: nps_chat.words(), "English: Wall Street Journal Corpus": lambda: treebank.words(), "Chinese: Sinica Corpus": lambda: sinica_treebank.words(), "Dutch: Alpino Corpus": lambda: alpino.words(),