''' draw tree ''' reader.chunked_sents()[0].draw() ''' get leaves ''' reader.chunked_words()[0].leaves() reader.chunked_sents()[0].leaves() reader.chunked_paras()[0][0].leaves() ''' categorized corpus ''' from nltk.corpus import brown brown.categories() from nltk.corpus.reader import CategorizedPlaintextCorpusReader reader = CategorizedPlaintextCorpusReader(path + '/corpora/cookbook/', r'movie_.*\.txt', cat_pattern=r'movie_(\w+)\.txt') reader.categories() reader.fileids(categories=['neg']) reader.fileids(categories=['pos']) ''' using a categorized chunked corpus reader ''' #import nltk.data from catchunked import CategorizedChunkedCorpusReader path = nltk.data.find('corpora/treebank/tagged') reader = CategorizedChunkedCorpusReader(path, r'wsj_.*\.pos',cat_pattern=r'wsj_(.*)\.pos') #len(reader.categories()) == len(reader.fileids()) len(reader.chunked_sents(categories=['0001'])) ''' Lazy corpus loader ''' from nltk.corpus.util import LazyCorpusLoader #from nltk.corpus.reader import WordListCorpusReader