def baseline_coref_resolver_demo(): from nltk.corpus.util import LazyCorpusLoader from nltk.corpus import BracketParseCorpusReader from nltk_contrib.coref.resolve import BaselineCorefResolver resolver = BaselineCorefResolver() treebank = load_treebank('0[12]') sents = LazyMap(lambda sent: \ [word for word in sent if not word.startswith('*')], treebank.sents()[:10]) mentions = resolver.mentions(sents) resolved_mentions = resolver.resolve_mentions(mentions) resolved_discourse = resolver.resolve(sents) print 'Baseline coref resolver demo...' print 'Mentions:' for mention in mentions: print mention print print 'Resolved mentions:' for mention in resolved_mentions: print mention print print 'Resolved discourse:' for sent in resolved_discourse: print sent print print
default=False, dest='psyco', help='use Psyco JIT, if available') parser.add_option('-v', '--verbose', action='store_true', default=False, dest='verbose', help='verbose') (options, args) = parser.parse_args() if options.psyco: try: import psyco psyco.profile() except: pass if options.train_tagger: treebank_train = load_treebank('0[2-9]|1[0-9]|2[01]') treebank_train_sequence = treebank_train.tagged_sents() treebank_test = load_treebank('24') treebank_test_sequence = treebank_test.tagged_sents() treebank_estimator = LidstoneProbDistFactory model = train_model(HiddenMarkovModelTagger, treebank_train_sequence, treebank_test_sequence, options.model_file, options.num_train_sents, options.num_test_sents, estimator=treebank_estimator, verbose=options.verbose) elif options.train_chunker: conll2k_train = LazyCorpusLoader(