Esempi in Python per tagged_sents

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: nltk.corpus

Metodo/funzione: tagged_sents

Esempi su hotexamples.com: 4

tagged_sents in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per nltk.corpus.tagged_sents, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: analyze_tagger_coverage.py Progetto: ANB2/nltk-trainer

## coverage analysis ##
#######################

if args.trace:
	print 'analyzing tag coverage of %s with %s\n' % (args.corpus, tagger.__class__.__name__)

tags_found = FreqDist()
unknown_words = set()

if args.metrics:
	tags_actual = FreqDist()
	tag_refs = []
	tag_test = []
	tag_word_refs = collections.defaultdict(set)
	tag_word_test = collections.defaultdict(set)
	tagged_sents = corpus.tagged_sents(**kwargs)
	taglen = 7
	
	if args.fraction != 1.0:
		cutoff = int(math.ceil(len(tagged_sents) * args.fraction))
		tagged_sents = tagged_sents[:cutoff]
	
	for tagged_sent in tagged_sents:
		for word, tag in tagged_sent:
			tags_actual.inc(tag)
			tag_refs.append(tag)
			tag_word_refs[tag].add(word)
			
			if len(tag) > taglen:
				taglen = len(tag)

Esempio n. 2

Mostra file

File: analyze_tagger_coverage.py Progetto: dougk7/nltk-trainer

#######################

if args.trace:
	print 'analyzing tag coverage of %s with %s\n' % (args.corpus, tagger.__class__.__name__)

tags_found = FreqDist()
unknown_words = set()

if args.metrics:
	tags_actual = FreqDist()
	tag_refs = []
	tag_test = []
	tag_word_refs = collections.defaultdict(set)
	tag_word_test = collections.defaultdict(set)
	
	for tagged_sent in corpus.tagged_sents(fileids=args.fileids):
		for word, tag in tagged_sent:
			tags_actual.inc(tag)
			tag_refs.append(tag)
			tag_word_refs[tag].add(word)
		
		for word, tag in tagger.tag(nltk.tag.untag(tagged_sent)):
			tags_found.inc(tag)
			tag_test.append(tag)
			tag_word_test[tag].add(word)
			
			if tag == '-NONE-':
				unknown_words.add(word)
	
	print 'Accuracy: %f' % nltk.metrics.accuracy(tag_refs, tag_test)
	print 'Unknown words: %d' % len(unknown_words)

Esempio n. 3

Mostra file

#######################

if args.trace:
    print 'analyzing tag coverage of %s with %s\n' % (
        args.corpus, tagger.__class__.__name__)

tags_found = FreqDist()
unknown_words = set()

if args.metrics:
    tags_actual = FreqDist()
    tag_refs = []
    tag_test = []
    tag_word_refs = collections.defaultdict(set)
    tag_word_test = collections.defaultdict(set)
    tagged_sents = corpus.tagged_sents(**kwargs)
    taglen = 7

    if args.fraction != 1.0:
        cutoff = int(math.ceil(len(tagged_sents) * args.fraction))
        tagged_sents = tagged_sents[:cutoff]

    for tagged_sent in tagged_sents:
        for word, tag in tagged_sent:
            tags_actual.inc(tag)
            tag_refs.append(tag)
            tag_word_refs[tag].add(word)

            if len(tag) > taglen:
                taglen = len(tag)

Esempio n. 4

Mostra file

    any/noun tag/noun set/noun

    This/det is/verb the/det second/adj paragraph/noun ./punc
    word/n without/adj a/det tag/noun :/: hello ./punc
    """,
                       b="""
    This/det is/verb the/det second/adj file/noun ./punc
    """)
corpus = TaggedCorpusReader(root, list('ab'))
print(corpus.fileids())
print(str(corpus.root) == str(root))
print(corpus.words())
print(corpus.sents())  # doctest: +ELLIPSIS
print(corpus.paras())  # doctest: +ELLIPSIS
print(corpus.tagged_words())  # doctest: +ELLIPSIS
print(corpus.tagged_sents())  # doctest: +ELLIPSIS
print(corpus.tagged_paras())  # doctest: +ELLIPSIS
print(corpus.raw()[:40])
print(len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()])
print(len(corpus.sents()), [len(corpus.sents(d)) for d in corpus.fileids()])
print(len(corpus.paras()), [len(corpus.paras(d)) for d in corpus.fileids()])
print(corpus.words('a'))
print(corpus.words('b'))
# del_testcorpus(root)
print(brown.fileids())  # doctest: +ELLIPSIS
print(brown.categories())  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
print(repr(brown.root).replace('\\\\', '/'))  # doctest: +ELLIPSIS
print(brown.words())
print(brown.sents())  # doctest: +ELLIPSIS
print(brown.paras())  # doctest: +ELLIPSIS
print(brown.tagged_words())  # doctest: +ELLIPSIS