예제 #1
0
파일: tagger.py 프로젝트: kidaa/eptmk-cval
def tag_eptmk(show_stats=True):
    eptmk_untagged = epistemonikos.corpus_load().sents()
    if not os.path.isfile('tagger.pickle'):
        print ("\ntagger.pickle file doesn't exist! Use save_tagger() function"
               " to create it.\n")
    else:
        with open('tagger.pickle', 'rb') as tagger_file:
            print '\nLoaded and unpickled POS tagger.\n'
            trained_tagger = cPickle.load(tagger_file)

        eptmk_tagged = trained_tagger.batch_tag(eptmk_untagged)
        if show_stats is True:
            print 'BACKOFF:'
            pprint_tag_stats(eptmk_tagged, none_tag=None)

        eptmk_tagged = dict_tagger(eptmk_tagged, EPTMK_DICT, none_tag=None)
        if show_stats is True:
            print '+ DICT:'
            pprint_tag_stats(eptmk_tagged, none_tag=None)

        eptmk_tagged = wordnet_tagger(eptmk_tagged, none_tag=None)
        if show_stats is True:
            print '+ WNET:'
            pprint_tag_stats(eptmk_tagged, none_tag=None)

        return eptmk_tagged
예제 #2
0
파일: tagger.py 프로젝트: kidaa/eptmk-cval
def tag_eptmk(show_stats=True):
    eptmk_untagged = epistemonikos.corpus_load().sents()
    if not os.path.isfile('tagger.pickle'):
        print(
            "\ntagger.pickle file doesn't exist! Use save_tagger() function"
            " to create it.\n")
    else:
        with open('tagger.pickle', 'rb') as tagger_file:
            print '\nLoaded and unpickled POS tagger.\n'
            trained_tagger = cPickle.load(tagger_file)

        eptmk_tagged = trained_tagger.batch_tag(eptmk_untagged)
        if show_stats is True:
            print 'BACKOFF:'
            pprint_tag_stats(eptmk_tagged, none_tag=None)

        eptmk_tagged = dict_tagger(eptmk_tagged, EPTMK_DICT, none_tag=None)
        if show_stats is True:
            print '+ DICT:'
            pprint_tag_stats(eptmk_tagged, none_tag=None)

        eptmk_tagged = wordnet_tagger(eptmk_tagged, none_tag=None)
        if show_stats is True:
            print '+ WNET:'
            pprint_tag_stats(eptmk_tagged, none_tag=None)

        return eptmk_tagged
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict
from corpora import epistemonikos
from nltk.corpus import stopwords

eptmk = epistemonikos.corpus_load().words()
words = [w.lower() for w in eptmk if w.isalpha()]

word_freq = defaultdict(int)
for w in words:
    word_freq[w] += 1
word_freq = [(v, k) for k, v in word_freq.items()]
word_freq = sorted(word_freq, reverse=True)
for w in word_freq[:50]: print w
raw_input('press ENTER')

stoplist = stopwords.words('english')
nonfunc_freq = defaultdict(int)
for w in [x for x in words if x not in stoplist]:
    nonfunc_freq[w] += 1
nonfunc_freq = [(v, k) for k, v in nonfunc_freq.items()]
nonfunc_freq = sorted(nonfunc_freq, reverse=True)
for w in nonfunc_freq[:50]: print w
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict
from corpora import epistemonikos
from nltk.corpus import stopwords

eptmk = epistemonikos.corpus_load().words()
words = [w.lower() for w in eptmk if w.isalpha()]

word_freq = defaultdict(int)
for w in words:
    word_freq[w] += 1
word_freq = [(v, k) for k, v in word_freq.items()]
word_freq = sorted(word_freq, reverse=True)
for w in word_freq[:50]:
    print w
raw_input('press ENTER')

stoplist = stopwords.words('english')
nonfunc_freq = defaultdict(int)
for w in [x for x in words if x not in stoplist]:
    nonfunc_freq[w] += 1
nonfunc_freq = [(v, k) for k, v in nonfunc_freq.items()]
nonfunc_freq = sorted(nonfunc_freq, reverse=True)
for w in nonfunc_freq[:50]:
    print w