Esempi in Python per UnigramTagger.train

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: nltk.tag

Classe/tipologia: UnigramTagger

Metodo/funzione: train

Esempi su hotexamples.com: 5

UnigramTagger.train in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per nltk.tag.UnigramTagger.train, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

UnigramTagger(30)

evaluate(20)

tag(20)

train(3)

_taggers(1)

accuracy(1)

batch_tag(1)

rules(1)

tag_sents(1)

Esempio n. 1

Mostra file

def make_pos_model(model_type):
    now = time.time()

    reader = TaggedCorpusReader('.', 'greek_training_set.pos')
    train_sents = reader.tagged_sents()
    if model_type == 'unigram':
        tagger = UnigramTagger(train_sents)
        file = 'unigram.pickle'
    elif model_type == 'bigram':
        tagger = BigramTagger(train_sents)
        file = 'bigram.pickle'
    elif model_type == 'trigram':
        tagger = TrigramTagger(train_sents)
        file = 'trigram.pickle'
    elif model_type == 'backoff':
        tagger1 = UnigramTagger(train_sents)
        tagger2 = BigramTagger(train_sents, backoff=tagger1)
        tagger = TrigramTagger(train_sents, backoff=tagger2)
        file = '123grambackoff.pickle'
    elif model_type == 'tnt':
        tagger = tnt.TnT()
        tagger.train(train_sents)
        file = 'tnt.pickle'
    else:
        print('Invalid model_type.')

    _dir = os.path.expanduser('~/greek_models_cltk/taggers/pos')
    path = os.path.join(_dir, file)
    with open(path, 'wb') as f:
        pickle.dump(tagger, f)

    print('Completed training {0} model in {1} seconds to {2}.'.format(
        model_type,
        time.time() - now, path))

Esempio n. 2

Mostra file

File: make_pos_models.py Progetto: wencanluo/greek_treebank_perseus

def make_pos_model(model_type):
    now = time.time()

    reader = TaggedCorpusReader('.', 'greek_training_set.pos')
    train_sents = reader.tagged_sents()
    if model_type == 'unigram':
        tagger = UnigramTagger(train_sents)
        file = 'unigram.pickle'
    elif model_type == 'bigram':
        tagger = BigramTagger(train_sents)
        file = 'bigram.pickle'
    elif model_type == 'trigram':
        tagger = TrigramTagger(train_sents)
        file = 'trigram.pickle'
    elif model_type == 'backoff':
        tagger1 = UnigramTagger(train_sents)
        tagger2 = BigramTagger(train_sents, backoff=tagger1)
        tagger = TrigramTagger(train_sents, backoff=tagger2)
        file = '123grambackoff.pickle'
    elif model_type == 'tnt':
        tagger = tnt.TnT()
        tagger.train(train_sents)
        file = 'tnt.pickle'
    else:
        print('Invalid model_type.')

    _dir = os.path.expanduser('~/greek_models_cltk/taggers/pos')
    path = os.path.join(_dir, file)
    with open(path, 'wb') as f:
        pickle.dump(tagger, f)

    print('Completed training {0} model in {1} seconds to {2}.'.format(model_type, time.time() - now, path))

Esempio n. 3

Mostra file

def train_tagger(language, model_type, feature, train_sents):
    if model_type == 'unigram':
        tagger = UnigramTagger(train_sents)
    elif model_type == 'bigram':
        tagger = BigramTagger(train_sents)
    elif model_type == 'trigram':
        tagger = TrigramTagger(train_sents)
    elif model_type == 'backoff':
        tagger1 = UnigramTagger(train_sents)
        tagger2 = BigramTagger(train_sents, backoff=tagger1)
        tagger = TrigramTagger(train_sents, backoff=tagger2)
    elif model_type == 'crf':
        tagger = CRFTagger()
        tagger.train(train_sents,
                     'taggers/{0}/{1}/crf.pickle'.format(language, feature))
    elif model_type == 'perceptron':
        tagger = PerceptronTagger(load=False)
        tagger.train(train_sents)

    return tagger

Esempio n. 4

Mostra file

File: pattern_wikicorpus.py Progetto: jgsogo/lingwars

def contextual_rules(wikicorpus_dir, context_file):
    sentences = wikicorpus(wikicorpus_dir, words=1000000)

    ANONYMOUS = "anonymous"
    for s in sentences:
        for i, (w, tag) in enumerate(s):
            if tag == "NP": # NP = proper noun in Parole tagset.
                s[i] = (ANONYMOUS, "NP")

    ctx = fntbl37()

    tagger = UnigramTagger(sentences)
    tagger = BrillTaggerTrainer(tagger, ctx, trace=0)
    tagger = tagger.train(sentences, max_rules=100)

    #print tagger.evaluate(wikicorpus(10000, start=1))

    with open(context_file, "w") as f:
        for rule in tagger.rules():
            f.write("%s\n" % rule)

Esempio n. 5

Mostra file

            s[i] = (ANONYMOUS, "NP")

# We can then train NLTK's FastBrillTaggerTrainer. It is based on a unigram tagger, which is simply a lexicon of known words
# and their part-of-speech tag. It will then boost the accuracy with a set of contextual rules that change a word's part-of-speech
# tag depending on the surrounding words.

from nltk.tag import UnigramTagger
from nltk.tag import FastBrillTaggerTrainer

from nltk.tag.brill import SymmetricProximateTokensTemplate
from nltk.tag.brill import ProximateTokensTemplate
from nltk.tag.brill import ProximateTagsRule
from nltk.tag.brill import ProximateWordsRule

ctx = [  # Context = surrounding words and tags.
    SymmetricProximateTokensTemplate(ProximateTagsRule, (1, 1)),
    SymmetricProximateTokensTemplate(ProximateTagsRule, (1, 2)),
    SymmetricProximateTokensTemplate(ProximateTagsRule, (1, 3)),
    SymmetricProximateTokensTemplate(ProximateTagsRule, (2, 2)),
    SymmetricProximateTokensTemplate(ProximateWordsRule, (0, 0)),
    SymmetricProximateTokensTemplate(ProximateWordsRule, (1, 1)),
    SymmetricProximateTokensTemplate(ProximateWordsRule, (1, 2)),
    ProximateTokensTemplate(ProximateTagsRule, (-1, -1), (1, 1)),
]

tagger = UnigramTagger(sentences)
tagger = FastBrillTaggerTrainer(tagger, ctx, trace=0)
tagger = tagger.train(sentences, max_rules=100)

#print tagger.evaluate(wikicorpus(10000, start=1))