Ejemplo n.º 1
0
def demo ():
    # load train corpus
    train_sents = brown.tagged('a')[:500]

    # create taggers
    tagger = MarshalNgram(3)

    #tagger.train(train_sents)
    #tagger.marshal("ngram.test")

    tagger.unmarshal("ngram.test")
    print tagger._model
Ejemplo n.º 2
0
def demo():
    # load train corpus
    train_sents = brown.tagged('a')[:500]

    # create taggers
    tagger = MarshalNgram(3)

    #tagger.train(train_sents)
    #tagger.marshal("ngram.test")

    tagger.unmarshal("ngram.test")
    print tagger._model
Ejemplo n.º 3
0
 def pos_tag(self, infile):
     train_sents = list(islice(brown.tagged(), 1000000))
     trigram_tagger = tag.Trigram()
     trigram_tagger.train(train_sents)
     file = open(infile + ".txt", "r")
     out = open(infile + "-tag.txt", "w")
     try:
         text = file.read()
         lines = string.split(text, '\n')
         for line in lines:
             tokens = list(tokenize.whitespace(line))
             tagged = list(trigram_tagger.tag(tokens))
             for tags in tagged:
                 print tags
                 if tags[1] == None:
                     out.write(tags[0] + "/" + "NA")
                 else:
                     out.write(tags[0] + "/" + tags[1])
                 out.write(" ")
             out.write("\n")
     except IOError:
         raise IOError()
     file.close()
     out.close()
Ejemplo n.º 4
0
from nltk import tag
from nltk.corpus import brown
import yaml

t0 = tag.Default("nn")
t1 = tag.Unigram(backoff=t0)
t1.train(brown.tagged("f"))  # section a: press-reportage

f = open("demo_tagger.yaml", "w")
yaml.dump(t1, f)
Ejemplo n.º 5
0
from nltk import tag
from nltk.corpus import brown
import yaml

t0 = tag.Default('nn')
t1 = tag.Unigram(backoff=t0)
t1.train(brown.tagged('f'))    # section a: press-reportage

f = open('demo_tagger.yaml', 'w')
yaml.dump(t1, f)