def demo (): # load train corpus train_sents = brown.tagged('a')[:500] # create taggers tagger = MarshalNgram(3) #tagger.train(train_sents) #tagger.marshal("ngram.test") tagger.unmarshal("ngram.test") print tagger._model
def demo(): # load train corpus train_sents = brown.tagged('a')[:500] # create taggers tagger = MarshalNgram(3) #tagger.train(train_sents) #tagger.marshal("ngram.test") tagger.unmarshal("ngram.test") print tagger._model
def pos_tag(self, infile): train_sents = list(islice(brown.tagged(), 1000000)) trigram_tagger = tag.Trigram() trigram_tagger.train(train_sents) file = open(infile + ".txt", "r") out = open(infile + "-tag.txt", "w") try: text = file.read() lines = string.split(text, '\n') for line in lines: tokens = list(tokenize.whitespace(line)) tagged = list(trigram_tagger.tag(tokens)) for tags in tagged: print tags if tags[1] == None: out.write(tags[0] + "/" + "NA") else: out.write(tags[0] + "/" + tags[1]) out.write(" ") out.write("\n") except IOError: raise IOError() file.close() out.close()
from nltk import tag from nltk.corpus import brown import yaml t0 = tag.Default("nn") t1 = tag.Unigram(backoff=t0) t1.train(brown.tagged("f")) # section a: press-reportage f = open("demo_tagger.yaml", "w") yaml.dump(t1, f)
from nltk import tag from nltk.corpus import brown import yaml t0 = tag.Default('nn') t1 = tag.Unigram(backoff=t0) t1.train(brown.tagged('f')) # section a: press-reportage f = open('demo_tagger.yaml', 'w') yaml.dump(t1, f)