예제 #1
0
def nltkdemo18plus():
    """
    Return 18 templates, from the original nltk demo, and additionally a few
    multi-feature ones (the motivation is easy comparison with nltkdemo18)
    """
    return nltkdemo18() + [
        Template(Word([-1]), Pos([1])),
        Template(Pos([-1]), Word([1])),
        Template(Word([-1]), Word([0]), Pos([1])),
        Template(Pos([-1]), Word([0]), Word([1])),
        Template(Pos([-1]), Word([0]), Pos([1])),
    ]
예제 #2
0
def brill_rules_pos_wd_feats_offset_4():
    """
    Return 24 templates of the seminal TBL paper, Brill (1995)
    """
    return [
        Template(Word([-1])),
        Template(Word([-2])),
        Template(Word([-3])),
        Template(Word([-4])),
        Template(Word([0])),
        Template(Word([1])),
        Template(Word([2])),
        Template(Word([3])),
        Template(Word([4])),
    ]
예제 #3
0
def brill_rules_pos_bigram_feats_offset_4():
    """
    Return 24 templates of the seminal TBL paper, Brill (1995)
    """
    return [
        Template(Word([-1, 0])),
        Template(Word([-2, -1])),
        Template(Word([-3, -2])),
        Template(Word([-4, -3])),
        Template(Word([1, 0])),
        Template(Word([2, 1])),
        Template(Word([3, 2])),
        Template(Word([4, 3]))
    ]
예제 #4
0
def demo_multiposition_feature():
    """
    The feature/s of a template takes a list of positions
    relative to the current word where the feature should be
    looked for, conceptually joined by logical OR. For instance,
    Pos([-1, 1]), given a value V, will hold whenever V is found
    one step to the left and/or one step to the right.

    For contiguous ranges, a 2-arg form giving inclusive end
    points can also be used: Pos(-3, -1) is the same as the arg
    below.
    """
    postag(templates=[Template(Pos([-3,-2,-1]))])
예제 #5
0
def demo_generated_templates():
    """
    Template.expand and Feature.expand are class methods facilitating
    generating large amounts of templates. See their documentation for
    details.

    Note: training with 500 templates can easily fill all available
    even on relatively small corpora
    """
    wordtpls = Word.expand([-1,0,1], [1,2], excludezero=False)
    tagtpls = Pos.expand([-2,-1,0,1], [1,2], excludezero=True)
    templates = list(Template.expand([wordtpls, tagtpls], combinations=(1,3)))
    print("Generated {0} templates for transformation-based learning".format(len(templates)))
    postag(templates=templates, incremental_stats=True, template_stats=True)
예제 #6
0
def demo_generated_templates():
    """
    Template.expand and Feature.expand are class methods facilitating
    generating large amounts of templates. See their documentation for
    details.

    Note: training with 500 templates can easily fill all available
    even on relatively small corpora
    """
    wordtpls = Word.expand([-1,0,1], [1,2], excludezero=False)
    tagtpls = Pos.expand([-2,-1,0,1], [1,2], excludezero=True)
    templates = list(Template.expand([wordtpls, tagtpls], combinations=(1,3)))
    print("Generated {0} templates for transformation-based learning".format(len(templates)))
    postag(templates=templates, incremental_stats=True, template_stats=True)
예제 #7
0
def fntbl37():
    """
    Return 37 templates taken from the postagging task of the
    fntbl distribution http://www.cs.jhu.edu/~rflorian/fntbl/
    (37 is after excluding a handful which do not condition on Pos[0];
    fntbl can do that but the current nltk implementation cannot.)
    """
    return [
        Template(Word([0]), Word([1]), Word([2])),
        Template(Word([-1]), Word([0]), Word([1])),
        Template(Word([0]), Word([-1])),
        Template(Word([0]), Word([1])),
        Template(Word([0]), Word([2])),
        Template(Word([0]), Word([-2])),
        Template(Word([1, 2])),
        Template(Word([-2, -1])),
        Template(Word([1, 2, 3])),
        Template(Word([-3, -2, -1])),
        Template(Word([0]), Pos([2])),
        Template(Word([0]), Pos([-2])),
        Template(Word([0]), Pos([1])),
        Template(Word([0]), Pos([-1])),
        Template(Word([0])),
        Template(Word([-2])),
        Template(Word([2])),
        Template(Word([1])),
        Template(Word([-1])),
        Template(Pos([-1]), Pos([1])),
        Template(Pos([1]), Pos([2])),
        Template(Pos([-1]), Pos([-2])),
        Template(Pos([1])),
        Template(Pos([-1])),
        Template(Pos([-2])),
        Template(Pos([2])),
        Template(Pos([1, 2, 3])),
        Template(Pos([1, 2])),
        Template(Pos([-3, -2, -1])),
        Template(Pos([-2, -1])),
        Template(Pos([1]), Word([0]), Word([1])),
        Template(Pos([1]), Word([0]), Word([-1])),
        Template(Pos([-1]), Word([-1]), Word([0])),
        Template(Pos([-1]), Word([0]), Word([1])),
        Template(Pos([-2]), Pos([-1])),
        Template(Pos([1]), Pos([2])),
        Template(Pos([1]), Pos([2]), Word([1]))
    ]
예제 #8
0
def nltkdemo18():
    """
    Return 18 templates, from the original nltk demo, in multi-feature syntax
    """
    return [
        Template(Pos([-1])),
        Template(Pos([1])),
        Template(Pos([-2])),
        Template(Pos([2])),
        Template(Pos([-2, -1])),
        Template(Pos([1, 2])),
        Template(Pos([-3, -2, -1])),
        Template(Pos([1, 2, 3])),
        Template(Pos([-1]), Pos([1])),
        Template(Word([-1])),
        Template(Word([1])),
        Template(Word([-2])),
        Template(Word([2])),
        Template(Word([-2, -1])),
        Template(Word([1, 2])),
        Template(Word([-3, -2, -1])),
        Template(Word([1, 2, 3])),
        Template(Word([-1]), Word([1])),
    ]
예제 #9
0
def brill24():
    """
    Return 24 templates of the seminal TBL paper, Brill (1995)
    """
    return [
        Template(Pos([-1])),
        Template(Pos([1])),
        Template(Pos([-2])),
        Template(Pos([2])),
        Template(Pos([-2, -1])),
        Template(Pos([1, 2])),
        Template(Pos([-3, -2, -1])),
        Template(Pos([1, 2, 3])),
        Template(Pos([-1]), Pos([1])),
        Template(Pos([-2]), Pos([-1])),
        Template(Pos([1]), Pos([2])),
        Template(Word([-1])),
        Template(Word([1])),
        Template(Word([-2])),
        Template(Word([2])),
        Template(Word([-2, -1])),
        Template(Word([1, 2])),
        Template(Word([-1, 0])),
        Template(Word([0, 1])),
        Template(Word([0])),
        Template(Word([-1]), Pos([-1])),
        Template(Word([1]), Pos([1])),
        Template(Word([0]), Word([-1]), Pos([-1])),
        Template(Word([0]), Word([1]), Pos([1])),
    ]
예제 #10
0
def demo_multifeature_template():
    """
    Templates can have more than a single feature.
    """
    postag(templates=[Template(Word([0]), Pos([-2,-1]))])
예제 #11
0
print "Unigram accuracy: "
print unigram_tagger.evaluate(evaulation_data)

# Bigram tagger
bigram_tagger = BigramTagger(training_data, backoff=unigram_tagger)
print "Bigram accuracy: "
print bigram_tagger.evaluate(evaulation_data)

# Trigram tagger
trigram_tagger = TrigramTagger(training_data, backoff=bigram_tagger)
print "Trigram accuracy: "
print trigram_tagger.evaluate(evaulation_data)

# Brill tagger templates
templates = [
    Template(brill.Pos([1, 1])),
    Template(brill.Pos([2, 2])),
    Template(brill.Pos([1, 2])),
    Template(brill.Pos([1, 3])),
    Template(brill.Word([1, 1])),
    Template(brill.Word([2, 2])),
    Template(brill.Word([1, 2])),
    Template(brill.Word([1, 3])),
    Template(brill.Pos([-1, -1]), brill.Pos([1, 1])),
    Template(brill.Word([-1, -1]), brill.Word([1, 1])),
]

# First iteration
trainer = brill_trainer.BrillTaggerTrainer(trigram_tagger, templates)
brill_tagger = trainer.train(training_data, max_rules, min_score)
print "Initial Brill accuracy:"