Python RegexpTagger.evaluate Examples

Programming Language: Python

Namespace/Package Name: nltk.tag

Class/Type: RegexpTagger

Method/Function: evaluate

Examples at hotexamples.com: 6

Python RegexpTagger.evaluate - 6 examples found. These are the top rated real world Python examples of nltk.tag.RegexpTagger.evaluate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RegexpTagger(30)

tag(8)

evaluate(5)

__init__(2)

tag_sents(1)

train(1)

Example #1

Show file

def find_combined_taggers_accuracy(train_set, test_set):
    # finding most used tag
    train_words = [word for sent in train_set for word in sent]
    train_set_tags = [tag for (word, tag) in train_words]
    most_frequent_tag = FreqDist(train_set_tags).max()
    default_tagger = DefaultTagger(most_frequent_tag)

    # default tagger
    default_tagger_result = default_tagger.evaluate(test_set)
    print("Default Tagger accuracy: ", default_tagger_result)

    # regex tagger
    patterns = [
        (r'.*ing$', 'VBG'),  # gerunds
        (r'.*ed$', 'VBD'),  # simple past
        (r'.*es$', 'VBZ'),  # 3rd singular present
        (r'.*ould$', 'MD'),  # modals
        (r'.*\'s$', 'NN$'),  # possessive nouns
        (r'.*s$', 'NNS'),  # plural nouns
        (r'^-?[0-9]+(\.[0-9]+)?$', 'CD'),  # cardinal numbers
        (r'.*', 'NN')  # nouns (default)
    ]
    regex_tagger = RegexpTagger(patterns)
    regex_tagger_result = regex_tagger.evaluate(test_set)
    print("Regex Tagger Accuracy: ", regex_tagger_result)

    # unigram tagger with default tagger as backoff
    unigram_tagger = UnigramTagger(train_set, backoff=default_tagger)
    unigram_tagger_result = unigram_tagger.evaluate(test_set)
    print("Unigram Tagger accuracy (Backoff = Default Tagger): ",
          unigram_tagger_result)

    # bigram tagger with different backoffs
    bigram_tagger = BigramTagger(train_set)
    bigram_tagger_backoff_unigram = BigramTagger(train_set,
                                                 backoff=unigram_tagger)
    bigram_tagger_backoff_regex = BigramTagger(train_set, backoff=regex_tagger)

    bigram_tagger_result = bigram_tagger.evaluate(test_set)
    bigram_tagger_backoff_regex_result = bigram_tagger_backoff_regex.evaluate(
        test_set)
    bigram_tagger_backoff_unigram_result = bigram_tagger_backoff_unigram.evaluate(
        test_set)

    print("Bigram Tagger Accuracy: ", bigram_tagger_result)
    print("Bigram Tagger Accuracy (Backoff = Regex Tagger): ",
          bigram_tagger_backoff_regex_result)
    print("Bigram Tagger Accuracy (Backoff = Unigram Tagger): ",
          bigram_tagger_backoff_unigram_result)

Example #2

Show file

File: pos_tagging.py Project: jreine01/Redneckwithan-I

# regex tagger
from nltk.tag import RegexpTagger
# define regex tag patterns
patterns = [
        (r'.*ing$', 'VBG'),               # gerunds
        (r'.*ed$', 'VBD'),                # simple past
        (r'.*es$', 'VBZ'),                # 3rd singular present
        (r'.*ould$', 'MD'),               # modals
        (r'.*\'s$', 'NN$'),               # possessive nouns
        (r'.*s$', 'NNS'),                 # plural nouns
        (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),  # cardinal numbers
        (r'.*', 'NN')                     # nouns (default) ... 
]
rt = RegexpTagger(patterns)

print rt.evaluate(test_data)
print rt.tag(tokens)


## N gram taggers
from nltk.tag import UnigramTagger
from nltk.tag import BigramTagger
from nltk.tag import TrigramTagger

ut = UnigramTagger(train_data)
bt = BigramTagger(train_data)
tt = TrigramTagger(train_data)

print ut.evaluate(test_data)
print ut.tag(tokens)

Example #3

Show file

# Define regex patterns used that determine the tags of tokens. Note that when tagging a token, expressions
# are evaluated bottom up and thus, the last one defines the default tag
patterns = [
    (r".*ing$", "VBG"),  # Gerunds
    (r".*ed$", "VBD"),  # Simple past
    (r".*es$", "VBZ"),  # 3rd singular present
    (r".*ould$", "MD"),  # Modals
    (r".*'s$", "NN$"),  # Possesive pronouns
    (r".*s$", "NNS"),  # Plural nouns
    (r"^-?[0-9]+(.[0-9]+)?$", "CD"),  # Cardinal numbers
    (r".*", "NN")  # Nouns (default)
]

rt = RegexpTagger(regexps=patterns)

print(rt.evaluate(test_data))
print(rt.tag(tokens))

# 3. N-GRAM TAGGERS:
#    Contiguous sequences of n items from a sequence of text or speech. Items can be words, phonemes,
#    letters, characters or syllabes. Shingles: n-grams where items are just words.
#    UnigramTagger -> NGramTagger -> ContextTagger -> SequentialBackoffTagger

# Train the N-Gram taggers using the training_data (pre-tagged tokens, i.e. labeled observations)
ut = UnigramTagger(train=train_data)
bt = BigramTagger(train_data)
tt = TrigramTagger(train_data)

# Test the performance of each N-Gram tagger
print("1-Gram Tagger Accuracy: {}".format(ut.evaluate(test_data)))
print("2-Gram Tagger Accuracy: {}".format(bt.evaluate(test_data)))

Example #4

Show file

File: 04-TaggingTrain.py Project: scb-account/PythonBasic

    a las queridas expresiones regulares con un RegexpTagger
'''

from nltk.tag import RegexpTagger
regexp_tagger = RegexpTagger(
         [( r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
          ( r'(The|the|A|a|An|an)$', 'AT'),   # articles
          ( r'.*able$', 'JJ'),                # adjectives
          ( r'.*ness$', 'NN'),         # nouns formed from adj
          ( r'.*ly$', 'RB'),           # adverbs
          ( r'.*s$', 'NNS'),           # plural nouns
          ( r'.*ing$', 'VBG'),         # gerunds
          (r'.*ed$', 'VBD'),           # past tense verbs
          (r'.*', 'NN')                # nouns (default)
          ])
print("Regexp Tagger: {}".format(regexp_tagger.evaluate(test_data)))

''' Visto lo anterior, podemos poner al tagger regexp como backoff
    de los N-gram creados anteriormente.

    O podríamos ponerlo 1º, pero me fio más de los preentrenados
    que de unas reglas puestas a capón.
'''

regexp_tagger = RegexpTagger(
         [( r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
          ( r'(The|the|A|a|An|an)$', 'AT'),   # articles
          ( r'.*able$', 'JJ'),                # adjectives
          ( r'.*ness$', 'NN'),         # nouns formed from adj
          ( r'.*ly$', 'RB'),           # adverbs
          ( r'.*s$', 'NNS'),           # plural nouns

Example #5

Show file

File: Proyecto_ajuste.py Project: jaidenmeiden/lc

(r'.*able$', 'JJ'),                # adjectives
(r'.*ness$', 'NN'),                # nouns formed from adjectives
(r'.*ly$', 'RB'),                  # adverbs
(r'.*s$', 'NNS'),                  # plural nouns
(r'.*ing$', 'VBG'),                # gerunds
(r'.*ed$', 'VBD'),                 # past tense verbs
(r'.*', 'NN')                      # nouns (default)
])
entrenar_bill(tagger,"RegexpTagger")


# In[ ]:


tagger = UnigramTagger(train_reducido[:1000])
tagger.evaluate(test_reducido[:1000])
entrenar_bill(tagger,"UnigramTagger")


# In[ ]:


tagger = BigramTagger(train_reducido[:1000])
tagger.evaluate(test_reducido[:1000])
entrenar_bill(tagger,"BigramTagger")


# In[ ]:


ct = CRFTagger()

Example #6

Show file

File: regex_tagger.py Project: anderscui/nlpy

from nltk.tag import RegexpTagger
from tag_util import patterns, test_sents


tagger = RegexpTagger(patterns)
print(tagger.evaluate(test_sents))