Example #1
0
f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
# and context (surrounding words) for unknown words.

# If a language model is also available, it overrides these (simpler) rules.
# For English, this can raise accuracy from about 94% up to about 97%,
# and makes the parses about 3x faster.

print("loading model...")

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
lexicon.model = Model.load(lexicon, f)

# To test the accuracy of the language model,
# we can compare a tagged corpus to the predicted tags.
# This corpus must be different from the one used for training.
# Typically, sections 22, 23 and 24 of the WSJ are used.

# Note that the WSJ contains standardized English.
# The accuracy will be lower when tested on, for example, informal tweets.
# A different classifier could be trained for informal language use.

print("testing...")

i, n = 0, 0
for s1 in data[-5000:]:
    s2 = " ".join(w for w, tag in s1)
Example #2
0
f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
# and context (surrounding words) for unknown words.

# If a language model is also available, it overrides these (simpler) rules.
# For English, this can raise accuracy from about 94% up to about 97%,
# and makes the parses about 3x faster.

print("loading model...")

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
lexicon.model = Model.load(f, lexicon)

# To test the accuracy of the language model,
# we can compare a tagged corpus to the predicted tags.
# This corpus must be different from the one used for training.
# Typically, sections 22, 23 and 24 of the WSJ are used.

# Note that the WSJ contains standardized English.
# The accuracy will be lower when tested on, for example, informal tweets.
# A different classifier could be trained for informal language use.

print("testing...")

i, n = 0, 0
for s1 in data[-5000:]:
    s2 = " ".join(w for w, tag in s1)
Example #3
0
f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
# and context (surrounding words) for unknown words.

# If a language model is also available, it overrides these (simpler) rules.
# For English, this can raise accuracy from about 94% up to about 97%,
# and makes the parses about 3x faster.

print("loading model...")

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
lexicon.model = Model.load(f, lexicon)

# To test the accuracy of the language model,
# we can compare a tagged corpus to the predicted tags.
# This corpus must be different from the one used for training.
# Typically, sections 22, 23 and 24 of the WSJ are used.

# Note that the WSJ contains standardized English.
# The accuracy will be lower when tested on, for example, informal tweets.
# A different classifier could be trained for informal language use.

print("testing...")

i, n = 0, 0
for s1 in data[-5000:]:
    s2 = " ".join(w for w, tag in s1)
Example #4
0
            next = None

m.save("en-model.slp", final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
# and context (surrounding words) for unknown words.

# If a language model is also available, it overrides these (simpler) rules.
# For English, this can raise accuracy from about 94% up to about 97%,
# and makes the parses about 3x faster.

print "loading model..."

lexicon.model = Model.load(lexicon, "en-model.slp")

# To test the accuracy of the language model,
# we can compare a tagged corpus to the predicted tags.
# This corpus must be different from the one used for training.
# Typically, sections 22, 23 and 24 of the WSJ are used.

# Note that the WSJ contains standardized English.
# The accuracy will be lower when tested on, for example, informal tweets.
# A different classifier could be trained for informal language use.

print "testing..."

i, n = 0, 0
for s1 in data[-5000:]:
    s2 = " ".join(w for w, tag in s1)
Example #5
0
f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
# and context (surrounding words) for unknown words.

# If a language model is also available, it overrides these (simpler) rules.
# For English, this can raise accuracy from about 94% up to about 97%,
# and makes the parses about 3x faster.

print("loading model...")

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
lexicon.model = Model.load(lexicon, f)

# To test the accuracy of the language model,
# we can compare a tagged corpus to the predicted tags.
# This corpus must be different from the one used for training.
# Typically, sections 22, 23 and 24 of the WSJ are used.

# Note that the WSJ contains standardized English.
# The accuracy will be lower when tested on, for example, informal tweets.
# A different classifier could be trained for informal language use.

print("testing...")

i, n = 0, 0
for s1 in data[-5000:]:
    s2 = " ".join(w for w, tag in s1)
Example #6
0
            next = None

m.save("en-model.slp", final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
# and context (surrounding words) for unknown words.

# If a language model is also available, it overrides these (simpler) rules.
# For English, this can raise accuracy from about 94% up to about 97%,
# and makes the parses about 3x faster.

print "loading model..."

lexicon.model = Model.load(lexicon, "en-model.slp")

# To test the accuracy of the language model,
# we can compare a tagged corpus to the predicted tags.
# This corpus must be different from the one used for training.
# Typically, sections 22, 23 and 24 of the WSJ are used.

# Note that the WSJ contains standardized English.
# The accuracy will be lower when tested on, for example, informal tweets.
# A different classifier could be trained for informal language use.

print "testing..."

i, n = 0, 0
for s1 in data[-5000:]:
    s2 = " ".join(w for w, tag in s1)