def _parse(*args, **kw):  # FIXME (workaround)
    from pattern.text.en import parser
    if isinstance(parser.model, str):
        from pattern.text import Model
        parser.model = Model(path=parser.model)

    return parsetree(*args, **kw)
Example #2
0
# Perceptron is an error-driven classifier.
# When given a training example (e.g., tagged word + surrounding words),
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print("training model...")

seed(0)  # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i + 1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
Example #3
0
# Perceptron is an error-driven classifier.
# When given a training example (e.g., tagged word + surrounding words), 
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print "training model..."

seed(0) # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i+1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

m.save("en-model.slp", final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
Example #4
0
# Perceptron is an error-driven classifier.
# When given a training example (e.g., tagged word + surrounding words),
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print("training model...")

seed(0)  # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i + 1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
Example #5
0
# Perceptron is an error-driven classifier.
# When given a training example (e.g., tagged word + surrounding words),
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print "training model..."

seed(0)  # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i + 1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

m.save("en-model.slp", final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,