Beispiel #1
0
)

# Only tokenize the text
doc = nlp.make_doc(text)
print([token.text for token in doc])

import spacy

nlp = spacy.load("en_core_web_sm")
text = (
    "Chick-fil-A is an American fast food restaurant chain headquartered in "
    "the city of College Park, Georgia, specializing in chicken sandwiches."
)

# Disable the tagger and parser
with nlp.disable_pipes("tagger", "parser"):
    # Process the text
    doc = nlp(text)
    # Print the entities in the doc
    print(doc.ents)

# How training works (1)
# 1. Initialize the model weights randomly with nlp.begin_training
# 2. Predict a few examples with the current weights by calling nlp.update
# 3. Compare prediction with true labels
# 4. Calculate how to change weights to improve predictions
# 5. Update weights slightly
# 6. Go back to 2.

# Example: Training the entity recognizer
# The entity recognizer tags words and phrases in context
doc = nlp(u"I found these crisps at our local WalMart & figured I would give them a try. They were so yummy I may never go back to regular chips, not that I was a big chip fan anyway. The only problem is I can eat the entire bag in one sitting. I give these crisps a big thumbs up!")
print([(ent.text, ent.label_) for ent in doc.ents])

#Training a custom NLP model

import spacy
import random

train_data = [
        (u"As soon as I tasted one and it tasted like a corn chip I checked the ingredients. ", {"entities": [(45, 49, "PRODUCT")]}),
        (u"I found these crisps at our local WalMart & figured I would give them a try", {"entities": [(14, 20, "PRODUCT")]})
]

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.begin_training()
    for i in range(10):
        random.shuffle(train_data)
        for text, annotations in train_data:
            nlp.update([text], [annotations], sgd=optimizer)
nlp.to_disk("model/food_model")

#Prediction
import spacy
nlp = spacy.load("model/food_model")
text = nlp("I consume about a jar every two weeks of this, either adding it to fajitas or using it as a corn chip dip")

for entity in text.ents:
    print(entity.text, entity.label_)
Beispiel #3
0
              '\n')

    ######################################################################
    # Selective processing:

    text = ("Chick-fil-A is an American fast food restaurant chain "
            "headquartered in the city of College Park, Georgia, specializing "
            "in chicken sandwiches.")

    # Only tokenize the text
    doc = nlp.make_doc(text)  # doc = nlp(text)

    print([token.text for token in doc])

    # Disable the tagger and parser
    with nlp.disable_pipes('tagger', 'parser'):
        # Process the text
        doc = nlp(text)
        # Print the entities in the doc
        print(doc.ents)

    ######################################################################
    # Creating training data:

    # Two tokens whose lowercase forms match 'iphone' and 'x'
    pattern1 = [{'LOWER': 'iphone'}, {'LOWER': 'x'}]

    # Token whose lowercase form matches 'iphone' and an optional digit
    pattern2 = [{'LOWER': 'iphone'}, {'OP': '?', 'IS_DIGIT': True}]

    # Add patterns to the matcher