) # Only tokenize the text doc = nlp.make_doc(text) print([token.text for token in doc]) import spacy nlp = spacy.load("en_core_web_sm") text = ( "Chick-fil-A is an American fast food restaurant chain headquartered in " "the city of College Park, Georgia, specializing in chicken sandwiches." ) # Disable the tagger and parser with nlp.disable_pipes("tagger", "parser"): # Process the text doc = nlp(text) # Print the entities in the doc print(doc.ents) # How training works (1) # 1. Initialize the model weights randomly with nlp.begin_training # 2. Predict a few examples with the current weights by calling nlp.update # 3. Compare prediction with true labels # 4. Calculate how to change weights to improve predictions # 5. Update weights slightly # 6. Go back to 2. # Example: Training the entity recognizer # The entity recognizer tags words and phrases in context
doc = nlp(u"I found these crisps at our local WalMart & figured I would give them a try. They were so yummy I may never go back to regular chips, not that I was a big chip fan anyway. The only problem is I can eat the entire bag in one sitting. I give these crisps a big thumbs up!") print([(ent.text, ent.label_) for ent in doc.ents]) #Training a custom NLP model import spacy import random train_data = [ (u"As soon as I tasted one and it tasted like a corn chip I checked the ingredients. ", {"entities": [(45, 49, "PRODUCT")]}), (u"I found these crisps at our local WalMart & figured I would give them a try", {"entities": [(14, 20, "PRODUCT")]}) ] other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"] with nlp.disable_pipes(*other_pipes): optimizer = nlp.begin_training() for i in range(10): random.shuffle(train_data) for text, annotations in train_data: nlp.update([text], [annotations], sgd=optimizer) nlp.to_disk("model/food_model") #Prediction import spacy nlp = spacy.load("model/food_model") text = nlp("I consume about a jar every two weeks of this, either adding it to fajitas or using it as a corn chip dip") for entity in text.ents: print(entity.text, entity.label_)
'\n') ###################################################################### # Selective processing: text = ("Chick-fil-A is an American fast food restaurant chain " "headquartered in the city of College Park, Georgia, specializing " "in chicken sandwiches.") # Only tokenize the text doc = nlp.make_doc(text) # doc = nlp(text) print([token.text for token in doc]) # Disable the tagger and parser with nlp.disable_pipes('tagger', 'parser'): # Process the text doc = nlp(text) # Print the entities in the doc print(doc.ents) ###################################################################### # Creating training data: # Two tokens whose lowercase forms match 'iphone' and 'x' pattern1 = [{'LOWER': 'iphone'}, {'LOWER': 'x'}] # Token whose lowercase form matches 'iphone' and an optional digit pattern2 = [{'LOWER': 'iphone'}, {'OP': '?', 'IS_DIGIT': True}] # Add patterns to the matcher