def classify(self,_string): #Expects one sentence as a string _string = unicode(_string) _string_processed = self.nlp(_string) tags = [] for index in range(len(_string_processed)): featureset = features.feature_compiler(index,_string_processed) tags.append(self.classifier.classify(featureset)) return tags
def train(self): for sentence,tags in self.datasource: sentence_processed = self.nlp(u' '.join(sentence)) for token in range(len(sentence)): self.featuresets.append((features.feature_compiler(token,sentence_processed),tags[token])) train_set, test_set = self.featuresets[0:-1000], self.featuresets[-1000:] pprint(train_set[:10]) self.classifier = MaxentClassifier.train(train_set) #Saving the classifier self.save()
import features from spacy.en import English nlp = English(entity=False) a = u'We wish you a merry christmas' ap = nlp(a) x = features.feature_compiler(3,ap)