def predict(storyId, sentence): tokenizedSentence = word_tokenize(sentence) taggedToken = posTagger(sentence) tagger = pycrfsuite.Tagger() tagger.open("{}/{}.model".format(app.config["MODELS_DIR"],storyId)) predictedLabels = tagger.tag(sentToFeatures(taggedToken)) extractedEntities = extractEntities(zip(tokenizedSentence, predictedLabels)) return extractedEntities
def predict(storyId, sentence): tokenizedSentence = word_tokenize(sentence, language='russian') taggedToken = posTagger(sentence) tagger = pycrfsuite.Tagger() tagger.open("{}/{}.model".format(app.config["MODELS_DIR"], storyId)) predictedLabels = tagger.tag(sentToFeatures(taggedToken)) extractedEntities = extractEntities(zip(tokenizedSentence, predictedLabels)) return extractedEntities
def test_sequenceLabeler_predict(self): print("test_sequenceLabeler_predict") global id global model_file sentence = "I want to book a cab from Beijing" tokenizedSentence = word_tokenize(sentence) taggedToken = posTagger(sentence) tagger = pycrfsuite.Tagger() tagger.open(model_file) predictedLabels = tagger.tag( sequenceLabeler.sentToFeatures(taggedToken)) extractedEntities = sequenceLabeler.extractEntities( zip(tokenizedSentence, predictedLabels)) print("extractedEntities:") print(extractedEntities)