def predict(storyId, sentence):
    tokenizedSentence = word_tokenize(sentence)
    taggedToken = posTagger(sentence)
    tagger = pycrfsuite.Tagger()
    tagger.open("{}/{}.model".format(app.config["MODELS_DIR"],storyId))
    predictedLabels = tagger.tag(sentToFeatures(taggedToken))
    extractedEntities = extractEntities(zip(tokenizedSentence, predictedLabels))
    return extractedEntities
Beispiel #2
0
def predict(storyId, sentence):
    tokenizedSentence = word_tokenize(sentence, language='russian')
    taggedToken = posTagger(sentence)
    tagger = pycrfsuite.Tagger()
    tagger.open("{}/{}.model".format(app.config["MODELS_DIR"], storyId))
    predictedLabels = tagger.tag(sentToFeatures(taggedToken))
    extractedEntities = extractEntities(zip(tokenizedSentence, predictedLabels))
    return extractedEntities
Beispiel #3
0
 def test_sequenceLabeler_predict(self):
     print("test_sequenceLabeler_predict")
     global id
     global model_file
     sentence = "I want to book a cab from Beijing"
     tokenizedSentence = word_tokenize(sentence)
     taggedToken = posTagger(sentence)
     tagger = pycrfsuite.Tagger()
     tagger.open(model_file)
     predictedLabels = tagger.tag(
         sequenceLabeler.sentToFeatures(taggedToken))
     extractedEntities = sequenceLabeler.extractEntities(
         zip(tokenizedSentence, predictedLabels))
     print("extractedEntities:")
     print(extractedEntities)