def test_senna_pipeline(self): """Senna pipeline interface""" pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner']) sent = 'Dusseldorf is an international business center'.split() result = [(token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent)] expected = [('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'), ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')] self.assertEqual(result, expected)
def setup_module(module): from nose import SkipTest try: tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) except OSError: raise SkipTest("Senna executable not found")
def setup_module(module): from nose import SkipTest try: tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"]) except OSError: raise SkipTest("Senna executable not found")
def setup_module(module): import pytest try: tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"]) except OSError: pytest.skip("Senna executable not found")
def test_senna_pipeline(self): """Senna pipeline interface""" pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner']) sent = 'Dusseldorf is an international business center'.split() result = [ (token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent) ] expected = [ ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'), ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN'), ] self.assertEqual(result, expected)
def test_senna_pipeline(self): """Senna pipeline interface""" pipeline = Senna(SENNA_EXECUTABLE_PATH, ["pos", "chk", "ner"]) sent = "Dusseldorf is an international business center".split() result = [ (token["word"], token["chk"], token["ner"], token["pos"]) for token in pipeline.tag(sent) ] expected = [ ("Dusseldorf", "B-NP", "B-LOC", "NNP"), ("is", "B-VP", "O", "VBZ"), ("an", "B-NP", "O", "DT"), ("international", "I-NP", "O", "JJ"), ("business", "I-NP", "O", "NN"), ("center", "I-NP", "O", "NN"), ] self.assertEqual(result, expected)
import json import sklearn_crfsuite from sklearn_crfsuite import metrics import pickle from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import PassiveAggressiveClassifier from duckling import DucklingWrapper from datetime import datetime from datetime import timedelta, date from dateutil import parser from nltk.stem.wordnet import WordNetLemmatizer import sys import re from nltk.classify import Senna from nltk.stem.wordnet import WordNetLemmatizer pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) lmtzr = WordNetLemmatizer() #Stop Words generation stop_words = [] file_open_stopwords = open('stop_words.txt', 'r') for each_word in file_open_stopwords: stop_words.append(each_word.strip()) file_open_stopwords.close() d = DucklingWrapper() leave_application_json = {} context_stack = [] leave_application_previous = {}