Exemple #1
0
    def test_senna_pipeline(self):
        """Senna pipeline interface"""

        pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
        sent = 'Dusseldorf is an international business center'.split()
        result = [(token['word'], token['chk'], token['ner'], token['pos'])
                  for token in pipeline.tag(sent)]
        expected = [('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
                    ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'),
                    ('international', 'I-NP', 'O', 'JJ'),
                    ('business', 'I-NP', 'O', 'NN'),
                    ('center', 'I-NP', 'O', 'NN')]
        self.assertEqual(result, expected)
Exemple #2
0
def setup_module(module):
    from nose import SkipTest

    try:
        tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
    except OSError:
        raise SkipTest("Senna executable not found")
Exemple #3
0
def setup_module(module):
    from nose import SkipTest

    try:
        tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"])
    except OSError:
        raise SkipTest("Senna executable not found")
Exemple #4
0
def setup_module(module):
    import pytest

    try:
        tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"])
    except OSError:
        pytest.skip("Senna executable not found")
Exemple #5
0
    def test_senna_pipeline(self):
        """Senna pipeline interface"""

        pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
        sent = 'Dusseldorf is an international business center'.split()
        result = [
            (token['word'], token['chk'], token['ner'], token['pos'])
            for token in pipeline.tag(sent)
        ]
        expected = [
            ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
            ('is', 'B-VP', 'O', 'VBZ'),
            ('an', 'B-NP', 'O', 'DT'),
            ('international', 'I-NP', 'O', 'JJ'),
            ('business', 'I-NP', 'O', 'NN'),
            ('center', 'I-NP', 'O', 'NN'),
        ]
        self.assertEqual(result, expected)
Exemple #6
0
    def test_senna_pipeline(self):
        """Senna pipeline interface"""

        pipeline = Senna(SENNA_EXECUTABLE_PATH, ["pos", "chk", "ner"])
        sent = "Dusseldorf is an international business center".split()
        result = [
            (token["word"], token["chk"], token["ner"], token["pos"])
            for token in pipeline.tag(sent)
        ]
        expected = [
            ("Dusseldorf", "B-NP", "B-LOC", "NNP"),
            ("is", "B-VP", "O", "VBZ"),
            ("an", "B-NP", "O", "DT"),
            ("international", "I-NP", "O", "JJ"),
            ("business", "I-NP", "O", "NN"),
            ("center", "I-NP", "O", "NN"),
        ]
        self.assertEqual(result, expected)
import json
import sklearn_crfsuite
from sklearn_crfsuite import metrics
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from duckling import DucklingWrapper
from datetime import datetime
from datetime import timedelta, date
from dateutil import parser
from nltk.stem.wordnet import WordNetLemmatizer
import sys
import re
from nltk.classify import Senna
from nltk.stem.wordnet import WordNetLemmatizer
pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
lmtzr = WordNetLemmatizer()

#Stop Words generation
stop_words = []
file_open_stopwords = open('stop_words.txt', 'r')
for each_word in file_open_stopwords:
    stop_words.append(each_word.strip())
file_open_stopwords.close()

d = DucklingWrapper()
leave_application_json = {}
context_stack = []
leave_application_previous = {}