Ejemplo n.º 1
0
def test_custom_features_loaded(home_assistant_app_path):
    nlp = NaturalLanguageProcessor(app_path=home_assistant_app_path)
    dc = nlp.domain_classifier
    dc.fit()
    assert 'average-token-length' in dc.config.features
    example = nlp.create_query('set the temperature')
    feature_set = dc._model._extract_features(example)
    assert 'average_token_length' in feature_set
Ejemplo n.º 2
0
def test_intent_classifier_logreg(kwik_e_mart_app_path):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    features = {
        "bag-of-words": {
            "lengths": [1]
        },
        "freq": {
            "bins": 5
        },
        "in-gaz": {},
        "length": {},
    }
    ic = nlp.domains["store_info"].intent_classifier
    ic.fit(model_settings={"classifier_type": "logreg"}, features=features)
    features = {
        "bag-of-words": {
            "lengths": [1, 2]
        },
        "freq": {
            "bins": 5
        },
        "in-gaz": {},
        "length": {},
    }
    ic.fit(model_settings={"classifier_type": "logreg"}, features=features)
Ejemplo n.º 3
0
def test_domain_classifier_random_forest(home_assistant_app_path):
    nlp = NaturalLanguageProcessor(app_path=home_assistant_app_path)
    dc = nlp.domain_classifier
    params = {"C": 10}
    with patch("logging.Logger.warning") as mock:
        dc.fit(model_settings={"classifier_type": "rforest"}, params=params)
        mock.assert_any_call("Unexpected param `C`, dropping it from model config.")
Ejemplo n.º 4
0
def test_intent_classifier_random_forest(kwik_e_mart_app_path, caplog):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    search_grid = {
        'n_estimators': [5, 10, 15, 20],
        'criterion': ['gini', 'entropy'],
        'warm_start': [True, False],
    }

    param_selection_settings = {
        'grid': search_grid,
        'type': 'k-fold',
        'k': 10
    }

    ic = nlp.domains['store_info'].intent_classifier
    ic.fit(model_settings={'classifier_type': 'rforest'}, param_selection=param_selection_settings)

    ic.fit(model_settings={'classifier_type': 'rforest'},
           param_selection={'type': 'k-fold', 'k': 10, 'grid': {'class_bias': [0.7, 0.3, 0]}})

    features = {
        'bag-of-words': {'lengths': [1, 2]},
        'freq': {'bins': 5},
        'in-gaz': {},
        'length': {}
    }
    with patch('logging.Logger.warning') as mock:
        ic.fit(model_settings={'classifier_type': 'rforest'},
               features=features)
        mock.assert_any_call('Unexpected param `C`, dropping it from model config.')
        mock.assert_any_call('Unexpected param `fit_intercept`, dropping it from model config.')
Ejemplo n.º 5
0
def test_intent_classifier_svm(kwik_e_mart_app_path):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    search_grid = {
        "C": [0.1, 0.5, 1, 5, 10, 50, 100, 1000, 5000],
        "kernel": ["linear", "rbf", "poly"],
    }

    param_selection_settings = {"grid": search_grid, "type": "k-fold", "k": 10}
    ic = nlp.domains["store_info"].intent_classifier
    ic.fit(
        model_settings={"classifier_type": "svm"},
        param_selection=param_selection_settings,
    )
Ejemplo n.º 6
0
def test_intent_classifier_svm(kwik_e_mart_app_path):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    search_grid = {
       'C': [0.1, 0.5, 1, 5, 10, 50, 100, 1000, 5000],
       'kernel': ['linear', 'rbf', 'poly'],
    }

    param_selection_settings = {
        'grid': search_grid,
        'type': 'k-fold',
        'k': 10
    }
    ic = nlp.domains['store_info'].intent_classifier
    ic.fit(model_settings={'classifier_type': 'svm'}, param_selection=param_selection_settings)
Ejemplo n.º 7
0
def test_domain_classifier_get_stats(home_assistant_app_path, capsys):
    nlp = NaturalLanguageProcessor(app_path=home_assistant_app_path)
    dc = nlp.domain_classifier
    dc.fit()
    eval = dc.evaluate()
    eval.print_stats()
    captured = capsys.readouterr()
    all_elems = set([k for k in captured.out.replace("\n", "").split(" ") if k != ""])
    assert "Overall" in all_elems
    assert "statistics:" in all_elems
    assert "accuracy" in all_elems
    assert "f1_weighted" in all_elems
    assert "tp" in all_elems
    assert "fp" in all_elems
    assert "fn" in all_elems
def test_tagger_get_stats(kwik_e_mart_app_path, capsys):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    er = nlp.domains['store_info'].intents['get_store_hours'].entity_recognizer
    er.fit()
    eval = er.evaluate()
    eval.print_stats()
    captured = capsys.readouterr()
    all_elems = set(
        [k for k in captured.out.replace('\n', '').split(' ') if k != ''])
    assert 'Overall' in all_elems
    assert 'statistics:' in all_elems
    assert 'accuracy' in all_elems
    assert 'f1_weighted' in all_elems
    assert 'tp' in all_elems
    assert 'fp' in all_elems
    assert 'fn' in all_elems
Ejemplo n.º 9
0
def test_tagger_get_stats(kwik_e_mart_app_path, capsys):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    er = nlp.domains["store_info"].intents["get_store_hours"].entity_recognizer
    er.fit()
    eval = er.evaluate()
    eval.print_stats()
    captured = capsys.readouterr()
    all_elems = set(
        [k for k in captured.out.replace("\n", "").split(" ") if k != ""])
    assert "Overall" in all_elems
    assert "statistics:" in all_elems
    assert "accuracy" in all_elems
    assert "f1_weighted" in all_elems
    assert "tp" in all_elems
    assert "fp" in all_elems
    assert "fn" in all_elems
Ejemplo n.º 10
0
def test_domain_classifier_get_stats(home_assistant_app_path, capsys):
    nlp = NaturalLanguageProcessor(app_path=home_assistant_app_path)
    dc = nlp.domain_classifier
    dc.fit()
    eval = dc.evaluate()
    eval.print_stats()
    captured = capsys.readouterr()
    all_elems = set(
        [k for k in captured.out.replace('\n', '').split(' ') if k != ''])
    assert 'Overall' in all_elems
    assert 'statistics:' in all_elems
    assert 'accuracy' in all_elems
    assert 'f1_weighted' in all_elems
    assert 'tp' in all_elems
    assert 'fp' in all_elems
    assert 'fn' in all_elems
Ejemplo n.º 11
0
def test_memm_model(kwik_e_mart_app_path):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    er = nlp.domains['store_info'].intents['get_store_hours'].entity_recognizer
    params = {
        'type': 'k-fold',
        'k': 5,
        'scoring': 'accuracy',
        'grid': {
            'penalty': ['l1', 'l2'],
            'C': [0.01, 1, 100, 10000, 1000000, 100000000]
        },
    }
    with patch('logging.Logger.warning') as mock:
        er.fit(param_selection=params)
        assert 'C' in er.config.param_selection['grid']
        assert 'penalty' in er.config.param_selection['grid']
        mock.assert_not_called()
Ejemplo n.º 12
0
def test_memm_model(kwik_e_mart_app_path):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    er = nlp.domains["store_info"].intents["get_store_hours"].entity_recognizer
    params = {
        "type": "k-fold",
        "k": 5,
        "scoring": "accuracy",
        "grid": {
            "penalty": ["l1", "l2"],
            "C": [0.01, 1, 100, 10000, 1000000, 100000000],
        },
    }
    with patch("logging.Logger.warning") as mock:
        er.fit(param_selection=params)
        assert "C" in er.config.param_selection["grid"]
        assert "penalty" in er.config.param_selection["grid"]
        mock.assert_not_called()
Ejemplo n.º 13
0
def test_intent_classifier_random_forest(kwik_e_mart_app_path, caplog):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    search_grid = {
        "n_estimators": [5, 10, 15, 20],
        "criterion": ["gini", "entropy"],
        "warm_start": [True, False],
    }

    param_selection_settings = {"grid": search_grid, "type": "k-fold", "k": 10}

    ic = nlp.domains["store_info"].intent_classifier
    ic.fit(
        model_settings={"classifier_type": "rforest"},
        param_selection=param_selection_settings,
    )

    ic.fit(
        model_settings={"classifier_type": "rforest"},
        param_selection={
            "type": "k-fold",
            "k": 10,
            "grid": {
                "class_bias": [0.7, 0.3, 0]
            },
        },
    )

    features = {
        "bag-of-words": {
            "lengths": [1, 2]
        },
        "freq": {
            "bins": 5
        },
        "in-gaz": {},
        "length": {},
    }
    with patch("logging.Logger.warning") as mock:
        ic.fit(model_settings={"classifier_type": "rforest"},
               features=features)
        mock.assert_any_call(
            "Unexpected param `C`, dropping it from model config.")
        mock.assert_any_call(
            "Unexpected param `fit_intercept`, dropping it from model config.")
Ejemplo n.º 14
0
def test_intent_classifier_logreg(kwik_e_mart_app_path):
    nlp = NaturalLanguageProcessor(app_path=kwik_e_mart_app_path)
    features = {
        'bag-of-words': {'lengths': [1]},
        'freq': {'bins': 5},
        'in-gaz': {},
        'length': {}
    }
    ic = nlp.domains['store_info'].intent_classifier
    ic.fit(model_settings={'classifier_type': 'logreg'},
           features=features)
    features = {
        'bag-of-words': {'lengths': [1, 2]},
        'freq': {'bins': 5},
        'in-gaz': {},
        'length': {}
    }
    ic.fit(model_settings={'classifier_type': 'logreg'},
           features=features)
Ejemplo n.º 15
0
from mindmeld.components.nlp import NaturalLanguageProcessor
nlp = NaturalLanguageProcessor('.')
nlp.build()

Ejemplo n.º 16
0
from mindmeld.components.nlp import NaturalLanguageProcessor
import spacy
import os.path
spacyNlp = spacy.load("en_core_web_sm")
jobTitleNlp = spacy.load("JOB_TITLE")
dateTimeNlp = spacy.load("DATE_TIME")

input_text = input("Enter text to process:\n")
path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
nlp = NaturalLanguageProcessor(app_path=path)
nlp.build(incremental=True)
sentences = input_text.split(".")

reply = "\nThe contract has been summarized into the key details, as seen below \n\n"
personCounter = 0

for s in sentences:
    if s is not "":
        processedWord = nlp.process(s)
        intent = processedWord.get('intent')
        domain = processedWord.get('domain')
        print("Intent is : " + intent)
        print("Domain is : " + domain)

        # handle salary case
        result_salary = ''
        if intent == 'get_salary':
            for c in s:
                if c.isdigit():
                    result_salary += c
            reply += "The SALARY of the employee is " + result_salary + "/month\n"
Ejemplo n.º 17
0
def handle_message(message):
    input_text = message.text
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    nlp = NaturalLanguageProcessor(app_path=path)
    nlp.build(incremental=True)
    sentences = input_text.split(".")
    reply = "\nThe contract has been summarized into the key details, as seen below \n\n"
    for s in sentences:
        if s is not "":
            processedWord = nlp.process(s)
            intent = processedWord.get('intent')
            domain = processedWord.get('domain')
            print("Intent is : " + intent)
            print("Domain is : " + domain)

            # handle salary case
            result_salary = ''
            if intent == 'get_salary':
                for c in s:
                    if c.isdigit():
                        result_salary += c
                reply += "The SALARY of the employee is " + result_salary + "/month\n"

            doc = spacyNlp(s)
            jobDoc = jobTitleNlp(s)
            dateDoc = dateTimeNlp(s)

            for ent in doc.ents:
                if ent.label_ == 'PERSON' and intent == 'employment_details':
                    reply += "The EMPLOYEE in the contract is : " + ent.text + "\n"
                elif ent.label_ == 'PERSON' and intent == 'employed_by':
                    reply += "The EMPLOYER in the contract is : " + ent.text + "\n"
                elif ent.label_ == 'ORG':
                    reply += "The COMPANY that the employee will report to is : " + ent.text + "\n"
                elif ent.label_ == 'GPE':
                    reply += "APPLICABLE LAW : " + ent.text + "\n"

            for ent in jobDoc.ents:
                if intent == 'job_details':
                    reply += "The JOB TITLE of the employee is : " + ent.text + "\n"
                #print(ent.text, ent.start_char, ent.end_char, ent.label_)

            for ent in dateDoc.ents:
                if intent == 'contract_date':
                    reply += "The contract was SIGNED on : " + ent.text + "\n"
                elif intent == 'effective_date':
                    reply += "The STARTING date of employment is : " + ent.text + "\n"
                elif intent == 'last_date':
                    reply += "The TERMINATION date of employment is : " + ent.text + "\n"

    # path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    # nlp = NaturalLanguageProcessor(app_path=path)
    # nlp.build(incremental=True)
    # processWord = message.text
    # processedWord = nlp.process(processWord)
    # intent = processedWord.get('intent')
    # domain = processedWord.get('domain')
    # logging.info('intent: ' + intent)
    # logging.info('domain: ' + domain)
    # if domain == 'greeting':
    #     reply = handleGreeting(intent)
    # else:  # domain = nanocore_faq
    #     reply = handleFaq(intent)

    bot.reply_to(message, reply)  # responds with success message