Beispiel #1
0
def create_model():
    train_data = map_to_numpy_array(load_dataset(TRAIN_DATA_PATH))
    test_data = map_to_numpy_array(load_dataset(TEST_DATA_PATH))
    label_data = np.array(map_to_numpy_array(load_dataset(LABEL_DATA_PATH))[:, 0:1], np.int32)
    print(label_data)
    labels_count = np.max(label_data) + 1
    message_train_x, train_y = reformat_network_dataset(train_data, labels_count)
    message_test_x, test_y = reformat_network_dataset(test_data, labels_count)
    concat = np.concatenate([message_test_x, message_train_x])
    encoded, vectorizer = datasets.encode_vectorize(concat, 100000)
    datasets.persist_vectorizer(vectorizer)
    model = build_network(encoded[len(message_test_x):], train_y, encoded[0:len(message_test_x)],
                          test_y, epochs=6)
    model.save(constants.MODEL_PATH)
Beispiel #2
0
def test_should_load_dataset_with_3_entries():
    actual_dataset = datasets.load_dataset("tests/resources/dataset.csv")
    actual_labels, actual_features = next(iter(actual_dataset.batch(3)))

    npt.assert_array_equal(actual_labels, np.array([2, 1, 0], dtype=int))
    npt.assert_array_equal(
        actual_features,
        np.array([b'foo bar', b'foobar', b'spaghetti'], dtype=object))
Beispiel #3
0
def create_model():
    train_labels, train_sentences = load_dataset(TRAIN_DATA_PATH)
    test_labels, test_sentences = load_dataset(TEST_DATA_PATH)
    label_data = load_dataset(LABEL_DATA_PATH)
    encoded, tokenizer = datasets.encode_vectorize(train_sentences, 10000)
    total_words = len(tokenizer.word_index)
    encoded = pad_sequences(encoded,
                            padding="post",
                            maxlen=SENTENCE_MAX_LENGTH,
                            truncating="post")
    test_encoded = pad_sequences(tokenizer.texts_to_sequences(test_sentences),
                                 padding="post",
                                 maxlen=SENTENCE_MAX_LENGTH,
                                 truncating="post")
    datasets.persist_vectorizer(tokenizer)
    model = build_network(encoded,
                          reshape_Labels(train_labels),
                          test_encoded,
                          reshape_Labels(test_labels),
                          epochs=5,
                          total=total_words,
                          max_length=SENTENCE_MAX_LENGTH)
    model.save(constants.MODEL_PATH)
Beispiel #4
0
def test_model_should_predict_correct_intent():
    labels, sentences = load_dataset('./resources/labels.csv')
    label_map = dict(zip(sentences, labels))
    print(label_map)
    input = np.array([["hi", label_map['greet']], ["balu", label_map['greet']],
                      ["hola", label_map['greet']],
                      ["can i cancel?", label_map['leave_annual_cancel']],
                      ["greetings", label_map['greet']],
                      ["show me my leave balance", label_map['leave_budget']],
                      ["cancel my leaves", label_map['leave_annual_cancel']],
                      ["thank you", label_map['thanks']],
                      ["stupid you", label_map['insult']],
                      ["bye", label_map['goodbye']],
                      ["what can you do", label_map['skills']]])

    result_arr = []
    for i in range(0, 5):
        intent_labels = [int(x) for x in input[:, -1].flatten()]
        input_str = input[:, 0:1].flatten()
        print('inputs', input_str, intent_labels)
        vectorizer = pickle.load(open(constants.VECTORIZER_PATH, 'rb'))
        encoded_matrix = vectorizer.texts_to_sequences(input_str)
        encoded_matrix = pad_sequences(encoded_matrix,
                                       padding="post",
                                       maxlen=SENTENCE_MAX_LENGTH,
                                       truncating="post")
        model = load_model(constants.MODEL_PATH)
        result = model.predict(encoded_matrix)
        print(
            'result', np.argmax(result, axis=1), '/n final comparision ',
            np.sum(np.equal(np.argmax(result, axis=1),
                            np.array(intent_labels))))
        result_arr.append(
            np.sum(np.equal(np.argmax(result, axis=1), np.array(
                intent_labels))) >= 7)

    assert np.array(result_arr).sum() >= 4
Beispiel #5
0
def test_should_load_dataset_with_3_entries():
    actual_labels, actual_features = datasets.load_dataset(
        "tests/resources/dataset.csv")
    npt.assert_array_equal(actual_labels, [2, 1, 0])
    npt.assert_array_equal(actual_features, ['foo bar', 'foobar', 'spaghetti'])
import pickle

import tensorflow as tf
from tensorflow_core.python.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

from mowgli.model.datasets import load_dataset
from mowgli.utils import constants
from mowgli.utils.constants import LABEL_DATA_PATH
from mowgli.model.create_model import SENTENCE_MAX_LENGTH

VECTORIZER = pickle.load(open(constants.VECTORIZER_PATH, 'rb'))
MODEL = load_model(constants.MODEL_PATH)
LABELS, LABEL_SENTENCES = load_dataset(LABEL_DATA_PATH)


def classify(message):

    encoded_matrix = VECTORIZER.texts_to_sequences([message])
    encoded_matrix = pad_sequences(encoded_matrix,
                                   padding="post",
                                   maxlen=SENTENCE_MAX_LENGTH,
                                   truncating="post")
    result = MODEL.predict(encoded_matrix)
    index = np.argmax(result[0])
    return LABEL_SENTENCES[index], 1.0