Ejemplo n.º 1
0
new_x = np.zeros((x.shape[0], max, x[0].shape[1]))

i = 0
for words in x:
    new_x[i, :words.shape[0], :] = words
    i += 1

x = new_x

y = keras.utils.to_categorical(y)

data_set = DataSet.from_np_array(x, y, class_names=class_names)

path = "../classification/RNN/saved_model/rnn.model"

with Logger("rnn", root='../') as l:
    l.log_and_print(data_set)
    l.log("")

    if os.path.isfile(path):
        classifier = RnnClassifier.load(path, data_set, logger=l)
    else:
        classifier = RnnClassifier(data_set, logger=l)

    classifier.fit(path, epochs=20)

    classifier.validate()
    classifier.metrics()
    classifier.plot_confusion_matrix()
    classifier.plot_history()
    classifier.save(path)
Ejemplo n.º 2
0
                 na_filter=False)
categories = [
    '"FHNW Benutzerpasswort von Studierenden zurücksetzen"',
    '"FHNW Passwortänderung (Active Directory)"', '"VPN Zugriff"',
    '"Drucker technische Probleme"', '"Drucker verbinden"',
    '"Webmail technische Probleme"', '"Papierstau"',
    '"VPN technische Probleme"', '"Webmail Zugriff"',
    '"SWITCHengines - Cloud Infrastructure"', '"Datenablage"',
    '"Web Single Sign On AAI"', '"Benutzendenkonto gesperrt"',
    '"Speicherplatz"', '"Benutzername vergessen"', '"Passwort ändern"'
]
df = df.loc[df['category'].isin(categories)]
sentences = filter_ticket_messages(df.message)

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.8,
                                 p_val=0.1)
data_set.add_text_data(sentences)

with Logger("multinomial_naive_bayes", root='../') as l:
    l.log_and_print(data_set)
    l.log_and_print()

    classifier = MultinomialNaiveBayes(data_set, logger=l)
    classifier.hyperparameter()
    classifier.validate()
    classifier.metrics()
    classifier.plot_confusion_matrix()
Ejemplo n.º 3
0
labels, class_names = get_merged_labels_three(root='../')

x = get_doc_vec_ticketing_message(root='../')
y = labels

n_values = len(class_names)
y = np.eye(n_values)[y]

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.8,
                                 p_val=0.1)

with Logger("multilayer_perceptron", root='../') as l:
    l.log_and_print(data_set)
    l.log("")

    # classifier = multilayer_perceptron.MultilayerPerceptron(data_set, num_classes=len(class_names), epoch=50, verbose=1,
    #                                                        logger=l)
    # classifier.fit()
    # classifier.validate()
    # classifier.metrics()
    # classifier.plot_confusion_matrix()

    model = hyperparameter_tuning.fit_hyper(root='../')
    classifier = multilayer_perceptron.MultilayerPerceptron(
        data_set,
        num_classes=len(class_names),
        epoch=20,
Ejemplo n.º 4
0
# class_names = ['positive', 'negative', 'neutral']
# x = np.load("data/test_data/fastTextDocumentVector.npy")
## positv 0, negative 1, neutral 3
# y = np.load("data/test_data/labels.npy")

le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.8,
                                 p_val=0.1)

with Logger("voting", root='../') as l:
    l.log_and_print(data_set)
    l.log_and_print()

    svm = SupportingVectorMachine(data_set, verbose=0, logger=l)
    gradient_boost = GradienBoost(data_set,
                                  verbose=0,
                                  n_estimators=120,
                                  logger=l)
    random_forest = RandomForest(data_set, verbose=0, logger=l)

    svm.fit()
    gradient_boost.fit()
    random_forest.fit()

    classifier = Voting(data_set,
Ejemplo n.º 5
0
    '"VPN technische Probleme"', '"Webmail Zugriff"',
    '"SWITCHengines - Cloud Infrastructure"', '"Datenablage"',
    '"Web Single Sign On AAI"', '"Benutzendenkonto gesperrt"',
    '"Speicherplatz"', '"Benutzername vergessen"', '"Passwort ändern"'
]
df = df.loc[df['category'].isin(categories)]
sentences = filter_ticket_messages(df.message)

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.8,
                                 p_val=0.1)
data_set.add_text_data(sentences)

with Logger("stochastik_gradient_descend", root='../') as l:
    l.log_and_print(data_set)
    l.log_and_print()

    classifier = StochastikGradientDescentClassifier(data_set, logger=l)
    l.log_and_print("SGD with document vector")
    #classifier.hyperparameter()
    #classifier.validate()
    #classifier.metrics()
    #classifier.plot_confusion_matrix()
    l.log_and_print()
    l.log_and_print("SGD with tf idf")
    classifier.hyperparameter_tf()
    classifier.validate()
    classifier.metrics()
    classifier.plot_confusion_matrix()
Ejemplo n.º 6
0
    classifier.validate()
    classifier.metrics()
    #  classifier.print_wrong_test()
    classifier.plot_confusion_matrix()
    classifier.save('../data/saved_models/svm_optimised.pkl')
'''

#  Train password classifier
labels, class_names = get_password_data(root='../')
x = get_doc_vec_ticketing_message(root='../')
y = labels
le.fit(y)
y = le.transform(y)

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.8,
                                 p_val=0.1)
with Logger("svm", root='../') as l:
    l.log_and_print("Password classifier")
    l.log_and_print(data_set)
    l.log_and_print()

    classifier = SupportingVectorMachine(data_set, verbose=1, logger=l)
    classifier.hyper_parameter_tuning()
    classifier.validate()
    classifier.metrics()
    classifier.plot_confusion_matrix()
    classifier.save('../data/saved_models/svm_password.pkl')
Ejemplo n.º 7
0
# twitter data
# class_names = ['positive', 'negative', 'neutral']
# x = np.load("data/test_data/fastTextDocumentVector.npy")
## positv 0, negative 1, neutral 3
# y = np.load("data/test_data/labels.npy")

le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.8,
                                 p_val=0.1)
data_set.plot_distribution('train')
data_set.plot_distribution('val')
data_set.plot_distribution('test')
data_set.plot_distribution('all')

with Logger("gradient_boost", root='../') as l:
    l.log_and_print(data_set)
    l.log("")

    classifier = GradienBoost(data_set, n_estimators=120, verbose=1, logger=l)
    # classifier.hyper_parameter_tuning()
    classifier.fit()
    classifier.validate()
    classifier.metrics()
    classifier.plot_confusion_matrix()
Ejemplo n.º 8
0
# class_names = ['positive', 'negative', 'neutral']
#  x = np.load("data/test_data/fastTextDocumentVector.npy")
# positv 0, negative 1, neutral 3
# y = np.load("data/test_data/labels.npy")

le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)

data_set = DataSet.from_np_array(x,
                                 y,
                                 class_names=class_names,
                                 p_train=0.6,
                                 p_val=0.1)

with Logger("random_forest", root='../') as l:
    l.log_and_print(data_set)
    l.log("")

    classifier = RandomForest(data_set,
                              n_estimators=1000,
                              max_leaf_nodes=20,
                              verbose=1,
                              logger=l)
    classifier.hyper_parameter_tuning()
    # classifier.fit()
    classifier.validate()
    classifier.metrics()
    classifier.plot_confusion_matrix()
    classifier.save('data/saved_models/randomForest.pkl')