new_x = np.zeros((x.shape[0], max, x[0].shape[1])) i = 0 for words in x: new_x[i, :words.shape[0], :] = words i += 1 x = new_x y = keras.utils.to_categorical(y) data_set = DataSet.from_np_array(x, y, class_names=class_names) path = "../classification/RNN/saved_model/rnn.model" with Logger("rnn", root='../') as l: l.log_and_print(data_set) l.log("") if os.path.isfile(path): classifier = RnnClassifier.load(path, data_set, logger=l) else: classifier = RnnClassifier(data_set, logger=l) classifier.fit(path, epochs=20) classifier.validate() classifier.metrics() classifier.plot_confusion_matrix() classifier.plot_history() classifier.save(path)
na_filter=False) categories = [ '"FHNW Benutzerpasswort von Studierenden zurücksetzen"', '"FHNW Passwortänderung (Active Directory)"', '"VPN Zugriff"', '"Drucker technische Probleme"', '"Drucker verbinden"', '"Webmail technische Probleme"', '"Papierstau"', '"VPN technische Probleme"', '"Webmail Zugriff"', '"SWITCHengines - Cloud Infrastructure"', '"Datenablage"', '"Web Single Sign On AAI"', '"Benutzendenkonto gesperrt"', '"Speicherplatz"', '"Benutzername vergessen"', '"Passwort ändern"' ] df = df.loc[df['category'].isin(categories)] sentences = filter_ticket_messages(df.message) data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.8, p_val=0.1) data_set.add_text_data(sentences) with Logger("multinomial_naive_bayes", root='../') as l: l.log_and_print(data_set) l.log_and_print() classifier = MultinomialNaiveBayes(data_set, logger=l) classifier.hyperparameter() classifier.validate() classifier.metrics() classifier.plot_confusion_matrix()
labels, class_names = get_merged_labels_three(root='../') x = get_doc_vec_ticketing_message(root='../') y = labels n_values = len(class_names) y = np.eye(n_values)[y] data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.8, p_val=0.1) with Logger("multilayer_perceptron", root='../') as l: l.log_and_print(data_set) l.log("") # classifier = multilayer_perceptron.MultilayerPerceptron(data_set, num_classes=len(class_names), epoch=50, verbose=1, # logger=l) # classifier.fit() # classifier.validate() # classifier.metrics() # classifier.plot_confusion_matrix() model = hyperparameter_tuning.fit_hyper(root='../') classifier = multilayer_perceptron.MultilayerPerceptron( data_set, num_classes=len(class_names), epoch=20,
# class_names = ['positive', 'negative', 'neutral'] # x = np.load("data/test_data/fastTextDocumentVector.npy") ## positv 0, negative 1, neutral 3 # y = np.load("data/test_data/labels.npy") le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.8, p_val=0.1) with Logger("voting", root='../') as l: l.log_and_print(data_set) l.log_and_print() svm = SupportingVectorMachine(data_set, verbose=0, logger=l) gradient_boost = GradienBoost(data_set, verbose=0, n_estimators=120, logger=l) random_forest = RandomForest(data_set, verbose=0, logger=l) svm.fit() gradient_boost.fit() random_forest.fit() classifier = Voting(data_set,
'"VPN technische Probleme"', '"Webmail Zugriff"', '"SWITCHengines - Cloud Infrastructure"', '"Datenablage"', '"Web Single Sign On AAI"', '"Benutzendenkonto gesperrt"', '"Speicherplatz"', '"Benutzername vergessen"', '"Passwort ändern"' ] df = df.loc[df['category'].isin(categories)] sentences = filter_ticket_messages(df.message) data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.8, p_val=0.1) data_set.add_text_data(sentences) with Logger("stochastik_gradient_descend", root='../') as l: l.log_and_print(data_set) l.log_and_print() classifier = StochastikGradientDescentClassifier(data_set, logger=l) l.log_and_print("SGD with document vector") #classifier.hyperparameter() #classifier.validate() #classifier.metrics() #classifier.plot_confusion_matrix() l.log_and_print() l.log_and_print("SGD with tf idf") classifier.hyperparameter_tf() classifier.validate() classifier.metrics() classifier.plot_confusion_matrix()
classifier.validate() classifier.metrics() # classifier.print_wrong_test() classifier.plot_confusion_matrix() classifier.save('../data/saved_models/svm_optimised.pkl') ''' # Train password classifier labels, class_names = get_password_data(root='../') x = get_doc_vec_ticketing_message(root='../') y = labels le.fit(y) y = le.transform(y) data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.8, p_val=0.1) with Logger("svm", root='../') as l: l.log_and_print("Password classifier") l.log_and_print(data_set) l.log_and_print() classifier = SupportingVectorMachine(data_set, verbose=1, logger=l) classifier.hyper_parameter_tuning() classifier.validate() classifier.metrics() classifier.plot_confusion_matrix() classifier.save('../data/saved_models/svm_password.pkl')
# twitter data # class_names = ['positive', 'negative', 'neutral'] # x = np.load("data/test_data/fastTextDocumentVector.npy") ## positv 0, negative 1, neutral 3 # y = np.load("data/test_data/labels.npy") le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.8, p_val=0.1) data_set.plot_distribution('train') data_set.plot_distribution('val') data_set.plot_distribution('test') data_set.plot_distribution('all') with Logger("gradient_boost", root='../') as l: l.log_and_print(data_set) l.log("") classifier = GradienBoost(data_set, n_estimators=120, verbose=1, logger=l) # classifier.hyper_parameter_tuning() classifier.fit() classifier.validate() classifier.metrics() classifier.plot_confusion_matrix()
# class_names = ['positive', 'negative', 'neutral'] # x = np.load("data/test_data/fastTextDocumentVector.npy") # positv 0, negative 1, neutral 3 # y = np.load("data/test_data/labels.npy") le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) data_set = DataSet.from_np_array(x, y, class_names=class_names, p_train=0.6, p_val=0.1) with Logger("random_forest", root='../') as l: l.log_and_print(data_set) l.log("") classifier = RandomForest(data_set, n_estimators=1000, max_leaf_nodes=20, verbose=1, logger=l) classifier.hyper_parameter_tuning() # classifier.fit() classifier.validate() classifier.metrics() classifier.plot_confusion_matrix() classifier.save('data/saved_models/randomForest.pkl')