def SVM(self): ''' SVM function :return: results to be saved by the adder function ''' global opter global losser alg = 3 if self.iterator == 0: Optimizers = ['rbf', 'linear', 'poly'] loss = [1, 2, 3, 4, 5] test2 = 0 for elements in range(len(Optimizers)): for elemento in range(len(loss)): alg = 1 data = self.data X_train, X_test, y_train, y_test = converter_1(data, SVM=True) loader = SVM(X_train, X_test, y_train, y_test, Optimizers[elements], loss[elemento]) y_test, y_pred = loader.svm() _, _, _, _, num, num2 = feedbackdata(y_test, y_pred) if num2 <= 1: num2 = 1 teste = num * (np.power(num, (1 / num2))) if teste > test2: opter = Optimizers[elements] losser = loss[elemento] test2 = teste data = self.data X_train, X_test, y_train, y_test = converter_1(data, SVM=True) start = timeit.default_timer() loader = SVM(X_train, X_test, y_train, y_test, opter, losser) y_test, y_pred = loader.svm() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'SVM' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg, opter, losser)
def Iocsvm(self): alg = 0 data = self.data X_train, X_val, X_test, y_train, y_val, y_test = converter_1( data, onClassClassification=True, shuffle=False, test_size=0.25, validation_size=0.25) start = timeit.default_timer() loader = OneClassSVM(X_train, X_val, X_test, y_train, y_val, y_test) y_test, y_pred = loader.prediction() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'OneClassSVM' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg)
def calculate_error_threshold(self): val_score = self.clf.score_samples(self.X_val) min_max_scaler = preprocessing.MinMaxScaler() val_score = min_max_scaler.fit_transform(val_score.reshape(-1, 1)) acceptable_n_FP = self.threshold_fn_percentage * len(self.y_val) threshold = 0 besterthreshold = 0 best_fscore_W = 0 while (threshold <= 1): # print ('**************************') # print (threshold) threshold += 0.005 y_pred = [1 if e > threshold else 0 for e in val_score] # y_Pred = np.array(y_pred) # Confusion Matrix from sklearn.metrics import confusion_matrix, precision_recall_fscore_support conf_matrix = confusion_matrix(self.y_val, y_pred, labels=[0, 1]) tn, fp, fn, tp = conf_matrix.ravel() # print(conf_matrix) # print("tn: " , tn) # print("fp: " ,fp) from sklearn.metrics import confusion_matrix, precision_recall_fscore_support conf_matrix = confusion_matrix(self.y_val, y_pred, labels=[0, 1]) precision_N, recall_N, fscore_N, xyz = precision_recall_fscore_support( self.y_val, y_pred, average='binary', pos_label=0) precision_A, recall_A, fscore_A, xyz = precision_recall_fscore_support( self.y_val, y_pred, average='binary', pos_label=1) precision_W, recall_W, fscore_W, xyz = precision_recall_fscore_support( self.y_val, y_pred, average='weighted') tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( self.y_val, y_pred) from sklearn import metrics import pandas as pd yval = pd.DataFrame(self.y_val) import numpy as np from sklearn import metrics fpr, tpr, _ = metrics.roc_curve(self.y_val, y_pred) sacore = metrics.auc(fpr, tpr) print(sacore) if fscore_A > best_fscore_W: best_fscore_W = fscore_A besterthreshold = threshold # print(predx.count(0)) # print(yval) # print(yval.describe()) # if fscore_A> recall_A_best: self.threshold = besterthreshold self.prediction()
def Ae(self): ''' Function for Autoencoders :return: Returns results to be saved by the adder function ''' alg = 1 data = self.data train, valid, validB, testB = converter_1(data, AE=True) #Initial function to perform grid search global bestresults if self.iterator == 0: bestresults = Hyperparametertuning(train, valid, validB, testB) #bestresults = [1e-6, 'relu', 'Adam', 'mean_squared_error', 64,20] alg = 1 train, valid, validB, testB = converter_1(data, AE=True) start = timeit.default_timer() loader = Autoen(train, valid, validB, testB) y_test, y_pred = loader.Ae(bestresults[0], bestresults[1], bestresults[2], bestresults[3], bestresults[4], bestresults[5]) print(bestresults[0], bestresults[1], bestresults[2], bestresults[3], bestresults[4], bestresults[5]) #y_test, y_pred= loader.Ae(1e-05,'relu', 'Adam', 'mean_squared_error', 1, 64) end = timeit.default_timer() totaltime = round(end - start, 2) opter, losser = 1, 2 Name = 'AutoEncoder' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg, opter, losser) print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 22222")
def SVM(self): alg = 3 data = self.data X_train, X_test, y_train, y_test = converter_1(data, SVM=True) start = timeit.default_timer() loader = SVM(X_train, X_test, y_train, y_test) y_test, y_pred = loader.svm() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'SVM' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg)
def Iocsvm(self): ''' Function for One Class SVM :return: Gives output to be saved by the adder function ''' global bestresultssvm alg = 0 data = self.data X_train, X_val, X_test, y_train, y_val, y_test = converter_1( data, onClassClassification=True, shuffle=False, test_size=0.25, validation_size=0.25) if self.iterator == 0: #pass bestresultssvm = SVMhyp(X_train, X_val, X_test, y_train, y_val, y_test) start = timeit.default_timer() loader = OneClassSVM(X_train, X_val, X_test, y_train, y_val, y_test, bestresultssvm[0], bestresultssvm[1], bestresultssvm[2]) y_test, y_pred = loader.prediction() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'OneClassSVM' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg, 0, 0)
def C_AE(self): ''' Compressed Autoencoder function :return: Output to be saved by the adder function ''' global opter global losser global bestresults alg = 2 data = self.data train, validB, testB = converter_1(data, cAE=True) #GridSearch if self.iterator == 0: bestresults = [1e-5, 'relu', 'Adam', 'mean_absolute_error', 1, 83] #bestresults = C_Hyp(train, validB, testB) print(bestresults) start = timeit.default_timer() loader = compressing_autoencoder(train, validB, testB, bestresults[0], bestresults[1], bestresults[2], bestresults[3], bestresults[4], bestresults[5]) loader.loading_data() y_test, y_pred = loader.test() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'Compressed_AutoEncoder' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg, bestresults[3], bestresults[2]) print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 33333333333333")
def C_AE(self): alg = 2 data = self.data train, validB, testB = converter_1(data, cAE=True) start = timeit.default_timer() loader = compressing_autoencoder(train, validB, testB) loader.loading_data() y_test, y_pred = loader.test() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'Compressed_AutoEncoder' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg) print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 33333333333333")
def Ae(self): global opter global losser # Initial function to perform grid search if self.iterator == 0: Optimizers = [ 'Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Nadam', 'SGD' ] loss = [ 'binary_crossentropy', 'categorical_crossentropy', 'cosine_similarity', 'mean_absolute_error', 'mean_absolute_percentage_error', 'mean_squared_error', 'mean_squared_logarithmic_error', 'poisson' ] test2 = 0 for elements in range(len(Optimizers)): for elemento in range(len(loss)): alg = 1 data = self.data train, valid, validB, testB = converter_1(data, AE=True) print(Optimizers[elements], 'Optimizer') print(loss[elemento], ' loss') loader = Autoen(train, valid, validB, testB, Optimizers[elements], loss[elemento]) y_test, y_pred = loader.Ae() _, _, _, _, num, num2 = feedbackdata(y_test, y_pred) if num2 <= 1: num2 = 1 teste = num * (np.power(num, (1 / num2))) if teste > test2: opter = Optimizers[elements] losser = loss[elemento] test2 = teste print(opter, ' PRINTINGGGGG OPTIMIZZERRRR') print(losser, 'PRINTTINGG LOSSS') alg = 1 data = self.data train, valid, validB, testB = converter_1(data, AE=True) start = timeit.default_timer() loader = Autoen(train, valid, validB, testB, opter, losser) y_test, y_pred = loader.Ae() end = timeit.default_timer() totaltime = round(end - start, 2) Name = 'AutoEncoder' + '-' + self.dataname tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata( y_test, y_pred) results = [ tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime ] self.adder(results, Name, alg, opter, losser) print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 22222")
def Ae(self, learning_rate, activation_function, optimizer, loss, batch_size,hidden_dimensions): epochs = 50 # learning_rate = 1e-5 # activation_function = 'relu' # num_dense_layers = 1 # batch_size = 64 # df = self.df ''' Accepts single DF file with anomalous and normal data defined by the approved labels of the framework DF: Dataframe input for the algorithm to work on. Epochs: Int input for the number of epochs default size: 100 Batch_size: Int input for the number of batch_size default size 32 returns; 1- Predicted Y and Real Y to outputresult 2- Total time of the algorithm 3- Graph showing the threshold for error distribtution and threshold (Optional) (removed) ''' import tensorflow import matplotlib.pyplot as plt import seaborn as sns import pandas as pd import numpy as np from pylab import rcParams import tensorflow as tf from keras.models import Model, load_model from keras.layers import Input, Dense from keras.callbacks import ModelCheckpoint, TensorBoard from keras import regularizers from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, precision_recall_curve from sklearn.metrics import recall_score, classification_report, auc, roc_curve from sklearn.metrics import precision_recall_fscore_support, f1_score from numpy.random import seed import timeit seed(1) from tensorflow import set_random_seed set_random_seed(2) SEED = 123 # used to help randomly select the data points DATA_SPLIT_PCT = 0.2 rcParams['figure.figsize'] = 8, 6 LABELS = ["Normal", "Break"] fault = 'Class' colm = list(self.train.columns) for item in colm: if (item == ("FaultNumber" or "Class" or "class" or "faultnumber")): fault = item df_train_0_x = self.train.drop([fault], axis=1) df_test = self.testB df_valid = self.valid df_validB = self.validB print(self.validB.shape) # made changes here df_valid_0_x = self.valid.drop([fault], axis=1).values # Scalerizing the data scaler = StandardScaler().fit(df_train_0_x) df_train_0_x_rescaled = scaler.transform(df_train_0_x) # validation of tensorflow df_valid_0_x_rescaled = scaler.transform(df_valid_0_x) # validation for error threshold df_valid_x_rescaled = scaler.transform(df_valid.drop([fault], axis=1)) df_valid_xB = scaler.transform((df_validB.drop([fault], axis=1))) # testing # df_test_x_rescaled = scaler.transform(df_test.drop([fault], axis=1)) nn_samples = df_valid[df_valid[fault] == 0].shape[0] na_samples = df_valid[df_valid[fault] == 1].shape[0] n_features = df_train_0_x_rescaled.shape[1] nb_epoch = epochs batch_size = batch_size input_dim = df_train_0_x_rescaled.shape[1] # num of predictor variables, encoding_dim = input_dim * hidden_dimensions hidden_dim = int(encoding_dim / 2) learning_rate = learning_rate # Start of the Algorithm layer formation input_layer = Input(shape=(input_dim,)) encoder = Dense(encoding_dim, activation=activation_function, activity_regularizer=regularizers.l1(learning_rate))( input_layer) encoder = Dense(hidden_dim, activation=activation_function)(encoder) decoder = Dense(hidden_dim, activation=activation_function)(encoder) decoder = Dense(encoding_dim, activation=activation_function)(decoder) decoder = Dense(input_dim, activation="linear")(decoder) autoencoder = Model(inputs=input_layer, outputs=decoder) autoencoder.summary() autoencoder.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # to save model data on the drive, no using till testing active_idle = [1] * len(df_train_0_x_rescaled) history = autoencoder.fit(df_train_0_x_rescaled, df_train_0_x_rescaled, epochs=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(df_valid_0_x_rescaled, df_valid_0_x_rescaled), sample_weight=np.asarray(active_idle), verbose=1) # Measuring Time print(history.history['accuracy'][-1]) # ERRROR threshold setting valid_x_predictions = autoencoder.predict(df_valid_xB) # FOR NORMAL debug = True mse = np.mean(np.power(df_valid_xB - valid_x_predictions, 2), axis=1) classes_normal = [0] * nn_samples classes_anomal = [1] * na_samples errors = mse print(len(errors), "Error shape") classes = df_validB[fault] print(len(classes), "Shape of CLass") n_perc_min = 0 n_perc_max = 98 best_threshold = 0 precision_W_best = 0 fscore_A_best = 0 AUC_best = 0 fscore_N_best = 0 recall_A_best = 0 recall_N_best = 0 recall_W_best = 0 fscore_W_best = 0 perc_best = 0 fps = [] fns = [] tps = [] tns = [] n_percs = [] precs = [] recalls = [] fscores = [] # Looping for error threshold calculation between 0 to 100 percentile for n_perc in np.linspace(n_perc_min, n_perc_max + 2, 1000): error_threshold = np.percentile(np.asarray(errors), n_perc) if debug: print("Try with percentile: %s (threshold: %s)" % ( n_perc, error_threshold)) predictions = [] for e in errors: if e > error_threshold: predictions.append(1) else: predictions.append(0) print(len(predictions), "Prediction shape") print(predictions.count(0)) precision_N, recall_N, fscore_N, xyz = precision_recall_fscore_support( classes, predictions, average='binary', pos_label=0) precision_A, recall_A, fscore_A, xyz = precision_recall_fscore_support( classes, predictions, average='binary', pos_label=1) precision_W, recall_W, fscore_W, xyz = precision_recall_fscore_support( classes, predictions, average='weighted') tn, fp, fn, tp = confusion_matrix(classes, predictions).ravel() fscores.append(fscore_W) precs.append(precision_W) recalls.append(recall_W) n_percs.append(n_perc) fps.append(fp) fns.append(fn) tps.append(tp) tns.append(tn) print(precision_N, recall_N, fscore_N) print(precision_A, recall_A, fscore_A) print( precision_W, recall_W, fscore_W) print(precision_W, recall_W, fscore_W) tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(classes, predictions) false_positive_rate = 1 / false_positive_rate false_positive_rate = false_positive_rate * 100 #sq = np.square(1 - false_positive_rate) #sq2 = np.square(1 - detection_rate) perc = np.sqrt(np.square(1 - detection_rate) + np.square(false_positive_rate)) # if fscore_A> recall_A_best: if fscore_W> fscore_W_best: precision_W_best = precision_W precision_N_best = precision_N precision_A_best = precision_A recall_W_best = recall_W recall_N_best = recall_N recall_A_best = recall_A fscore_W_best = fscore_W fscore_N_best = fscore_N fscore_A_best = fscore_A perc_best = perc tp_best = tp best_threshold = n_perc pax = np.percentile(np.asarray(errors), best_threshold) if debug: pass threshold_fixed = pax df_test = df_test df_test_x_rescaled = scaler.transform(df_test.drop([fault], axis=1)) test_x_predictions = autoencoder.predict(df_test_x_rescaled) mse = np.mean(np.power(df_test_x_rescaled - test_x_predictions, 2), axis=1) error_df_test = pd.DataFrame({'Reconstruction_error': mse, 'True_class': df_test[fault]}) error_df_test = error_df_test.reset_index() # threshold_fixed = pax # plt.title("Reconstruction error for different classes") # plt.ylabel("Reconstruction error") # plt.xlabel("Data point index") # plt.show(); pred_y = [1 if e > threshold_fixed else 0 for e in error_df_test.Reconstruction_error.values] return (error_df_test.True_class, pred_y)