Beispiel #1
0
    def SVM(self):
        '''
        SVM function
        :return:
        results to be saved by the adder function
        '''
        global opter
        global losser
        alg = 3
        if self.iterator == 0:
            Optimizers = ['rbf', 'linear', 'poly']

            loss = [1, 2, 3, 4, 5]
            test2 = 0

            for elements in range(len(Optimizers)):
                for elemento in range(len(loss)):
                    alg = 1
                    data = self.data

                    X_train, X_test, y_train, y_test = converter_1(data,
                                                                   SVM=True)

                    loader = SVM(X_train, X_test, y_train, y_test,
                                 Optimizers[elements], loss[elemento])
                    y_test, y_pred = loader.svm()

                    _, _, _, _, num, num2 = feedbackdata(y_test, y_pred)

                    if num2 <= 1:
                        num2 = 1
                    teste = num * (np.power(num, (1 / num2)))
                    if teste > test2:
                        opter = Optimizers[elements]
                        losser = loss[elemento]
                        test2 = teste

        data = self.data

        X_train, X_test, y_train, y_test = converter_1(data, SVM=True)

        start = timeit.default_timer()

        loader = SVM(X_train, X_test, y_train, y_test, opter, losser)
        y_test, y_pred = loader.svm()
        end = timeit.default_timer()
        totaltime = round(end - start, 2)

        Name = 'SVM' + '-' + self.dataname
        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]

        self.adder(results, Name, alg, opter, losser)
Beispiel #2
0
    def Iocsvm(self):

        alg = 0
        data = self.data

        X_train, X_val, X_test, y_train, y_val, y_test = converter_1(
            data,
            onClassClassification=True,
            shuffle=False,
            test_size=0.25,
            validation_size=0.25)

        start = timeit.default_timer()
        loader = OneClassSVM(X_train, X_val, X_test, y_train, y_val, y_test)

        y_test, y_pred = loader.prediction()

        end = timeit.default_timer()
        totaltime = round(end - start, 2)
        Name = 'OneClassSVM' + '-' + self.dataname

        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]
        self.adder(results, Name, alg)
Beispiel #3
0
    def calculate_error_threshold(self):
        val_score = self.clf.score_samples(self.X_val)
        min_max_scaler = preprocessing.MinMaxScaler()
        val_score = min_max_scaler.fit_transform(val_score.reshape(-1, 1))
        acceptable_n_FP = self.threshold_fn_percentage * len(self.y_val)
        threshold = 0
        besterthreshold = 0
        best_fscore_W = 0
        while (threshold <= 1):

            # print ('**************************')
            # print (threshold)
            threshold += 0.005
            y_pred = [1 if e > threshold else 0 for e in val_score]
            # y_Pred = np.array(y_pred)
            # Confusion Matrix
            from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
            conf_matrix = confusion_matrix(self.y_val, y_pred, labels=[0, 1])
            tn, fp, fn, tp = conf_matrix.ravel()
            # print(conf_matrix)
            # print("tn: " , tn)
            # print("fp: " ,fp)

            from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

            conf_matrix = confusion_matrix(self.y_val, y_pred, labels=[0, 1])

            precision_N, recall_N, fscore_N, xyz = precision_recall_fscore_support(
                self.y_val, y_pred, average='binary', pos_label=0)
            precision_A, recall_A, fscore_A, xyz = precision_recall_fscore_support(
                self.y_val, y_pred, average='binary', pos_label=1)
            precision_W, recall_W, fscore_W, xyz = precision_recall_fscore_support(
                self.y_val, y_pred, average='weighted')
            tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
                self.y_val, y_pred)

            from sklearn import metrics

            import pandas as pd
            yval = pd.DataFrame(self.y_val)
            import numpy as np
            from sklearn import metrics

            fpr, tpr, _ = metrics.roc_curve(self.y_val, y_pred)
            sacore = metrics.auc(fpr, tpr)
            print(sacore)
            if fscore_A > best_fscore_W:
                best_fscore_W = fscore_A
                besterthreshold = threshold

            # print(predx.count(0))

            # print(yval)
            # print(yval.describe())

            # if fscore_A> recall_A_best:

        self.threshold = besterthreshold

        self.prediction()
Beispiel #4
0
    def Ae(self):
        '''
        Function for Autoencoders
        :return:
        Returns results to be saved by the adder function
        '''

        alg = 1
        data = self.data
        train, valid, validB, testB = converter_1(data, AE=True)
        #Initial function to perform grid search
        global bestresults
        if self.iterator == 0:

            bestresults = Hyperparametertuning(train, valid, validB, testB)
            #bestresults = [1e-6, 'relu', 'Adam', 'mean_squared_error', 64,20]

        alg = 1

        train, valid, validB, testB = converter_1(data, AE=True)

        start = timeit.default_timer()
        loader = Autoen(train, valid, validB, testB)
        y_test, y_pred = loader.Ae(bestresults[0], bestresults[1],
                                   bestresults[2], bestresults[3],
                                   bestresults[4], bestresults[5])
        print(bestresults[0], bestresults[1], bestresults[2], bestresults[3],
              bestresults[4], bestresults[5])
        #y_test, y_pred= loader.Ae(1e-05,'relu', 'Adam', 'mean_squared_error', 1, 64)
        end = timeit.default_timer()
        totaltime = round(end - start, 2)
        opter, losser = 1, 2
        Name = 'AutoEncoder' + '-' + self.dataname
        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]
        self.adder(results, Name, alg, opter, losser)

        print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 22222")
Beispiel #5
0
    def SVM(self):

        alg = 3
        data = self.data

        X_train, X_test, y_train, y_test = converter_1(data, SVM=True)

        start = timeit.default_timer()

        loader = SVM(X_train, X_test, y_train, y_test)
        y_test, y_pred = loader.svm()
        end = timeit.default_timer()
        totaltime = round(end - start, 2)

        Name = 'SVM' + '-' + self.dataname
        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]

        self.adder(results, Name, alg)
Beispiel #6
0
    def Iocsvm(self):
        '''
        Function for One Class SVM
        :return:
        Gives output to be saved by the adder function
        '''
        global bestresultssvm
        alg = 0
        data = self.data

        X_train, X_val, X_test, y_train, y_val, y_test = converter_1(
            data,
            onClassClassification=True,
            shuffle=False,
            test_size=0.25,
            validation_size=0.25)
        if self.iterator == 0:

            #pass
            bestresultssvm = SVMhyp(X_train, X_val, X_test, y_train, y_val,
                                    y_test)

        start = timeit.default_timer()
        loader = OneClassSVM(X_train, X_val, X_test, y_train, y_val, y_test,
                             bestresultssvm[0], bestresultssvm[1],
                             bestresultssvm[2])

        y_test, y_pred = loader.prediction()

        end = timeit.default_timer()
        totaltime = round(end - start, 2)
        Name = 'OneClassSVM' + '-' + self.dataname

        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]
        self.adder(results, Name, alg, 0, 0)
Beispiel #7
0
    def C_AE(self):
        '''
        Compressed Autoencoder function
        :return:
        Output to be saved by the adder function
        '''
        global opter
        global losser
        global bestresults
        alg = 2
        data = self.data
        train, validB, testB = converter_1(data, cAE=True)
        #GridSearch
        if self.iterator == 0:
            bestresults = [1e-5, 'relu', 'Adam', 'mean_absolute_error', 1, 83]
            #bestresults = C_Hyp(train, validB, testB)

        print(bestresults)

        start = timeit.default_timer()
        loader = compressing_autoencoder(train, validB, testB, bestresults[0],
                                         bestresults[1], bestresults[2],
                                         bestresults[3], bestresults[4],
                                         bestresults[5])

        loader.loading_data()
        y_test, y_pred = loader.test()
        end = timeit.default_timer()
        totaltime = round(end - start, 2)

        Name = 'Compressed_AutoEncoder' + '-' + self.dataname
        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]
        self.adder(results, Name, alg, bestresults[3], bestresults[2])

        print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 33333333333333")
Beispiel #8
0
    def C_AE(self):

        alg = 2
        data = self.data
        train, validB, testB = converter_1(data, cAE=True)

        start = timeit.default_timer()
        loader = compressing_autoencoder(train, validB, testB)

        loader.loading_data()
        y_test, y_pred = loader.test()
        end = timeit.default_timer()
        totaltime = round(end - start, 2)

        Name = 'Compressed_AutoEncoder' + '-' + self.dataname
        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]
        self.adder(results, Name, alg)

        print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 33333333333333")
Beispiel #9
0
    def Ae(self):
        global opter
        global losser
        # Initial function to perform grid search
        if self.iterator == 0:
            Optimizers = [
                'Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Nadam', 'SGD'
            ]

            loss = [
                'binary_crossentropy', 'categorical_crossentropy',
                'cosine_similarity', 'mean_absolute_error',
                'mean_absolute_percentage_error', 'mean_squared_error',
                'mean_squared_logarithmic_error', 'poisson'
            ]
            test2 = 0

            for elements in range(len(Optimizers)):
                for elemento in range(len(loss)):
                    alg = 1
                    data = self.data
                    train, valid, validB, testB = converter_1(data, AE=True)

                    print(Optimizers[elements], 'Optimizer')
                    print(loss[elemento], ' loss')
                    loader = Autoen(train, valid, validB, testB,
                                    Optimizers[elements], loss[elemento])

                    y_test, y_pred = loader.Ae()

                    _, _, _, _, num, num2 = feedbackdata(y_test, y_pred)

                    if num2 <= 1:
                        num2 = 1
                    teste = num * (np.power(num, (1 / num2)))
                    if teste > test2:
                        opter = Optimizers[elements]
                        losser = loss[elemento]
                        test2 = teste

        print(opter, ' PRINTINGGGGG OPTIMIZZERRRR')
        print(losser, 'PRINTTINGG LOSSS')
        alg = 1
        data = self.data
        train, valid, validB, testB = converter_1(data, AE=True)

        start = timeit.default_timer()
        loader = Autoen(train, valid, validB, testB, opter, losser)

        y_test, y_pred = loader.Ae()
        end = timeit.default_timer()
        totaltime = round(end - start, 2)
        Name = 'AutoEncoder' + '-' + self.dataname
        tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(
            y_test, y_pred)
        results = [
            tn, fp, fn, tp, detection_rate, false_positive_rate, totaltime
        ]
        self.adder(results, Name, alg, opter, losser)

        print(self.iterator, "HEEEEEEEREEEEEEEEEEEEEE ITERATOR 22222")
Beispiel #10
0
    def Ae(self, learning_rate, activation_function, optimizer, loss, batch_size,hidden_dimensions):

        epochs = 50
        # learning_rate = 1e-5
        #  activation_function = 'relu'
        # num_dense_layers = 1
        # batch_size = 64
        # df = self.df

        '''
            Accepts single DF file with anomalous and normal data
            defined by the approved labels of the framework
            DF: Dataframe input for the algorithm to work on.
            Epochs: Int input for the number of epochs default size: 100
            Batch_size: Int input for the number of batch_size default size 32

            returns;
                1- Predicted Y and Real Y to outputresult
                2- Total time of the algorithm
                3- Graph showing the threshold for error distribtution and threshold (Optional) (removed)
            '''
        import tensorflow
        import matplotlib.pyplot as plt
        import seaborn as sns

        import pandas as pd
        import numpy as np
        from pylab import rcParams
        import tensorflow as tf
        from keras.models import Model, load_model
        from keras.layers import Input, Dense
        from keras.callbacks import ModelCheckpoint, TensorBoard
        from keras import regularizers

        from sklearn.model_selection import train_test_split
        from sklearn.metrics import confusion_matrix, precision_recall_curve
        from sklearn.metrics import recall_score, classification_report, auc, roc_curve
        from sklearn.metrics import precision_recall_fscore_support, f1_score
        from numpy.random import seed
        import timeit
        seed(1)
        from tensorflow import set_random_seed
        set_random_seed(2)
        SEED = 123  # used to help randomly select the data points
        DATA_SPLIT_PCT = 0.2
        rcParams['figure.figsize'] = 8, 6
        LABELS = ["Normal", "Break"]
        fault = 'Class'
        colm = list(self.train.columns)

        for item in colm:
            if (item == ("FaultNumber" or "Class" or "class" or "faultnumber")):
                fault = item

        df_train_0_x = self.train.drop([fault], axis=1)
        df_test = self.testB
        df_valid = self.valid
        df_validB = self.validB
        print(self.validB.shape)
        # made changes here
        df_valid_0_x = self.valid.drop([fault], axis=1).values

        # Scalerizing the data
        scaler = StandardScaler().fit(df_train_0_x)
        df_train_0_x_rescaled = scaler.transform(df_train_0_x)
        # validation of tensorflow
        df_valid_0_x_rescaled = scaler.transform(df_valid_0_x)

        # validation for error threshold
        df_valid_x_rescaled = scaler.transform(df_valid.drop([fault], axis=1))
        df_valid_xB = scaler.transform((df_validB.drop([fault], axis=1)))
        # testing
        # df_test_x_rescaled = scaler.transform(df_test.drop([fault], axis=1))

        nn_samples = df_valid[df_valid[fault] == 0].shape[0]
        na_samples = df_valid[df_valid[fault] == 1].shape[0]
        n_features = df_train_0_x_rescaled.shape[1]

        nb_epoch = epochs
        batch_size = batch_size
        input_dim = df_train_0_x_rescaled.shape[1]  # num of predictor variables,
        encoding_dim = input_dim * hidden_dimensions
        hidden_dim = int(encoding_dim / 2)
        learning_rate = learning_rate

        # Start of the Algorithm layer formation

        input_layer = Input(shape=(input_dim,))
        encoder = Dense(encoding_dim, activation=activation_function,
                        activity_regularizer=regularizers.l1(learning_rate))(
            input_layer)
        encoder = Dense(hidden_dim, activation=activation_function)(encoder)
        decoder = Dense(hidden_dim, activation=activation_function)(encoder)
        decoder = Dense(encoding_dim, activation=activation_function)(decoder)
        decoder = Dense(input_dim, activation="linear")(decoder)

        autoencoder = Model(inputs=input_layer, outputs=decoder)
        autoencoder.summary()
        autoencoder.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
        # to save model data on the drive, no using till testing

        active_idle = [1] * len(df_train_0_x_rescaled)

        history = autoencoder.fit(df_train_0_x_rescaled, df_train_0_x_rescaled, epochs=nb_epoch,
                                  batch_size=batch_size, shuffle=True,
                                  validation_data=(df_valid_0_x_rescaled, df_valid_0_x_rescaled),
                                  sample_weight=np.asarray(active_idle),
                                  verbose=1)
        # Measuring Time
        print(history.history['accuracy'][-1])
        # ERRROR threshold setting

        valid_x_predictions = autoencoder.predict(df_valid_xB)
        # FOR NORMAL

        debug = True

        mse = np.mean(np.power(df_valid_xB - valid_x_predictions, 2), axis=1)

        classes_normal = [0] * nn_samples
        classes_anomal = [1] * na_samples
        errors = mse
        print(len(errors), "Error shape")
        classes = df_validB[fault]
        print(len(classes), "Shape of CLass")
        n_perc_min = 0
        n_perc_max = 98

        best_threshold = 0
        precision_W_best = 0
        fscore_A_best = 0
        AUC_best = 0
        fscore_N_best = 0
        recall_A_best = 0
        recall_N_best = 0
        recall_W_best = 0
        fscore_W_best = 0
        perc_best = 0
        fps = []
        fns = []
        tps = []
        tns = []
        n_percs = []
        precs = []
        recalls = []
        fscores = []

        # Looping for error threshold calculation between 0 to 100 percentile

        for n_perc in np.linspace(n_perc_min, n_perc_max + 2, 1000):
            error_threshold = np.percentile(np.asarray(errors), n_perc)
            if debug:
                print("Try with percentile: %s (threshold: %s)" % (
                    n_perc, error_threshold))

            predictions = []
            for e in errors:

                if e > error_threshold:
                    predictions.append(1)
                else:
                    predictions.append(0)
            print(len(predictions), "Prediction shape")
            print(predictions.count(0))
            precision_N, recall_N, fscore_N, xyz = precision_recall_fscore_support(
                classes, predictions, average='binary', pos_label=0)
            precision_A, recall_A, fscore_A, xyz = precision_recall_fscore_support(
                classes, predictions, average='binary', pos_label=1)
            precision_W, recall_W, fscore_W, xyz = precision_recall_fscore_support(
                classes, predictions, average='weighted')

            tn, fp, fn, tp = confusion_matrix(classes, predictions).ravel()
            fscores.append(fscore_W)
            precs.append(precision_W)
            recalls.append(recall_W)
            n_percs.append(n_perc)
            fps.append(fp)
            fns.append(fn)
            tps.append(tp)
            tns.append(tn)
            print(precision_N, recall_N, fscore_N)
            print(precision_A, recall_A, fscore_A)
            print( precision_W, recall_W, fscore_W)
            print(precision_W, recall_W, fscore_W)
            tn, fp, fn, tp, detection_rate, false_positive_rate = feedbackdata(classes, predictions)

            false_positive_rate = 1 / false_positive_rate
            false_positive_rate = false_positive_rate * 100

            #sq = np.square(1 - false_positive_rate)
            #sq2 = np.square(1 - detection_rate)
            perc = np.sqrt(np.square(1 - detection_rate) + np.square(false_positive_rate))
            # if fscore_A> recall_A_best:
            if fscore_W> fscore_W_best:
                precision_W_best = precision_W
                precision_N_best = precision_N
                precision_A_best = precision_A
                recall_W_best = recall_W
                recall_N_best = recall_N
                recall_A_best = recall_A
                fscore_W_best = fscore_W
                fscore_N_best = fscore_N
                fscore_A_best = fscore_A
                perc_best = perc
                tp_best = tp
                best_threshold = n_perc
        pax = np.percentile(np.asarray(errors), best_threshold)

        if debug:
            pass

        threshold_fixed = pax

        df_test = df_test
        df_test_x_rescaled = scaler.transform(df_test.drop([fault], axis=1))

        test_x_predictions = autoencoder.predict(df_test_x_rescaled)

        mse = np.mean(np.power(df_test_x_rescaled - test_x_predictions, 2), axis=1)
        error_df_test = pd.DataFrame({'Reconstruction_error': mse,
                                      'True_class': df_test[fault]})
        error_df_test = error_df_test.reset_index()
        # threshold_fixed = pax

        # plt.title("Reconstruction error for different classes")
        # plt.ylabel("Reconstruction error")
        # plt.xlabel("Data point index")
        # plt.show();
        pred_y = [1 if e > threshold_fixed else 0 for e in error_df_test.Reconstruction_error.values]

        return (error_df_test.True_class, pred_y)