Example #1
0
def compute_auc_data():
    list_data =  ["PageBlocks", "WPBC", "PenDigits", "GLASS", "Shuttle", "Arrhythmia",\
                "CTU13_10", "CTU13_08","CTU13_09","CTU13_13",\
                "Spambase", "UNSW", "NSLKDD", "InternetAds"]
    methods = ["input", "DAE", "Shrink", "VAE"]
    path = "D:/Python_code/SDA-02/Results/Exp_Hidden/"

    lof_auc = np.empty([0, 14])
    for method in methods:
        auc1 = np.empty([0, 1])
        for data in list_data:
            _, _, actual = load_data(data)  #load original data
            if method == "input":
                train_X, test_X, actual = load_data(data)
            elif method == "DAE":
                train_X = np.genfromtxt(path + "NEW_STOPPING_DAE/" + data +
                                        "_train_z.csv",
                                        delimiter=",")
                test_X = np.genfromtxt(path + "NEW_STOPPING_DAE/" + data +
                                       "_test_z.csv",
                                       delimiter=",")
            elif method == "Shrink":
                train_X = np.genfromtxt(path + "NEW_STOPPING_SHRINK/" + data +
                                        "_train_z.csv",
                                        delimiter=",")
                test_X = np.genfromtxt(path + "NEW_STOPPING_SHRINK/" + data +
                                       "_test_z.csv",
                                       delimiter=",")
            elif method == "VAE":
                train_X = np.genfromtxt(path + "NEW_STOPPING_VAE/" + data +
                                        "_train_z.csv",
                                        delimiter=",")
                test_X = np.genfromtxt(path + "NEW_STOPPING_VAE/" + data +
                                       "_test_z.csv",
                                       delimiter=",")

            lof = auc_LOF(train_X, test_X, actual)
            #lof = auc_IOF(train_X, test_X, actual)
            #lof = auc_CEN(train_X, test_X, actual)

            auc1 = np.append(auc1, lof)
            print(auc1)

        lof_auc = np.append(lof_auc, auc1)
        np.set_printoptions(precision=3, suppress=True)
        print(auc1)
    lof_auc = np.reshape(lof_auc, (-1, 14))
    np.savetxt(path + "LOF_11_per.csv", lof_auc, delimiter=",", fmt='%f')
Example #2
0
def investigate_bandwidth(norm, data, method, path):
    for m in method:
        train_z = np.genfromtxt(path + m + data + "_train_z.csv",
                                delimiter=",")
        test_z = np.genfromtxt(path + m + data + "_test_z.csv", delimiter=",")

        _, _, actual = load_data(data)
        #gamma = 1/features =  1/(2*bw*bw)
        #bw = sqrt(features/2)
        n_features = train_z.shape[1]
        AUC = np.empty([0, 4])
        steps = 10
        n = 5.0

        bw = np.asarray([i for i in np.linspace(0.0, n, steps + 1)])
        gamma = 1.0 / (2.0 * bw * bw)

        for b, g in zip(bw, gamma):
            if b > 0:
                svm05 = investigate_svm(train_z, test_z, actual, norm, g, 0.5)
                svm01 = investigate_svm(train_z, test_z, actual, norm, g, 0.1)
                kde = investigate_kde(train_z, test_z, actual, norm, b)

                AUC = np.append(AUC, [b, kde, svm05, svm01])

        AUC = np.reshape(AUC, (-1, 4))
        Plot_AUC_Bandwidth(AUC, data, n, n_features, path)
Example #3
0
def compute_confusion_matrix():
    list_data = ["Probe", "DoS", "R2L", "U2R"]
    path = "D:/Python_code/SDA-02/Results/Exp_Hidden/NEW_GROUP/"

    train_X = np.genfromtxt(path + "ShrinkAE/NSLKDD_train_z.csv",
                            delimiter=",")
    test_X = np.genfromtxt(path + "ShrinkAE/NSLKDD_test_z.csv", delimiter=",")
    _, _, actual = load_data("NSLKDD")  #load original data

    #  auc_cen = auc_CEN(train_X,test_X, actual)
    #  print(auc_cen)

    test_normal = test_X[(actual == 1)]
    y_normal = ~(actual[(actual == 1)]).astype(np.bool)  # False

    CEN = CentroidBasedOneClassClassifier(threshold=0.92)
    CEN.fit(train_X)

    pre_label0 = CEN.predict(test_normal)

    Test_P = sum(pre_label0 == y_normal)
    Actual_P = len(test_normal)
    FN = Actual_P - Test_P
    print("\nActual Normal: %d,\nTest normal:%d, \nFP:%d" %
          (Actual_P, Test_P, FN))

    Test_N = 0
    Actual_N = 0
    FP = 0
    for data in list_data:
        _, _, actual = load_data(data)  #load original data
        test_X = np.genfromtxt(path + "ShrinkAE/" + data + "_test_z.csv",
                               delimiter=",")
        test_anomaly = test_X[(actual == 0)]
        y_attack = ~(actual[(actual == 0)]).astype(np.bool)  #True

        Actual_N = len(test_anomaly)
        pre_label1 = CEN.predict(test_anomaly)

        Test_N = sum(pre_label1 == y_attack)
        FP = Actual_N - Test_N

        print("\nActual " + data + ": %d,\nTest anomaly: %d, \nFP:%d" %
              (Actual_N, Test_N, FP))
Example #4
0
def compute_auc_group_attacks():
    list_data = ["Probe", "DoS", "R2L", "U2R",\
               "Fuzzers", "Analysis", "Backdoor", "DoS_UNSW", "Exploits",\
               "Generic", "Reconnaissance", "Shellcode", "Worms"]

    methods = ["input", "DAE", "Shrink", "VAE"]

    path = "D:/Python_code/SDA-02/Results/Exp_Hidden/NEW_GROUP/"

    lof_auc = np.empty([0, 13])
    for method in methods:
        auc1 = np.empty([0, 1])
        for data in list_data:
            _, _, actual = load_data(data)  #load original data
            if method == "input":
                train_X, test_X, actual = load_data(data)
            elif method == "DAE":
                train_X = np.genfromtxt(path + "DAE/" + data + "_train_z.csv",
                                        delimiter=",")
                test_X = np.genfromtxt(path + "DAE/" + data + "_test_z.csv",
                                       delimiter=",")
            elif method == "Shrink":
                train_X = np.genfromtxt(path + "SHRINK/" + data +
                                        "_train_z.csv",
                                        delimiter=",")
                test_X = np.genfromtxt(path + "SHRINK/" + data + "_test_z.csv",
                                       delimiter=",")
            elif method == "VAE":
                train_X = np.genfromtxt(path + "VAE/" + data + "_train_z.csv",
                                        delimiter=",")
                test_X = np.genfromtxt(path + "VAE/" + data + "_test_z.csv",
                                       delimiter=",")

            lof = auc_LOF(train_X, test_X, actual)
            auc1 = np.append(auc1, lof)
            print(auc1)

        lof_auc = np.append(lof_auc, auc1)
        np.set_printoptions(precision=3, suppress=True)
        print(auc1)
    lof_auc = np.reshape(lof_auc, (-1, 13))
    np.savetxt(path + "LOF_10_UNSW.csv", lof_auc, delimiter=",", fmt='%f')
Example #5
0
def sparsity_measurement(list_data):

    sparsity_data = dict()
    for data in list_data:
        train_set, _, _ = load_data(data)
        m = train_set.shape[1]
        n = train_set.shape[0]
        d = (train_set == 0).sum()
        sparsity = round((float(d) / (m * n)), 2)

        print(data + ":    %0.2f" % sparsity)
        sparsity_data[data] = sparsity
        print(sparsity_data)
    return sparsity_data
Example #6
0
def visualize_z():
  path   = "D:/Python_code/SDA-02/Results/Exp_Hidden/"
  for data_name in datasets:
    i = 0
    plt.subplots(ncols=3, nrows = 3, figsize=(6, 6))
    for m in path_method:
      _,_, actual = load_data(data_name)
      train_z = np.genfromtxt(path + m + data_name + "_train_z.csv", delimiter=",")
      test_z  = np.genfromtxt(path + m + data_name + "_test_z.csv", delimiter=",")
    
      test_X0 = test_z[(actual==1)]
      test_X1 = test_z[(actual==0)]
      np.random.shuffle(train_z)
      np.random.shuffle(test_X0)
      np.random.shuffle(test_X1) 
    
      n = 1000  
      train_z = train_z[:n]
      test_X0 = test_X0[:n]
      test_X1 = test_X1[:n]
      dataset = [train_z, test_X0, test_X1]
    
      j = 0
      color = ['b', 'g', 'r']
      label = ["Normal train", "Normal test", "Anomaly test"]
      for data, c, l in zip(dataset, color, label):
        num = i*3 + j + 1
        fig= plt.subplot(3, 3, num)
        if (data_name == "CTU13_10"):
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.11, 0.11))
            plt.xlim((-0.11, 0.11))     
          elif (i==2):
            plt.ylim((-0.011, 0.011))
            plt.xlim((-0.011, 0.011)) 

        if (data_name == "InternetAds" ):
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.21, 0.21))
            plt.xlim((-0.21, 0.21))     
          elif (i==2):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))
            
        if (data_name == "Spambase"):
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))     
          elif (i==2):
            plt.ylim((-0.031, 0.031))
            plt.xlim((-0.031, 0.031))
            
        if (data_name == "CTU13_08"):        
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))     
          elif (i==2):
            plt.ylim((-0.031, 0.031))
            plt.xlim((-0.031, 0.031))
            
        if (data_name == "CTU13_09"):        
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))     
          elif (i==2):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))
            
        if (data_name == "CTU13_13"):        
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.021, 0.021))
            plt.xlim((-0.021, 0.021))     
          elif (i==2):
            plt.ylim((-0.021, 0.021))
            plt.xlim((-0.021, 0.021))
            
        if (data_name =="NSLKDD"):
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))             
          elif (i==2):
            plt.ylim((-0.061, 0.061))
            plt.xlim((-0.061, 0.061))    
            
        elif (data_name == "UNSW"):
          if (i==0):
            plt.ylim((-1.1, 1.1))
            plt.xlim((-1.1, 1.1))
          elif (i==1):
            plt.ylim((-0.051, 0.051))
            plt.xlim((-0.051, 0.051))             
          elif (i==2):
            plt.ylim((-0.061, 0.061))
            plt.xlim((-0.061, 0.061)) 
        #plt.title(data_name)    
        plt.plot(data[:,0], data[:,1], c+'o', ms=2, mec= c , label= l) 
        plt.xticks(fontsize=9, rotation=90)
        plt.yticks(fontsize=9, rotation=0)       
        


        if i == 0:
          plt.legend(bbox_to_anchor=(1.02, 1.25), ncol = 1, fontsize = 'medium')
        fig.xaxis.set_ticks_position('bottom')    #Disable bottom and left ticks
        fig.yaxis.set_ticks_position('left')  
        
        fig.axes.get_xaxis().set_visible(False) 
        fig.axes.get_yaxis().set_visible(False)  
   
        if (i == 2 or i == 0 or  i == 1):
          fig.axes.get_xaxis().set_visible(True)
#          xticks = fig.xaxis.get_major_ticks()  
#          xticks[0].label1.set_visible(False)   #Disable the last yticks           
#          xticks[-1].label1.set_visible(False)    #Disable the first yticks           
#          if (j == 1):
#              plt.xlabel(data_name, fontsize=16)
                   
        if (j == 0):
          fig.axes.get_yaxis().set_visible(True)  
          plt.ylabel(method[i], fontsize=12)  
          
#          yticks = fig.yaxis.get_major_ticks() 
#          yticks[0].label1.set_visible(False)    #Disable the first yticks 
#          yticks[-1].label1.set_visible(False)   #Disable the last yticks
         
        j = j + 1
      i = i + 1
    
#    plt.tight_layout()    
    plt.subplots_adjust(wspace=0.08, hspace=0.35) 
    plt.savefig(path + "Visualize_z/" + data_name + "_Visualize_nD_1.pdf")
    plt.show() 
Example #7
0
def visualize_2D_scale():
  path     = "D:/Python_code/SDA-02/Results/Exp_Hidden/Visualize_z/"
  for data_name in datasets:
    i = 0
    plt.subplots(ncols=3, nrows = 3, figsize=(6, 6))
    for m in path_method:
      _,_, actual = load_data(data_name)
      train_z = np.genfromtxt(path + m + data_name + "_train_z.csv", delimiter=",")
      test_z  = np.genfromtxt(path + m + data_name + "_test_z.csv", delimiter=",")
      
      scaler = preprocessing.StandardScaler()                
      scaler.fit(train_z)
      train_z = scaler.transform(train_z)
      test_z  = scaler.transform(test_z)
    
      test_X0 = test_z[(actual==1)]
      test_X1 = test_z[(actual==0)]
    
      np.random.shuffle(train_z)
      np.random.shuffle(test_X0)
      np.random.shuffle(test_X1) 
    
      n = 1000  
      train_z = train_z[:n]
      test_X0 = test_X0[:n]
      test_X1 = test_X1[:n]
      dataset = [train_z, test_X0, test_X1]
    
      j = 0
      color = ['b', 'g', 'r']
      label = ["Normal train", "Normal test", "Anomaly test"]
      for data, c, l in zip(dataset, color, label):
        num = i*3 + j + 1
        fig= plt.subplot(3, 3, num)
        
        ["CTU13_10", "UNSW", "NSLKDD"]
        if(data_name == "NSLKDD"):
          if (i==0):
            plt.ylim((-5.1, 5.1))
            plt.xlim((-5.1, 5.1))
          else:
            plt.ylim((-40.1, 40.1))
            plt.xlim((-40.1, 40.1))
        else:
          if (i==0):
            plt.ylim((-5.1, 5.1))
            plt.xlim((-5.1, 5.1))
          else:
            plt.ylim((-30.1, 30.1))
            plt.xlim((-30.1, 30.1))
          
        plt.plot(data[:,0], data[:,1], c+'o', ms=2, mec= c , label= l) 
        plt.xticks(fontsize=10, rotation=90)
        plt.yticks(fontsize=10, rotation=0)       
        


        if i == 0:
          plt.legend(bbox_to_anchor=(1.02, 1.25), ncol = 1, fontsize = 'medium')
        fig.xaxis.set_ticks_position('bottom')    #Disable bottom and left ticks
        fig.yaxis.set_ticks_position('left')  
        
        fig.axes.get_xaxis().set_visible(False) 
        fig.axes.get_yaxis().set_visible(False)  
   
#        if (i == 2 or i == 0):
        fig.axes.get_xaxis().set_visible(True)
        xticks = fig.xaxis.get_major_ticks()  
        xticks[0].label1.set_visible(False)   #Disable the last yticks           
        xticks[-1].label1.set_visible(False)    #Disable the first yticks           
#          if (j == 1):
#              plt.xlabel(data_name, fontsize=16)
                   
        if (j == 0):
          fig.axes.get_yaxis().set_visible(True)  
          plt.ylabel(method[i], fontsize=14)  
          
          yticks = fig.yaxis.get_major_ticks() 
          yticks[0].label1.set_visible(False)    #Disable the first yticks 
          yticks[-1].label1.set_visible(False)   #Disable the last yticks
         
        j = j + 1
      i = i + 1
    
#    plt.tight_layout()    
    plt.subplots_adjust(wspace=0.08, hspace=0.35) 
    plt.savefig(path  + "2D/" +  data_name + "_Visualize_2D.pdf")
    plt.show()  
Example #8
0
def Main_Test():

    list_data = ["PageBlocks", "WPBC", "PenDigits", "GLASS", "Shuttle", "Arrhythmia",\
                 "CTU13_10", "CTU13_08","CTU13_09","CTU13_13",\
                 "Spambase", "UNSW", "NSLKDD", "InternetAds"]

    list_data = ["CTU13_10"]

    norm = "maxabs"
    corruptions = [0.1, 0.1, 0.1]

    print("DAE")
    print("+ Data: ", list_data)
    print("+ Scaler: ", norm)
    print("+ Corruptions: ", corruptions)

    AUC_Hidden = np.empty([0, 10])

    num = 0
    for data in list_data:
        num = num + 1
        h_sizes = hyper_parameters(data)

        train_set, test_set, actual = load_data(data)
        train_X, test_X = normalize_data(train_set, test_set, norm)

        train_X = theano.shared(numpy.asarray(train_X,
                                              dtype=theano.config.floatX),
                                borrow=True)
        test_X = theano.shared(numpy.asarray(test_X,
                                             dtype=theano.config.floatX),
                               borrow=True)

        datasets = [(train_X), (test_X), (actual)]

        in_dim = train_set.shape[1]
        n_vali = (int)(train_set.shape[0] / 5)
        n_train = len(train_set) - n_vali
        #batch     = int(n_train/20)

        pat, val, batch, n_batch = stopping_para_shrink(n_train)

        print("\n" + str(num) + ".", data, "...")
        print(" + Hidden Sizes: ", in_dim, h_sizes, "- Batch_sizes:", batch)
        print (" + Data: %d (%d train, %d vali) - %d normal, %d anomaly"\
            %(len(train_set), n_train, n_vali, \
            len(test_set[(actual == 1)]), len(test_set[(actual == 0)])))
        print(" + Patience: %5.0d, Validate: %5.0d,  \n + Batch size: %5.0d, n batch:%5.0d"\
             %(pat, val, batch, n_batch))

        sda, re = train_SdAE(pre_lr=1e-2,
                             end2end_lr=1e-4,
                             algo='adadelta',
                             dataset=datasets,
                             data_name=data,
                             n_validate=n_vali,
                             norm=norm,
                             batch_size=batch,
                             hidden_sizes=h_sizes,
                             corruptions=corruptions,
                             patience=pat,
                             validation=val)

        #*******Computer AUC on hidden data*************
        lof, cen, dis, kde, svm05, svm01, ae = sda.Compute_AUC_Hidden(
            train_X, test_X, actual, norm, data)
        auc_hidden = np.column_stack(
            [batch, re[0], lof, cen, dis, kde, svm05, svm01, ae, 100 * re[2]])
        AUC_Hidden = np.append(AUC_Hidden, auc_hidden)

        #save hidden data to files
#        sda.Save_Hidden_Data(train_X, test_X, data, path)

    AUC_Hidden = np.reshape(AUC_Hidden, (-1, 10))
    np.set_printoptions(precision=3, suppress=True)
    column_list = [2, 3, 4, 5, 6, 7, 8, 9]
    print("    LOF    CEN    MDIS   KDE   SVM5    SVM1    AE    RE*100")
    print(AUC_Hidden[:, column_list])
Example #9
0
import csv

#%%
"Get data information"

path = "D:/Python_code/SDA-02/Results/Exp_Hidden/"
list_data = ["PageBlocks", "WPBC", "PenDigits", "GLASS", "Shuttle", "Arrhythmia",\
             "CTU13_10", "CTU13_08","CTU13_09","CTU13_13",\
             "Spambase", "UNSW", "NSLKDD", "InternetAds"]

data_infor = np.empty([0,5])
header     = np.column_stack(["Data", "Dimension", "Training", "Normal", "Anomaly"])
data_infor = np.append(data_infor, header)

for data in list_data:
  train_set, test_set, actual = load_data(data)
  test_normal  = test_set[(actual == 1)]
  test_anomaly = test_set[(actual == 0)]

  d         = train_set.shape[1]
  n_train   = train_set.shape[0]
  n_normal  = test_normal.shape[0]
  n_anomaly = test_anomaly.shape[0]
  infor =  np.column_stack([ data, d, n_train, n_normal, n_anomaly])
  data_infor = np.append(data_infor, infor)

data_infor   = np.reshape(data_infor,(-1,5))
#np.savetxt(path +  "data_information.csv", data_infor, fmt='%.18e', delimiter=' ')
#np.savetxt(path +  "data_information.csv", data_infor, delimiter=",", fmt= ('%s,%f,%f,%f,%f') )

with open(path + "data_information.csv", "wb") as f:
Example #10
0
def bandwidth_auc(norm, data, label, method, path, load):

    plt.subplots(ncols=3, nrows=1, figsize=(6, 6))
    num = 0
    _, _, actual = load_data(data)
    for m, l in zip(method, label):
        AUC = np.empty([0, 4])

        if (load == 0):
            train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                    "_train_z.csv",
                                    delimiter=",")
            test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                   "_test_z.csv",
                                   delimiter=",")

            #gamma = 1/features =  1/(2*bw*bw)
            #bw = sqrt(features/2)
            #n_features = train_z.shape[1]

            steps = 50
            n = 5.0
            bw = np.asarray([i for i in np.linspace(0.0, n, steps + 1)])
            gamma = 1.0 / (2.0 * bw * bw)
            for b, g in zip(bw, gamma):
                if b > 0:
                    svm05 = investigate_svm(train_z, test_z, actual, norm, g,
                                            0.5)
                    svm01 = investigate_svm(train_z, test_z, actual, norm, g,
                                            0.1)
                    kde = investigate_kde(train_z, test_z, actual, norm, b)
                    AUC = np.append(AUC, [b, kde, svm05, svm01])
            AUC = np.reshape(AUC, (-1, 4))
        else:
            AUC = np.genfromtxt(path + "Paramter_h/" + data + "_" + m +
                                "_auc_bw.csv",
                                delimiter=",")

        fig = plt.subplot(3, 1, num + 1)

        plt.xlim([0.0, max(AUC[:, 0]) + 0.1])
        plt.ylim([0.40, 1.0])

        # plt.plot(AUC[:,0], AUC[:,2],  'r-o', ms=6, mec="r", label = r'$\mathrm{SVM}_{\nu = 0.5}$', markevery = 3)
        plt.plot(AUC[:, 0],
                 AUC[:, 3],
                 'g-^',
                 ms=6,
                 mec="g",
                 label=r'$\mathrm{SVM}_{\nu = 0.1}$',
                 markevery=3)
        #plt.plot(AUC[:,0], AUC[:,1],  'b-x', ms=6, mec="b", label = 'KDE', markevery = 3)
        np.savetxt(path + "Paramter_h/" + data + "_" + m + "_auc_bw.csv",
                   AUC,
                   delimiter=",",
                   fmt='%f')

        fig.axes.get_xaxis().set_visible(False)
        if (num == 2):
            plt.legend(bbox_to_anchor=(1.0, 0.29), ncol=3, fontsize='large')

            fig.axes.get_xaxis().set_visible(True)
            for xtick in fig.xaxis.get_major_ticks():
                xtick.label.set_fontsize(14)
            plt.xlabel('Bandwidth $(h)$', fontsize=16)

        plt.ylabel('AUC$_{' + label[num] + '}$', fontsize=16)
        yticks = fig.yaxis.get_major_ticks()
        for ytick in yticks:
            ytick.label.set_fontsize(14)
        yticks[0].label1.set_visible(False)  #Disable the last yticks

        if num == 0:
            ax2 = fig.twiny()
            new_tick_locations = np.array([1.0, 2.0, 3.0, 4.0, 5.0])

            def tick_function(bw):
                gamma = 1.0 / (2.0 * bw * bw)
                return ["%.3f" % z for z in gamma]

            ax2.set_xlim(fig.get_xlim())
            ax2.set_xticks(new_tick_locations)
            ax2.set_xticklabels(tick_function(new_tick_locations), fontsize=13)
            ax2.set_xlabel(r"$\gamma$ =  $1/(2*h^{2})$", fontsize=16)

            #fig.set_xticklabels(tick_function(new_tick_locations), fontsize=13)

        num = num + 1

    plt.tight_layout()
    plt.subplots_adjust(wspace=0.025, hspace=0.025)
    plt.savefig(path + "Paramter_h/" + data + "_BW.pdf")
    plt.show()
Example #11
0
def parameter_k_lof(norm, data, label, method, path, load):
    num = 0
    AUC = np.empty([0, 1])

    if (load == 0):
        _, _, actual = load_data(data)
        for m, l in zip(method, label):
            auc = np.empty([0, 1])

            train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                    "_train_z.csv",
                                    delimiter=",")
            test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                   "_test_z.csv",
                                   delimiter=",")

            steps = 50
            nu = np.asarray([i for i in np.linspace(0.0, 0.5, steps + 1)])
            if (num == 0):
                AUC = np.reshape(100 * nu[1:], (-1, 1))
                print(AUC)
            for n in nu:
                if n > 0:
                    lof = Investigate_lof(train_z, test_z, actual, norm, n)
                    auc = np.append(auc, [lof])

            auc = np.reshape(auc, (-1, 1))
            AUC = np.insert(AUC, [num + 1], auc, axis=1)
            num = num + 1
            print(AUC)

    else:
        AUC = np.genfromtxt(path + "Parameter_k/" + data + "_auc_k.csv",
                            delimiter=",")

    plt.figure(figsize=(6, 3))
    #    plt.title(data , fontsize=16)
    plt.xlim([0.0, max(AUC[:, 0]) + 1])
    plt.ylim([0.25, 1.0])
    plt.yticks(fontsize=12)

    plt.plot(AUC[:, 0],
             AUC[:, 1],
             'r-o',
             ms=6,
             mec="r",
             label='DAE-LOF',
             markevery=5)
    plt.plot(AUC[:, 0],
             AUC[:, 2],
             'g-^',
             ms=6,
             mec="g",
             label='SAE-LOF',
             markevery=5)
    plt.plot(AUC[:, 0],
             AUC[:, 3],
             'b-x',
             ms=6,
             mec="b",
             label='DVAE-LOF',
             markevery=5)
    np.savetxt(path + "Parameter_k/" + data + "_auc_k.csv",
               AUC,
               delimiter=",",
               fmt='%f')

    plt.legend(bbox_to_anchor=(1.0, 0.35), ncol=2, fontsize='large')
    plt.xlabel(r"$k$", fontsize=20)
    plt.ylabel('AUC', fontsize=16)

    plt.tight_layout()
    plt.savefig(path + "Parameter_k/" + data + "_k.pdf")
    plt.show()
Example #12
0
def parameter_nu_svm(norm, data, label, method, path, load):
    num = 0
    AUC = np.empty([0, 1])

    if (load == 0):
        _, _, actual = load_data(data)
        for m, l in zip(method, label):
            auc = np.empty([0, 1])
            train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                    "_train_z.csv",
                                    delimiter=",")
            test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                   "_test_z.csv",
                                   delimiter=",")

            n_features = train_z.shape[1]
            bw = (n_features / 2.0)**0.5  #default value in One-class SVM
            gamma = 1 / (2 * bw * bw)

            steps = 50
            nu = np.asarray([i for i in np.linspace(0.0, 0.5, steps + 1)])
            if (num == 0):
                AUC = np.reshape(nu[1:], (-1, 1))
            for n in nu:
                if n > 0:
                    svm = investigate_svm(train_z, test_z, actual, norm, gamma,
                                          n)
                    auc = np.append(auc, [svm])

            auc = np.reshape(auc, (-1, 1))
            AUC = np.insert(AUC, [num + 1], auc, axis=1)
            num = num + 1
            print(AUC)
    else:
        AUC = np.genfromtxt(path + "Parameter_nu/" + data + "_auc_nu.csv",
                            delimiter=",")

    plt.figure(figsize=(6, 3))
    #    plt.title(data , fontsize=16)
    plt.xlim([0.0, max(AUC[:, 0]) + 0.01])
    plt.ylim([0.25, 1.0])
    plt.yticks(fontsize=12)

    plt.plot(AUC[:, 0],
             AUC[:, 1],
             'r-o',
             ms=6,
             mec="r",
             label='DAE-OCSVM',
             markevery=5)
    plt.plot(AUC[:, 0],
             AUC[:, 2],
             'g-^',
             ms=6,
             mec="g",
             label='SAE-OCSVM',
             markevery=5)
    plt.plot(AUC[:, 0],
             AUC[:, 3],
             'b-x',
             ms=6,
             mec="b",
             label='DVAE-OCSVM',
             markevery=5)
    np.savetxt(path + "Parameter_nu/" + data + "_auc_nu.csv",
               AUC,
               delimiter=",",
               fmt='%f')

    plt.legend(bbox_to_anchor=(1.0, 0.4), ncol=2, fontsize='large')
    plt.xlabel(r"$\nu$", fontsize=20)
    plt.ylabel('AUC', fontsize=16)

    plt.tight_layout()
    plt.savefig(path + "Parameter_nu/" + data + "_nu.pdf")
    plt.show()
Example #13
0
def parameter_gamma_svm(norm, data, label, method, path, load):
    num = 0
    AUC = np.empty([0, 1])
    if (load == 0):
        _, _, actual = load_data(data)
        for m, l in zip(method, label):
            auc = np.empty([0, 1])

            train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                    "_train_z.csv",
                                    delimiter=",")
            test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data +
                                   "_test_z.csv",
                                   delimiter=",")


            h = [0.05, 0.1,0.2,0.5,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22\
            ,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50]
            h = np.reshape(h, (-1, 1))
            #        g1 = np.asarray([i for i in np.linspace(1e-5, 1e-4, steps+1)])
            #        g2 = np.asarray([i for i in np.linspace(1e-4, 1e-3, steps+1)])
            #        g3 = np.asarray([i for i in np.linspace(1e-3, 1e-2, steps+1)])
            #        g4 = np.asarray([i for i in np.linspace(1e-2, 1e-1, steps+1)])
            #        g5 = np.asarray([i for i in np.linspace(1e-1, 1e-0, steps+1)])
            #        g6 = np.asarray([i for i in np.linspace(1e-0, 1e+1, steps+1)])
            #        g7 = np.asarray([i for i in np.linspace(1e+1, 1e+2, steps+1)])
            #        g8 = np.asarray([i for i in np.linspace(1e+2, 1e+3, steps+1)])
            #        gamma = np.concatenate((g3[:steps], g4[:steps], g5[:steps], g6[:steps], g7[:steps]))
            #        print(gamma)
            #        steps = 50
            #        gamma = np.asarray([i for i in np.linspace(1e-5, 1e+0, steps+1)])
            if (num == 0):
                AUC = np.reshape(h, (-1, 1))
            gamma = 1.0 / (2 * h)
            for gamma1 in gamma:
                if gamma1 > 0:
                    svm = investigate_svm(train_z, test_z, actual, norm,
                                          gamma1, 0.1)
                    auc = np.append(auc, [svm])

            auc = np.reshape(auc, (-1, 1))
            AUC = np.insert(AUC, [num + 1], auc, axis=1)
            num = num + 1
            print(AUC)
    else:
        AUC = np.genfromtxt(path + "Parameter_gamma/" + data +
                            "_auc_gamma.csv",
                            delimiter=",")

    fig, ax = plt.subplots(figsize=(6.4, 3.6))
    #    plt.title(data , fontsize=16)
    plt.xlim([0.0, max(AUC[:, 0]) + 0.1])
    plt.ylim([0.25, 1.0])

    plt.plot(AUC[:, 0],
             AUC[:, 1],
             'r-o',
             ms=6,
             mec="r",
             label='DAE-OCSVM',
             markevery=5)
    plt.plot(AUC[:, 0],
             AUC[:, 2],
             'g-^',
             ms=6,
             mec="g",
             label='SAE-OCSVM',
             markevery=5)
    plt.plot(AUC[:, 0],
             AUC[:, 3],
             'b-x',
             ms=6,
             mec="b",
             label='DVAE-OCSVM',
             markevery=5)
    np.savetxt(path + "Parameter_gamma/" + data + "_auc_gamma.csv",
               AUC,
               delimiter=",",
               fmt='%f')
    #xx-small, x-small, small, medium, large, x-large, xx-large
    plt.legend(bbox_to_anchor=(1.0, 0.95), ncol=2, fontsize='large')
    plt.xlabel(r"$\gamma$ =  $1/(2*h^{2})$", fontsize=20)
    plt.ylabel('AUC', fontsize=16)
    plt.yticks(fontsize=12)

    new_tick_locations = np.array([0.05, 10, 20, 30, 40, 50])

    def tick_function(bw):
        gamma = 1.0 / (2.0 * bw * bw)
        return ["%1.1e" % z for z in gamma]

    ax.set_xticklabels(tick_function(new_tick_locations), fontsize=12)
    #
    #
    ax2 = ax.twiny()
    new_tick_locations = np.array([0.05, 10, 20, 30, 40, 50])

    ax2.set_xticks(new_tick_locations)
    ax2.set_xticklabels(new_tick_locations, fontsize=12)
    ax2.set_xlabel("$h$", fontsize=20)

    plt.tight_layout()
    plt.savefig(path + "Parameter_gamma/" + data + "_gamma.pdf")
    plt.show()