def compute_auc_data(): list_data = ["PageBlocks", "WPBC", "PenDigits", "GLASS", "Shuttle", "Arrhythmia",\ "CTU13_10", "CTU13_08","CTU13_09","CTU13_13",\ "Spambase", "UNSW", "NSLKDD", "InternetAds"] methods = ["input", "DAE", "Shrink", "VAE"] path = "D:/Python_code/SDA-02/Results/Exp_Hidden/" lof_auc = np.empty([0, 14]) for method in methods: auc1 = np.empty([0, 1]) for data in list_data: _, _, actual = load_data(data) #load original data if method == "input": train_X, test_X, actual = load_data(data) elif method == "DAE": train_X = np.genfromtxt(path + "NEW_STOPPING_DAE/" + data + "_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "NEW_STOPPING_DAE/" + data + "_test_z.csv", delimiter=",") elif method == "Shrink": train_X = np.genfromtxt(path + "NEW_STOPPING_SHRINK/" + data + "_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "NEW_STOPPING_SHRINK/" + data + "_test_z.csv", delimiter=",") elif method == "VAE": train_X = np.genfromtxt(path + "NEW_STOPPING_VAE/" + data + "_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "NEW_STOPPING_VAE/" + data + "_test_z.csv", delimiter=",") lof = auc_LOF(train_X, test_X, actual) #lof = auc_IOF(train_X, test_X, actual) #lof = auc_CEN(train_X, test_X, actual) auc1 = np.append(auc1, lof) print(auc1) lof_auc = np.append(lof_auc, auc1) np.set_printoptions(precision=3, suppress=True) print(auc1) lof_auc = np.reshape(lof_auc, (-1, 14)) np.savetxt(path + "LOF_11_per.csv", lof_auc, delimiter=",", fmt='%f')
def investigate_bandwidth(norm, data, method, path): for m in method: train_z = np.genfromtxt(path + m + data + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + m + data + "_test_z.csv", delimiter=",") _, _, actual = load_data(data) #gamma = 1/features = 1/(2*bw*bw) #bw = sqrt(features/2) n_features = train_z.shape[1] AUC = np.empty([0, 4]) steps = 10 n = 5.0 bw = np.asarray([i for i in np.linspace(0.0, n, steps + 1)]) gamma = 1.0 / (2.0 * bw * bw) for b, g in zip(bw, gamma): if b > 0: svm05 = investigate_svm(train_z, test_z, actual, norm, g, 0.5) svm01 = investigate_svm(train_z, test_z, actual, norm, g, 0.1) kde = investigate_kde(train_z, test_z, actual, norm, b) AUC = np.append(AUC, [b, kde, svm05, svm01]) AUC = np.reshape(AUC, (-1, 4)) Plot_AUC_Bandwidth(AUC, data, n, n_features, path)
def compute_confusion_matrix(): list_data = ["Probe", "DoS", "R2L", "U2R"] path = "D:/Python_code/SDA-02/Results/Exp_Hidden/NEW_GROUP/" train_X = np.genfromtxt(path + "ShrinkAE/NSLKDD_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "ShrinkAE/NSLKDD_test_z.csv", delimiter=",") _, _, actual = load_data("NSLKDD") #load original data # auc_cen = auc_CEN(train_X,test_X, actual) # print(auc_cen) test_normal = test_X[(actual == 1)] y_normal = ~(actual[(actual == 1)]).astype(np.bool) # False CEN = CentroidBasedOneClassClassifier(threshold=0.92) CEN.fit(train_X) pre_label0 = CEN.predict(test_normal) Test_P = sum(pre_label0 == y_normal) Actual_P = len(test_normal) FN = Actual_P - Test_P print("\nActual Normal: %d,\nTest normal:%d, \nFP:%d" % (Actual_P, Test_P, FN)) Test_N = 0 Actual_N = 0 FP = 0 for data in list_data: _, _, actual = load_data(data) #load original data test_X = np.genfromtxt(path + "ShrinkAE/" + data + "_test_z.csv", delimiter=",") test_anomaly = test_X[(actual == 0)] y_attack = ~(actual[(actual == 0)]).astype(np.bool) #True Actual_N = len(test_anomaly) pre_label1 = CEN.predict(test_anomaly) Test_N = sum(pre_label1 == y_attack) FP = Actual_N - Test_N print("\nActual " + data + ": %d,\nTest anomaly: %d, \nFP:%d" % (Actual_N, Test_N, FP))
def compute_auc_group_attacks(): list_data = ["Probe", "DoS", "R2L", "U2R",\ "Fuzzers", "Analysis", "Backdoor", "DoS_UNSW", "Exploits",\ "Generic", "Reconnaissance", "Shellcode", "Worms"] methods = ["input", "DAE", "Shrink", "VAE"] path = "D:/Python_code/SDA-02/Results/Exp_Hidden/NEW_GROUP/" lof_auc = np.empty([0, 13]) for method in methods: auc1 = np.empty([0, 1]) for data in list_data: _, _, actual = load_data(data) #load original data if method == "input": train_X, test_X, actual = load_data(data) elif method == "DAE": train_X = np.genfromtxt(path + "DAE/" + data + "_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "DAE/" + data + "_test_z.csv", delimiter=",") elif method == "Shrink": train_X = np.genfromtxt(path + "SHRINK/" + data + "_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "SHRINK/" + data + "_test_z.csv", delimiter=",") elif method == "VAE": train_X = np.genfromtxt(path + "VAE/" + data + "_train_z.csv", delimiter=",") test_X = np.genfromtxt(path + "VAE/" + data + "_test_z.csv", delimiter=",") lof = auc_LOF(train_X, test_X, actual) auc1 = np.append(auc1, lof) print(auc1) lof_auc = np.append(lof_auc, auc1) np.set_printoptions(precision=3, suppress=True) print(auc1) lof_auc = np.reshape(lof_auc, (-1, 13)) np.savetxt(path + "LOF_10_UNSW.csv", lof_auc, delimiter=",", fmt='%f')
def sparsity_measurement(list_data): sparsity_data = dict() for data in list_data: train_set, _, _ = load_data(data) m = train_set.shape[1] n = train_set.shape[0] d = (train_set == 0).sum() sparsity = round((float(d) / (m * n)), 2) print(data + ": %0.2f" % sparsity) sparsity_data[data] = sparsity print(sparsity_data) return sparsity_data
def visualize_z(): path = "D:/Python_code/SDA-02/Results/Exp_Hidden/" for data_name in datasets: i = 0 plt.subplots(ncols=3, nrows = 3, figsize=(6, 6)) for m in path_method: _,_, actual = load_data(data_name) train_z = np.genfromtxt(path + m + data_name + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + m + data_name + "_test_z.csv", delimiter=",") test_X0 = test_z[(actual==1)] test_X1 = test_z[(actual==0)] np.random.shuffle(train_z) np.random.shuffle(test_X0) np.random.shuffle(test_X1) n = 1000 train_z = train_z[:n] test_X0 = test_X0[:n] test_X1 = test_X1[:n] dataset = [train_z, test_X0, test_X1] j = 0 color = ['b', 'g', 'r'] label = ["Normal train", "Normal test", "Anomaly test"] for data, c, l in zip(dataset, color, label): num = i*3 + j + 1 fig= plt.subplot(3, 3, num) if (data_name == "CTU13_10"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.11, 0.11)) plt.xlim((-0.11, 0.11)) elif (i==2): plt.ylim((-0.011, 0.011)) plt.xlim((-0.011, 0.011)) if (data_name == "InternetAds" ): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.21, 0.21)) plt.xlim((-0.21, 0.21)) elif (i==2): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) if (data_name == "Spambase"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) elif (i==2): plt.ylim((-0.031, 0.031)) plt.xlim((-0.031, 0.031)) if (data_name == "CTU13_08"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) elif (i==2): plt.ylim((-0.031, 0.031)) plt.xlim((-0.031, 0.031)) if (data_name == "CTU13_09"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) elif (i==2): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) if (data_name == "CTU13_13"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.021, 0.021)) plt.xlim((-0.021, 0.021)) elif (i==2): plt.ylim((-0.021, 0.021)) plt.xlim((-0.021, 0.021)) if (data_name =="NSLKDD"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) elif (i==2): plt.ylim((-0.061, 0.061)) plt.xlim((-0.061, 0.061)) elif (data_name == "UNSW"): if (i==0): plt.ylim((-1.1, 1.1)) plt.xlim((-1.1, 1.1)) elif (i==1): plt.ylim((-0.051, 0.051)) plt.xlim((-0.051, 0.051)) elif (i==2): plt.ylim((-0.061, 0.061)) plt.xlim((-0.061, 0.061)) #plt.title(data_name) plt.plot(data[:,0], data[:,1], c+'o', ms=2, mec= c , label= l) plt.xticks(fontsize=9, rotation=90) plt.yticks(fontsize=9, rotation=0) if i == 0: plt.legend(bbox_to_anchor=(1.02, 1.25), ncol = 1, fontsize = 'medium') fig.xaxis.set_ticks_position('bottom') #Disable bottom and left ticks fig.yaxis.set_ticks_position('left') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) if (i == 2 or i == 0 or i == 1): fig.axes.get_xaxis().set_visible(True) # xticks = fig.xaxis.get_major_ticks() # xticks[0].label1.set_visible(False) #Disable the last yticks # xticks[-1].label1.set_visible(False) #Disable the first yticks # if (j == 1): # plt.xlabel(data_name, fontsize=16) if (j == 0): fig.axes.get_yaxis().set_visible(True) plt.ylabel(method[i], fontsize=12) # yticks = fig.yaxis.get_major_ticks() # yticks[0].label1.set_visible(False) #Disable the first yticks # yticks[-1].label1.set_visible(False) #Disable the last yticks j = j + 1 i = i + 1 # plt.tight_layout() plt.subplots_adjust(wspace=0.08, hspace=0.35) plt.savefig(path + "Visualize_z/" + data_name + "_Visualize_nD_1.pdf") plt.show()
def visualize_2D_scale(): path = "D:/Python_code/SDA-02/Results/Exp_Hidden/Visualize_z/" for data_name in datasets: i = 0 plt.subplots(ncols=3, nrows = 3, figsize=(6, 6)) for m in path_method: _,_, actual = load_data(data_name) train_z = np.genfromtxt(path + m + data_name + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + m + data_name + "_test_z.csv", delimiter=",") scaler = preprocessing.StandardScaler() scaler.fit(train_z) train_z = scaler.transform(train_z) test_z = scaler.transform(test_z) test_X0 = test_z[(actual==1)] test_X1 = test_z[(actual==0)] np.random.shuffle(train_z) np.random.shuffle(test_X0) np.random.shuffle(test_X1) n = 1000 train_z = train_z[:n] test_X0 = test_X0[:n] test_X1 = test_X1[:n] dataset = [train_z, test_X0, test_X1] j = 0 color = ['b', 'g', 'r'] label = ["Normal train", "Normal test", "Anomaly test"] for data, c, l in zip(dataset, color, label): num = i*3 + j + 1 fig= plt.subplot(3, 3, num) ["CTU13_10", "UNSW", "NSLKDD"] if(data_name == "NSLKDD"): if (i==0): plt.ylim((-5.1, 5.1)) plt.xlim((-5.1, 5.1)) else: plt.ylim((-40.1, 40.1)) plt.xlim((-40.1, 40.1)) else: if (i==0): plt.ylim((-5.1, 5.1)) plt.xlim((-5.1, 5.1)) else: plt.ylim((-30.1, 30.1)) plt.xlim((-30.1, 30.1)) plt.plot(data[:,0], data[:,1], c+'o', ms=2, mec= c , label= l) plt.xticks(fontsize=10, rotation=90) plt.yticks(fontsize=10, rotation=0) if i == 0: plt.legend(bbox_to_anchor=(1.02, 1.25), ncol = 1, fontsize = 'medium') fig.xaxis.set_ticks_position('bottom') #Disable bottom and left ticks fig.yaxis.set_ticks_position('left') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # if (i == 2 or i == 0): fig.axes.get_xaxis().set_visible(True) xticks = fig.xaxis.get_major_ticks() xticks[0].label1.set_visible(False) #Disable the last yticks xticks[-1].label1.set_visible(False) #Disable the first yticks # if (j == 1): # plt.xlabel(data_name, fontsize=16) if (j == 0): fig.axes.get_yaxis().set_visible(True) plt.ylabel(method[i], fontsize=14) yticks = fig.yaxis.get_major_ticks() yticks[0].label1.set_visible(False) #Disable the first yticks yticks[-1].label1.set_visible(False) #Disable the last yticks j = j + 1 i = i + 1 # plt.tight_layout() plt.subplots_adjust(wspace=0.08, hspace=0.35) plt.savefig(path + "2D/" + data_name + "_Visualize_2D.pdf") plt.show()
def Main_Test(): list_data = ["PageBlocks", "WPBC", "PenDigits", "GLASS", "Shuttle", "Arrhythmia",\ "CTU13_10", "CTU13_08","CTU13_09","CTU13_13",\ "Spambase", "UNSW", "NSLKDD", "InternetAds"] list_data = ["CTU13_10"] norm = "maxabs" corruptions = [0.1, 0.1, 0.1] print("DAE") print("+ Data: ", list_data) print("+ Scaler: ", norm) print("+ Corruptions: ", corruptions) AUC_Hidden = np.empty([0, 10]) num = 0 for data in list_data: num = num + 1 h_sizes = hyper_parameters(data) train_set, test_set, actual = load_data(data) train_X, test_X = normalize_data(train_set, test_set, norm) train_X = theano.shared(numpy.asarray(train_X, dtype=theano.config.floatX), borrow=True) test_X = theano.shared(numpy.asarray(test_X, dtype=theano.config.floatX), borrow=True) datasets = [(train_X), (test_X), (actual)] in_dim = train_set.shape[1] n_vali = (int)(train_set.shape[0] / 5) n_train = len(train_set) - n_vali #batch = int(n_train/20) pat, val, batch, n_batch = stopping_para_shrink(n_train) print("\n" + str(num) + ".", data, "...") print(" + Hidden Sizes: ", in_dim, h_sizes, "- Batch_sizes:", batch) print (" + Data: %d (%d train, %d vali) - %d normal, %d anomaly"\ %(len(train_set), n_train, n_vali, \ len(test_set[(actual == 1)]), len(test_set[(actual == 0)]))) print(" + Patience: %5.0d, Validate: %5.0d, \n + Batch size: %5.0d, n batch:%5.0d"\ %(pat, val, batch, n_batch)) sda, re = train_SdAE(pre_lr=1e-2, end2end_lr=1e-4, algo='adadelta', dataset=datasets, data_name=data, n_validate=n_vali, norm=norm, batch_size=batch, hidden_sizes=h_sizes, corruptions=corruptions, patience=pat, validation=val) #*******Computer AUC on hidden data************* lof, cen, dis, kde, svm05, svm01, ae = sda.Compute_AUC_Hidden( train_X, test_X, actual, norm, data) auc_hidden = np.column_stack( [batch, re[0], lof, cen, dis, kde, svm05, svm01, ae, 100 * re[2]]) AUC_Hidden = np.append(AUC_Hidden, auc_hidden) #save hidden data to files # sda.Save_Hidden_Data(train_X, test_X, data, path) AUC_Hidden = np.reshape(AUC_Hidden, (-1, 10)) np.set_printoptions(precision=3, suppress=True) column_list = [2, 3, 4, 5, 6, 7, 8, 9] print(" LOF CEN MDIS KDE SVM5 SVM1 AE RE*100") print(AUC_Hidden[:, column_list])
import csv #%% "Get data information" path = "D:/Python_code/SDA-02/Results/Exp_Hidden/" list_data = ["PageBlocks", "WPBC", "PenDigits", "GLASS", "Shuttle", "Arrhythmia",\ "CTU13_10", "CTU13_08","CTU13_09","CTU13_13",\ "Spambase", "UNSW", "NSLKDD", "InternetAds"] data_infor = np.empty([0,5]) header = np.column_stack(["Data", "Dimension", "Training", "Normal", "Anomaly"]) data_infor = np.append(data_infor, header) for data in list_data: train_set, test_set, actual = load_data(data) test_normal = test_set[(actual == 1)] test_anomaly = test_set[(actual == 0)] d = train_set.shape[1] n_train = train_set.shape[0] n_normal = test_normal.shape[0] n_anomaly = test_anomaly.shape[0] infor = np.column_stack([ data, d, n_train, n_normal, n_anomaly]) data_infor = np.append(data_infor, infor) data_infor = np.reshape(data_infor,(-1,5)) #np.savetxt(path + "data_information.csv", data_infor, fmt='%.18e', delimiter=' ') #np.savetxt(path + "data_information.csv", data_infor, delimiter=",", fmt= ('%s,%f,%f,%f,%f') ) with open(path + "data_information.csv", "wb") as f:
def bandwidth_auc(norm, data, label, method, path, load): plt.subplots(ncols=3, nrows=1, figsize=(6, 6)) num = 0 _, _, actual = load_data(data) for m, l in zip(method, label): AUC = np.empty([0, 4]) if (load == 0): train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_test_z.csv", delimiter=",") #gamma = 1/features = 1/(2*bw*bw) #bw = sqrt(features/2) #n_features = train_z.shape[1] steps = 50 n = 5.0 bw = np.asarray([i for i in np.linspace(0.0, n, steps + 1)]) gamma = 1.0 / (2.0 * bw * bw) for b, g in zip(bw, gamma): if b > 0: svm05 = investigate_svm(train_z, test_z, actual, norm, g, 0.5) svm01 = investigate_svm(train_z, test_z, actual, norm, g, 0.1) kde = investigate_kde(train_z, test_z, actual, norm, b) AUC = np.append(AUC, [b, kde, svm05, svm01]) AUC = np.reshape(AUC, (-1, 4)) else: AUC = np.genfromtxt(path + "Paramter_h/" + data + "_" + m + "_auc_bw.csv", delimiter=",") fig = plt.subplot(3, 1, num + 1) plt.xlim([0.0, max(AUC[:, 0]) + 0.1]) plt.ylim([0.40, 1.0]) # plt.plot(AUC[:,0], AUC[:,2], 'r-o', ms=6, mec="r", label = r'$\mathrm{SVM}_{\nu = 0.5}$', markevery = 3) plt.plot(AUC[:, 0], AUC[:, 3], 'g-^', ms=6, mec="g", label=r'$\mathrm{SVM}_{\nu = 0.1}$', markevery=3) #plt.plot(AUC[:,0], AUC[:,1], 'b-x', ms=6, mec="b", label = 'KDE', markevery = 3) np.savetxt(path + "Paramter_h/" + data + "_" + m + "_auc_bw.csv", AUC, delimiter=",", fmt='%f') fig.axes.get_xaxis().set_visible(False) if (num == 2): plt.legend(bbox_to_anchor=(1.0, 0.29), ncol=3, fontsize='large') fig.axes.get_xaxis().set_visible(True) for xtick in fig.xaxis.get_major_ticks(): xtick.label.set_fontsize(14) plt.xlabel('Bandwidth $(h)$', fontsize=16) plt.ylabel('AUC$_{' + label[num] + '}$', fontsize=16) yticks = fig.yaxis.get_major_ticks() for ytick in yticks: ytick.label.set_fontsize(14) yticks[0].label1.set_visible(False) #Disable the last yticks if num == 0: ax2 = fig.twiny() new_tick_locations = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) def tick_function(bw): gamma = 1.0 / (2.0 * bw * bw) return ["%.3f" % z for z in gamma] ax2.set_xlim(fig.get_xlim()) ax2.set_xticks(new_tick_locations) ax2.set_xticklabels(tick_function(new_tick_locations), fontsize=13) ax2.set_xlabel(r"$\gamma$ = $1/(2*h^{2})$", fontsize=16) #fig.set_xticklabels(tick_function(new_tick_locations), fontsize=13) num = num + 1 plt.tight_layout() plt.subplots_adjust(wspace=0.025, hspace=0.025) plt.savefig(path + "Paramter_h/" + data + "_BW.pdf") plt.show()
def parameter_k_lof(norm, data, label, method, path, load): num = 0 AUC = np.empty([0, 1]) if (load == 0): _, _, actual = load_data(data) for m, l in zip(method, label): auc = np.empty([0, 1]) train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_test_z.csv", delimiter=",") steps = 50 nu = np.asarray([i for i in np.linspace(0.0, 0.5, steps + 1)]) if (num == 0): AUC = np.reshape(100 * nu[1:], (-1, 1)) print(AUC) for n in nu: if n > 0: lof = Investigate_lof(train_z, test_z, actual, norm, n) auc = np.append(auc, [lof]) auc = np.reshape(auc, (-1, 1)) AUC = np.insert(AUC, [num + 1], auc, axis=1) num = num + 1 print(AUC) else: AUC = np.genfromtxt(path + "Parameter_k/" + data + "_auc_k.csv", delimiter=",") plt.figure(figsize=(6, 3)) # plt.title(data , fontsize=16) plt.xlim([0.0, max(AUC[:, 0]) + 1]) plt.ylim([0.25, 1.0]) plt.yticks(fontsize=12) plt.plot(AUC[:, 0], AUC[:, 1], 'r-o', ms=6, mec="r", label='DAE-LOF', markevery=5) plt.plot(AUC[:, 0], AUC[:, 2], 'g-^', ms=6, mec="g", label='SAE-LOF', markevery=5) plt.plot(AUC[:, 0], AUC[:, 3], 'b-x', ms=6, mec="b", label='DVAE-LOF', markevery=5) np.savetxt(path + "Parameter_k/" + data + "_auc_k.csv", AUC, delimiter=",", fmt='%f') plt.legend(bbox_to_anchor=(1.0, 0.35), ncol=2, fontsize='large') plt.xlabel(r"$k$", fontsize=20) plt.ylabel('AUC', fontsize=16) plt.tight_layout() plt.savefig(path + "Parameter_k/" + data + "_k.pdf") plt.show()
def parameter_nu_svm(norm, data, label, method, path, load): num = 0 AUC = np.empty([0, 1]) if (load == 0): _, _, actual = load_data(data) for m, l in zip(method, label): auc = np.empty([0, 1]) train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_test_z.csv", delimiter=",") n_features = train_z.shape[1] bw = (n_features / 2.0)**0.5 #default value in One-class SVM gamma = 1 / (2 * bw * bw) steps = 50 nu = np.asarray([i for i in np.linspace(0.0, 0.5, steps + 1)]) if (num == 0): AUC = np.reshape(nu[1:], (-1, 1)) for n in nu: if n > 0: svm = investigate_svm(train_z, test_z, actual, norm, gamma, n) auc = np.append(auc, [svm]) auc = np.reshape(auc, (-1, 1)) AUC = np.insert(AUC, [num + 1], auc, axis=1) num = num + 1 print(AUC) else: AUC = np.genfromtxt(path + "Parameter_nu/" + data + "_auc_nu.csv", delimiter=",") plt.figure(figsize=(6, 3)) # plt.title(data , fontsize=16) plt.xlim([0.0, max(AUC[:, 0]) + 0.01]) plt.ylim([0.25, 1.0]) plt.yticks(fontsize=12) plt.plot(AUC[:, 0], AUC[:, 1], 'r-o', ms=6, mec="r", label='DAE-OCSVM', markevery=5) plt.plot(AUC[:, 0], AUC[:, 2], 'g-^', ms=6, mec="g", label='SAE-OCSVM', markevery=5) plt.plot(AUC[:, 0], AUC[:, 3], 'b-x', ms=6, mec="b", label='DVAE-OCSVM', markevery=5) np.savetxt(path + "Parameter_nu/" + data + "_auc_nu.csv", AUC, delimiter=",", fmt='%f') plt.legend(bbox_to_anchor=(1.0, 0.4), ncol=2, fontsize='large') plt.xlabel(r"$\nu$", fontsize=20) plt.ylabel('AUC', fontsize=16) plt.tight_layout() plt.savefig(path + "Parameter_nu/" + data + "_nu.pdf") plt.show()
def parameter_gamma_svm(norm, data, label, method, path, load): num = 0 AUC = np.empty([0, 1]) if (load == 0): _, _, actual = load_data(data) for m, l in zip(method, label): auc = np.empty([0, 1]) train_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_train_z.csv", delimiter=",") test_z = np.genfromtxt(path + 'NEW_STOPPING_' + m + '/' + data + "_test_z.csv", delimiter=",") h = [0.05, 0.1,0.2,0.5,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22\ ,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50] h = np.reshape(h, (-1, 1)) # g1 = np.asarray([i for i in np.linspace(1e-5, 1e-4, steps+1)]) # g2 = np.asarray([i for i in np.linspace(1e-4, 1e-3, steps+1)]) # g3 = np.asarray([i for i in np.linspace(1e-3, 1e-2, steps+1)]) # g4 = np.asarray([i for i in np.linspace(1e-2, 1e-1, steps+1)]) # g5 = np.asarray([i for i in np.linspace(1e-1, 1e-0, steps+1)]) # g6 = np.asarray([i for i in np.linspace(1e-0, 1e+1, steps+1)]) # g7 = np.asarray([i for i in np.linspace(1e+1, 1e+2, steps+1)]) # g8 = np.asarray([i for i in np.linspace(1e+2, 1e+3, steps+1)]) # gamma = np.concatenate((g3[:steps], g4[:steps], g5[:steps], g6[:steps], g7[:steps])) # print(gamma) # steps = 50 # gamma = np.asarray([i for i in np.linspace(1e-5, 1e+0, steps+1)]) if (num == 0): AUC = np.reshape(h, (-1, 1)) gamma = 1.0 / (2 * h) for gamma1 in gamma: if gamma1 > 0: svm = investigate_svm(train_z, test_z, actual, norm, gamma1, 0.1) auc = np.append(auc, [svm]) auc = np.reshape(auc, (-1, 1)) AUC = np.insert(AUC, [num + 1], auc, axis=1) num = num + 1 print(AUC) else: AUC = np.genfromtxt(path + "Parameter_gamma/" + data + "_auc_gamma.csv", delimiter=",") fig, ax = plt.subplots(figsize=(6.4, 3.6)) # plt.title(data , fontsize=16) plt.xlim([0.0, max(AUC[:, 0]) + 0.1]) plt.ylim([0.25, 1.0]) plt.plot(AUC[:, 0], AUC[:, 1], 'r-o', ms=6, mec="r", label='DAE-OCSVM', markevery=5) plt.plot(AUC[:, 0], AUC[:, 2], 'g-^', ms=6, mec="g", label='SAE-OCSVM', markevery=5) plt.plot(AUC[:, 0], AUC[:, 3], 'b-x', ms=6, mec="b", label='DVAE-OCSVM', markevery=5) np.savetxt(path + "Parameter_gamma/" + data + "_auc_gamma.csv", AUC, delimiter=",", fmt='%f') #xx-small, x-small, small, medium, large, x-large, xx-large plt.legend(bbox_to_anchor=(1.0, 0.95), ncol=2, fontsize='large') plt.xlabel(r"$\gamma$ = $1/(2*h^{2})$", fontsize=20) plt.ylabel('AUC', fontsize=16) plt.yticks(fontsize=12) new_tick_locations = np.array([0.05, 10, 20, 30, 40, 50]) def tick_function(bw): gamma = 1.0 / (2.0 * bw * bw) return ["%1.1e" % z for z in gamma] ax.set_xticklabels(tick_function(new_tick_locations), fontsize=12) # # ax2 = ax.twiny() new_tick_locations = np.array([0.05, 10, 20, 30, 40, 50]) ax2.set_xticks(new_tick_locations) ax2.set_xticklabels(new_tick_locations, fontsize=12) ax2.set_xlabel("$h$", fontsize=20) plt.tight_layout() plt.savefig(path + "Parameter_gamma/" + data + "_gamma.pdf") plt.show()