def Model(Label,Parameters=[]): global filepath, filename, fixed_seed_num, sequence_window, number_class, hidden_units, input_dim, learning_rate, epoch, is_multi_scale, training_level, cross_cv, is_add_noise, noise_ratio try: filepath = Parameters["filepath"] filename = Parameters["filename"] sequence_window = Parameters["sequence_window"] number_class = Parameters["number_class"] hidden_units = Parameters["hidden_units"] input_dim = Parameters["input_dim"] learning_rate = Parameters["learning_rate"] epoch = Parameters["epoch"] is_multi_scale = Parameters["is_multi_scale"] training_level = Parameters["training_level"] cross_cv = Parameters["cross_cv"] fixed_seed_num = Parameters["fixed_seed_num"] is_add_noise = Parameters["is_add_noise"] noise_ratio = Parameters["noise_ratio"] except: pass result_list_dict = defaultdict(list) evaluation_list = ["ACCURACY","F1_SCORE","AUC","G_MEAN"] for each in evaluation_list: result_list_dict[each] = [] np.random.seed(fixed_seed_num) # for reproducibility #num_selected_features = 30 #num_selected_features = 25#AS leak tab=0 #num_selected_features = 32#Slammer tab=0 num_selected_features = 33#Nimda tab=1 for tab_cv in range(cross_cv): if not tab_cv == 0 :continue epoch_training_loss_list = [] epoch_val_loss_list = [] #print(is_multi_scale) #using MLP to train if Label == "SVM": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=0) print(Label+" is running..............................................") y_train = y_train0 clf = svm.SVC(kernel="rbf", gamma=0.00001, C=100000,probability=True) print(x_train.shape) clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "SVMF": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=5) print(Label+" is running..............................................") clf = svm.SVC(kernel="rbf", gamma=0.00001, C=100000,probability=True) print(x_train.shape) #x_train_new = SelectKBest(f_classif, k=num_selected_features).fit_transform(x_train, y_train0) #x_test_new = SelectKBest(f_classif, k=num_selected_features).fit_transform(x_test, y_test0) clf.fit(x_train, y_train0) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "SVMW": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=6) print(Label + " is running..............................................") #SVR(kernel="linear") = svm.SVC(kernel="rbf", gamma=0.00001, C=100000, probability=True) estimator = svm.SVC(kernel="linear",probability=True) selector = RFE(estimator, num_selected_features, step=1) selector = selector.fit(x_train, y_train0) result = selector.predict_proba(x_test) # return Evaluation.Evaluation(y_test, result) # results = Evaluation.Evaluation(y_test, result) elif Label == "NBF": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=10) print(Label + " is running..............................................") clf = MultinomialNB() clf.fit(x_train, y_train0) result = clf.predict_proba(x_test) elif Label == "NBW": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=11) print(Label + " is running..............................................") #SVR(kernel="linear") = svm.SVC(kernel="rbf", gamma=0.00001, C=100000, probability=True) estimator = MultinomialNB() selector = RFE(estimator, num_selected_features, step=1) selector = selector.fit(x_train, y_train0) result = selector.predict_proba(x_test) # return Evaluation.Evaluation(y_test, result) # results = Evaluation.Evaluation(y_test, result) elif Label == "NB": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=1) print(Label+" is running..............................................") y_train = y_train0 clf = MultinomialNB() clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "DT": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=2) print(Label+" is running.............................................."+str(x_train.shape)) y_train = y_train0 clf = tree.DecisionTreeClassifier() clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "Ada.Boost": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=0) print(Label+" is running.............................................."+str(x_train.shape)) y_train = y_train0 #clf = AdaBoostClassifier(n_estimators=10) #Nimda tab=1 clf = AdaBoostClassifier(n_estimators=10) clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "MLP": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=0) print(Label+" is running..............................................") batch_size = len(y_train) start = time.clock() model = Sequential() model.add(Dense(hidden_units, activation="relu", input_dim=33)) model.add(Dense(output_dim=number_class)) model.add(Activation("sigmoid")) # model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch) #result = model.predict(X_Testing, batch_size=batch_size) result = model.predict(x_test) end = time.clock() print("The Time For MLP is " + str(end - start)) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) #elif Label == "SVM-S": #x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData('Attention',filepath,filename,sequence_window,tab_cv,cross_cv) #x_train,y_train = Manipulation(x_train,y_train0,sequence_window) #x_test, y_test = Manipulation(x_test, y_test0, sequence_window) #clf = svm.SVC(kernel="rbf") #clf.fit(x_train, y_train) #result = clf.predict(x_test) #results = Evaluation.Evaluation_WithoutS(y_test, result) elif Label == "RNN": print(Label+" is running..............................................") start = time.clock() x_train_multi_list, x_train, y_train, x_testing_multi_list, x_test, y_test = LoadData.GetData(is_add_noise,noise_ratio,'Attention', filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level) batch_size = len(y_train) rnn_object = SimpleRNN(hidden_units, input_length=len(x_train[0]), input_dim=input_dim) model = Sequential() model.add(rnn_object) # X.shape is (samples, timesteps, dimension) #model.add(Dense(30, activation="relu")) #model.add(Dropout(0.2)) model.add(Dense(30, activation="sigmoid")) #model.add(Dropout(0.3)) # model.add(Dense(5,activation="tanh")) model.add(Dense(output_dim=number_class)) model.add(Activation("sigmoid")) # model.add(Activation("softmax")) # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch) #result = model.predict(X_Testing, batch_size=batch_size) result = model.predict(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) end = time.clock() print("The Time For RNN is " + str(end - start)) # print(result) elif Label == "LSTM": print(Label+" is running..............................................") start = time.clock() x_train_multi_list, x_train, y_train, x_testing_multi_list, x_test, y_test = LoadData.GetData(is_add_noise,noise_ratio,'Attention',filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level) batch_size = len(y_train) lstm_object = LSTM(hidden_units, input_length=len(x_train[0]), input_dim=input_dim) model = Sequential() model.add(lstm_object) # X.shape is (samples, timesteps, dimension) # model.add(LSTM(lstm_size,return_sequences=True,input_shape=(len(X_Training[0]),33))) # model.add(LSTM(100,return_sequences=True)) # model.add(Dense(10, activation="tanh")) # model.add(Dense(5,activation="tanh")) model.add(Dense(30, activation="relu")) #model.add(Dropout(0.2)) #model.add(Dense(30, activation="sigmoid")) #model.add(Dropout(0.3)) # model.add(Dense(5,activation="tanh")) model.add(Dense(output_dim=number_class)) model.add(Activation("sigmoid")) #model.add(Activation("softmax")) # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch) #result = model.predict(X_Testing, batch_size=batch_size) result = model.predict(x_test) end = time.clock() print("The Time For LSTM is " + str(end - start)) if len(Parameters) > 0: return Evaluation.Evaluation(y_test, result)#Plotting AUC results = Evaluation.Evaluation(y_test, result)# Computing ACCURACY,F1-score,..,etc print(results) y_test2 = np.array(Evaluation.ReverseEncoder(y_test)) result2 = np.array(Evaluation.ReverseEncoder(result)) print("---------------------------1111111111111111") with open("StatFalseAlarm_"+filename+"_True.txt","w") as fout: for tab in range(len(y_test2)): fout.write(str(int(y_test2[tab]))+'\n') with open("StatFalseAlarm_"+filename+"_"+Label+"_"+"_Predict.txt","w") as fout: for tab in range(len(result2)): fout.write(str(int(result2[tab]))+'\n') print(result2.shape) print("---------------------------22222222222222222") for each_eval, each_result in results.items(): result_list_dict[each_eval].append(each_result) for eachk, eachv in result_list_dict.items(): result_list_dict[eachk] = np.average(eachv) #print(result_list_dict) if is_add_noise == False: with open(os.path.join(os.getcwd(),"Comparison_Log_"+filename+".txt"),"a")as fout: outfileline = Label+":__" fout.write(outfileline) for eachk,eachv in result_list_dict.items(): fout.write(eachk+": "+str(round(eachv,3))+",\t") fout.write('\n') else: with open(os.path.join(os.getcwd(),"Comparison_Log_Adding_Noise_"+filename+".txt"),"a")as fout: outfileline = Label+":__"+"Noise_Ratio_:"+str(noise_ratio) fout.write(outfileline) for eachk,eachv in result_list_dict.items(): fout.write(eachk+": "+str(round(eachv,3))+",\t") fout.write('\n') return results