def get_final_data(models): file_final = open('input/inputToStage_3.data', 'w') num_clusters = len(models) for i in range(num_clusters): j = str(i) inputFile = open('input/cluster_' + j + '_train.csv') pointData = [] currOut = [] for line in inputFile: (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12) = line.split(',') pointData = numpy.asarray([[ float(col1), float(col2), float(col3), float(col4), float(col5), float(col6), float(col7), float(col8), float(col9), float(col10), float(col11) ]]) membershipValue = getMembershipVal(col12) #predict output from all ANN for jth data point for clusterNum in range(num_clusters): curr = models[clusterNum].predict(pointData, batch_size=10, verbose=0) currOut.append(curr[0]) #get input for final ANN by multiplication of output of each data point to each ANN #and its corresponding memebership to each cluster outputFromFeed = numpy.matmul(numpy.transpose(currOut), numpy.transpose(membershipValue)) writeStr = ','.join([str(x) for x in outputFromFeed]) file_final.write(writeStr) currOut = [] file_final.write('\n') file_final.close() filenames = [ 'output/cluster_0_label.csv', 'output/cluster_1_label.csv', 'output/cluster_2_label.csv', 'output/cluster_3_label.csv' ] with open('output/combinedFile', 'w') as outfile: for fname in filenames: with open(fname) as infile: for line in infile: outfile.write(line) return ann.get_data("input/inputToStage_3.data", "output/combinedFile", False)
def train(num_clusters): models = [] for i in range(num_clusters): j = str(i) file_in = "input/cluster_" + j + "_train.csv" file_out = "output/cluster_" + j + "_label.csv" in_data, out_data = ann.get_data(file_in, file_out, True) models.append(ann.get_model(in_data, out_data, 11, 14, 4)) return models
def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Because we want to categorize packets into Normal and 3 kinds of attacks (DoS, Probe, U2R) num_clusters = 4 #1. Train Stage II ANNs #-----------------Skip this step if models are stored-------------------# stored = False if os.path.isfile("model/model0.json"): stored = True if not stored: print("Training " + str(num_clusters) + " ANNs for prediction") stage2_ann = stage2.train(num_clusters) #save these ANN to a file for index in range(num_clusters): ann.write_model_to_file(stage2_ann[index], index) stored = True #2. Prepare input and true labels for training Stage III ANN models = [] if not os.path.isfile("input/inputToStage_3.data"): print("Preparing input for final ANN") if stored: models = get_model_list(num_clusters) else: models = stage2_ann in_data, out_data = stage3.get_final_data(models) else: in_data, out_data = ann.get_data("input/inputToStage_3.data", "output/combinedFile", False) #3. Train Stage III ANN if not os.path.isfile("model/final.json"): print("Training final ANN") final_ann = stage3.get_final_ann(in_data, out_data) ann.write_model_to_file_final(final_ann) #4. Predict output for test packets if os.path.isfile("model/final.json"): if not models: models = get_model_list(num_clusters) #generate input test file source_attack = test.get_test_data(models) test_data = numpy.loadtxt(open("input/test_final.data", "r"), delimiter=",") #load Stage III ANN and predict for test packets loaded_model_final = ann.load_model_from_file('model/final.json', 'model/final.h5') prediction = loaded_model_final.predict(test_data, batch_size=10, verbose=0) test.get_prediction_labels(prediction, source_attack) else: print("No trained ANN found for final prediction")