Beispiel #1
0
def get_final_data(models):
    file_final = open('input/inputToStage_3.data', 'w')

    num_clusters = len(models)

    for i in range(num_clusters):
        j = str(i)
        inputFile = open('input/cluster_' + j + '_train.csv')
        pointData = []
        currOut = []
        for line in inputFile:
            (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10,
             col11, col12) = line.split(',')
            pointData = numpy.asarray([[
                float(col1),
                float(col2),
                float(col3),
                float(col4),
                float(col5),
                float(col6),
                float(col7),
                float(col8),
                float(col9),
                float(col10),
                float(col11)
            ]])

            membershipValue = getMembershipVal(col12)

            #predict output from all ANN for jth data point
            for clusterNum in range(num_clusters):
                curr = models[clusterNum].predict(pointData,
                                                  batch_size=10,
                                                  verbose=0)
                currOut.append(curr[0])

            #get input for final ANN by multiplication of output of each data point to each ANN
            #and its corresponding memebership to each cluster
            outputFromFeed = numpy.matmul(numpy.transpose(currOut),
                                          numpy.transpose(membershipValue))

            writeStr = ','.join([str(x) for x in outputFromFeed])
            file_final.write(writeStr)
            currOut = []
            file_final.write('\n')

    file_final.close()

    filenames = [
        'output/cluster_0_label.csv', 'output/cluster_1_label.csv',
        'output/cluster_2_label.csv', 'output/cluster_3_label.csv'
    ]
    with open('output/combinedFile', 'w') as outfile:
        for fname in filenames:
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)

    return ann.get_data("input/inputToStage_3.data", "output/combinedFile",
                        False)
def train(num_clusters):
    models = []
    for i in range(num_clusters):
        j = str(i)
        file_in = "input/cluster_" + j + "_train.csv"
        file_out = "output/cluster_" + j + "_label.csv"

        in_data, out_data = ann.get_data(file_in, file_out, True)
        models.append(ann.get_model(in_data, out_data, 11, 14, 4))

    return models
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    #Because we want to categorize packets into Normal and 3 kinds of attacks (DoS, Probe, U2R)
    num_clusters = 4

    #1. Train Stage II ANNs
    #-----------------Skip this step if models are stored-------------------#
    stored = False
    if os.path.isfile("model/model0.json"):
        stored = True

    if not stored:
        print("Training " + str(num_clusters) + " ANNs for prediction")
        stage2_ann = stage2.train(num_clusters)
        #save these ANN to a file
        for index in range(num_clusters):
            ann.write_model_to_file(stage2_ann[index], index)
            stored = True

    #2. Prepare input and true labels for training Stage III ANN
    models = []
    if not os.path.isfile("input/inputToStage_3.data"):
        print("Preparing input for final ANN")
        if stored:
            models = get_model_list(num_clusters)
        else:
            models = stage2_ann

        in_data, out_data = stage3.get_final_data(models)
    else:
        in_data, out_data = ann.get_data("input/inputToStage_3.data",
                                         "output/combinedFile", False)

    #3. Train Stage III ANN
    if not os.path.isfile("model/final.json"):
        print("Training final ANN")
        final_ann = stage3.get_final_ann(in_data, out_data)
        ann.write_model_to_file_final(final_ann)

    #4. Predict output for test packets
    if os.path.isfile("model/final.json"):
        if not models:
            models = get_model_list(num_clusters)

        #generate input test file
        source_attack = test.get_test_data(models)

        test_data = numpy.loadtxt(open("input/test_final.data", "r"),
                                  delimiter=",")

        #load Stage III ANN and predict for test packets
        loaded_model_final = ann.load_model_from_file('model/final.json',
                                                      'model/final.h5')
        prediction = loaded_model_final.predict(test_data,
                                                batch_size=10,
                                                verbose=0)

        test.get_prediction_labels(prediction, source_attack)
    else:
        print("No trained ANN found for final prediction")