def get_preprocessed_training_data(datasize=None, regenerate=False, withfull=False): df = None workpath = os.path.dirname(os.path.abspath(__file__)) if regenerate == False: with open(workpath + '/./df_training_20.pkl', 'rb') as input: df_training_20 = pickle.load(input) with open(workpath + '/./df_training_full.pkl', 'rb') as input: df_training_full = pickle.load(input) with open(workpath + '/./gmms_training_20.pkl', 'rb') as input: gmms_20 = pickle.load(input) with open(workpath + '/./gmms_training_full.pkl', 'rb') as input: gmms_full = pickle.load(input) return df_training_20, df_training_full, gmms_20, gmms_full else: workpath = os.path.dirname(os.path.abspath(__file__)) datafile_20 = workpath + '/data/KDDTrain+_20Percent.txt' datafile_full = workpath + '/data/KDDTrain+.txt' headers, attacks = get_header_data() df_training_full = None gmms_training_full = None print "preprocessing training data for 20 percent..." df = model.load_dataframe(datafile_20, headers, datasize=datasize) print "descretization..." df_training_20 = discretize_elems(df, attacks) print "gmm fitting..." gmms_training_20 = construct_gmms(df_training_20, headers) if withfull == True: print "preprocessing training data total..." df = model.load_dataframe(datafile_full, headers, datasize=datasize) print "descretization..." df_training_full = discretize_elems(df, attacks) print "gmm fitting..." gmms_training_full = construct_gmms(df_training_full, headers) else: print "without full data" print "save to file..." with open(workpath + '/./df_training_20.pkl', 'wb') as output: pickle.dump(df_training_20, output, -1) with open(workpath + '/./gmms_training_20.pkl', 'wb') as output: pickle.dump(gmms_training_20, output, -1) if withfull == True: with open(workpath + '/./df_training_full.pkl', 'wb') as output: pickle.dump(df_training_full, output, -1) with open(workpath + '/./gmms_training_full.pkl', 'wb') as output: pickle.dump(gmms_training_full, output, -1) return df_training_20, df_training_full, gmms_training_20, gmms_training_full
def get_preprocessed_training_data(datasize=None, regenerate=False, withfull=False): df = None workpath = os.path.dirname(os.path.abspath(__file__)) if regenerate == False: with open(workpath+'/./df_training_20.pkl','rb') as input: df_training_20 = pickle.load(input) with open(workpath+'/./df_training_full.pkl','rb') as input: df_training_full = pickle.load(input) with open(workpath+'/./gmms_training_20.pkl','rb') as input: gmms_20 = pickle.load(input) with open(workpath+'/./gmms_training_full.pkl','rb') as input: gmms_full = pickle.load(input) return df_training_20, df_training_full, gmms_20, gmms_full else : workpath = os.path.dirname(os.path.abspath(__file__)) datafile_20 = workpath + '/data/KDDTrain+_20Percent.txt' datafile_full = workpath + '/data/KDDTrain+.txt' headers, attacks = get_header_data() df_training_full = None gmms_training_full = None print "preprocessing training data for 20 percent..." df = model.load_dataframe(datafile_20,headers,datasize=datasize) print "descretization..." df_training_20 = discretize_elems(df, attacks) print "gmm fitting..." gmms_training_20 = construct_gmms(df_training_20, headers) if withfull == True : print "preprocessing training data total..." df = model.load_dataframe(datafile_full,headers,datasize=datasize) print "descretization..." df_training_full = discretize_elems(df, attacks) print "gmm fitting..." gmms_training_full = construct_gmms(df_training_full, headers) else : print "without full data" print "save to file..." with open(workpath + '/./df_training_20.pkl','wb') as output: pickle.dump(df_training_20, output,-1) with open(workpath + '/./gmms_training_20.pkl','wb') as output: pickle.dump(gmms_training_20, output,-1) if withfull == True : with open(workpath + '/./df_training_full.pkl','wb') as output: pickle.dump(df_training_full, output,-1) with open(workpath + '/./gmms_training_full.pkl','wb') as output: pickle.dump(gmms_training_full, output,-1) return df_training_20, df_training_full, gmms_training_20, gmms_training_full
def get_preprocessed_test_data(datasize=None, regenerate=False): df = None workpath = os.path.dirname(os.path.abspath(__file__)) if regenerate == False: with open(workpath+'/./df_test_plus.pkl','rb') as input: df_test_plus = pickle.load(input) with open(workpath+'/./df_test_21.pkl','rb') as input: df_test_21 = pickle.load(input) with open(workpath + '/./gmms_test_plus.pkl','rb') as input: gmm_test_plus = pickle.load(input) with open(workpath + '/./gmms_test_21.pkl','rb') as input: gmm_test_21 = pickle.load(input) return df_test_plus, df_test_21, gmm_test_plus, gmm_test_21 else : workpath = os.path.dirname(os.path.abspath(__file__)) datafile_plus = workpath + '/data/KDDTest+.txt' datafile_21 = workpath + '/data/KDDTest-21.txt' headers, attacks = get_header_data() print "preprocessing testing data plus..." df = model.load_dataframe(datafile_plus,headers,datasize=datasize) df_test_plus = discretize_elems(df, attacks) gmms_test_plus = construct_gmms(df_test_plus, headers) print "preprocessing testing data 21..." df = model.load_dataframe(datafile_21,headers,datasize=datasize) df_test_21 = discretize_elems(df, attacks) gmms_test_21 = construct_gmms(df_test_21, headers) print "save to file..." with open(workpath + '/./df_test_plus.pkl','wb') as output: pickle.dump(df_test_plus, output, -1) with open(workpath + '/./df_test_21.pkl','wb') as output: pickle.dump(df_test_21, output, -1) with open(workpath + '/./gmms_test_plus.pkl','wb') as output: pickle.dump(gmms_test_plus, output,-1) with open(workpath + '/./gmms_test_21.pkl','wb') as output: pickle.dump(gmms_test_21, output,-1) return df_test_plus, df_test_21, gmms_test_plus, gmms_test_21
def get_preprocessed_test_data(datasize=None, regenerate=False): df = None workpath = os.path.dirname(os.path.abspath(__file__)) if regenerate == False: with open(workpath + '/./df_test_plus.pkl', 'rb') as input: df_test_plus = pickle.load(input) with open(workpath + '/./df_test_21.pkl', 'rb') as input: df_test_21 = pickle.load(input) with open(workpath + '/./gmms_test_plus.pkl', 'rb') as input: gmm_test_plus = pickle.load(input) with open(workpath + '/./gmms_test_21.pkl', 'rb') as input: gmm_test_21 = pickle.load(input) return df_test_plus, df_test_21, gmm_test_plus, gmm_test_21 else: workpath = os.path.dirname(os.path.abspath(__file__)) datafile_plus = workpath + '/data/KDDTest+.txt' datafile_21 = workpath + '/data/KDDTest-21.txt' headers, attacks = get_header_data() print "preprocessing testing data plus..." df = model.load_dataframe(datafile_plus, headers, datasize=datasize) df_test_plus = discretize_elems(df, attacks) gmms_test_plus = construct_gmms(df_test_plus, headers) print "preprocessing testing data 21..." df = model.load_dataframe(datafile_21, headers, datasize=datasize) df_test_21 = discretize_elems(df, attacks) gmms_test_21 = construct_gmms(df_test_21, headers) print "save to file..." with open(workpath + '/./df_test_plus.pkl', 'wb') as output: pickle.dump(df_test_plus, output, -1) with open(workpath + '/./df_test_21.pkl', 'wb') as output: pickle.dump(df_test_21, output, -1) with open(workpath + '/./gmms_test_plus.pkl', 'wb') as output: pickle.dump(gmms_test_plus, output, -1) with open(workpath + '/./gmms_test_21.pkl', 'wb') as output: pickle.dump(gmms_test_21, output, -1) return df_test_plus, df_test_21, gmms_test_plus, gmms_test_21