def task3(train_set, test_set, algo): ar_utils.test_dataset(train_set['Bearing1']) ar_utils.plot_series(train_set, "train set") ar_utils.plot_series(test_set, "test set") if check_ds == True: print('adf fuller testing ...') ar_utils.adfuller_test(train_set['Bearing1']) ar_utils.plot_pacf(train_set['Bearing1']) ar_utils.plot_acf(train_set['Bearing1']) t0 = times.time() cpu_t0 = psutil.cpu_percent(interval=0.5) model = ar_utils.generate_model(train_set, 0, 3, 3, 'Bearing1', algo) t1 = times.time() cpu_t1 = psutil.cpu_percent(interval=0.5) train_t = t1 - t0 cpu_train_perc = abs(cpu_t1 - cpu_t0) cpu_p0 = psutil.cpu_percent(interval=0.5) pred_set = ar_utils.predict(model, len(test_set)) t2 = times.time() cpu_p1 = psutil.cpu_percent(interval=0.5) pred_t = t2 - t1 cpu_pred_perc = abs(cpu_p1 - cpu_p0) ar_utils.plot_difference_model(test_set, pred_set, algo) return train_t, pred_t, cpu_train_perc, cpu_pred_perc
def predict_request_body_len(train_set, test_set, method): arima_utils.adfuller_test(train_set['request_body_len'].dropna()) arima_utils.plot_series(train_set['request_body_len'], 'Original Series') ''' train_set['Value First Difference'] = train_set['request_body_len'] - train_set['request_body_len'].shift(1) #dropdna, borra todos los vacios arima_utils.adfuller_test(train_set['Value First Difference'].dropna()) arima_utils.plot_series(train_set['Value First Difference'], 'Value First Difference') ''' arima_utils.plot_pacf(train_set['request_body_len']) arima_utils.plot_acf(train_set['request_body_len']) # use request_body_len, response_body_len # usar p = 12 (intento original) # usar q = 34 (intento original) p = 2 q = 1 start_time = time.time() print("STARTING TIMER REQUEST ", method) model = pf.ARIMA(data=train_set, ar=p, ma=q, integ=0, target='request_body_len') x = model.fit(method=method) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # PRINT DATA #print(x.summary()) print(x.scores) model.plot_fit() plt.show() # model.plot_predict_is(h=30) # firstRegister = conn.head(30) #plt.plot(test_set['ts'], test_set['request_body_len']) #model.plot_predict_is(h=100, past_values=40) #print(model.predict(h=100)) start_time = time.time() print("STARTING TIMER, PREDICT REQUEST ", method) plt.plot(test_set.index, test_set['request_body_len'], label='REAL', color='pink') plt.plot(model.predict(h=100), label='PREDICTION', color='cyan') plt.legend(['REAL', 'PREDICTION']) # model.plot_predict(h=200, past_values=40) # plt.plot(firstRegister['ts'], firstRegister['request_body_len']) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) plt.show()
def predict_response_body_len(train_set, test_set): arima_utils.adfuller_test(train_set['response_body_len']) arima_utils.plot_series(train_set['response_body_len'], 'Original Series') arima_utils.plot_pacf(train_set['response_body_len']) arima_utils.plot_acf(train_set['response_body_len']) # use request_body_len, response_body_len # usar p = 8 (intento original) # usar q = 7 (intento original) p = 2 q = 8 start_time = time.time() print("STARTING TIMER, RESPONSE") model = pf.ARIMA(data=train_set, ar=p, ma=q, integ=0, target='response_body_len') x = model.fit(method=method) #model.fit(method='BBVI', iterations='10000', optimizer='ADAM') ### #model.fit(method='Laplace') #model.fit(method='M-H') end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # PRINT DATA print(x.summary()) print(x.scores) model.plot_fit() plt.show() # model.plot_predict_is(h=30) # firstRegister = conn.head(30) start_time = time.time() print("STARTING TIMER PREDICT RESPONSE") plt.plot(test_set.index, test_set['response_body_len'], label='REAL', color='pink') plt.plot(model.predict(h=100), label='PREDICTION', color='cyan') plt.legend(['REAL', 'PREDICTION']) #end_time = time.time() #total_time = end_time - start_time #print("TIME: " , total_time) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # model.plot_predict(h=200, past_values=40) # plt.plot(firstRegister['ts'], firstRegister['response_body_len']) plt.show()
def ANY_size(conn80,ANY,shiftvalue): new_size=ANY+"_size" new_shift=ANY+"_shift_"+str(shiftvalue) conn80[new_size]=conn80[ANY].apply(len) print(conn80[new_size]) if shiftvalue>0: conn80[new_shift]=conn80[new_size]-conn80[new_size].shift(shiftvalue) arima_utils.adfuller_test(conn80[new_shift].dropna()) arima_utils.plot_series(conn80[new_shift], "uri_size shift=" + str(shiftvalue)) else: fix=conn80[np.abs(conn80[new_size] - conn80[new_size].mean()) <= (1 * conn80[new_size].std())] #arima_utils.adfuller_test(fix[new_size]) #arima_utils.plot_series(fix[new_size],new_size) #arima_utils.plot_pacf(fix[new_size]) #arima_utils.plot_acf(fix[new_size]) return fix
def predict_request_body_len(train_set, test_set): arima_utils.adfuller_test(train_set['request_body_len'].dropna()) arima_utils.plot_series(train_set['request_body_len'], 'Original Series') ''' train_set['Value First Difference'] = train_set['request_body_len'] - train_set['request_body_len'].shift(1) #dropdna, borra todos los vacios arima_utils.adfuller_test(train_set['Value First Difference'].dropna()) arima_utils.plot_series(train_set['Value First Difference'], 'Value First Difference') ''' arima_utils.plot_pacf(train_set['request_body_len']) arima_utils.plot_acf(train_set['request_body_len']) # use request_body_len, response_body_len # usar p = 12 (intento original) # usar q = 34 (intento original) p = 2 q = 1 start_time = time.time() print("STARTING TIMER REQUEST") model = pf.ARIMA(data=train_set, ar=p, ma=q, integ=0, target='request_body_len') x = model.fit(method=method) ##itentando la wea bayesiana #https://pyflux.readthedocs.io/en/latest/var.html #model.list_priors() #model.adjust_prior(2, pf.Normal(0, 1)) #x = model.fit(method='BBVI', iterations='10000', optimizer='ADAM') ### #x = model.fit(method='Laplace') #x = model.fit(method="M-H") end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # PRINT DATA print(x.summary()) print(x.scores) model.plot_fit() plt.show() # model.plot_predict_is(h=30) # firstRegister = conn.head(30) #plt.plot(test_set['ts'], test_set['request_body_len']) #model.plot_predict_is(h=100, past_values=40) #print(model.predict(h=100)) start_time = time.time() print("STARTING TIMER, PREDICT REQUEST") plt.plot(test_set.index, test_set['request_body_len'], label='REAL', color='pink') plt.plot(model.predict(h=100), label='PREDICTION', color='cyan') plt.legend(['REAL', 'PREDICTION']) # model.plot_predict(h=200, past_values=40) # plt.plot(firstRegister['ts'], firstRegister['request_body_len']) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) plt.show()
def malware_resp(conn): conn['check_respmimetypes']=conn['resp_mime_types'].apply(malware_resp_mime_types) arima_utils.adfuller_test(conn['check_respmimetypes']) arima_utils.plot_series(conn['check_respmimetypes'],"check respmime") arima_utils.plot_pacf(conn['check_respmimetypes']) arima_utils.plot_acf(conn['check_respmimetypes'])
def statuscode(): test1='status_code' arima_utils.adfuller_test(conn80[test1]) arima_utils.plot_series(conn80[test1],test1)
import pyflux as pf import matplotlib.pyplot as plt #read data and split and test and training set import pandas as pd data = pd.read_csv('resources/cpu-full-a.csv',parse_dates=[0], infer_datetime_format=True) import arima_utils arima_utils.adfuller_test(data['cpu']) arima_utils.plot_series(data['cpu'],'Original Series') data['Value First Difference'] = data['cpu'] - data['cpu'].shift(1) arima_utils.adfuller_test(data['Value First Difference'].dropna()) arima_utils.plot_series(data['Value First Difference'],'Value First Difference') data['Value 12 Difference'] = data['cpu'] - data['cpu'].shift(12) arima_utils.adfuller_test(data['Value 12 Difference'].dropna()) arima_utils.plot_series(data['Value 12 Difference'],'Value 12 Difference') arima_utils.plot_pacf(data['cpu']) arima_utils.plot_acf(data['cpu']) # Define the model model = pf.ARIMA(data=data, ar=2, ma=20, integ=0, target='cpu') x = model.fit("PML") #x = model.fit("M-H")