Esempio n. 1
0
def task3(train_set, test_set, algo):
    ar_utils.test_dataset(train_set['Bearing1'])

    ar_utils.plot_series(train_set, "train set")
    ar_utils.plot_series(test_set, "test set")

    if check_ds == True:
        print('adf fuller testing ...')
        ar_utils.adfuller_test(train_set['Bearing1'])
        ar_utils.plot_pacf(train_set['Bearing1'])
        ar_utils.plot_acf(train_set['Bearing1'])

    t0 = times.time()
    cpu_t0 = psutil.cpu_percent(interval=0.5)
    model = ar_utils.generate_model(train_set, 0, 3, 3, 'Bearing1', algo)
    t1 = times.time()
    cpu_t1 = psutil.cpu_percent(interval=0.5)
    train_t = t1 - t0
    cpu_train_perc = abs(cpu_t1 - cpu_t0)

    cpu_p0 = psutil.cpu_percent(interval=0.5)
    pred_set = ar_utils.predict(model, len(test_set))
    t2 = times.time()
    cpu_p1 = psutil.cpu_percent(interval=0.5)
    pred_t = t2 - t1
    cpu_pred_perc = abs(cpu_p1 - cpu_p0)

    ar_utils.plot_difference_model(test_set, pred_set, algo)

    return train_t, pred_t, cpu_train_perc, cpu_pred_perc
Esempio n. 2
0
def predict_request_body_len(train_set, test_set, method):

    arima_utils.adfuller_test(train_set['request_body_len'].dropna())
    arima_utils.plot_series(train_set['request_body_len'], 'Original Series')
    '''
    train_set['Value First Difference'] = train_set['request_body_len'] - train_set['request_body_len'].shift(1)
    #dropdna, borra todos los vacios
    arima_utils.adfuller_test(train_set['Value First Difference'].dropna())
    arima_utils.plot_series(train_set['Value First Difference'], 'Value First Difference')
    '''
    arima_utils.plot_pacf(train_set['request_body_len'])
    arima_utils.plot_acf(train_set['request_body_len'])
    # use request_body_len, response_body_len
    # usar p = 12 (intento original)
    # usar q = 34 (intento original)
    p = 2
    q = 1

    start_time = time.time()
    print("STARTING TIMER REQUEST ", method)

    model = pf.ARIMA(data=train_set,
                     ar=p,
                     ma=q,
                     integ=0,
                     target='request_body_len')
    x = model.fit(method=method)

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)

    # PRINT DATA
    #print(x.summary())
    print(x.scores)
    model.plot_fit()
    plt.show()

    # model.plot_predict_is(h=30)
    # firstRegister = conn.head(30)
    #plt.plot(test_set['ts'], test_set['request_body_len'])
    #model.plot_predict_is(h=100, past_values=40)
    #print(model.predict(h=100))

    start_time = time.time()
    print("STARTING TIMER, PREDICT REQUEST  ", method)
    plt.plot(test_set.index,
             test_set['request_body_len'],
             label='REAL',
             color='pink')
    plt.plot(model.predict(h=100), label='PREDICTION', color='cyan')
    plt.legend(['REAL', 'PREDICTION'])
    # model.plot_predict(h=200, past_values=40)
    # plt.plot(firstRegister['ts'], firstRegister['request_body_len'])
    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)
    plt.show()
def predict_response_body_len(train_set, test_set):
    arima_utils.adfuller_test(train_set['response_body_len'])
    arima_utils.plot_series(train_set['response_body_len'], 'Original Series')
    arima_utils.plot_pacf(train_set['response_body_len'])
    arima_utils.plot_acf(train_set['response_body_len'])
    # use request_body_len, response_body_len

    # usar p = 8 (intento original)
    # usar q = 7 (intento original)
    p = 2
    q = 8

    start_time = time.time()
    print("STARTING TIMER, RESPONSE")

    model = pf.ARIMA(data=train_set,
                     ar=p,
                     ma=q,
                     integ=0,
                     target='response_body_len')
    x = model.fit(method=method)

    #model.fit(method='BBVI', iterations='10000', optimizer='ADAM')    ###
    #model.fit(method='Laplace')
    #model.fit(method='M-H')

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)
    # PRINT DATA
    print(x.summary())
    print(x.scores)
    model.plot_fit()
    plt.show()
    # model.plot_predict_is(h=30)
    # firstRegister = conn.head(30)
    start_time = time.time()
    print("STARTING TIMER PREDICT RESPONSE")

    plt.plot(test_set.index,
             test_set['response_body_len'],
             label='REAL',
             color='pink')
    plt.plot(model.predict(h=100), label='PREDICTION', color='cyan')
    plt.legend(['REAL', 'PREDICTION'])

    #end_time = time.time()
    #total_time = end_time - start_time
    #print("TIME: " , total_time)

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)

    # model.plot_predict(h=200, past_values=40)
    # plt.plot(firstRegister['ts'], firstRegister['response_body_len'])
    plt.show()
def ANY_size(conn80,ANY,shiftvalue):
    new_size=ANY+"_size"
    new_shift=ANY+"_shift_"+str(shiftvalue)
    conn80[new_size]=conn80[ANY].apply(len)
    print(conn80[new_size])
    if shiftvalue>0:
        conn80[new_shift]=conn80[new_size]-conn80[new_size].shift(shiftvalue)
        arima_utils.adfuller_test(conn80[new_shift].dropna())
        arima_utils.plot_series(conn80[new_shift], "uri_size shift=" + str(shiftvalue))
    else:
        fix=conn80[np.abs(conn80[new_size] - conn80[new_size].mean()) <= (1 * conn80[new_size].std())]

        #arima_utils.adfuller_test(fix[new_size])
        #arima_utils.plot_series(fix[new_size],new_size)
        #arima_utils.plot_pacf(fix[new_size])
        #arima_utils.plot_acf(fix[new_size])
    return fix
def predict_request_body_len(train_set, test_set):

    arima_utils.adfuller_test(train_set['request_body_len'].dropna())
    arima_utils.plot_series(train_set['request_body_len'], 'Original Series')
    '''
    train_set['Value First Difference'] = train_set['request_body_len'] - train_set['request_body_len'].shift(1)
    #dropdna, borra todos los vacios
    arima_utils.adfuller_test(train_set['Value First Difference'].dropna())
    arima_utils.plot_series(train_set['Value First Difference'], 'Value First Difference')
    '''
    arima_utils.plot_pacf(train_set['request_body_len'])
    arima_utils.plot_acf(train_set['request_body_len'])
    # use request_body_len, response_body_len
    # usar p = 12 (intento original)
    # usar q = 34 (intento original)
    p = 2
    q = 1

    start_time = time.time()
    print("STARTING TIMER REQUEST")

    model = pf.ARIMA(data=train_set,
                     ar=p,
                     ma=q,
                     integ=0,
                     target='request_body_len')
    x = model.fit(method=method)

    ##itentando la wea bayesiana
    #https://pyflux.readthedocs.io/en/latest/var.html
    #model.list_priors()
    #model.adjust_prior(2, pf.Normal(0, 1))
    #x = model.fit(method='BBVI', iterations='10000', optimizer='ADAM')    ###
    #x = model.fit(method='Laplace')
    #x = model.fit(method="M-H")

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)

    # PRINT DATA
    print(x.summary())
    print(x.scores)
    model.plot_fit()
    plt.show()

    # model.plot_predict_is(h=30)
    # firstRegister = conn.head(30)
    #plt.plot(test_set['ts'], test_set['request_body_len'])
    #model.plot_predict_is(h=100, past_values=40)
    #print(model.predict(h=100))

    start_time = time.time()
    print("STARTING TIMER, PREDICT REQUEST")
    plt.plot(test_set.index,
             test_set['request_body_len'],
             label='REAL',
             color='pink')
    plt.plot(model.predict(h=100), label='PREDICTION', color='cyan')
    plt.legend(['REAL', 'PREDICTION'])
    # model.plot_predict(h=200, past_values=40)
    # plt.plot(firstRegister['ts'], firstRegister['request_body_len'])
    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)
    plt.show()
def malware_resp(conn):
    conn['check_respmimetypes']=conn['resp_mime_types'].apply(malware_resp_mime_types)
    arima_utils.adfuller_test(conn['check_respmimetypes'])
    arima_utils.plot_series(conn['check_respmimetypes'],"check respmime")
    arima_utils.plot_pacf(conn['check_respmimetypes'])
    arima_utils.plot_acf(conn['check_respmimetypes'])
def statuscode():
    test1='status_code'
    arima_utils.adfuller_test(conn80[test1])
    arima_utils.plot_series(conn80[test1],test1)
import pyflux as pf
import matplotlib.pyplot as plt


#read data and split and test and training set
import pandas as pd
data = pd.read_csv('resources/cpu-full-a.csv',parse_dates=[0], infer_datetime_format=True)

import arima_utils
arima_utils.adfuller_test(data['cpu'])
arima_utils.plot_series(data['cpu'],'Original Series')

data['Value First Difference'] = data['cpu'] - data['cpu'].shift(1)
arima_utils.adfuller_test(data['Value First Difference'].dropna())
arima_utils.plot_series(data['Value First Difference'],'Value First Difference')

data['Value 12 Difference'] = data['cpu'] - data['cpu'].shift(12)
arima_utils.adfuller_test(data['Value 12 Difference'].dropna())
arima_utils.plot_series(data['Value 12 Difference'],'Value 12 Difference')

arima_utils.plot_pacf(data['cpu'])

arima_utils.plot_acf(data['cpu'])

# Define the model
model = pf.ARIMA(data=data,
                   ar=2, ma=20, integ=0, target='cpu')
x = model.fit("PML")
#x = model.fit("M-H")