Exemple #1
0
def predict_voxels(tx_pivot_df: pd.DataFrame, start_time: float,
                   stop_time: float, samples_to_predict: int,
                   lags: int) -> pd.DataFrame:
    """
    Predict some number of output samples given an input training set
    :param tx_pivot_df:
    :param start_time: start time to limit the training set to, in seconds
    :param stop_time: stop time to limit the training set to, in seconds
    :param samples_to_predict: how many output samples to generate, in terms of the grid time block size
    :param lags: how many lags to use in prediction model
    :return:
    """

    start_time_ms = start_time * 1000
    stop_time_ms = stop_time * 1000

    # pandas convention is to include the last value in the range, not like what is expected with
    # python. Subtract 1 ms from the end time so we don't get that extra sample
    # generate the model
    # LOGGER.debug("training on dataset sized %i x %i",
    #              tx_pivot_df.loc[start_time_ms:stop_time_ms-1].shape[0],
    #              tx_pivot_df.loc[start_time_ms:stop_time_ms-1].shape[1])
    model = pf.VAR(data=tx_pivot_df.loc[start_time_ms:stop_time_ms - 1],
                   lags=lags,
                   integ=0)

    # fit the model to our data
    x = model.fit()

    # generate predictions
    result_df = model.predict(h=samples_to_predict)

    # switch times back to seconds
    result_df["start_time"] = result_df.index / 1000.0

    # "unpivot" back to our original format
    result_df = result_df.melt(id_vars="start_time",
                               var_name="start_freq",
                               value_name='duty_cycle')

    # clip duty cycles to be between 0 and 1.0
    result_df.loc[result_df["duty_cycle"] < 0, "duty_cycle"] = 0.0
    result_df.loc[result_df["duty_cycle"] > 1, "duty_cycle"] = 1.0

    # # convert frequencies back to floats
    # result_df["start_freq"] = result_df["start_freq"].apply(float)

    return result_df
Exemple #2
0
def predict_freq_by_freq(data, training_lag, steps):
    """Takes a numpy matrix containing duty cycles with time in the first 
    coordinate and frequency in the second, and calculates another numpy
    matrix containing the VAR predictions with steps many new time indices
    and the same number of frequencies. We use separate models for each
    frequencies (this is how spec-val is implemented)."""

    output = np.zeros((steps, data.shape[1]), dtype=np.float32)

    for i in range(data.shape[1]):
        data2 = pd.DataFrame(data[:, i:i + 1], columns=["x"])
        model = pf.VAR(data=data2, lags=training_lag)
        model.fit()
        result = model.predict(h=steps)
        output[:, i:i + 1] = result

    return output
def main():
    print("First Module's Name: {}".format(__name__))

    if os.name == 'posix':
        sl = '/'
    elif os.name == 'nt':
        sl = '\\'

    # tickers = ['NYSE:JPM', 'NYSE:GS', 'NYSE:C', 'NYSE:WFC', 'NYSE:MS', 'TSE:BNS', 'NYSE:TD', 'NYSE:NMR', 'NYSE:DB', 'LON:BARC',
    # 'NYSE:HSBC','NYSE:UBS', 'NYSE:CS', 'EPA:BNP', 'EPA:GLE', 'EPA:ACA', 'EPA:KN']
    tickers = ['NYSE:JPM', 'NYSE:GS', 'NYSE:C', 'NYSE:WFC', 'NYSE:MS']

    start = dt.datetime(2010, 1, 1)
    end = dt.datetime(2017, 10, 31)
    stocks = web.DataReader(tickers, 'google', start, end)
    closing_prices = np.log(stocks.Close)
    closing_pt = stocks.Close
    # plt.figure(figsize=(15, 5))
    # plt.plot(closing_pt.index, closing_pt)
    # plt.legend(closing_pt.columns.values, loc=3)
    # plt.title("Closing price")

    y = pf.VAR(data=closing_prices, lags=2, integ=1)
    x = y.fit()
    x.summary()

    # Plot latent variables
    # y.plot_z(list(range(0, 6)), figsize=(15, 5))

    # Predictions vs fitted (in-sample fit)
    # y.plot_fit(figsize=(15, 5))

    # Predictions Chart (bandwidth)
    # y.plot_predict(past_values=19, h=5, figsize=(15, 5))

    # Predictions Table
    # y.predict(h=5)

    # Rolling out of sample predictions
    y.plot_predict_is(h=30, figsize=((15, 5)))

    print('VAR v1 done')
start = 12000
interval = 20
nmbr_predictions = 5
allpredicted_values = np.array([0, 0, 0, 0]).reshape(1, 4)
original_values = np.array([0, 0, 0, 0]).reshape(1, 4)

for i in range(start, stop, interval):
    i = i + nmbr_predictions
    VAR_data = pd.DataFrame({
        'pm25': my_data[:, 4][:i],
        'dewp': my_data[:, 5][:i],
        'temp': my_data[:, 6][:i],
        'pres': my_data[:, 7][:i]
    })
    VAR_data = VAR_data[['pm25', 'dewp', 'temp', 'pres']]
    model = pf.VAR(data=VAR_data, lags=4, integ=1)
    x = model.fit()
    allpredicted_values = np.append(
        allpredicted_values,
        np.cumsum(np.append(my_data[i - 3, 4:8].reshape(1, 4),
                            model.predict_is(nmbr_predictions).values,
                            axis=0),
                  axis=0)[1:, :],
        axis=0)
    original_values = np.append(original_values, my_data[i - 2:i, 4:8], axis=0)

allpredicted_values = allpredicted_values[1:, ]
original_values = original_values[1:, ]
np.save("predicted_values_long_term_for_error", allpredicted_values)
np.save("original_values_long_term_for_error", original_values)
def build_model(data, lags):
    model = pf.VAR(data=data, lags=lags)
    return model
Exemple #6
0
def univariate_anomaly_VAR(lista_datos):
    lista_puntos = np.arange(0, len(lista_datos), 1)

    df = pd.DataFrame()
    df['valores'] = lista_datos

    tam_train = int(len(df) * 0.7)
    #print tam_train
    df_train = df[:tam_train]
    print('Tamanio train: {}'.format(df_train.shape))
    df_test = df[tam_train:]
    print('Tamanio test: {}'.format(df_test.shape))

    model = pf.VAR(df_train, lags=15)
    x = model.fit()

    #model.plot_z(list(range(0,6)),figsize=(15,5))
    #model.plot_fit(figsize=(8,5))
    #model.plot_predict_is(h=8, figsize=((8,5)))
    #model.plot_predict(past_values=20, h=6, figsize=(8,5))

    future_forecast_pred = model.predict(len(df_test))
    future_forecast_pred = future_forecast_pred[['valores']]

    list_test = df_test['valores'].values
    list_future_forecast_pred = future_forecast_pred['valores'].values

    #mse_test = (list_future_forecast_pred - list_test)
    #mse_abs_test = abs(mse_test)

    mse = mean_squared_error(list_test, list_future_forecast_pred)
    print('El error medio del modelo_test es: {}'.format(mse))
    rmse = np.sqrt(mse)
    print('El root error medio del modelo_test es: {}'.format(rmse))
    mae = mean_absolute_error(list_test, list_future_forecast_pred)

    df_aler = pd.DataFrame()
    df_aler['real_value'] = list_test
    df_aler['expected value'] = list_future_forecast_pred
    df_aler['mse'] = mse
    df_aler['puntos'] = future_forecast_pred.index
    df_aler.set_index('puntos', inplace=True)
    df_aler['mae'] = mae

    df_aler['anomaly_score'] = abs(df_aler['expected value'] -
                                   df_aler['real_value']) / df_aler['mae']

    df_aler = df_aler[(df_aler['anomaly_score'] > 2)]

    max = df_aler['anomaly_score'].max()
    min = df_aler['anomaly_score'].min()
    df_aler['anomaly_score'] = (df_aler['anomaly_score'] - min) / (max - min)

    df_aler_ult = df_aler[:5]
    df_aler_ult = df_aler_ult[
        (df_aler_ult.index == df_aler.index.max()) |
        (df_aler_ult.index == ((df_aler.index.max()) - 1))
        | (df_aler_ult.index == ((df_aler.index.max()) - 2)) |
        (df_aler_ult.index == ((df_aler.index.max()) - 3))
        | (df_aler_ult.index == ((df_aler.index.max()) - 4))]
    if len(df_aler_ult) == 0:
        exists_anom_last_5 = 'FALSE'
    else:
        exists_anom_last_5 = 'TRUE'

    max = df_aler_ult['anomaly_score'].max()
    min = df_aler_ult['anomaly_score'].min()
    print df_aler_ult
    df_aler_ult['anomaly_score'] = (df_aler_ult['anomaly_score'] -
                                    min) / (max - min)

    #####forecast#####

    model_for = pf.VAR(df, lags=5)
    x_for = model_for.fit()

    #model.plot_z(list(range(0,6)),figsize=(15,5))
    #model.plot_fit(figsize=(8,5))

    future_forecast_pred_for = model_for.predict(5)

    df_result_forecast = future_forecast_pred_for.reset_index()
    df_result_forecast = df_result_forecast.rename(columns={'index': 'step'})

    print df.head(5)
    print df.tail(5)

    engine_output = {}
    engine_output['rmse'] = rmse
    engine_output['mse'] = mse
    engine_output['mae'] = mae
    engine_output['present_status'] = exists_anom_last_5
    engine_output['present_alerts'] = df_aler_ult.to_dict(orient='record')
    engine_output['past'] = df_aler.to_dict(orient='record')
    engine_output['engine'] = 'VAR'
    engine_output['future'] = df_result_forecast.to_dict(orient='record')
    test_values = pd.DataFrame(future_forecast_pred.values,
                               index=df_test.index,
                               columns=['expected value'])
    test_values['step'] = test_values.index
    engine_output['debug'] = test_values.to_dict(orient='record')

    return (engine_output)
Exemple #7
0
def univariate_anomaly_VAR(lista_datos,num_fut,name):
    lista_puntos = np.arange(0, len(lista_datos),1)


    df = pd.DataFrame()
    df['valores'] = lista_datos

    df['valores'] = df.valores.astype(np.float)

    tam_train = int(len(df)*0.7)
    #print tam_train
    df_train = df[:tam_train]
    print('Tamanio train: {}'.format(df_train.shape))
    df_test = df[tam_train:]
    print('Tamanio test: {}'.format(df_test.shape))

    print (type(df_test))
    mae_period = 99999999
    best_lag=0
    lags = int(round(len(df_train)/2))
    print ("empezamos el bucle")
    for lag in range(lags):
        model = pf.VAR(df_train,lags=lag)
        x = model.fit()


        print ("fit ready")
        future_forecast_pred = model.predict(len(df_test))
        future_forecast_pred = future_forecast_pred[['valores']]

        list_test = df_test['valores'].values
        list_future_forecast_pred = future_forecast_pred['valores'].values

        #pyplot.plot(list_test, label='real')
        #pyplot.plot(list_future_forecast_pred, label='pred')
        #pyplot.legend()
        #pyplot.show()

        mae_temp = mean_absolute_error(list_test, list_future_forecast_pred)
        print('El error medio del modelo_test es: {}'.format(mae_temp))

        if mae_temp < mae_period:
            best_lag=lag
            mae_period=mae_temp
        else:
            print ("mae:" + str(mae_period))

    print ("######best mae is " + str(mae_period) + " with the lag " + str(best_lag))

    model = pf.VAR(df_train,lags=best_lag)
    x = model.fit()

    #model.plot_z(list(range(0,6)),figsize=(15,5))
    #model.plot_fit(figsize=(8,5))
    #model.plot_predict_is(h=8, figsize=((8,5)))
    #model.plot_predict(past_values=20, h=6, figsize=(8,5))

    future_forecast_pred = model.predict(len(df_test))
    future_forecast_pred = future_forecast_pred[['valores']]

    list_test = df_test['valores'].values
    list_future_forecast_pred = future_forecast_pred['valores'].values

    #mse_test = (list_future_forecast_pred - list_test)
    #mse_abs_test = abs(mse_test)

    #pyplot.plot(list_test, label='real')
    #pyplot.plot(list_future_forecast_pred, label='pred')
    #pyplot.legend()
    #pyplot.show()

    mse = mean_squared_error(list_test, list_future_forecast_pred)
    print('El error medio del modelo_test es: {}'.format(mse))


    rmse = np.sqrt(mse)
    print('El root error medio del modelo_test es: {}'.format(rmse))
    mae = mean_absolute_error(list_test, list_future_forecast_pred)


    print ("Saving params")
    filename = './models_temp/learned_model_var'+name
    with open(filename,'w') as f:
        f.write(str(best_lag))
        f.close()

    print ("insertando modelo VAR")
    new_model(name, 'VAR', pack('N', 365),str(best_lag),mae)



    df_aler = pd.DataFrame()
    df_aler['real_value'] = list_test
    df_aler['expected value'] = list_future_forecast_pred
    df_aler['mse'] = mse
    df_aler['puntos'] = future_forecast_pred.index
    df_aler.set_index('puntos',inplace=True)
    df_aler['mae'] = mae

    df_aler['anomaly_score'] = abs(df_aler['expected value']-df_aler['real_value'])/df_aler['mae']

    df_aler = df_aler[(df_aler['anomaly_score']> 2)]

    max = df_aler['anomaly_score'].max()
    min = df_aler['anomaly_score'].min()
    df_aler['anomaly_score']= ( df_aler['anomaly_score'] - min ) /(max - min)

    df_aler_ult = df_aler[:5]
    df_aler_ult = df_aler_ult[(df_aler_ult.index==df_aler.index.max())|(df_aler_ult.index==((df_aler.index.max())-1))
                             |(df_aler_ult.index==((df_aler.index.max())-2))|(df_aler_ult.index==((df_aler.index.max())-3))
                             |(df_aler_ult.index==((df_aler.index.max())-4))]
    if len(df_aler_ult) == 0:
        exists_anom_last_5 = 'FALSE'
    else:
        exists_anom_last_5 = 'TRUE'

    max = df_aler_ult['anomaly_score'].max()
    min = df_aler_ult['anomaly_score'].min()
    print (df_aler_ult)
    df_aler_ult['anomaly_score'] = ( df_aler_ult['anomaly_score'] - min ) /(max - min)

    #####forecast#####

    model_for = pf.VAR(df,lags=best_lag)
    x_for = model_for.fit()

    #model.plot_z(list(range(0,6)),figsize=(15,5))
    #model.plot_fit(figsize=(8,5))

    future_forecast_pred_for = model_for.predict(num_fut)

    #pyplot.plot(future_forecast_pred_for, label='forecast')
    #pyplot.legend()
    #pyplot.show()

    df_result_forecast = future_forecast_pred_for.reset_index()
    df_result_forecast = df_result_forecast.rename(columns = {'index':'step'})

    print (df.head(5))
    print (df.tail(5))

    engine_output={}
    engine_output['rmse'] = rmse
    engine_output['mse'] = mse
    engine_output['mae'] = mae
    engine_output['present_status']=exists_anom_last_5
    engine_output['present_alerts']=df_aler_ult.fillna(0).to_dict(orient='record')
    engine_output['past']=df_aler.to_dict(orient='record')
    engine_output['engine']='VAR'
    engine_output['future']= df_result_forecast.fillna(0).to_dict(orient='record')
    test_values = pd.DataFrame(future_forecast_pred.values,index = df_test.index,columns=['expected value'])
    test_values['step'] = test_values.index
    engine_output['debug'] = test_values.fillna(0).to_dict(orient='record')


    return (engine_output)
Exemple #8
0
def anomaly_VAR(list_var,num_fut):
    df_var = pd.DataFrame()

    for i in range(len(list_var)):
        df_var['var_{}'.format(i)] = list_var[i]
        df_var['var_{}'.format(i)] = list_var[i]


    df_var.rename(columns = {df_var.columns[-1]:'expected value'},inplace=True)
    df_var = df_var.astype('double')
    tam_train = int(len(df_var)*0.7)
    #print tam_train
    df_train = df_var[:tam_train]
    print('Tamanio train: {}'.format(df_train.shape))
    df_test = df_var[tam_train:]
    print('Tamanio test: {}'.format(df_test.shape))


    mae_period = 99999999
    best_lag=0
    lags = int(round(len(df_train)/2))
    if (lags > 100):
        lags=100
    for lag in range(lags):
        print ("entra en el bucle con dato " + str(lag))
        model = pf.VAR(df_train,lags=lag)
        x = model.fit()


        future_forecast_pred = model.predict(len(df_test))
        future_forecast_pred = future_forecast_pred[['expected value']]

        list_test = df_test['expected value'].values
        list_future_forecast_pred = future_forecast_pred['expected value'].values

        #pyplot.plot(list_test, label='real')
        #pyplot.plot(list_future_forecast_pred, label='pred')
        #pyplot.legend()
        #pyplot.show()

        mae_temp = mean_absolute_error(list_test, list_future_forecast_pred)
        print('El error medio del modelo_test es: {}'.format(mae_temp))

        if mae_temp < mae_period:
            best_lag=lag
            mae_period=mae_temp
        else:
            print ("mae:" + str(mae_period))
        print ("sale del bucle")

    print ("######best mae is " + str(mae_period) + " with the lag " + str(best_lag))


    model = pf.VAR(df_train,lags=best_lag)
    x = model.fit()

    #model.plot_z(list(range(0,6)),figsize=(15,5))
    #model.plot_fit(figsize=(8,5))
    #model.plot_predict_is(h=90, figsize=((8,5)))
    #model.plot_predict(past_values=len(df_train), h=len(df_test), figsize=(8,5))



    future_forecast_pred = model.predict(len(df_test))
    future_forecast_pred = future_forecast_pred[['expected value']]

    list_test = df_test['expected value'].values
    list_future_forecast_pred = future_forecast_pred['expected value'].values

    #pyplot.plot(list_test, label='real')
    #pyplot.plot(list_future_forecast_pred, label='pred')
    #pyplot.legend()
    #pyplot.show()

    #mse_test = (list_future_forecast_pred - list_test)
    #mse_abs_test = abs(mse_test)

    mse = mean_squared_error(list_test, list_future_forecast_pred)
    print('El error medio del modelo_test es: {}'.format(mse))
    rmse = np.sqrt(mse)
    print('El root error medio del modelo_test es: {}'.format(rmse))
    mae = mean_absolute_error(list_test, list_future_forecast_pred)

    df_aler = pd.DataFrame()
    df_aler['real_value'] = list_test
    df_aler['expected value'] = list_future_forecast_pred
    df_aler['mse'] = mse
    df_aler['puntos'] = future_forecast_pred.index
    df_aler.set_index('puntos',inplace=True)
    df_aler['mae'] = mae

    df_aler['anomaly_score'] = abs(df_aler['expected value']-df_aler['real_value'])/df_aler['mae']

    df_aler = df_aler[(df_aler['anomaly_score']> 2)]

    max = df_aler['anomaly_score'].max()
    min = df_aler['anomaly_score'].min()
    df_aler['anomaly_score']= ( df_aler['anomaly_score'] - min ) /(max - min)

    df_aler_ult = df_aler[:5]
    df_aler_ult = df_aler_ult[(df_aler_ult.index==df_aler.index.max())|(df_aler_ult.index==((df_aler.index.max())-1))
                             |(df_aler_ult.index==((df_aler.index.max())-2))|(df_aler_ult.index==((df_aler.index.max())-3))
                             |(df_aler_ult.index==((df_aler.index.max())-4))]
    if len(df_aler_ult) == 0:
        exists_anom_last_5 = 'FALSE'
    else:
        exists_anom_last_5 = 'TRUE'

    max = df_aler_ult['anomaly_score'].max()
    min = df_aler_ult['anomaly_score'].min()
    df_aler_ult['anomaly_score'] = ( df_aler_ult['anomaly_score'] - min ) /(max - min)
    df_aler_ult = df_aler_ult.fillna(0)
    #####forecast#####

    model_for = pf.VAR(df_var,lags=best_lag)
    x_for = model_for.fit()

    #model.plot_z(list(range(0,6)),figsize=(15,5))
    #model.plot_fit(figsize=(8,5))

    # save the model to disk
    filename = "./models_temp/var_model.pkl"
    with open(filename, 'wb') as file:
        pickle.dump(model, file)


    future_forecast_pred_for = model_for.predict(num_fut)
    future_forecast_pred_for = future_forecast_pred_for[['expected value']]

    df_result_forecast = future_forecast_pred_for.reset_index()
    df_result_forecast = df_result_forecast.rename(columns = {'index':'step'})



    engine_output={}
    engine_output['rmse'] = rmse
    engine_output['mse'] = mse
    engine_output['mae'] = mae
    engine_output['present_status']=exists_anom_last_5
    engine_output['present_alerts']=df_aler_ult.fillna(0).to_dict(orient='record')
    engine_output['past']=df_aler.to_dict(orient='record')
    engine_output['engine']='VAR'
    engine_output['future']= df_result_forecast.fillna(0).to_dict(orient='record')

    engine_output['rmse'] = rmse
    engine_output['mse'] = mse
    engine_output['mae'] = mae
    engine_output['present_status']=exists_anom_last_5
    engine_output['present_alerts']=df_aler_ult.fillna(0).to_dict(orient='record')
    engine_output['past']=df_aler.fillna(0).to_dict(orient='record')
    engine_output['engine']='VAR'
    engine_output['future']= df_result_forecast.fillna(0).to_dict(orient='record')
    test_values = pd.DataFrame(future_forecast_pred.values,index = df_test.index,columns=['expected value'])
    test_values['step'] = test_values.index
    engine_output['debug'] = test_values.fillna(0).to_dict(orient='record')

    return (engine_output)
Exemple #9
0
def anomaly_var(lista_datos,num_fut,desv_mse=0,train=True,name='model-name'):
    lista_puntos = np.arange(0, len(lista_datos),1)
    df, df_train, df_test = create_train_test(lista_puntos, lista_datos)

    print (type(df_test))
    mae_period = 99999999
    best_lag=0
    lags = int(round(len(df_train)/2))
    print ("empezamos el bucle")
    df_train = df_train.astype('float64')
    for lag in range(lags):
        model = pf.VAR(df_train,lags=lag)
        x = model.fit()


        print ("fit ready")
        future_forecast_pred = model.predict(len(df_test))
        future_forecast_pred = future_forecast_pred[['valores']]

        list_test = df_test['valores'].values
        list_future_forecast_pred = future_forecast_pred['valores'].values

        mae_temp = mean_absolute_error(list_test, list_future_forecast_pred)
        print('El error medio del modelo_test es: {}'.format(mae_temp))

        if mae_temp < mae_period:
            best_lag=lag
            mae_period=mae_temp
        else:
            print ("mae:" + str(mae_period))

    print ("######best mae is " + str(mae_period) + " with the lag " + str(best_lag))

    model = pf.VAR(df_train,lags=best_lag)
    x = model.fit()

    future_forecast_pred = model.predict(len(df_test))
    future_forecast_pred = future_forecast_pred[['valores']]

    list_test = df_test['valores'].values
    list_future_forecast_pred = future_forecast_pred['valores'].values


    engine = engine_output_creation('var')
    engine.alerts_creation(list_future_forecast_pred,df_test)
    engine.debug_creation(list_future_forecast_pred,df_test)
    engine.metrics_generation( df_test['valores'].values, list_future_forecast_pred)

    if (train):
      print ("Saving params")
      filename = './models_temp/learned_model_var'+name
      with open(filename,'w') as f:
        f.write(str(best_lag))
        f.close()
        print ("insertando modelo VAR")
        new_model(name, 'VAR', pack('N', 365),str(best_lag),mae)



    #####forecast#####

    model_for = pf.VAR(df,lags=best_lag)
    x_for = model_for.fit()

    future_forecast_pred_for = model.predict(num_fut)

    print("salida del modelo")
    df_result_forecast = future_forecast_pred_for.reset_index()
    df_result_forecast = df_result_forecast.rename(columns = {'index':'step'})

    engine.forecast_creation( future_forecast_pred_for['valores'].tolist(), len(lista_datos),num_fut)
    return (engine.engine_output)
Exemple #10
0
def anomaly_VAR(lista_datos):
    lista_puntos = np.arange(0, len(lista_datos), 1)

    df = pd.DataFrame()
    df['valores'] = lista_datos

    tam_train = int(len(df) * 0.7)
    #print tam_train
    df_train = df[:tam_train]
    print('Tamanio train: {}'.format(df_train.shape))
    df_test = df[tam_train:]
    print('Tamanio test: {}'.format(df_test.shape))

    model = pf.VAR(df_train, lags=5)
    print("modelo entrenado")
    x = model.fit()
    print("modelo entrenado2")

    future_forecast_pred = model.predict(len(df_test))
    future_forecast_pred = future_forecast_pred[['var_0']]

    list_test = df_test['var_0'].values
    list_future_forecast_pred = future_forecast_pred['var_0'].values

    mse = mean_squared_error(list_test, list_future_forecast_pred)
    print('Model_test mean_error: {}'.format(mse))
    rmse = np.sqrt(mse)
    print('Model_test root error: {}'.format(rmse))
    mae = mean_absolute_error(list_test, list_future_forecast_pred)
    df_aler = pd.DataFrame()

    df_aler['real_value'] = list_test
    df_aler['expected value'] = list_future_forecast_pred
    df_aler['mse'] = mse
    df_aler['puntos'] = future_forecast_pred.index
    df_aler.set_index('puntos', inplace=True)
    df_aler['mae'] = mae

    df_aler['anomaly_score'] = abs(df_aler['expected value'] -
                                   df_aler['real_value']) / df_aler['mae']

    df_aler = df_aler[(df_aler['anomaly_score'] > 2)]

    max = df_aler['anomaly_score'].max()
    min = df_aler['anomaly_score'].min()
    df_aler['anomaly_score'] = (df_aler['anomaly_score'] - min) / (max - min)

    df_aler_ult = df_aler[:5]
    df_aler_ult = df_aler_ult[
        (df_aler_ult.index == df_aler.index.max()) |
        (df_aler_ult.index == ((df_aler.index.max()) - 1))
        | (df_aler_ult.index == ((df_aler.index.max()) - 2)) |
        (df_aler_ult.index == ((df_aler.index.max()) - 3))
        | (df_aler_ult.index == ((df_aler.index.max()) - 4))]
    if len(df_aler_ult) == 0:
        exists_anom_last_5 = 'FALSE'
    else:
        exists_anom_last_5 = 'TRUE'

    max = df_aler_ult['anomaly_score'].max()
    min = df_aler_ult['anomaly_score'].min()
    print df_aler_ult
    df_aler_ult['anomaly_score'] = (df_aler_ult['anomaly_score'] -
                                    min) / (max - min)

    #####forecast#####

    model_for = pf.VAR(df_var, lags=5)
    x_for = model_for.fit()

    future_forecast_pred_for = model_for.predict(5)
    future_forecast_pred_for = future_forecast_pred_for[['var_0']]
    df_result_forecast = future_forecast_pred_for.reset_index()
    df_result_forecast = df_result_forecast.rename(columns={'index': 'step'})

    print df_var.head(5)
    print df_var.tail(5)

    engine_output = {}
    engine_output['rmse'] = rmse
    engine_output['mse'] = mse
    engine_output['mae'] = mae
    engine_output['present_status'] = exists_anom_last_5
    engine_output['present_alerts'] = df_aler_ult.to_dict(orient='record')
    engine_output['past'] = df_aler.to_dict(orient='record')
    engine_output['engine'] = 'VAR'
    engine_output['future'] = df_result_forecast.to_dict(orient='record')

    return (engine_output)
Exemple #11
0
def VAR_model(df, target, lags, differences):
    VAR_model = pf.VAR(data=df, target=target, lags=lags, integ=differences)
    VARx = VAR_model.fit()
    print(VARx.summary())
Exemple #12
0
                 'B Constant','B AR(1)','A to B AR(1)','C to B AR(1)','D to B AR(1)',
                 'C Constant','C AR(1)','A to C AR(1)','B to C AR(1)', 'D to C AR(1)',
                 'D Constant','D AR(1)','A to D AR(1)','B to D AR(1)', 'C to D AR(1)'] 

results_OLS = pd.DataFrame(index = par_names_OLS, data = OLS_results.reshape(-1)) 
results_OLS = pd.DataFrame(index = par_names_OLS, data = OLS_results.reshape(-1))
results_OLS.columns = ['Parameters']
results_OLS


# In[8]:


# Check with the values for VAR(1) from the pyflux package

pf.VAR(df,1).fit().summary()


# In[9]:


# Estimate VAR(1) by MLE

MLE_results = VAR(data = df, lags = 1, target = None, integ = 0).MLE()
MLE_results

# For more clarity

par_names_MLE = ['A AR(1)','B to A AR(1)','C to A AR(1)','D to A AR(1)',
                 'B AR(1)','A to B AR(1)','C to B AR(1)','D to B AR(1)',
                 'C AR(1)','A to C AR(1)','B to C AR(1)', 'D to C AR(1)',