Esempio n. 1
0
def auto_arima_pyflux(ts_df):
    import pyflux as pf
    min_aic = np.inf
    min_aic_param = None

    for p in range(3):
        for d in range(3):
            for q in range(3):
                if (p, d, q) != (0, 0, 0):
                    model = pf.ARIMA(data=ts_df, ar=p, integ=d, ma=q)
                    model_fit = model.fit("MLE")  # M-H
                    if model_fit.aic < min_aic:
                        min_aic = model_fit.aic
                        min_aic_param = (p, d, q)
    if min_aic_param is None:
        print("Not successful in fitting ARIMA")
        return -1
    else:
        model = pf.ARIMA(data=ts_df,
                         ar=min_aic_param[0],
                         integ=min_aic_param[1],
                         ma=min_aic_param[2])
        model_fit = model.fit("MLE")  # M-H
        return min_aic, min_aic_param, model, model.predict(h=1,
                                                            intervals=True)
 def plot(self, series, latent_variables = None):
     series = series[180:360]
     series = pd.DataFrame(series)
     if latent_variables is None:
         model = pf.ARIMA(series, self.ar, self.ma, self.integ)
     else:
         model = pf.ARIMA(series, self.ar, self.ma, self.integ, latent_variables)
     model_fit = model.fit()
     model.plot_predict(30, past_values = 100)
    def _latent_variable_distribution(self, set_a, set_b):
        a = np.asarray(set_a)
        b = np.asarray(set_b)
        a_model = pf.ARIMA(a, self.ar, self.ma, self.integ)
        b_model = pf.ARIMA(b, self.ar, self.ma, self.integ)
        a_modelfit = a_model.fit()
        b_modelfit = b_model.fit()

        a_vars = a_model.latent_variables
        a_latent_vars = a_vars.dahlia()
        a_vals = a_latent_vars[0]
        a_indicators = a_latent_vars[1]
        a_factors = a_latent_vars[2]
        a_means = np.empty(len(a_vals))
        a_sdevs = np.empty(len(a_vals))

        b_vars = b_model.latent_variables
        b_latent_vars = b_vars.dahlia()
        b_vals = b_latent_vars[0]
        b_indicators = b_latent_vars[1]
        b_factors = b_latent_vars[2]
        b_means = np.empty(len(b_vals))
        b_sdevs = np.empty(len(b_vals))

        for i in range(len(a_means)):
            a_means[i] = a_vals[i].mean()
            a_sdevs[i] = a_vals[i].std()
            b_means[i] = b_vals[i].mean()
            b_sdevs[i] = b_vals[i].std()

        values = np.empty((len(a_vals), len(a_vals[0])))
        for y in range(0, len(a_vals)):
            """datasets with the 'b' indicator are scaled differently, 
               use z-scores to average equivalent values in a and b distribution"""
            if (b_indicators == 'b' and a_indicators[y] != 'b') or (a_indicators[y] == b_indicators[y] != 'b'):
                for z in range(0, len(a_vals[0])):
                    pA = stat.norm(a_means[y],a_sdevs[y]).cdf(a_vals[y][z])
                    bVal = stat.norm(b_means[y], b_sdevs[y]).ppf(pA)
                    avg = (a_vals[y][z] + bVal) / 2
                    values[y][z] = avg
            elif a_indicators[y] != 'b' and b_indicators[y] != 'b':
                for z in range(0, len(a_vals[0])):
                    pB = stat.norm(b_means[y],b_sdevs[y]).cdf(b_vals[y][z])
                    aVal = stat.norm(a_means[y], a_sdevs[y]).ppf(pB)
                    avg = (b_vals[y][z] + aVal) / 2
                    values[y][z] = avg
            else:
                for z in range(0, len(a_vals[0])):
                    pA = stat.norm(a_means[y], a_sdevs[y]).cdf(a_vals[y][z])
                    bVal = stat.norm(b_means[y], b_sdevs[y]).ppf(pA)
                    avg = (a_vals[y][z] + bVal) / 2
                    values[y][z] = avg

        return values
Esempio n. 4
0
def a_test_bbvi_elbo():
    """
    Tests that the ELBO increases
    """
    model = pf.ARIMA(data=data, ar=1, ma=0, family=pf.Exponential())
    x = model.fit('BBVI', iterations=200, record_elbo=True, map_start=False)
    assert (x.elbo_records[-1] > x.elbo_records[0])
Esempio n. 5
0
def run_aram(df, maxar, maxma, test_size=14):
    data = df.dropna()
    data['log'] = np.log(data[data.columns[0]])
    #    test_size = int(len(data) * 0.33)
    train_size = len(data) - int(test_size)
    train, test = data[:train_size], data[train_size:]
    if test_stationarity(train[train.columns[1]]) < 0.01:
        print('平稳,不需要差分')
    else:
        diffn = best_diff(train, maxdiff=8)
        train = produce_diffed_timeseries(train, diffn)
        print('差分阶数为' + str(diffn) + ',已完成差分')
    print('开始进行ARMA拟合')
    order = choose_order(train[train.columns[2]], maxar, maxma)
    print('模型的阶数为:' + str(order))
    _ar = order[0]
    _ma = order[1]
    model = pf.ARIMA(data=train,
                     ar=_ar,
                     ma=_ma,
                     target='diff',
                     family=pf.Normal())
    model.fit("MLE")
    test = test['payment_times']
    test_predict = model.predict(int(test_size))
    test_predict = predict_recover(test_predict, train, diffn)
    RMSE = np.sqrt(
        ((np.array(test_predict) - np.array(test))**2).sum() / test.size)
    print("测试集的RMSE为:" + str(RMSE))
Esempio n. 6
0
def test_bbvi_mini_batch_elbo():
    """
    Tests that the ELBO increases
    """
    model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy())
    x = model.fit('BBVI', iterations=100, mini_batch=32, record_elbo=True)
    assert (x.elbo_records[-1] > x.elbo_records[0])
Esempio n. 7
0
def a_test_predict_length():
    """
    Tests that the prediction dataframe length is equal to the number of steps h
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Exponential())
    x = model.fit()
    assert (model.predict(h=5).shape[0] == 5)
Esempio n. 8
0
def test_bbvi_elbo():
    """
    Tests that the ELBO increases
    """
    model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Skewt())
    x = model.fit('BBVI',iterations=400, record_elbo=True)
    assert(x.elbo_records[-1]>x.elbo_records[0])
Esempio n. 9
0
def test_predict_is_nans():
    """
    Tests that the in-sample predictions are not nans
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Skewt())
    x = model.fit()
    assert(len(model.predict_is(h=5).values[np.isnan(model.predict_is(h=5).values)]) == 0)
Esempio n. 10
0
def univariate_arima():
    '''
    Reads the data and fits the ARIMA model
    Prints the Acccuracy Score

    Inputs:
        None

    Outputs:
        None
    '''

    data = preprocessing.main()
    n_train_hours = 52 * 3
    train = data.iloc[:n_train_hours, :]
    test = data.iloc[n_train_hours:, :]

    model = pf.ARIMA(data=train, ar=9, ma=0, integ=1, target='milk')

    x = model.fit("MLE")
    x.summary()

    # model.plot_fit(figsize=(15,5))
    model.plot_predict(h=38, past_values=20, figsize=(15, 5))
    #import pdb; pdb.set_trace()

    yhat = model.predict(h=38)
    pred_chg = yhat > 0
    actual_chg = test.iloc[:-1, 0].diff() > 0
    print accuracy_score(actual_chg, pred_chg)
Esempio n. 11
0
def flux_auto(y, s, k, a, t, e, r):
    """ One way to use flux package

            - Contemporaneous y[1:] variables are used as exogenous 'X' in pmdarima
            - This only works for k=1

        :returns: x, s', w
    """
    if s is None:
        s = dict()
        s = flux_hyperparams(s=s,r=r)
        s = initialize_buffers(s=s,y=y)

    if y is not None:
        # Process observation and return prediction
        assert isinstance(y, float) or len(y) == s['dim'], ' Cannot change dimension of input in flight '
        y0, exog = split_exogenous(y=y, dim=s['dim'])
        s = update_buffers(s=s, a=a, exog=exog, y0=y0)
        if True:  # Always fit prior to prediction
            none_, s, _ = flux_auto(y=None, s=s, k=k, a=a, t=t, e=e, r=r)  # Fit the model
            assert none_ is None
        return flux_or_last_value(s=s,k=k,exog=exog,y0=y0)

    if y is None:
        if len(s.get('buffer'))<s['n_burn']:
            s['model'] = None
        else:
            data = pd.DataFrame(columns=['y'], data=s.get('buffer'))
            s['model'] = pf.ARIMA(data=data, ar=s['ar'], ma=s['ma'], target='y', family=s['family'])
            _ = s['model'].fit("MLE")
        return None, s, None  # Acknowledge that a fit was requested by returning x=None, w=None
Esempio n. 12
0
def test_predict_is_length():
    """
    Tests that the prediction IS dataframe length is equal to the number of steps h
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy())
    x = model.fit()
    assert (model.predict_is(h=5).shape[0] == 5)
def sliding_prediction_fixed_arma(ts, winsize=28, show_convg_info=False):
    import pyflux as pf
    from statsmodels.tsa.arima_model import ARIMA

    ts = ts.astype(float)  # statsmodel
    # ts = pd.DataFrame(ts)  # pyflux

    start_indx = 0
    predictions = []

    for start_indx in range(0, ts.size - winsize):
        # for start_indx in range(0, 1):
        end_indx = start_indx + winsize
        ts_sliced = ts[start_indx:end_indx]
        date = ts.index[end_indx].date()

        # statsmodel
        # model = ARIMA(ts_sliced, (7, 1, 0))
        # model_fit = model.fit(disp=show_convg_info)
        # nextday_pred = model_fit.forecast(steps=1)

        # pyflux
        ts_sliced = pd.DataFrame(ts_sliced)
        model = pf.ARIMA(data=ts_sliced, ar=7, integ=0, ma=1)
        model_fit = model.fit("MLE")  # M-H
        nextday_pred = model.predict(h=1, intervals=True)
        pred_count = nextday_pred['count'][0]
        # print(model_fit)
        # print(nextday_pred['count'][0])
        print(nextday_pred['count'], date)
        predictions.append((date, pred_count))
    return pd.DataFrame(predictions)
Esempio n. 14
0
def a_test_ppc():
    """
    Tests PPC value
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Exponential())
    x = model.fit('BBVI', iterations=100)
    p_value = model.ppc(nsims=100)
    assert (0.0 <= p_value <= 1.0)
Esempio n. 15
0
def test_predict_length():
    """
	Tests that the prediction dataframe length is equal to the number of steps h
	"""
    model = pf.ARIMA(data=data, ar=2, ma=2)
    x = model.fit()
    x.summary()
    assert (model.predict(h=5).shape[0] == 5)
Esempio n. 16
0
def test_ppc():
    """
    Tests PPC value
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy())
    x = model.fit('BBVI', iterations=100)
    p_value = model.ppc()
    assert (0.0 <= p_value <= 1.0)
Esempio n. 17
0
def a_test_predict_nans():
    """
    Tests that the predictions are not nans
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Exponential())
    x = model.fit()
    assert (len(
        model.predict(h=5).values[np.isnan(model.predict(h=5).values)]) == 0)
Esempio n. 18
0
def predict_request_body_len(train_set, test_set, method):

    arima_utils.adfuller_test(train_set['request_body_len'].dropna())
    arima_utils.plot_series(train_set['request_body_len'], 'Original Series')
    '''
    train_set['Value First Difference'] = train_set['request_body_len'] - train_set['request_body_len'].shift(1)
    #dropdna, borra todos los vacios
    arima_utils.adfuller_test(train_set['Value First Difference'].dropna())
    arima_utils.plot_series(train_set['Value First Difference'], 'Value First Difference')
    '''
    arima_utils.plot_pacf(train_set['request_body_len'])
    arima_utils.plot_acf(train_set['request_body_len'])
    # use request_body_len, response_body_len
    # usar p = 12 (intento original)
    # usar q = 34 (intento original)
    p = 2
    q = 1

    start_time = time.time()
    print("STARTING TIMER REQUEST ", method)

    model = pf.ARIMA(data=train_set,
                     ar=p,
                     ma=q,
                     integ=0,
                     target='request_body_len')
    x = model.fit(method=method)

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)

    # PRINT DATA
    #print(x.summary())
    print(x.scores)
    model.plot_fit()
    plt.show()

    # model.plot_predict_is(h=30)
    # firstRegister = conn.head(30)
    #plt.plot(test_set['ts'], test_set['request_body_len'])
    #model.plot_predict_is(h=100, past_values=40)
    #print(model.predict(h=100))

    start_time = time.time()
    print("STARTING TIMER, PREDICT REQUEST  ", method)
    plt.plot(test_set.index,
             test_set['request_body_len'],
             label='REAL',
             color='pink')
    plt.plot(model.predict(h=100), label='PREDICTION', color='cyan')
    plt.legend(['REAL', 'PREDICTION'])
    # model.plot_predict(h=200, past_values=40)
    # plt.plot(firstRegister['ts'], firstRegister['request_body_len'])
    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)
    plt.show()
Esempio n. 19
0
def test_sample_model():
    """
    Tests sampling function
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy())
    x = model.fit('BBVI', iterations=100)
    sample = model.sample(nsims=100)
    assert (sample.shape[0] == 100)
    assert (sample.shape[1] == len(data) - 2)
Esempio n. 20
0
def test_predict_is_nonconstant():
    """
    We should not really have predictions that are constant (should be some difference)...
    This captures bugs with the predict function not iterating forward
    """
    model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy())
    x = model.fit()
    predictions = model.predict_is(h=10, intervals=False)
    assert (not np.all(predictions.values == predictions.values[0]))
Esempio n. 21
0
def ARIMAX_model(df, target, ar, integ, ma):
    pfarima_model = pf.ARIMA(data=df,
                             ar=ar,
                             ma=ma,
                             integ=integ,
                             target=target,
                             family=pf.Normal())
    arima_x_mh = pfarima_model.fit("M-H")
    arima_x_mh.summary()
Esempio n. 22
0
def test_predict_nans():
    """
	Tests that the predictions are not nans
	"""
    model = pf.ARIMA(data=data, ar=2, ma=2)
    x = model.fit()
    x.summary()
    assert (len(
        model.predict(h=5).values[np.isnan(model.predict(h=5).values)]) == 0)
def predict_response_body_len(train_set, test_set):
    arima_utils.adfuller_test(train_set['response_body_len'])
    arima_utils.plot_series(train_set['response_body_len'], 'Original Series')
    arima_utils.plot_pacf(train_set['response_body_len'])
    arima_utils.plot_acf(train_set['response_body_len'])
    # use request_body_len, response_body_len

    # usar p = 8 (intento original)
    # usar q = 7 (intento original)
    p = 2
    q = 8

    start_time = time.time()
    print("STARTING TIMER, RESPONSE")

    model = pf.ARIMA(data=train_set,
                     ar=p,
                     ma=q,
                     integ=0,
                     target='response_body_len')
    x = model.fit(method=method)

    #model.fit(method='BBVI', iterations='10000', optimizer='ADAM')    ###
    #model.fit(method='Laplace')
    #model.fit(method='M-H')

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)
    # PRINT DATA
    print(x.summary())
    print(x.scores)
    model.plot_fit()
    plt.show()
    # model.plot_predict_is(h=30)
    # firstRegister = conn.head(30)
    start_time = time.time()
    print("STARTING TIMER PREDICT RESPONSE")

    plt.plot(test_set.index,
             test_set['response_body_len'],
             label='REAL',
             color='pink')
    plt.plot(model.predict(h=100), label='PREDICTION', color='cyan')
    plt.legend(['REAL', 'PREDICTION'])

    #end_time = time.time()
    #total_time = end_time - start_time
    #print("TIME: " , total_time)

    end_time = time.time()
    total_time = end_time - start_time
    print("TIME:          ", total_time)

    # model.plot_predict(h=200, past_values=40)
    # plt.plot(firstRegister['ts'], firstRegister['response_body_len'])
    plt.show()
Esempio n. 24
0
def doARMA(data, ar, ma):
    family = pf.Normal()
    model = pf.ARIMA(data=data, ar=ar, ma=ma, target='sunspot.year', family=family)
    x = model.fit("MLE")
    # x.summary()
    # model.plot_fit(figsize=(15,10))
    model.plot_predict_is(h=500, figsize=(15,5))
    # model.plot_predict(h=20,past_values=20,figsize=(15,5))
    res = model.predict_is(h=500)
    return res
Esempio n. 25
0
def predict_ARIMA(trainData, testX, lookAhead, p, q):

    testX = np.array(testX).reshape(-1)
    total_train = np.concatenate([trainData, testX], axis=0)
    model = pf.ARIMA(data=total_train, ar=p, ma=q, family=pf.Normal())
    model.fit(method="MLE")

    pred = model.predict(lookAhead, intervals=False)

    return pred
Esempio n. 26
0
def a_test_bbvi():
    """
    Tests an ARIMA model estimated with BBVI and that the length of the latent variable
    list is correct, and that the estimated latent variables are not nan
    """
    model = pf.ARIMA(data=data, ar=1, ma=0, family=pf.Exponential())
    x = model.fit('BBVI', iterations=200)
    assert (len(model.latent_variables.z_list) == 3)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert (len(lvs[np.isnan(lvs)]) == 0)
Esempio n. 27
0
def a_test_laplace():
    """
    Tests an ARIMA model estimated with Laplace approximation and that the length of the 
    latent variable list is correct, and that the estimated latent variables are not nan
    """
    model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Exponential())
    x = model.fit('Laplace')
    assert (len(model.latent_variables.z_list) == 3)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert (len(lvs[np.isnan(lvs)]) == 0)
Esempio n. 28
0
def test_pml():
    """
    Tests a PML model estimated with Laplace approximation and that the length of the 
    latent variable list is correct, and that the estimated latent variables are not nan
    """
    model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy())
    x = model.fit('PML')
    assert (len(model.latent_variables.z_list) == 4)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert (len(lvs[np.isnan(lvs)]) == 0)
Esempio n. 29
0
def test_mh():
    """
    Tests an ARIMA model estimated with Metropolis-Hastings and that the length of the 
    latent variable list is correct, and that the estimated latent variables are not nan
    """
    model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy())
    x = model.fit('M-H', nsims=300)
    assert (len(model.latent_variables.z_list) == 4)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert (len(lvs[np.isnan(lvs)]) == 0)
Esempio n. 30
0
def test_bbvi_mini_batch():
    """
    Tests an ARIMA model estimated with BBVI and that the length of the latent variable
    list is correct, and that the estimated latent variables are not nan
    """
    model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy())
    x = model.fit('BBVI', iterations=100, mini_batch=32)
    assert (len(model.latent_variables.z_list) == 4)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert (len(lvs[np.isnan(lvs)]) == 0)