コード例 #1
1
    y_hat = model_persistence(x)
    predictions.append(y_hat)

submission_generator.generate(predictions)
test_score = mean_absolute_error(test_y, predictions)
print('Test MAE: %.3f' % test_score)
# plot predictions vs expected
plt.plot(test_y, label="real values")
plt.plot(predictions, color='red', label="predictions")
plt.legend(loc='upper left')
plt.show()

# Implementing Auto Regression Model
# Training Autoregression
print(train[:, 1])
model = AR(train_y)
model_fit = model.fit()
print('Lag: %s' % model_fit.k_ar)
print('Coefficients: %s' % model_fit.params)

# Making predictions
predictions = model_fit.predict(start=len(train_y), end=len(train_y) + len(test_y) - 1, dynamic=False)
error = mean_absolute_error(test_y, predictions)
print('Test MAE: %.3f' % error)

# Plotting results
plt.plot(test_y, label="real values")
plt.plot(predictions, color='red', label="predictions")
plt.legend(loc='upper left')

plt.show()
コード例 #2
0
ファイル: test_ar.py プロジェクト: Inoryy/statsmodels
def test_ar_select_order():
    # 2118
    np.random.seed(12345)
    y = sm.tsa.arma_generate_sample([1, -0.75, 0.3], [1], 100)
    ts = Series(y, index=DatetimeIndex(start="1/1/1990", periods=100, freq="M"))
    ar = AR(ts)
    res = ar.select_order(maxlag=12, ic="aic")
    assert_(res == 2)
コード例 #3
0
ファイル: test_ar.py プロジェクト: DevSinghSachan/statsmodels
def test_ar_select_order():
    # 2118
    np.random.seed(12345)
    y = sm.tsa.arma_generate_sample([1, -.75, .3], [1], 100)
    ts = TimeSeries(y, index=DatetimeIndex(start='1/1/1990', periods=100,
                                           freq='M'))
    ar = AR(ts)
    res = ar.select_order(maxlag=12, ic='aic')
    assert_(res == 2)
コード例 #4
0
ファイル: test_ar.py プロジェクト: statsmodels/statsmodels
def test_ar_select_order_tstat():
    rs = np.random.RandomState(123)
    tau = 25
    y = rs.randn(tau)
    ts = Series(y, index=date_range(start='1/1/1990', periods=tau,
                                    freq='M'))

    ar = AR(ts)
    res = ar.select_order(maxlag=5, ic='t-stat')
    assert_equal(res, 0)
コード例 #5
0
ファイル: __init__.py プロジェクト: jordicolomer/open-kaggle
def predict(x, params):
    x = delete6keep1(x)
    #x = range(10)
    try:
        model = AR(x)
        res = model.fit(maxlag=1)
        ret = int(res.predict(len(x), len(x))[0])
        if ret>100:
            print x,ret
        return ret
    except Exception, err:
        return 0
コード例 #6
0
 def transform(self, X):
     """
     Detect and remove dropped.
     """
     out = []
     for x in X:
         tmp = []
         for a in x:
             ar_mod = AR(a[::self.subsample])
             ar_res = ar_mod.fit(self.order)
             bse = ar_res.bse
             if len(bse)!=(self.order + 1):
                 bse = np.array([np.nan] * (self.order + 1))
             tmp.append(bse)
         out.append(tmp)
     return np.array(out)
コード例 #7
0
ファイル: compute.py プロジェクト: jameszuccollo/pyscraper
def project(ser, start, end):
    """Fit AR model to series and project to end of index. Primarily
    useful for filling in missing values at the end of time series to
    ensure they match.

            ser: series to fit trend to
            start: date to begin fitting
            end: date to end fitting

        Returns:
            new_ser: series with missing end values replaced by fitted
                     values."""

    from statsmodels.tsa.ar_model import AR

    trend_mod = AR(ser[start:end]).fit()

    return trend_mod.predict(
        start=trend_mod.k_ar, end=ser.index.shape[0])
コード例 #8
0
ファイル: features.py プロジェクト: victorshch/axiomatic
    def __call__(self, sample):
        """
        Computes self.n_coef AR coefficients for an array of samples
        See https://en.wikipedia.org/wiki/Autoregressive_model
        @param sample: m x n numpy array, m -- number of samples, n -- length of each sample
        @return: m x self.n_coef numpy array containing AR coefficients for each sample
        """

        m = sample.shape[0]
        trend = 'c' if self.use_constant else 'nc'
        maxlag = self.n_coef - 1 if self.use_constant else self.n_coef
        features = []

        for i in xrange(m):
            model = AR(sample[i])
            results = model.fit(maxlag, trend=trend)
            features.append(results.params)

        return np.array(features)
コード例 #9
0
def AutoRegression(train, test):

    model = AR(train)
    model_fit = model.fit()
    window = model_fit.k_ar
    coef = model_fit.params
    # walk forward over time steps in test
    history = train[len(train) - window:]
    # print(len(history))
    history = [history[i] for i in range(len(history))]
    # print(history[0:5])
    predictions = list()
    for t in range(len(test)):
        length = len(history)
        lag = [history[i] for i in range(length - window, length)]
        yhat = coef[0]
        for d in range(window):
            yhat += coef[d + 1] * lag[window - d - 1]
        obs = test[t]
        predictions.append(yhat)
        history.append(obs)  # new observations added to history
        # print('predicted=%f, expected=%f' % (yhat, obs))
    return predictions
コード例 #10
0
def spectrum0_ar(x):
    z = np.arange(1, len(x) + 1)
    z = z[:, np.newaxis]**[0, 1]
    p, res, rnk, s = lstsq(z, x)
    residuals = x - np.matmul(z, p)

    if residuals.std() == 0:
        spec = order = 0
    else:
        ar_out = AR(x).fit(ic='aic', trend='c')
        order = ar_out.k_ar
        spec = np.var(ar_out.resid) / (1 - np.sum(ar_out.params[1:]))**2

    return spec, order
コード例 #11
0
ファイル: graphs.py プロジェクト: adrianacoca/Predictweet
def sentiment_prediction(data, user):
    y_train = data["sentiments"]
    model = AR(y_train)
    model_fit = model.fit(maxlag=1)
    future_pred = model_fit.predict(start=len(data["sentiments"]),
                                    end=105,
                                    dynamic=False)
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(y=data['sentiments'],
                   mode='lines+markers',
                   name='past sentiment',
                   text=(data['time'])))
    fig.add_trace(
        go.Scatter(y=future_pred,
                   x=list(range(len(data["sentiments"]), 105)),
                   mode='lines+markers',
                   name='prediction of future sentiment',
                   text=(data['time'])))

    fig.update_layout(
        title=f"Sentiment Analysis of @{user} twitter interactions")
    fig.show()
コード例 #12
0
def autoRegression3(day):
    col_daily = db['daily']
    dailyGrossSet = []
    for y in range(2008, 2018):
        for record in col_daily.find({"Year": y}):
            movieNumber = record['MoviesTracked']
            gross = record['Gross($)'].replace(",", "")
            dailyGrossSet.append(int(gross) / int(movieNumber))
    daycount = 0
    for record in col_daily.find({"Year": 2018}):
        movieNumber = record['MoviesTracked']
        gross = record['Gross($)'].replace(",", "")
        dailyGrossSet.append(int(gross) / int(movieNumber))
        daycount += 1
        if daycount >= day:
            break
    print(dailyGrossSet)
    # fit model
    model = AR(dailyGrossSet)
    model_fit = model.fit()
    # make prediction
    res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet))
    print(res)
コード例 #13
0
ファイル: test_ar.py プロジェクト: shivankgoel/statsmodels
    def test_mle(self):
        # check predict with no constant, #3945
        res1 = self.res1
        endog = res1.model.endog
        res0 = AR(endog).fit(maxlag=9, method='mle', trend='nc', disp=0)
        assert_allclose(res0.fittedvalues[-10:],
                        res0.fittedvalues[-10:],
                        rtol=0.015)

        res_arma = ARMA(endog, (9, 0)).fit(method='mle', trend='nc', disp=0)
        assert_allclose(res0.params, res_arma.params, atol=5e-6)
        assert_allclose(res0.fittedvalues[-10:],
                        res_arma.fittedvalues[-10:],
                        rtol=1e-4)
コード例 #14
0
def metodo_Dm(cpu_workload, Y, Z, output_list):

    X = Y
    train_size = int(len(X))

    train, test = X[:train_size], X[:train_size]
    #print(len(train)," ",len(test))

    #print("test = ",len(test))
    # train autoregression
    model = AR(train)
    model_fit = model.fit()
    window = model_fit.k_ar
    coef = model_fit.params
    # walk forward over time steps in test
    history = train[len(train) - window:]
    history = [history[i] for i in range(len(history))]
    predictions = list()

    print(len(Z), "  ", len(output_list), " ", len(test))

    for t in range(len(test)):
        length = len(history)
        lag = [history[i] for i in range(length - window, length)]
        yhat = coef[0]
        for d in range(window):
            yhat += coef[d + 1] * lag[window - d - 1]
        obs = test[t]
        if (Z[t] == output_list[0]
            ):  #or Z[t]==output_list[1] or Z[t]==output_list[2]):
            predictions.append(cpu_workload[t])
        else:
            predictions.append(-yhat + 4)
        history.append(obs)
    #print('predicted=%f, expected=%f' % (yhat, obs))
    error = mean_squared_error(test, predictions)
    return test, predictions, error
コード例 #15
0
def autoregression(data, train_test_percentage=20):
    train_test_size = int(len(data) * float(train_test_percentage) / 100)
    train, test = data[0:train_test_size], data[train_test_size:]

    # train autoregression
    model = AR(train)
    model_fit = model.fit()
    window = model_fit.k_ar
    coef = model_fit.params

    # walk forward over time steps in test
    history = train[len(train)-window:]
    history = [history[i] for i in range(len(history))]
    predictions = list()

    for t in range(len(test)):
        length = len(history)
        lag = [history[i] for i in range(length-window, length)]
        yhat = coef[0]

        for d in range(window):
            yhat += coef[d+1] * lag[window-d-1]
        
        obs = test[t]
        predictions.append(yhat)
        history.append(obs)
        
    mse_error = mean_squared_error(test, predictions)
    print 'Autoregression MSE: '+ str(mse_error)
    pyplot.plot(range(len(test)), predictions, color='red', lw=2, label='prediction')
    pyplot.plot(range(len(test)), test, color='green', lw=2, label='actual')
    pyplot.ylabel('max temp')
    pyplot.xlabel('days from 1/1/2009')
    pyplot.title('Autoregression')
    pyplot.show()
    
    return predictions
コード例 #16
0
def modelling_AR(df, name):
    """
    Function to get the prediction model AR and apply to our DF
    """
    data_close = df[f'CLOSE_{name}']
    b, a = signal.butter(3, 1/10)
    filtrd_data_close = signal.filtfilt(b, a, data_close)
    df2 = pd.DataFrame({"X":data_close.to_numpy(),"Xf": filtrd_data_close},index=df.index)
    dr = df2.index
    realidad = df2.loc[dr[:22808]]
    futuro = df2.loc[dr[22808:]]
    predictions_AR = dict()

    for col in realidad.columns:
        train = realidad[col]
        test = futuro[col]

        # Entrena el modelo AR
        model_AR = AR(train)
        print(f"Entrenando con los datos desde la serie {col}")
        model_fit_AR = model_AR.fit(maxlag=4)
        
        # Predice los valores AR
        predictions_AR[f'{col}_prediction'] = model_fit_AR.predict(start=len(train),
                                        end=len(train)+len(test)-1, dynamic=False)
      
    pred_AR = pd.DataFrame(predictions_AR)
    pred_AR.index = futuro.index

    AR_predictions = pd.DataFrame({
    "GT":futuro.X,
    "X":pred_AR.X_prediction,
    "Xf":pred_AR.Xf_prediction,
    "diff_X": futuro.X - pred_AR.X_prediction,
    "diff_Xf":futuro.X - pred_AR.Xf_prediction},index=futuro.index)

    return AR_predictions
コード例 #17
0
    def AutoRegressive(self, data, testSize=2, test=True):
        # Autoregressive model used for time-series predictions
        # if test= True, then select the last testSize points as test set
        # else predict for a period of testSize
        print(data.shape)
        if test:
            trainData = data[:-testSize]
            testData = data[-testSize:]
        else:
            trainData = data

        model = AR(trainData)
        modelFit = model.fit()
        winSize, coeff = modelFit.k_ar, modelFit.params

        predData = list(trainData[-winSize:])
        pred = []
        for i in range(testSize):
            x = list(predData[-winSize:])
            y = coeff[0]
            # use winSize number of data to predict future value
            for n in range(winSize):
                y += coeff[n + 1] * x[winSize - (n + 1)]
            if test:
                # use test data to predict future value
                predData.append(testData[i])
            else:
                # use predicted value to predict future value
                predData.append(y)
            pred.append(y)

        if test:
            error = mse(testData, pred)
            return pred, error, testData
        else:
            error = None
            return pred, error
コード例 #18
0
ファイル: app.py プロジェクト: anuf/cazatalentos
def time_series(ts_dict, num_pred=7, title="Efficiency"):
    '''Models and predicts time series from data'''
    data = []
    for k in ts_dict:
        ts = ts_dict[k]
        train, test = ts[1:len(ts) - num_pred], ts[len(ts) - num_pred:]

        # train autoregression
        model = AR(train, freq="W")
        model_fit = model.fit()

        # make predictions
        predictions = model_fit.predict(start=len(train),
                                        end=len(train) + len(test) - 1,
                                        dynamic=False)

        # Create a trace results
        predict = pd.concat([ts[len(ts) - 8:len(ts) - 7], predictions])

        line_predict = go.Scatter(x=predict.index,
                                  y=predict.values,
                                  name="prediccion " +
                                  k)  # , marker={'color': 'rgb(0,255,0)'})
        line_hist = go.Scatter(x=train.index,
                               y=train.values,
                               name="historicos " + k)

        data += [line_hist, line_predict]

    layout = go.Layout(title=title)
    figure = go.Figure(data=data, layout=layout)

    return ({
        'figure': figure,
        'curr_val': train[-1],
        'first_pred': predictions[0]
    })
コード例 #19
0
ファイル: arma.py プロジェクト: LONG-9621/Stackedcapsule
def fit_ar(outputs, inputs, guessed_dim):
    """Fits an AR model of order p = guessed_dim.

  Args:
    outputs: Array with the output values from the LDS.
    inputs: Array with exogenous inputs values.
    guessed_dim: Guessed hidden dimension.

  Returns:
    - Fitted AR coefficients.
  """
    if outputs.shape[1] > 1:
        # If there are multiple output dimensions, fit autoregressive params on
        # each dimension separately and average.
        params_list = [
            fit_ar(outputs[:, j:j+1], inputs, guessed_dim) \
            for j in xrange(outputs.shape[1])]
        return np.mean(np.concatenate([a.reshape(1, -1) for a in params_list]),
                       axis=0)
    if inputs is None:
        model = AR(outputs).fit(ic='bic',
                                trend='c',
                                maxlag=guessed_dim,
                                disp=0)
        arparams = np.zeros(guessed_dim)
        arparams[:model.k_ar] = model.params[model.k_trend:]
        return arparams
    else:
        model = ARMA(outputs, order=(guessed_dim, 0), exog=inputs)
        try:
            arma_model = model.fit(start_ar_lags=guessed_dim,
                                   trend='c',
                                   disp=0)
            return arma_model.arparams
        except (ValueError, np.linalg.LinAlgError) as e:
            warnings.warn(str(e), sm_exceptions.ConvergenceWarning)
            return np.zeros(guessed_dim)
コード例 #20
0
ファイル: test_ar.py プロジェクト: Gys19/statsmodels
def test_ar_errors(reset_randomstate):
    with pytest.raises(ValueError, match='Only the univariate case'):
        with pytest.warns(FutureWarning):
            AR(np.empty((1000, 2)))
    with pytest.warns(FutureWarning):
        ar = AR(np.random.standard_normal(1000))
    with pytest.raises(ValueError, match='Method yw not'):
        ar.fit(method='yw')
    with pytest.raises(ValueError, match='ic option fpic not'):
        ar.fit(ic='fpic')
    res = ar.fit()
    with pytest.raises(ValueError, match='Start must be >= k_ar'):
        res.predict(start=0)
    with pytest.raises(ValueError, match='Prediction must have `end` after'):
        res.predict(start=100, end=99)
    with pytest.raises(ValueError, match='Length of start params'):
        with pytest.warns(FutureWarning):
            AR(np.random.standard_normal(1000)).fit(maxlag=2,
                                                    method='mle',
                                                    start_params=[1, 1])
コード例 #21
0
def test_ar_errors(reset_randomstate):
    with pytest.raises(ValueError, match="Only the univariate case"):
        with pytest.warns(FutureWarning):
            AR(np.empty((1000, 2)))
    with pytest.warns(FutureWarning):
        ar = AR(np.random.standard_normal(1000))
    with pytest.raises(ValueError, match="Method yw not"):
        ar.fit(method="yw")
    with pytest.raises(ValueError, match="ic option fpic not"):
        ar.fit(ic="fpic")
    res = ar.fit()
    with pytest.raises(ValueError, match="Start must be >= k_ar"):
        res.predict(start=0)
    with pytest.raises(ValueError, match="Prediction must have `end` after"):
        res.predict(start=100, end=99)
    with pytest.raises(ValueError, match="Length of start params"):
        with pytest.warns(FutureWarning):
            AR(np.random.standard_normal(1000)).fit(maxlag=2,
                                                    method="mle",
                                                    start_params=[1, 1])
class TestingActivity4_01(unittest.TestCase):
    def setUp(self) -> None:
        ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

        self.data = pd.read_csv(
            os.path.join(ROOT_DIR, '..', 'Datasets', 'austin_weather.csv'))

    def test_AR(self):
        self.model = AR(self.data.TempAvgF)
        self.model_fit = self.model.fit()
        self.max_lag = self.model_fit.k_ar
        self.assertEqual(self.max_lag, (23))
        self.params = self.model_fit.params[0:4]
        self.assertEqual(round(self.params[0], 4), (1.9094))
        self.assertEqual(round(self.params[1], 4), (0.9121))
コード例 #23
0
    def test_mle(self):
        # check predict with no constant, #3945
        res1 = self.res1
        endog = res1.model.endog
        with pytest.warns(FutureWarning):
            res0 = AR(endog).fit(maxlag=9, method="mle", trend="nc", disp=0)
        assert_allclose(res0.fittedvalues[-10:],
                        res0.fittedvalues[-10:],
                        rtol=0.015)

        res_arma = ARIMA(endog, order=(9, 0, 0), trend="n").fit()
        assert_allclose(res0.params, res_arma.params[:-1], rtol=1e-2)
        assert_allclose(res0.fittedvalues[-10:],
                        res_arma.fittedvalues[-10:],
                        rtol=1e-4)
コード例 #24
0
    def get_stat_AR_coefficients(self, signals, max_lag):
        """Get the auto-regression coefficients for a set of time series signals.

            Args:
                signals (DataFrame): A Pandas DataFrame of waveforms, one per column
                max_lag (float): The maximum number of AR coefficients to return.  Will be zero padded if model requires
                                  less than the number specified.

            Returns DataFrame: A dataframe that contains a single row where each column is a parameter coefficient.
        """
        for i in range(0, np.shape(signals)[1]):

            # The AR model throws for some constant signals.  The signals should have been normalized into z-scores, in
            # which case the parameters for an all zero signal are all zero.
            if self.is_constant_signal(signals[i]) and signals[0, i] == 0:
                parameters = np.append((np.zeros(max_lag + 1)))
            else:
                model = AR(signals[:, i])
                model_fit = model.fit(maxlag=max_lag, ic=None)
                if np.shape(model_fit.params)[0] < max_lag + 1:
                    parameters = np.pad(
                        model_fit.params,
                        (0, max_lag + 1 - np.shape(model_fit.params)[0]),
                        'constant',
                        constant_values=0)
                elif np.shape(model_fit.params)[0] > max_lag + 1:
                    parameters = model_fit.params[:max_lag]
                else:
                    parameters = model_fit.params

            if i == 0:
                coefficients = parameters
            else:
                coefficients = np.append(coefficients, parameters, axis=0)

        return pd.DataFrame(coefficients).T
コード例 #25
0
def returnpred(p, m, file='SIH.csv'):
    dataset = pd.read_csv(file)
    x1 = dataset.loc[(dataset['Product_Name'] == p) & (dataset['Month'] == m)]
    y1 = x1.groupby('Day').mean()
    y1 = y1.rename(columns={'Month': 'days'})
    y = y1.iloc[:, 5]
    n1 = len(y)
    train1 = y[0:25]
    test1 = y[25:n1]
    model_AR = AR(train1)
    model_fit_AR = model_AR.fit()
    predictions_AR = model_fit_AR.predict(start=25, end=n1 + 10)

    plt.figure()
    plt.plot(test1)
    plt.plot(predictions_AR, color='red')
    plt.title("Future Predictions of different company")
    plt.legend(['Original', 'Predictions'])

    fig = plt.gcf()
    plotly_fig = tls.mpl_to_plotly(fig)
    plotly_fig['layout']['width'] = 1200
    plot_div = plot(plotly_fig, output_type='div', include_plotlyjs=False)
    return plot_div
class TestingExercise4_01(unittest.TestCase):
    def setUp(self) -> None:
        ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

        self.data = pd.read_csv(
            os.path.join(ROOT_DIR, '..', 'Datasets', 'spx.csv'))

    def test_AR(self):
        self.model = AR(self.data.close)
        self.model_fit = self.model.fit()
        self.max_lag = self.model_fit.k_ar
        self.assertEqual(self.max_lag, (36))
        self.params = self.model_fit.params[0:4]
        self.assertEqual(round(self.params[0], 4), (0.1142))
        self.assertEqual(round(self.params[1], 4), (0.9442))
コード例 #27
0
def main(csv_file_path):
    # load csv
    all_samples = load_csv(csv_file_path)

    # split to test and train
    train, test = split_samples(all_samples)

    # set history=train (duplicate train)
    history = list(train)

    # for i < number_of_predictions
    prediction_list = list()
    for prediction_index in range(PREDICTIONS):
        # train model on history
        model = AR(history)
        model_fit = model.fit()

        # predict next value and concatenate to prediction list
        predictions = model_fit.predict_using_learned_params(
            start=len(history), end=len(history), dynamic=False)
        prediction_list.append(predictions[0])

        # concatenate test[i] to history
        history.append(test[prediction_index])
        print('predicted={pred_value}, expected={real_value}'.format(
            pred_value=prediction_list[-1], real_value=test[prediction_index]))

        # keep history to same length
        history = history[1:]

    # calculate MSE with test and prediction lists
    error = mean_squared_error(test, prediction_list)
    print('Test MSE = {mse_value}'.format(mse_value=error))

    # return test and predictions
    return test, prediction_list
コード例 #28
0
def AR_prediction(data, test_data, test_for_AR, ar_summary):
    print("AR_prediction() start execute")
    tickers = test_for_AR
    log_returns = to_log_return(data)
    with open('prediction_results/AR_model_prediction.txt',
              'w') as results_file:
        for ticker in tickers:
            print("AR_prediction() start execute in ticker: " + ticker)
            log_rtn = log_returns[ticker].dropna()
            result = AR(log_rtn).fit(ar_summary[ticker][0])
            result_show = result.predict(test_data.index[0],
                                         test_data.index[-1])
            test_log_returns = to_log_return(test_data)
            test_log_rtn = test_log_returns[ticker].dropna()
            test_log_rtn = test_log_rtn[result_show.index]
            visualization(test_log_rtn, result_show, 'AR_prediction', ticker)
            rmse = sqrt(
                sum((result_show - test_log_rtn).dropna()**2) /
                test_log_rtn.size)
            message = "The prediction for {} is \n {}\n RMSE:{}\n".format(
                ticker, result_show, rmse)
            results_file.write(message)

    return
コード例 #29
0
def OU_fitting(series):

    # series: pd.Series, indexed by date

    # return the fitted OU process model params.

    ar_model = AR(endog=series).fit(maxlag=1)
    [b, a] = ar_model.params.tolist()
    resid_std = np.std(ar_model.resid)

    lam = -np.log(a)
    mu = b / (1 - a)
    sigma = resid_std * np.sqrt(-2 * np.log(a) / (1 - a * a))

    res = {'ar_model': ar_model, 'lam': lam, 'mu': mu, 'sigma': sigma}
    return (res)
コード例 #30
0
def ar_coefficient(x, c, param):
    """
    This feature calculator fit the unconditional maximum likelihood of an autoregressive AR(k) process. The k parameter
    is the maximum lag of the process

    .. math::

        X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t}

    For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then
    the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned.

    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :param c: the time series name
    :type c: str
    :param param: contains dictionaries {"coeff": x, "k": y} with x,y int
    :type param: list
    :return x: the different feature values
    :return type: pandas.Series
    """
    df_cfg = pd.DataFrame(param)
    df_cfg["k"] = df_cfg["k"].apply(int)

    res = pd.Series()

    for k in df_cfg["k"].unique():
        coeff = df_cfg[df_cfg["k"] == k]["coeff"]
        try:
            mod = AR(list(x)).fit(maxlag=k, solver="mle").params
            res_tmp = pd.Series(index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff])

            for p in coeff:
                if p <= k:
                    try:
                        res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = mod[p]
                    except IndexError:
                        res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = 0
                else:
                    res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = np.NaN

        except (LinAlgError, ValueError):
            res_tmp = pd.Series([np.NaN] * len(coeff),
                                index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff])

        res = res.append(res_tmp)
    return res
コード例 #31
0
    def __init__(self, train, test, last_train, alpha, diff):
        """
        The class is initialized to transfored external parameters into class properties.
        """

        self.train, self.test, self.last_train = train, test, last_train
        self.models_init, self.models_param = dict(), dict()
        
        k = len(last_train)
        
        self.idx_orig  = self.train.columns[:-k]
        self.idx_trans = self.train.columns[-k:]
        self.alpha = alpha
        self.diff = diff
        
        for i in self.idx_trans:
            self.models_init[i] = AR(self.train[i].values)
コード例 #32
0
ファイル: 波动率预测.py プロジェクト: kasperlmc/learngit
def predict_AR(df, window=ROLLING_WINDOW, p=1):
    """第一种方法: 在给定滚动周期下利用AR(P)模型预测

    输入:
        df:DataFrame, 波动率原始数据
        window: 整数滚动周期
        p: int, lag of AR model
    输出:
        vols_pred: 时间序列, 预测波动率
    """

    fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size,
                                                        end=x.size)
    vols_pred = df[VOL_NAME].rolling(window).apply(fit)
    vols_pred.name = 'AR' + '_' + repr(window) + '_' + repr(p)
    print(vols_pred.name + " prediction finished.")
    return vols_pred
def initialize(df):
    global predictions
    predictions = pd.DataFrame()
    user_input = input(
        "Do you want to forecast by CATEGORY, VENDOR or PRODUCT? Select one: ")
    user_input = user_input.upper()
    user_selection = input('Type down your specific' + ' ' + user_input +
                           ' in the same format' + ':')
    dataset = df[['INV_AMOUNT_USD', user_input, 'Date']]
    dataset = dataset.groupby([user_input, 'Date']).sum().reset_index()
    sample = dataset.loc[dataset[user_input] == user_selection][[
        'INV_AMOUNT_USD', 'Date'
    ]]
    FBProphet(sample)
    AR(sample)
    ARIMA(sample)
    expo_smooth(sample)
コード例 #34
0
def predict_AR(array, p=1):
    """第一种方法: 在给定滚动周期下利用AR(P)模型预测

    输入:
        df:DataFrame, 波动率原始数据
        window: 整数滚动周期
        p: int, lag of AR model
    输出:
        vols_pred: 时间序列, 预测波动率
    """

    #fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size, end=x.size)
    #vols_pred = df[VOL_NAME].rolling(window).apply(fit)
    vols_pred = AR(array).fit(maxlag=p, disp=0).predict(start=array.size,
                                                        end=array.size,
                                                        dynamic=True)
    return vols_pred
def ARC_cal(input_file):
    #Auto Regression Coefficient
    #https://machinelearningmastery.com/autoregression-models-time-series-forecasting-python/
    import pandas as pd
    from pandas import Series
    from matplotlib import pyplot
    from statsmodels.tsa.ar_model import AR  #conda install statsmodels
    from sklearn.metrics import mean_squared_error  #pip install -U scikit-learn scipy matplotlib #conda installe scikit-learn
    import numpy as np

    with open(input_file) as file_var:
        for line in file_var:
            channel_numbers = len(line.split())  #actually 8 as already known
            break
    #print("Channel number:", channel_numbers)
    '''
    number_of_lines = 0    
    
    with open(input_file) as file_var:
        for line in file_var:
            number_of_lines += 1
    #print("Line number:", number_of_lines)
    '''

    ARC_list = []
    ARC_list_LagNumber = []
    '''
    for column_n in range(channel_numbers):
        ARC_list.append([])
    '''

    input_series = pd.read_csv(input_file, header=None)
    input_tmp_file = input_series.values
    number_of_lines = len(input_tmp_file)
    input_file_array = [[
        float(ele) for ele in input_tmp_file[line_n][0].split()
    ] for line_n in range(number_of_lines)]
    input_file_array = np.array(input_file_array)
    for column_n in range(channel_numbers):
        model_fit = AR(input_file_array[:, column_n]).fit()
        ARC_list_LagNumber.append(model_fit.k_ar)
        ARC_list.append(model_fit.params.tolist())

    return ARC_list_LagNumber, ARC_list
コード例 #36
0
ファイル: test_ar.py プロジェクト: liu-lu/statsmodels
def test_summary_corner():
    data = sm.datasets.macrodata.load_pandas().data["cpi"].diff().dropna()
    dates = period_range(start='1959Q1', periods=len(data), freq='Q')
    data.index = dates
    res = AR(data).fit(maxlag=4)
    summ = res.summary().as_text()
    assert 'AR(4)' in summ
    assert 'L4.cpi' in summ
    assert '03-31-1959' in summ
    res = AR(data).fit(maxlag=0)
    summ = res.summary().as_text()
    assert 'const' in summ
    assert 'AR(0)' in summ
コード例 #37
0
def forecasting(model_name, resultsDict, predictionsDict, df, df_training,
                df_testcase):
    #index = len(df_training)
    yhat = list()

    for t in tqdm(range(len(df_testcase.Ambient_Temp))):
        temp_train = df[:len(df_training) + t]

        if model_name == "SES":
            model = SimpleExpSmoothing(temp_train.Ambient_Temp)
        elif model_name == "HWES":
            model = ExponentialSmoothing(temp_train.Ambient_Temp)
        elif model_name == "AR":
            model = AR(temp_train.Ambient_Temp)
        # elif model_name == "MA":
        #     model = ARMA(temp_train.Ambient_Temp, order=(0, 1))
        # elif model_name == "ARMA":
        #     model = ARMA(temp_train.Ambient_Temp, order=(1, 1))
        elif model_name == "ARIMA":
            model = ARIMA(temp_train.Ambient_Temp, order=(1, 0, 0))
        elif model_name == "SARIMAX":
            model = SARIMAX(temp_train.Ambient_Temp,
                            order=(1, 0, 0),
                            seasonal_order=(0, 0, 0, 3))

        model_fit = model.fit()

        if model_name == "SES" or "HWES":
            predictions = model_fit.predict(start=len(temp_train),
                                            end=len(temp_train))
        elif model_name == "AR" or "ARIMA" or "SARIMAX":
            predictions = model_fit.predict(start=len(temp_train),
                                            end=len(temp_train),
                                            dynamic=False)
        yhat = yhat + [predictions]

    yhat = pd.concat(yhat)
    resultsDict[model_name] = metrics.evaluate(df_testcase.Ambient_Temp,
                                               yhat.values)
    predictionsDict[model_name] = yhat.values
    plt.plot(df_testcase.Ambient_Temp.values, label='Original')
    plt.plot(yhat.values, color='red', label=model_name + ' predicted')
    plt.legend()
    plt.show()
コード例 #38
0
class AutoRegression(PredictionModel):
    def __init__(self):
        super(AutoRegression, self).__init__()
        self.model = None
        self.model_fitted = None
        self.train_df = None

    def train(self, predict_state, predict_field):
        self.train_df = self.assign_train_df(predict_field)
        self.state = predict_state
        self.model = AR(self.train_df[predict_state])
        self.model_fitted = self.model.fit(maxlag=1)

    def predict(self):
        pred = self.model_fitted.predict(start=len(self.train_df[self.state]),
                                         end=len(self.train_df[self.state]) +
                                         FUTURE_DAYS - 1,
                                         dynamic=False)
        return np.round(pred, 0).astype(np.int32).array
コード例 #39
0
ファイル: forecast.py プロジェクト: SNU-sunday/FISS-PYTHON
def ARcast(data,time,dt=False,axis=-1,missing=0):
    """
    Forecast the data by using AutoRegressive method.
    
    The code automatically find the unevenly sampled data point, 
    and then forecast the that point by using AR method.
    
    Parameters
    ----------
    data : ~numpy.ndarray
        n dimensional data.
        Data must have the same number of elements to the time.
    time : astropy.time.core.Time
        The time for the each data points.
    dt : (optional) float
        An Interval of the time between each data in second unit.
    axis : (optional) int
        An axis to forecast.
    missing : (optional) float
        The missing value of the data.
        It may be due to data alignment.
    
    Returns
    -------
    ARdata : ~numpy.ndarray
        Autoregressived data.
        It must be larger elements then input data.
    tf : ~numpy.ndarray
        Time the forecasted ARdata points.
    
    Notes
    -----
    Input time must be the astropy.time.core.Time, 
    but output time is the ~numpy.ndarray.
    
    References
    ----------
    `AR model <https://en.wikipedia.org/wiki/Autoregressive_model>`_.\n
    `statsmodels.tsa.ar_model.AR <http://statsmodels.sourceforge.net/devel/generated/statsmodels.tsa.ar_model.AR.html>`_.
    
    Example
    -------
    >>> from fisspy.analysis.forecast import ARcast
    >>> ARdata, tf = ARcast(data,t,dt=20.,axis=1)
    """
    if not dt:
        dt=(time[1]-time[0]).value
    
    shape=list(data.shape)
    shape0=list(data.shape)
    if shape[axis]!=len(time):
        raise ValueError('The size of data is different from the size of time.')
        
    t=(time-time[0])*24*3600
    t=t.value
    tf=np.arange(t[0],t[-1],dt,dtype=float)
    
    interp=interp1d(t,data,axis=axis)
    datai=interp(tf)
    
    shape.pop(axis)
    ind=[shape0.index(i) for i in shape]
    ind=[axis]+ind
    datat=datai.transpose(ind)
    
    shapei=datat.shape
    datat=datat.reshape((shapei[0],np.prod(shapei[1:])))
    shapet=datat.shape
    
    td=t-np.roll(t,1)
    addi=np.where(td >= dt*2)[0]
    
    for wh in addi:
        for i in range(shapet[1]):
            y=datat[:,i]
            wh2=wh+int(td[wh]/dt-1)
            if (y==missing).sum()<4:
                bar=AR(y)
                car=bar.fit()
                dar=car.predict(int(wh),int(wh2))
                datat[wh:wh2+1,i]=dar
            else:
                datat[wh:wh2+1,i]=missing
    datat=datat.reshape((shapei))
    
    return datat.transpose(ind), tf
                
コード例 #40
0
ファイル: main.py プロジェクト: MrChoclate/speech-recognition
def get_lpc(trame):
    ar_mod = AR(trame)
    ar_res = ar_mod.fit(20)
    return ar_res.params
コード例 #41
0
ファイル: forecast.py プロジェクト: rvijayagopalan/finance
	return numpy.array(diff)

# Make a prediction give regression coefficients and lag obs
def predict(coef, history):
	yhat = coef[0]
	for i in range(1, len(coef)):
		yhat += coef[i] * history[-i]
	return yhat

series = Series.from_csv('../data/nifty.csv', header=0)
# split dataset
X = difference(series.values)
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:]
# train autoregression
model = AR(train)
model_fit = model.fit(maxlag=6, disp=False)
window = model_fit.k_ar
coef = model_fit.params
# walk forward over time steps in test
history = [train[i] for i in range(len(train))]
predictions = list()
for t in range(len(test)):
	yhat = predict(coef, history)
	obs = test[t]
	predictions.append(yhat)
	history.append(obs)
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
pyplot.plot(test)
コード例 #42
-1
ファイル: test_ar.py プロジェクト: Inoryy/statsmodels
def test_ar_select_order_tstat():
    rs = np.random.RandomState(123)
    tau = 25
    y = rs.randn(tau)
    ts = Series(y, index=DatetimeIndex(start="1/1/1990", periods=tau, freq="M"))

    ar = AR(ts)
    res = ar.select_order(maxlag=5, ic="t-stat")
    assert_equal(res, 0)