Example #1
0
def predict(x, params):
    x = delete6keep1(x)
    #x = range(10)
    try:
        model = AR(x)
        res = model.fit(maxlag=1)
        ret = int(res.predict(len(x), len(x))[0])
        if ret>100:
            print x,ret
        return ret
    except Exception, err:
        return 0
 def transform(self, X):
     """
     Detect and remove dropped.
     """
     out = []
     for x in X:
         tmp = []
         for a in x:
             ar_mod = AR(a[::self.subsample])
             ar_res = ar_mod.fit(self.order)
             bse = ar_res.bse
             if len(bse)!=(self.order + 1):
                 bse = np.array([np.nan] * (self.order + 1))
             tmp.append(bse)
         out.append(tmp)
     return np.array(out)
Example #3
0
    def __call__(self, sample):
        """
        Computes self.n_coef AR coefficients for an array of samples
        See https://en.wikipedia.org/wiki/Autoregressive_model
        @param sample: m x n numpy array, m -- number of samples, n -- length of each sample
        @return: m x self.n_coef numpy array containing AR coefficients for each sample
        """

        m = sample.shape[0]
        trend = 'c' if self.use_constant else 'nc'
        maxlag = self.n_coef - 1 if self.use_constant else self.n_coef
        features = []

        for i in xrange(m):
            model = AR(sample[i])
            results = model.fit(maxlag, trend=trend)
            features.append(results.params)

        return np.array(features)
Example #4
0
def sentiment_prediction(data, user):
    y_train = data["sentiments"]
    model = AR(y_train)
    model_fit = model.fit(maxlag=1)
    future_pred = model_fit.predict(start=len(data["sentiments"]),
                                    end=105,
                                    dynamic=False)
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(y=data['sentiments'],
                   mode='lines+markers',
                   name='past sentiment',
                   text=(data['time'])))
    fig.add_trace(
        go.Scatter(y=future_pred,
                   x=list(range(len(data["sentiments"]), 105)),
                   mode='lines+markers',
                   name='prediction of future sentiment',
                   text=(data['time'])))

    fig.update_layout(
        title=f"Sentiment Analysis of @{user} twitter interactions")
    fig.show()
Example #5
0
def autoRegression3(day):
    col_daily = db['daily']
    dailyGrossSet = []
    for y in range(2008, 2018):
        for record in col_daily.find({"Year": y}):
            movieNumber = record['MoviesTracked']
            gross = record['Gross($)'].replace(",", "")
            dailyGrossSet.append(int(gross) / int(movieNumber))
    daycount = 0
    for record in col_daily.find({"Year": 2018}):
        movieNumber = record['MoviesTracked']
        gross = record['Gross($)'].replace(",", "")
        dailyGrossSet.append(int(gross) / int(movieNumber))
        daycount += 1
        if daycount >= day:
            break
    print(dailyGrossSet)
    # fit model
    model = AR(dailyGrossSet)
    model_fit = model.fit()
    # make prediction
    res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet))
    print(res)
def AutoRegression(train, test):

    model = AR(train)
    model_fit = model.fit()
    window = model_fit.k_ar
    coef = model_fit.params
    # walk forward over time steps in test
    history = train[len(train) - window:]
    # print(len(history))
    history = [history[i] for i in range(len(history))]
    # print(history[0:5])
    predictions = list()
    for t in range(len(test)):
        length = len(history)
        lag = [history[i] for i in range(length - window, length)]
        yhat = coef[0]
        for d in range(window):
            yhat += coef[d + 1] * lag[window - d - 1]
        obs = test[t]
        predictions.append(yhat)
        history.append(obs)  # new observations added to history
        # print('predicted=%f, expected=%f' % (yhat, obs))
    return predictions, window, coef
Example #7
0
def metodo_Dm(cpu_workload, Y, Z, output_list):

    X = Y
    train_size = int(len(X))

    train, test = X[:train_size], X[:train_size]
    #print(len(train)," ",len(test))

    #print("test = ",len(test))
    # train autoregression
    model = AR(train)
    model_fit = model.fit()
    window = model_fit.k_ar
    coef = model_fit.params
    # walk forward over time steps in test
    history = train[len(train) - window:]
    history = [history[i] for i in range(len(history))]
    predictions = list()

    print(len(Z), "  ", len(output_list), " ", len(test))

    for t in range(len(test)):
        length = len(history)
        lag = [history[i] for i in range(length - window, length)]
        yhat = coef[0]
        for d in range(window):
            yhat += coef[d + 1] * lag[window - d - 1]
        obs = test[t]
        if (Z[t] == output_list[0]
            ):  #or Z[t]==output_list[1] or Z[t]==output_list[2]):
            predictions.append(cpu_workload[t])
        else:
            predictions.append(-yhat + 4)
        history.append(obs)
    #print('predicted=%f, expected=%f' % (yhat, obs))
    error = mean_squared_error(test, predictions)
    return test, predictions, error
Example #8
0
    def AutoRegressive(self, data, testSize=2, test=True):
        # Autoregressive model used for time-series predictions
        # if test= True, then select the last testSize points as test set
        # else predict for a period of testSize
        print(data.shape)
        if test:
            trainData = data[:-testSize]
            testData = data[-testSize:]
        else:
            trainData = data

        model = AR(trainData)
        modelFit = model.fit()
        winSize, coeff = modelFit.k_ar, modelFit.params

        predData = list(trainData[-winSize:])
        pred = []
        for i in range(testSize):
            x = list(predData[-winSize:])
            y = coeff[0]
            # use winSize number of data to predict future value
            for n in range(winSize):
                y += coeff[n + 1] * x[winSize - (n + 1)]
            if test:
                # use test data to predict future value
                predData.append(testData[i])
            else:
                # use predicted value to predict future value
                predData.append(y)
            pred.append(y)

        if test:
            error = mse(testData, pred)
            return pred, error, testData
        else:
            error = None
            return pred, error
Example #9
0
def autoregression(data, train_test_percentage=20):
    train_test_size = int(len(data) * float(train_test_percentage) / 100)
    train, test = data[0:train_test_size], data[train_test_size:]

    # train autoregression
    model = AR(train)
    model_fit = model.fit()
    window = model_fit.k_ar
    coef = model_fit.params

    # walk forward over time steps in test
    history = train[len(train)-window:]
    history = [history[i] for i in range(len(history))]
    predictions = list()

    for t in range(len(test)):
        length = len(history)
        lag = [history[i] for i in range(length-window, length)]
        yhat = coef[0]

        for d in range(window):
            yhat += coef[d+1] * lag[window-d-1]
        
        obs = test[t]
        predictions.append(yhat)
        history.append(obs)
        
    mse_error = mean_squared_error(test, predictions)
    print 'Autoregression MSE: '+ str(mse_error)
    pyplot.plot(range(len(test)), predictions, color='red', lw=2, label='prediction')
    pyplot.plot(range(len(test)), test, color='green', lw=2, label='actual')
    pyplot.ylabel('max temp')
    pyplot.xlabel('days from 1/1/2009')
    pyplot.title('Autoregression')
    pyplot.show()
    
    return predictions
def modelling_AR(df, name):
    """
    Function to get the prediction model AR and apply to our DF
    """
    data_close = df[f'CLOSE_{name}']
    b, a = signal.butter(3, 1/10)
    filtrd_data_close = signal.filtfilt(b, a, data_close)
    df2 = pd.DataFrame({"X":data_close.to_numpy(),"Xf": filtrd_data_close},index=df.index)
    dr = df2.index
    realidad = df2.loc[dr[:22808]]
    futuro = df2.loc[dr[22808:]]
    predictions_AR = dict()

    for col in realidad.columns:
        train = realidad[col]
        test = futuro[col]

        # Entrena el modelo AR
        model_AR = AR(train)
        print(f"Entrenando con los datos desde la serie {col}")
        model_fit_AR = model_AR.fit(maxlag=4)
        
        # Predice los valores AR
        predictions_AR[f'{col}_prediction'] = model_fit_AR.predict(start=len(train),
                                        end=len(train)+len(test)-1, dynamic=False)
      
    pred_AR = pd.DataFrame(predictions_AR)
    pred_AR.index = futuro.index

    AR_predictions = pd.DataFrame({
    "GT":futuro.X,
    "X":pred_AR.X_prediction,
    "Xf":pred_AR.Xf_prediction,
    "diff_X": futuro.X - pred_AR.X_prediction,
    "diff_Xf":futuro.X - pred_AR.Xf_prediction},index=futuro.index)

    return AR_predictions
Example #11
0
def time_series(ts_dict, num_pred=7, title="Efficiency"):
    '''Models and predicts time series from data'''
    data = []
    for k in ts_dict:
        ts = ts_dict[k]
        train, test = ts[1:len(ts) - num_pred], ts[len(ts) - num_pred:]

        # train autoregression
        model = AR(train, freq="W")
        model_fit = model.fit()

        # make predictions
        predictions = model_fit.predict(start=len(train),
                                        end=len(train) + len(test) - 1,
                                        dynamic=False)

        # Create a trace results
        predict = pd.concat([ts[len(ts) - 8:len(ts) - 7], predictions])

        line_predict = go.Scatter(x=predict.index,
                                  y=predict.values,
                                  name="prediccion " +
                                  k)  # , marker={'color': 'rgb(0,255,0)'})
        line_hist = go.Scatter(x=train.index,
                               y=train.values,
                               name="historicos " + k)

        data += [line_hist, line_predict]

    layout = go.Layout(title=title)
    figure = go.Figure(data=data, layout=layout)

    return ({
        'figure': figure,
        'curr_val': train[-1],
        'first_pred': predictions[0]
    })
Example #12
0
def fit_ar(outputs, inputs, guessed_dim):
    """Fits an AR model of order p = guessed_dim.

  Args:
    outputs: Array with the output values from the LDS.
    inputs: Array with exogenous inputs values.
    guessed_dim: Guessed hidden dimension.

  Returns:
    - Fitted AR coefficients.
  """
    if outputs.shape[1] > 1:
        # If there are multiple output dimensions, fit autoregressive params on
        # each dimension separately and average.
        params_list = [
            fit_ar(outputs[:, j:j+1], inputs, guessed_dim) \
            for j in xrange(outputs.shape[1])]
        return np.mean(np.concatenate([a.reshape(1, -1) for a in params_list]),
                       axis=0)
    if inputs is None:
        model = AR(outputs).fit(ic='bic',
                                trend='c',
                                maxlag=guessed_dim,
                                disp=0)
        arparams = np.zeros(guessed_dim)
        arparams[:model.k_ar] = model.params[model.k_trend:]
        return arparams
    else:
        model = ARMA(outputs, order=(guessed_dim, 0), exog=inputs)
        try:
            arma_model = model.fit(start_ar_lags=guessed_dim,
                                   trend='c',
                                   disp=0)
            return arma_model.arparams
        except (ValueError, np.linalg.LinAlgError) as e:
            warnings.warn(str(e), sm_exceptions.ConvergenceWarning)
            return np.zeros(guessed_dim)
Example #13
0
def returnpred(p, m, file='SIH.csv'):
    dataset = pd.read_csv(file)
    x1 = dataset.loc[(dataset['Product_Name'] == p) & (dataset['Month'] == m)]
    y1 = x1.groupby('Day').mean()
    y1 = y1.rename(columns={'Month': 'days'})
    y = y1.iloc[:, 5]
    n1 = len(y)
    train1 = y[0:25]
    test1 = y[25:n1]
    model_AR = AR(train1)
    model_fit_AR = model_AR.fit()
    predictions_AR = model_fit_AR.predict(start=25, end=n1 + 10)

    plt.figure()
    plt.plot(test1)
    plt.plot(predictions_AR, color='red')
    plt.title("Future Predictions of different company")
    plt.legend(['Original', 'Predictions'])

    fig = plt.gcf()
    plotly_fig = tls.mpl_to_plotly(fig)
    plotly_fig['layout']['width'] = 1200
    plot_div = plot(plotly_fig, output_type='div', include_plotlyjs=False)
    return plot_div
Example #14
0
    def get_stat_AR_coefficients(self, signals, max_lag):
        """Get the auto-regression coefficients for a set of time series signals.

            Args:
                signals (DataFrame): A Pandas DataFrame of waveforms, one per column
                max_lag (float): The maximum number of AR coefficients to return.  Will be zero padded if model requires
                                  less than the number specified.

            Returns DataFrame: A dataframe that contains a single row where each column is a parameter coefficient.
        """
        for i in range(0, np.shape(signals)[1]):

            # The AR model throws for some constant signals.  The signals should have been normalized into z-scores, in
            # which case the parameters for an all zero signal are all zero.
            if self.is_constant_signal(signals[i]) and signals[0, i] == 0:
                parameters = np.append((np.zeros(max_lag + 1)))
            else:
                model = AR(signals[:, i])
                model_fit = model.fit(maxlag=max_lag, ic=None)
                if np.shape(model_fit.params)[0] < max_lag + 1:
                    parameters = np.pad(
                        model_fit.params,
                        (0, max_lag + 1 - np.shape(model_fit.params)[0]),
                        'constant',
                        constant_values=0)
                elif np.shape(model_fit.params)[0] > max_lag + 1:
                    parameters = model_fit.params[:max_lag]
                else:
                    parameters = model_fit.params

            if i == 0:
                coefficients = parameters
            else:
                coefficients = np.append(coefficients, parameters, axis=0)

        return pd.DataFrame(coefficients).T
def main(csv_file_path):
    # load csv
    all_samples = load_csv(csv_file_path)

    # split to test and train
    train, test = split_samples(all_samples)

    # set history=train (duplicate train)
    history = list(train)

    # for i < number_of_predictions
    prediction_list = list()
    for prediction_index in range(PREDICTIONS):
        # train model on history
        model = AR(history)
        model_fit = model.fit()

        # predict next value and concatenate to prediction list
        predictions = model_fit.predict_using_learned_params(
            start=len(history), end=len(history), dynamic=False)
        prediction_list.append(predictions[0])

        # concatenate test[i] to history
        history.append(test[prediction_index])
        print('predicted={pred_value}, expected={real_value}'.format(
            pred_value=prediction_list[-1], real_value=test[prediction_index]))

        # keep history to same length
        history = history[1:]

    # calculate MSE with test and prediction lists
    error = mean_squared_error(test, prediction_list)
    print('Test MSE = {mse_value}'.format(mse_value=error))

    # return test and predictions
    return test, prediction_list
Example #16
0
 def generate_AR_para(self, rawwave, filtered=False, wavt=False, AR_order=10):
     signal = rawwave
                 
     '''
     W = fftfreq(signal.size, d= 1 / 512)
     psd = rfft(signal) #discrete Fourier transform of a real sequence
     filtered_psd = psd.copy()
     filtered_psd[(W<30)] = 0
     filtered_signal = irfft(filtered_psd)
     '''
     if filtered == True:
         if wavt == False:
             filtered_signal, _, _ = self.selective_freq_range(signal, high_freq=30, low_freq=1.5)
             ARModel = AR(filtered_signal)
         else:
             filtered_signal, _ = self.wavelet_transform(signal)
             ARModel = AR(filtered_signal)
     else:
         ARModel = AR(signal)
     
     #ARModel_fit = ARModel.fit()
     ARModel_fit = ARModel.fit(maxlag=AR_order)
     
     return ARModel_fit.params
    predictions.append(y_hat)

submission_generator.generate(predictions)
test_score = mean_absolute_error(test_y, predictions)
print('Test MAE: %.3f' % test_score)
# plot predictions vs expected
plt.plot(test_y, label="real values")
plt.plot(predictions, color='red', label="predictions")
plt.legend(loc='upper left')
plt.show()

# Implementing Auto Regression Model
# Training Autoregression
print(train[:, 1])
model = AR(train_y)
model_fit = model.fit()
print('Lag: %s' % model_fit.k_ar)
print('Coefficients: %s' % model_fit.params)

# Making predictions
predictions = model_fit.predict(start=len(train_y), end=len(train_y) + len(test_y) - 1, dynamic=False)
error = mean_absolute_error(test_y, predictions)
print('Test MAE: %.3f' % error)

# Plotting results
plt.plot(test_y, label="real values")
plt.plot(predictions, color='red', label="predictions")
plt.legend(loc='upper left')

plt.show()
Example #18
0
 def ar(self):
     model = AR(self.inputs)
     model_fit = model.fit()
     return model_fit.params
pd.plotting.autocorrelation_plot(sales_data['sales'])

# sales_data['sales'].corr(sales_data['sales'].shift(12))
# decomposed = seasonal_decompose(sales_data['sales'], model='additive')
# x = decomposed.plot()

sales_data['stationary'] = sales_data['sales'].diff()
#creating model
# create train/test datasets
X = sales_data['stationary'].dropna()
train_data = X[1:len(X) - 12]
test_data = X[X[len(X) - 12:]]

# train the autoregression model
model = AR(train_data)
model_fitted = model.fit()

print('The lag value chose is: %s' % model_fitted.k_ar)
print('The coefficients of the model are:\n %s' % model_fitted.params)

predictions = model_fitted.predict(start=len(train_data),
                                   end=len(train_data) + len(test_data) - 1,
                                   dynamic=False)

# create a comparison dataframe
compare_df = pd.concat([sales_data['stationary'].tail(12), predictions],
                       axis=1).rename(columns={
                           'stationary': 'actual',
                           0: 'predicted'
                       })
Example #20
0
def test_resids_mle():
    data = sm.datasets.sunspots.load_pandas()
    with pytest.warns(FutureWarning):
        ar = AR(np.asarray(data.endog))
    res = ar.fit(1, method='mle', disp=-1)
    assert res.resid.shape[0] == data.endog.shape[0]
324.87	,
322.41	,
323.64	,
322.73	,
324.45	,
326.65	,
325.71	,
327.95	,
327.45	,
328.19	,
330.92]

# Autoregression (AR) example
# fit model
modelAR = AR(data)
modelAR_fit = modelAR.fit()
# make prediction
yhatAR = modelAR_fit.predict(len(data), len(data))
print(yhatAR)
# End Autoregression

# Moving Average (MA) example
# fit model
modelMA = ARMA(data, order=(0, 1))
modelMA_fit = modelMA.fit(disp=False)
# make prediction
yhatMA = modelMA_fit.predict(len(data), len(data))
print(yhatMA)
# End Moving Average

# # Autoregressive Moving Average (ARMA) example
Example #22
0
            yield
        finally:
            sys.stdout = old_stdout


if len(sys.argv) < 2:
	print("Select a country");

else:

	def difference(dataset):
		diff = list()
		for i in range(1, len(dataset)):
			value = dataset[i] - dataset[i - 1]
			diff.append(value)
		return numpy.array(diff)

	# load dataset
	series = read_csv('tmp/'+sys.argv[1].lower()+'.csv', header=0, index_col=0)
	X = difference(series.values)
	# fit model
	model = AR(X)
	model_fit = model.fit(maxlag=6, disp=False)
	# save model to file
	model_fit.save('data/'+sys.argv[1].lower()+'/ar_model.pkl')
	# save the differenced dataset
	numpy.save('data/'+sys.argv[1].lower()+'/ar_data.npy', X)
	# save the last ob
	numpy.save('data/'+sys.argv[1].lower()+'/ar_obs.npy', [series.values[-1]])

Example #23
0
data_pk_half = data_pk_half.fillna(0)
#%%
data_pk_full = data2[['COUNTY', 'Year', 'PK (FULL DAY)']]
data_pk_full.dropna(inplace=True)
data_pk_full = data_pk_full.fillna(0)
#%%
dataframe_list = []
for county in data_kg_full.COUNTY.unique():
    county1 = data2[data2.COUNTY == county]
    series = pd.Series(
        county1['KG (FULL DAY)'].to_list(),
        index=county1['Year'].to_list())  # create lagged dataset
    X = series.values
    train, test = X[1:len(X) - 7], X[len(X) - 7:]
    model = AR(train)
    model_fit = model.fit()
    print('Lag: %s' % model_fit.k_ar)
    print('Coefficients: %s' % model_fit.params)
    # make predictions
    print(len(train))
    print(len(train) + len(test) - 1)
    predictions_list = []
    initial_list = []
    new_list = X.tolist()
    print(new_list)
    initial_list.append(new_list[0])
    initial_list.extend(train.tolist())
    predictions = model_fit.predict(start=len(train),
                                    end=len(train) + len(test) + 5,
                                    dynamic=False)
    for i in range(len(predictions)):
Example #24
0
def autoregression(list_statistics):
    model = AR(list_statistics)
    model_fit = model.fit()
    # make prediction
    yhat = model_fit.predict(len(list_statistics), len(list_statistics)+9)
    return yhat
Example #25
0
# Make a prediction give regression coefficients and lag obs
def predict(coef, history):
	yhat = coef[0]
	for i in range(1, len(coef)):
		yhat += coef[i] * history[-i]
	return yhat

series = Series.from_csv('../data/nifty.csv', header=0)
# split dataset
X = difference(series.values)
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:]
# train autoregression
model = AR(train)
model_fit = model.fit(maxlag=6, disp=False)
window = model_fit.k_ar
coef = model_fit.params
# walk forward over time steps in test
history = [train[i] for i in range(len(train))]
predictions = list()
for t in range(len(test)):
	yhat = predict(coef, history)
	obs = test[t]
	predictions.append(yhat)
	history.append(obs)
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
pyplot.plot(test)
pyplot.plot(predictions, color='red')
Example #26
0
plt.show()

"""Remove seasonal effect and store as column in dateframe."""
x = df.Average - df.Season
df['x'] = pd.Series(x)

"""Plot autocorrelation and partial autocorrelation."""
#autocorrelation_plot(x)
#plot_pacf(x, lags=50)
#plt.show()
# We clearly see that lag 3 is the right choice.


#sm.OLS(x, lag_func(x))
model = AR(x)
model_fit = model.fit(maxlag = 40, ic = 'aic', trend = 'nc')
print('Lag: %s' % model_fit.k_ar)
print('Coefficients: %s' % model_fit.params)
print('Residuals: %s' % model_fit.sigma2)

"""Find CAR(3) coefficients from AR(3) coefficients."""
alpha1 = 3 - model_fit.params[0]
alpha2 = 2 * alpha1- model_fit.params[1] - 3
alpha3 = alpha2 - alpha1 - model_fit.params[2] + 1
#print(alpha1, alpha2, alpha3)


"""Create column with year, month and day."""
df['Year'] = pd.DatetimeIndex(df['Date']).year
df['Month'] = pd.DatetimeIndex(df['Date']).month
df['Day'] = pd.DatetimeIndex(df['Date']).day
Example #27
0
def ARcast(data,time,dt=False,axis=-1,missing=0):
    """
    Forecast the data by using AutoRegressive method.
    
    The code automatically find the unevenly sampled data point, 
    and then forecast the that point by using AR method.
    
    Parameters
    ----------
    data : ~numpy.ndarray
        n dimensional data.
        Data must have the same number of elements to the time.
    time : astropy.time.core.Time
        The time for the each data points.
    dt : (optional) float
        An Interval of the time between each data in second unit.
    axis : (optional) int
        An axis to forecast.
    missing : (optional) float
        The missing value of the data.
        It may be due to data alignment.
    
    Returns
    -------
    ARdata : ~numpy.ndarray
        Autoregressived data.
        It must be larger elements then input data.
    tf : ~numpy.ndarray
        Time the forecasted ARdata points.
    
    Notes
    -----
    Input time must be the astropy.time.core.Time, 
    but output time is the ~numpy.ndarray.
    
    References
    ----------
    `AR model <https://en.wikipedia.org/wiki/Autoregressive_model>`_.\n
    `statsmodels.tsa.ar_model.AR <http://statsmodels.sourceforge.net/devel/generated/statsmodels.tsa.ar_model.AR.html>`_.
    
    Example
    -------
    >>> from fisspy.analysis.forecast import ARcast
    >>> ARdata, tf = ARcast(data,t,dt=20.,axis=1)
    """
    if not dt:
        dt=(time[1]-time[0]).value
    
    shape=list(data.shape)
    shape0=list(data.shape)
    if shape[axis]!=len(time):
        raise ValueError('The size of data is different from the size of time.')
        
    t=(time-time[0])*24*3600
    t=t.value
    tf=np.arange(t[0],t[-1],dt,dtype=float)
    
    interp=interp1d(t,data,axis=axis)
    datai=interp(tf)
    
    shape.pop(axis)
    ind=[shape0.index(i) for i in shape]
    ind=[axis]+ind
    datat=datai.transpose(ind)
    
    shapei=datat.shape
    datat=datat.reshape((shapei[0],np.prod(shapei[1:])))
    shapet=datat.shape
    
    td=t-np.roll(t,1)
    addi=np.where(td >= dt*2)[0]
    
    for wh in addi:
        for i in range(shapet[1]):
            y=datat[:,i]
            wh2=wh+int(td[wh]/dt-1)
            if (y==missing).sum()<4:
                bar=AR(y)
                car=bar.fit()
                dar=car.predict(int(wh),int(wh2))
                datat[wh:wh2+1,i]=dar
            else:
                datat[wh:wh2+1,i]=missing
    datat=datat.reshape((shapei))
    
    return datat.transpose(ind), tf
                
Example #28
0
def AR_model(s_y):
    model = AR(s_y)
    model_fit = model.fit(maxlag=50)
    yhat = model_fit.predict(100, len(s_y))
    yhat = np.hstack([np.zeros([99]), yhat])
    def update(self):

        self.variance = round(
            self.beta * self.variance +
            (1 - self.beta) * abs(self.prediction - self.last_arrival_time), 2)

        from statsmodels.tsa.ar_model import AR

        begin = max(0, self.index - self.window)
        data = self.arrivals[begin:self.index]
        # fit model
        model = AR(data)
        model_fit = model.fit()
        self.model = model_fit
        # make prediction
        self.prediction = model_fit.predict(len(data), len(data))[0]

        minVal = 0
        maxVal = 0
        if len(self.predictedArrivals) < 4:
            minVal = min(self.predictedArrivals[-self.windowArrival:])
            maxVal = max(self.predictedArrivals[-self.windowArrival:])
        else:
            cut = self.predictedArrivals[-self.windowArrival:]
            cut.sort()
            minVal = cut[1]
            maxVal = cut[-2]

        if self.prediction < minVal:
            self.prediction = minVal
        elif self.prediction > maxVal:
            self.prediction = maxVal

        interset = self.resultDifferences[-300:] if len(
            self.resultDifferences) > 300 else self.resultDifferences
        intersetWO = np.abs(array(interset))
        interset = reject_outliers_2(intersetWO)

        self.meanPE.append(np.mean(interset))
        self.variancePE.append(interset.var())
        self.stdPE.append(np.std(interset))
        self.medianPE.append(np.median(interset))
        self.rmsPE.append(math.sqrt(np.square(interset).mean()))

        interset = self.resultDifferences[-200:] if len(
            self.resultDifferences) > 200 else self.resultDifferences

        previousArrival = -100
        if len(self.predictedArrivals) > 1:
            previousArrival = self.predictedArrivals[-1]

        for key in self.constrains:

            new_timeout = new_timeout = round(
                self.prediction + (self.constraints_to_K[key]) * self.variance,
                3)
            if previousArrival != -100:
                predTrend = 1 if self.prediction - previousArrival > 0 else 0

                if predTrend == int(self.altPred):
                    new_timeout = round(
                        self.prediction +
                        (self.constraints_to_K[key]) * self.variance, 3)

            self.timeouts[key].append(new_timeout)
            continue

            index = math.ceil(len(interset) * key)
            index = min(index, len(interset) - 1)
            #interset = [abs(x) for x in interset]
            interset.sort()
            element = interset[index]
            margin = (keySmoother2(key) * 2 * np.median(interset)) + element
            extreme = interset[-2] if len(interset) > 1 else interset[-1]
            new_timeout = round(self.prediction + extreme, 3)
Example #30
0
def test_roots():
    data = sm.datasets.sunspots.load_pandas()
    with pytest.warns(FutureWarning):
        ar = AR(np.asarray(data.endog))
    res = ar.fit(1)
    assert_almost_equal(res.roots, np.array([1. / res.params[-1]]))
    def __projections(self, indicators, baseyear):
        """
        Generates indicator level projections till current year.

        This treats each indicator for each country as a time series. The
        projections are made using an AR(n) model, where n is determined by
        a heuristic approach (n here is the number of lag variables).

        For cases where data is insufficient, we simply treat it as missing
        which is better than projecting incorrectly.

        indicators: all indicators to project
        baseyear: year to project to.

        returns: a dataframe
        """
        start_time = time()

        pdf = self.df.copy(deep=True)
        pdf['year_idx'] = pd.to_datetime(pdf.year, format='%Y')
        pdf = pdf.set_index('year_idx').to_period(freq='Y')

        cnt = 0
        ign = 0

        # The resulting dataframe
        proj_df = pd.DataFrame()

        ts = pdf.groupby(['Country Code', 'Indicator Code'])

        for (country, ind), grp in ts:

            if (country in SSA) & (ind in indicators):

                # Years for which projection is needed
                years = np.arange(grp.year.max() + 1, baseyear + 1)

                # observations available in this time series
                obs = len(grp)

                # Maximum lag to consider for the AR model
                lag = min(len(grp) - 1, MAX_LAG)

                logger.debug(
                    "Country: {}, Indicator: {}, observations: {}, maxlag: {}, num years to project: {}"
                    .format(country, ind, obs, lag, len(years)))

                if (years.size > 0) & (years.size <= 5) & (obs > 5):

                    # Do some interpolation if needed
                    X = grp.value.copy(deep=True)
                    X = X.resample('Y').sum()
                    X = X.interpolate()

                    # Fit and score an AR(n) model
                    model = AR(X, missing='raise')
                    model_fit = model.fit(maxlag=lag, trend='nc')

                    pred = model_fit.predict(start=str(years.min()),
                                             end=str(years.max()))
                    cnt += 1

                    # Conform to the overall dataframe
                    curr_df = pd.DataFrame()
                    curr_df['value'] = pred
                    curr_df['Country Code'] = country
                    curr_df['Indicator Code'] = ind
                    curr_df['Country Name'] = grp['Country Name'][0]
                    curr_df['Indicator Name'] = grp['Indicator Name'][0]
                    curr_df.reset_index(inplace=True)
                    curr_df.rename(columns={'index': "year"}, inplace=True)
                    curr_df = curr_df[[
                        'Country Name', 'Country Code', 'Indicator Name',
                        'Indicator Code', 'year', 'value'
                    ]]

                    proj_df = pd.concat([proj_df, curr_df], ignore_index=True)

                else:
                    # Don't do projections if relatively recent data isn't available
                    # or isn't needed.
                    # print("long time series")
                    ign += 1

            else:
                # No projections needed for countries outside Sub-Saharan Africa
                pass

        logger.info(
            "Projections made for {} time series ({} ignored or not needed).".
            format(cnt, ign))
        logger.info("Projections made in {:3.2f} sec.".format(time() -
                                                              start_time))

        # Change the year from period to integer
        proj_df.year = proj_df.year.apply(lambda x: int(x.strftime("%Y")))

        return proj_df
Example #32
0
wine.head()
wine.tail()
wine.head().append(wine.tail())
wine.shape
wine
#176 observations
wineTrg = wine[0:108] # Up to December '88
#create model for 108 observations
wineVal = wine[108:] # From January '89 until end
wineTrg
wineVal
#%%%%%
wineTrg.rolling(window=3)
wine_ma3c = wineTrg.rolling(window=3, center=True).mean()
wine_ma3c
wine_ma3 = wineTrg.rolling(window=3, center=False).mean()
wine_ma3


#%%% Exponential Smothening




from statsmodels.tsa.ar_model import AR
model1 = AR(wineTrg)
model1_fit = model1.fit()
# make prediction
yhat1 = model1_fit.predict(len(wineTrg), len(wineTrg))
print(yhat1)
from random import random
#-------------------------------------------------------------------------------------------------
# AR example
# contrived dataset
xdata = range(1, 100)
ydata = [x + (3*random()) for x in xdata]
plt.xlim(0, 100)
plt.ylim(0, 100)
#-------------------------------
plt.scatter(xdata,ydata,s=10)
plt.show()
print()
#-------------------------------
# fit model
model = AR(ydata)
model_fit = model.fit()
#-------------------------------
# make prediction
#yhat = model_fit.predict(len(xdata), len(ydata))
yhat = model_fit.predict( start= 90, end = 110 )
print('Predicted value for Auto Regression ', yhat)
print("="*50)
#-------------------------------------------------------------------------------------------------
# MA example
# fit model
model = ARMA(ydata, order=(0, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict( start= 90, end = 110 )
print('Predicted value for Moving Average 0,1 ',yhat)
print("="*50)
#matplotlib.rcParams['xtick.labelsize'] = 12
#matplotlib.rcParams['ytick.labelsize'] = 12
#matplotlib.rcParams['text.color'] = 'k'
#rcParams['figure.figsize'] = 18, 8

df = pd.read_csv("CHARTEVENTS_HR_FILTERED.csv")
#,SUBJECT_ID,HADM_ID,ICUSTAY_ID,CHARTTIME,HEART_RATE
heart_rate_36 = df.loc[df['SUBJECT_ID'] == 36]
heart_rate_36 = heart_rate_36[['CHARTTIME','HEART_RATE']]

#Make the index a time datatype, make only one reading per hour and fill in missing values
heart_rate_36['CHARTTIME'] = pd.to_datetime(heart_rate_36['CHARTTIME'])
heart_rate_36 = heart_rate_36.set_index('CHARTTIME')
heart_rate_36_resampled = heart_rate_36.resample('H').mean()
heart_rate_36_resampled = heart_rate_36_resampled.interpolate(method='linear')

print ("Original data points: " + str(len(heart_rate_36)))
print ("Resampled hourly data points: " + str(len(heart_rate_36_resampled)))
print (plt.style.available)

#Autoregression (AR)
model = AR(heart_rate_36_resampled)
model_fit = model.fit()
heart_rate_36_forecast = model_fit.predict(len(heart_rate_36_resampled), len(heart_rate_36_resampled)+24)

plt.figure(figsize=(16,8))
plt.plot(heart_rate_36, label='Original')
plt.plot(heart_rate_36_resampled, label='Resampled')
plt.plot(heart_rate_36_forecast, label='AR Forecast')
plt.legend(loc='best')
plt.show()
Example #35
0
    for i in range(1, len(coef)):
        yhat += coef[i] * history[-i]
    return yhat


series = pd.read_csv('daily-total-female-births.csv', header = 0, index_col = 0,
                     parse_dates = True, squeeze = True)

#Spliteamos nuestro conjunto de datos
X = difference(series.values)
size = int(len(X)*0.66)
train, test = [0:size], X[size:]

#Entrenamos nuestro modelo autoregresivo
model = AR(train)
model_fit = model.fit(maxlag = 6, disp = False)
window = model_fit.k_ar
coef = model_fit.params

#Hacemos predicciones de forma walk forward
history = [train[i] for i in range(len(train))]
predictions = list()
for t in range(len(test)):
    yhat = predict(coef, history)
    obs = test[t]
    predictions.append(yhat)
    history.append(obs)

rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)
Example #36
0
# In[30]:

len(newthr1)

# In[45]:

# train = newthr1[0:12095]
# test = newthr1[12094:]
train = newthr1[0:100]
test = newthr1[100:120]
predictions = []

# In[46]:

model_ar = AR(train)
model_ar_fit = model_ar.fit()

# In[48]:

predictions = model_ar_fit.predict(start=100, end=120)

# In[49]:

plt.plot(test)
plt.plot(predictions, color='red')

# In[50]:

predictions

# In[38]:
Example #37
0
def get_lpc(trame):
    ar_mod = AR(trame)
    ar_res = ar_mod.fit(20)
    return ar_res.params