def generate_test_dataset(): n = 100 ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) x1 = 8 + arma_process.generate_sample(nsample=n) x2 = 12 + arma_process.generate_sample(nsample=n) x3 = 3 + arma_process.generate_sample(nsample=n) y = 1.2 * x1 + 4.0 * x2 + 0.0 * x3 + np.random.normal(size=n) t = range(1, n + 1) y[70:] += 5 return pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3, "t": t})
def _gen_ar2(do_fit=True): phi = np.r_[0.9, 0.05] ar2 = ArmaProcess(ar=np.r_[1, -phi], nobs=500) y = ar2.generate_sample() assert ar2.isstationary and ar2.isinvertible fit = sm.tsa.ARMA(y, (2, 0)).fit() if do_fit else None return y, fit
def arima_process(size, ar_coefs, ma_coefs, d=0): """Simulate a series from an arima model.""" arma = ArmaProcess(ar_coefs, ma_coefs) arma_series = arma.generate_sample(size + d) # Integrate d times. for i in six.moves.range(d): arma_series = np.cumsum(arma_series) return pd.Series(arma_series)
def generate_noise(timepoints=200, scale=0.01): np.random.seed(12345) # make the noise component rho = 0.12 ar = np.array([1, -rho]) # statmodels says to invert rho ap = ArmaProcess(ar) err = ap.generate_sample(timepoints, scale=scale, axis=0) return err
def simulate_ar1_time_series(): # Plot 1: AR parameter = +0.9 plt.subplot(2, 1, 1) ar1 = np.array([1, -0.9]) ma1 = np.array([1]) AR_object1 = ArmaProcess(ar1, ma1) simulated_data_1 = AR_object1.generate_sample(nsample=1000) plt.plot(simulated_data_1) # Plot 1: AR parameter = -0.9 plt.subplot(2, 1, 2) ar2 = np.array([1, 0.9]) ma2 = np.array([1]) AR_object2 = ArmaProcess(ar2, ma2) simulated_data_2 = AR_object2.generate_sample(nsample=1000) plt.plot(simulated_data_2) plt.show()
def test_default_causal_cto_no_signal(): np.random.seed(1) ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) X = 100 + arma_process.generate_sample(nsample=100) y = 1.2 * X + np.random.normal(size=(100)) data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X']) ci = CausalImpact(data, [0, 69], [70, 99]) assert ci.p_value > 0.05
def sample_MA_process_ARMA(mu, theta, realisations): np.random.seed(1234) dist = lambda size: np.random.normal(0, 1, size) arparams = np.array([]) maparams = np.array(theta) # include zero-th lag arparams = np.r_[1, arparams] maparams = np.r_[1, maparams] arma_t = ArmaProcess(arparams, maparams) return arma_t.generate_sample(nsample=realisations, distrvs=dist)
def ar_example1(): from statsmodels.tsa.arima_process import ArmaProcess # Plot 1: AR parameter = +0.9 plt.subplot(2, 1, 1) ar1 = np.array([1, -0.9]) ma1 = np.array([1]) AR_object1 = ArmaProcess(ar1, ma1) simulated_data_1 = AR_object1.generate_sample(nsample=1000) plt.plot(simulated_data_1) # Plot 2: AR parameter = -0.9 plt.subplot(2, 1, 2) ar2 = np.array([1, 0.9]) ma2 = np.array([1]) AR_object2 = ArmaProcess(ar2, ma2) simulated_data_2 = AR_object2.generate_sample(nsample=1000) plt.plot(simulated_data_2) plt.show()
def sample_random_walk_arma(X0, realisations): np.random.seed(1234) # ARMA(1,1) arparams = np.array([1]) maparams = np.array([0]) # include zero-th lag arparams = np.r_[1, -arparams] maparams = np.r_[1, maparams] arma_t = ArmaProcess(arparams, maparams) return arma_t.generate_sample(nsample=realisations)
def test_lower_upper_percentile(): np.random.seed(1) ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) X = 100 + arma_process.generate_sample(nsample=100) y = 1.2 * X + np.random.normal(size=(100)) data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X']) ci = CausalImpact(data, [0, 69], [70, 99]) ci.lower_upper_percentile == [2.5, 97.5]
def data(): ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) X = 1 + arma_process.generate_sample(nsample=100) X = X.reshape(-1, 1) y = 1.2 * X + np.random.normal(size=(100, 1)) data = np.concatenate((y, X), axis=1) data = pd.DataFrame(data) return data
def get_ts(N, p=0, q=0): """ p is the number of poles q is the number of nills Generates stable processes. """ model = ArmaProcess(poles(p // 2), poles(q // 2)) return model, model.generate_sample(N)
def estimate_order_of_model_pacf(): # Simulate AR(1) with phi=+0.6 ma = np.array([1]) ar = np.array([1, -0.6]) AR_object = ArmaProcess(ar, ma) simulated_data_1 = AR_object.generate_sample(nsample=5000) # Plot PACF for AR(1) plot_pacf(simulated_data_1, lags=20) plt.show() # Simulate AR(2) with phi1=+0.6, phi2=+0.3 ma = np.array([1]) ar = np.array([1, -0.6, -0.3]) AR_object = ArmaProcess(ar, ma) simulated_data_2 = AR_object.generate_sample(nsample=5000) # Plot PACF for AR(2) plot_pacf(simulated_data_2, lags=20) plt.show()
def test_causal_cto_w_positive_signal_no_standardization(): np.random.seed(1) ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) X = 100 + arma_process.generate_sample(nsample=100) y = 1.2 * X + np.random.normal(size=(100)) y[70:] += 1 data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X']) ci = CausalImpact(data, [0, 69], [70, 99], standardize=False) assert ci.p_value < 0.05
def simulate_ar2_time_series(): fig, axes = plt.subplots(3, 1) ar = np.array([2, -0.9, -0.8]) ma = np.array([1]) arma = ArmaProcess(ar, ma) simulated = arma.generate_sample(nsample=1000) axes[0].plot(simulated) axes[0].set_title("AR(2, [0.9, 0.8]), MA(1, 0)") plot_acf(simulated, ax=axes[1]) plot_pacf(simulated, ax=axes[2]) plt.show()
def equivalence_of_ar1_and_ma_infinity(intraday): # Build a list MA parameters ma = [0.8**i for i in range(30)] # Simulate the MA(30) model ar = np.array([1]) AR_object = ArmaProcess(ar, ma) simulated_data = AR_object.generate_sample(nsample=5000) # Plot the ACF plot_acf(simulated_data, lags=30) plt.show()
def test_from_model(self): process = ArmaProcess([1, -.8], [1, .3], 1000) t = 1000 rs = np.random.RandomState(12345) y = process.generate_sample(t, burnin=100, distrvs=rs.standard_normal) res = ARMA(y, (1, 1)).fit(disp=False) process_model = ArmaProcess.from_estimation(res) process_coef = ArmaProcess.from_coeffs(res.arparams, res.maparams, t) assert_equal(process_model.arcoefs, process_coef.arcoefs) assert_equal(process_model.macoefs, process_coef.macoefs) assert_equal(process_model.nobs, process_coef.nobs) assert_equal(process_model.isinvertible, process_coef.isinvertible) assert_equal(process_model.isstationary, process_coef.isstationary)
def do_one_res_bootstrap(n, slope_hat, residuals): rand_res = lambda size: np.random.choice(residuals, size) ar = np.array([1, -slope_hat]) ma = np.array([1]) AR_res = ArmaProcess(ar,ma) data = AR_res.generate_sample(nsample = n + 1, scale = 1, distrvs = rand_res) x = data[0:(n-1)] y = data[1:n] slope = solve(x, y) stderr = calc_StdErr(slope, x, y) T = (slope - slope_hat) / stderr return T
def test_simulated_y_default_model(): np.random.seed(1) ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) X = 100 + arma_process.generate_sample(nsample=100) y = 1.2 * X + np.random.normal(size=(100)) data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X']) ci = CausalImpact(data, [0, 69], [70, 99]) assert ci.simulated_y.shape == (1000, 30) lower, upper = np.percentile(ci.simulated_y.mean(axis=1), [5, 95]) assert lower > 119 assert upper < 121
def mc_ar1_ARMA(self, phi, std, n, N=1000): """ Monte-Carlo AR(1) processes input: phi .. (estimated) lag-1 autocorrelation std .. (estimated) standard deviation of noise n .. length of original time series N .. number of MC simulations """ AR_object = ArmaProcess(np.array([1, -phi]), np.array([1]), nobs=n) mc = AR_object.generate_sample(nsample=(N, n), scale=std, axis=1, burnin=1000) return mc
def compare_the_acf_for_several_ar_time_series(): ar_parameters = [0.9, -0.9, 0.3] fig, axes = plt.subplots(3, 1, sharex=True) for i, p in enumerate(ar_parameters): ar = np.array([1, -p]) ma = np.array([1]) ar_object = ArmaProcess(ar, ma) simulated_data = ar_object.generate_sample(nsample=1000) plot_acf(simulated_data, ax=axes[i]) axes[i].set_title("AR parameter φ = %4.2f" % (p)) fig.suptitle("Comparison of ACF") plt.show()
def make_trend(series_len, method='rw', arma=[.25, .6], rw_loc=0.0, rw_scale=0.1, seed=1): """ Module to generate time-series trend with different methods Parameters ---------- series_len: int Total length of series method: str ['arma', 'rw'] In case of `'rw'`, a simple random walk process will be used. For `'arma'`, we will use `statsmodels.api` to simulate a simple ARMA(1, 1) process arma: list List [arparams, maparams] of size 2 where used for arma(1) generating process rw_loc: float Location parameter of random walk generated by `np.random.normal()` rw_scale: float Scale parameter of random walk generated by `np.random.normal()` seed: int Seed passed into `np.random.default_rng()` Returns ------- np.array-llike Simulated trend with length equals `series_len` Notes ----- 1. ARMA process: https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_process.ArmaProcess.html """ # make trend if method == "rw": rw = np.random.default_rng(seed).normal(rw_loc, rw_scale, series_len) trend = np.cumsum(rw) elif method == "arma": arparams = np.array([arma[0]]) maparams = np.array([arma[1]]) # add zero-lag and negate ar = np.r_[1, -arparams] # add zero-lag ma = np.r_[1, maparams] arma_process = ArmaProcess(ar, ma) trend = arma_process.generate_sample(series_len) else: raise IllegalArgument("Invalid trend method.") return trend
def estimating_an_ar_model(): ar = np.array([1, -0.9]) ma = np.array([1]) ar_process = ArmaProcess(ar, ma) simulated_data = ar_process.generate_sample(nsample=1000) # Fit an AR(1) model to the first simulated data mod = ARMA(simulated_data, order=(1, 0)) res = mod.fit() # Print out summary information on the fit print(res.summary()) # Print out the estimate for the constant and for phi print("When the true phi=0.9, the estimate of phi (and the constant) are:") print(res.params) return simulated_data, res
def generate_signal(go_onset=2, ss_onset=12, fs_onset=22, go_pwr=1, ss_pwr=2, fs_pwr=3, noise=0, design_resolution=0.1, duration=40, stim_duration=1, tr=1): rho = 0.12 cond_order = [0, 1, 2] betas = np.array([go_pwr, ss_pwr, fs_pwr]) onsets = np.array([go_onset, ss_onset, fs_onset]) onsets_res = onsets / design_resolution onsets_res = onsets_res.astype(int) duration_res = int(duration / design_resolution) stim_duration_res = int(stim_duration / design_resolution) sampling_rate = int(tr / design_resolution) X = np.zeros((duration_res, onsets.shape[0])) B = np.zeros((onsets.shape[0], 1)) for idx, (cond, onset) in enumerate(zip(cond_order, onsets_res)): # set the design matrix X[onset:onset+stim_duration_res, idx] = 1 X[:, idx] = np.convolve( X[:, idx], hemodynamic_models._gamma_difference_hrf( tr, oversampling=sampling_rate))[0:X.shape[0]] # set the beta for the trial depending on condition B[idx, :] = betas[cond] # downsample X so it's back to TR resolution X = X[::sampling_rate, :] signal = X @ B signal = np.squeeze(signal) if noise > 0.0: np.random.seed(12345) # make the noise component n_trs = int(duration / tr) ar = np.array([1, -rho]) # statmodels says to invert rho ap = ArmaProcess(ar) err = ap.generate_sample(n_trs, scale=noise, axis=0) Y = signal + err else: Y = signal return Y
def get_AR(process, iters, params): # AR if params == None: params = {'AR': [-0.999]} metadata = {"NAME": process, "number_of_iterations": iters} metadata.update(params) ar = np.array([1] + params['AR']) ar_object = ArmaProcess(ar, [1]) y = ar_object.generate_sample(nsample=iters) true_var = sum([ y[i:-(len(ar) - i)] * (-1 * ar[len(ar) - i]) for i in range(1, len(ar)) ]) + sps.norm.ppf(0.01) y = y[len(ar):] data = pd.DataFrame(data={'Returns': y, 'True_VAR_0.01': true_var}) return metadata, data
def estimate_order_of_model_information_criteria(): ma = np.array([1]) ar = np.array([1, -0.6, -0.3]) AR_object = ArmaProcess(ar, ma) simulated_data_2 = AR_object.generate_sample(nsample=5000) # Fit the data to an AR(p) for p = 0,...,6 , and save the BIC BIC = np.zeros(7) for p in range(7): mod = ARMA(simulated_data_2, order=(p, 0)) res = mod.fit() # Save BIC for AR(p) BIC[p] = res.bic # Plot the BIC as a function of p plt.plot(range(1, 7), BIC[1:7], marker='o') plt.xlabel('Order of AR Model') plt.ylabel('Bayesian Information Criterion') plt.show()
def simulate_ar_process(phi=0.9, plot=False): """ # 0 lag coefficient of 1 # sign of other coefficient is opposite from what we are using # Example, AR(1) process with phi = 0.9 # the second element of AR array should be the opposite sign, - 0.9 # Since ignoring MA at the moment, we just use 1 :param phi: :return: """ ar = np.array([1, -phi]) ma = np.array([1]) AR_object = ArmaProcess(ar, ma) simulated_data = AR_object.generate_sample(nsample=1000) if plot: plt.plot(simulated_data) plt.show() return simulated_data
def estimating_an_ma_model(): ar1 = np.array([1]) ma1 = np.array([1, -0.9]) MA_object1 = ArmaProcess(ar1, ma1) simulated_data_1 = MA_object1.generate_sample(nsample=1000) # Fit an MA(1) model to the first simulated data mod = ARMA(simulated_data_1, order=(0, 1)) res = mod.fit() # Print out summary information on the fit print(res.summary()) # Print out the estimate for the constant and for theta print( "When the true theta=-0.9, the estimate of theta (and the constant) are:" ) print(res.params) return res
def plot_MA(maxit = 100, N = 365, T = 1000, save=False, name='img/results/emission.png'): """Generate emission model output with transition replaced with MA. Transition model is defined as z_t = z_{t-1} + 3*sin(2*pi*t/T) Args: maxit (int): Number of samples N (int): Size of time range. T (int): Constant test size. save (bool, optional): Whether to save the figure, defaultly not. name (str, optional): Path to save the plot to. """ # create model MA = ArmaProcess(ma = [.2,-.4,.2,-.7]) # iterate z = np.zeros((maxit, N)) x = np.zeros((maxit, N)) for i in range(maxit): # create z[t] sample z[i,:] = MA.generate_sample(nsample=N) + 3*np.sin(np.array(range(N))/N*2*np.pi) z[i,:] = (z[i,:] - z[i,:].min()) / (z[i,:].max() - z[i,:].min()) # create x[t] sample x[i,:] = emission(z[i,:], np.array([T for i in range(N)]), 1, 50) def get_mu_ci(ts): mu = ts.mean(axis = 0) ci = np.quantile(ts, [.025,.975],axis=0) return mu,ci z_mu,z_ci = get_mu_ci(z) x_mu,x_ci = get_mu_ci(x) # plot fig1, ax1 = plt.subplots() ax1.plot(range(N), z_mu, color='red', label='z[t]') ax1.fill_between(range(N), z_ci[0,:], z_ci[1,:], color = 'red', alpha = .1) ax1.plot(range(N), x_mu, color='blue', label='x[t]') ax1.fill_between(range(N), x_ci[0,:], x_ci[1,:], color = 'blue', alpha = .1) ax1.set_xlabel('Time') ax1.set_ylabel('Value') ax1.legend() if save: fig1.savefig(name)
def test_simulated_y_custom_model(): np.random.seed(1) ar = np.r_[1, 0.9] ma = np.array([1]) arma_process = ArmaProcess(ar, ma) X = 100 + arma_process.generate_sample(nsample=100) y = 1.2 * X + np.random.normal(size=(100)) data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X']) intervention_idx = 70 normed_pre_data, _ = standardize(data.iloc[:intervention_idx]) model = UnobservedComponents( endog=normed_pre_data['y'].iloc[0:intervention_idx], level='llevel', exog=normed_pre_data['X'].iloc[0:intervention_idx]) ci = CausalImpact(data, [0, 69], [70, 99], model=model) assert ci.simulated_y.shape == (1000, 30) lower, upper = np.percentile(ci.simulated_y.mean(axis=1), [5, 95]) assert lower > 119 assert upper < 121
arma_t.isinvertible() # <codecell> arma_t.isstationary() # <rawcell> # * What does this mean? # <codecell> fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) ax.plot(arma_t.generate_sample(size=50)); # <codecell> arparams = np.array([1, .35, -.15, .55, .1]) maparams = np.array([1, .65]) arma_t = ArmaProcess(arparams, maparams) arma_t.isstationary() # <codecell> arma_rvs = arma_t.generate_sample(size=500, burnin=250, scale=2.5) # <codecell> fig = plt.figure(figsize=(12,8))
, 0.8 , 0.8 2 , 0.8 3 , … Simulate 5000 observations of the MA(30) model Plot the ACF of the simulated series ''' # import the modules for simulating data and plotting the ACF from statsmodels.tsa.arima_process import ArmaProcess from statsmodels.graphics.tsaplots import plot_acf # Build a list MA parameters ma = [0.8**i for i in range(30)] # Simulate the MA(30) model ar = np.array([1]) AR_object = ArmaProcess(ar, ma) simulated_data = AR_object.generate_sample(nsample=5000) # Plot the ACF plot_acf(simulated_data, lags=30) plt.show()
100XP Import the class ArmaProcess in the arima_process module. Plot the simulated AR procesees: Let ar1 represent an array of the AR parameters [1, −ϕ − ϕ ] as explained above. For now, the MA parmater array, ma1, will contain just the lag-zero coefficient of one. With parameters ar1 and ma1, create an instance of the class ArmaProcess(ar,ma) called AR_object1. Simulate 1000 data points from the object you just created, AR_object1, using the method .generate_sample(). Plot the simulated data in a subplot. Repeat for the other AR parameter. ''' # import the module for simulating data from statsmodels.tsa.arima_process import ArmaProcess # Plot 1: AR parameter = +0.9 plt.subplot(2,1,1) ar1 = np.array([1, -0.9]) ma1 = np.array([1]) AR_object1 = ArmaProcess(ar1, ma1) simulated_data_1 = AR_object1.generate_sample(nsample=1000) plt.plot(simulated_data_1) # Plot 2: AR parameter = -0.9 plt.subplot(2,1,2) ar2 = np.array([1, 0.9]) ma2 = np.array([1]) AR_object2 = ArmaProcess(ar2, ma2) simulated_data_2 = AR_object2.generate_sample(nsample=1000) plt.plot(simulated_data_2) plt.show()
axes[3].set_ylabel('Irregular') fig.tight_layout() return fig if __name__ == "__main__": import numpy as np from statsmodels.tsa.arima_process import ArmaProcess np.random.seed(123) ar = [1, .35, .8] ma = [1, .8] arma = ArmaProcess(ar, ma, nobs=100) assert arma.isstationary() assert arma.isinvertible() y = arma.generate_sample() dates = pd.date_range("1/1/1990", periods=len(y), freq='M') ts = pd.TimeSeries(y, index=dates) xpath = "/home/skipper/src/x12arima/x12a" try: results = x13_arima_analysis(xpath, ts) except: print("Caught exception") results = x13_arima_analysis(xpath, ts, log=False) # import pandas as pd # seas_y = pd.read_csv("usmelec.csv") # seas_y = pd.TimeSeries(seas_y["usmelec"].values,