def get_sample_data(self, m=ModelType.ar, p=1, q=1, n=None, b=0): Validator.validate_attribute(p, int, True) Validator.validate_attribute(q, int, True) if n is None: n = self.__n_samples # the betas of the MA equal to 0 for an AR(p) model likewise # the alphas of the AR equal to 0 for an MA(q) model if m is SerialCorrelation.ModelType.ar: alphas = self.get_alt_diminishing_random_list(size=p) betas = np.array([0.]) # value of q does not matter in this case elif m is SerialCorrelation.ModelType.ma: alphas = np.array([0.]) # value of p does not matter in this case betas = self.get_diminishing_random_list(size=q) #betas = np.array([0.6, 0.4, 0.2]) elif m is SerialCorrelation.ModelType.arma: alphas = self.get_alt_diminishing_random_list(size=p) betas = self.get_diminishing_random_list(size=q) #alphas = np.array([0.5, -0.25, 0.4]) #betas = np.array([0.5, -0.3]) # Python requires the zero-lag value as well which is 1 # also the alphas for the AR model must be negated ar = np.r_[1, -alphas] ma = np.r_[1, betas] data = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=b) return alphas, betas, data
def gen_ARMAsample(self, alphas, betas, samples=1000, burn=4000, plot=False): '''generate sample based on ARMA coefficients. Arguments: --------- alphas : 1D numpy array with MA coefficients betas : 1D numpy array with MA coefficients samples : number of samples burn : burnin: no idea.. Return: --------- y : ARMA sample''' # 1D numpy arrays with coeff ready for filtering alphas = np.r_[1, alphas] betas = np.r_[1, betas] y = smt.arma_generate_sample(ar=alphas, ma=betas, nsample=samples, burnin=burn) if plot: plt.figure() plt.title('ARMA sample: RA(%d) MA(%d)' % (len(alphas), len(betas))) plt.plot(y, '-k', linewidth=0.7) plt.tight_layout() plt.show() return y
def f5(self): a = np.array([1, -.24, .13, .3, -.2, .3, -.3]) b = np.array([ 1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, .9 ]) ar = np.r_[1, np.negative(a)] ma = np.r_[b] y = smt.arma_generate_sample(ar=ar, ma=ma, nsample=20001, burnin=300, sigma=0.02) return y
def MA(): n = int(1000) # set the AR(p) alphas equal to 0 alphas = np.array([0.]) betas = np.array([0.6]) # add zero-lag and negate alphas ar = np.r_[1, -alphas] ma = np.r_[1, betas] ma1 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n) _ = tsplot(ma1, lags=30) max_lag = 30 mdl = smt.ARMA(ma1, order=(0, 1)).fit(maxlag=max_lag, method='mle', trend='nc') print(mdl.summary())
def AR_p(): np.random.seed(1) n_samples = int(1000) a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a * x[t - 1] + w[t] _ = tsplot(x, lags=30) mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc') est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc') true_order = 1 print('\nalpha estimate: {:3.5f} | best lag order = {}'.format( mdl.params[0], est_order)) print('\ntrue alpha = {} | true order = {}'.format(a, true_order)) n = int(1000) alphas = np.array([.666, -.333]) betas = np.array([0.]) # Python requires us to specify the zero-lag value which is 1 # Also note that the alphas for the AR model must be negated # We also set the betas for the MA equal to 0 for an AR(p) model # For more information see the examples at statsmodels.org ar = np.r_[1, -alphas] ma = np.r_[1, betas] ar2 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n) _ = tsplot(ar2, lags=30) max_lag = 10 mdl = smt.AR(ar2).fit(maxlag=max_lag, ic='aic', trend='nc') est_order = smt.AR(ar2).select_order(maxlag=max_lag, ic='aic', trend='nc') true_order = 2 print('\ncoef estimate: {:3.4f} {:3.4f} | best lag order = {}'.format( mdl.params[0], mdl.params[1], est_order)) print('\ntrue coefs = {} | true order = {}'.format([.666, -.333], true_order))
def _generate_ARMA(self): if self.seed is not None: np.random.seed(self.seed) ar = np.array(self.p) ma = np.array(self.q) ar = np.r_[1, -ar] ma = np.r_[1, ma] burn = int(self.datapoints / 10) dataset = [] for i in range(self.n_series): arma = smt.arma_generate_sample(ar=ar, ma=ma, nsample=self.datapoints, burnin=burn) dataset.append(arma) return np.array(dataset)
# Plot graphs xt.plot(ax=ax_xt) ax_xt.set_title("Time Series") plot_acf(xt, lags=50, ax=ax_acf) plot_pacf(xt, lags=50, ax=ax_pacf) plt.tight_layout() return None # Number of samples n = 600 # Generate MA(1) dataset ar = np.r_[1, -0] ma = np.r_[1, 0.7] ma1_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ma1_data) # Generate MA(2) dataset ar = np.r_[1, -0] ma = np.r_[1, 0.6, 0.7] ma2_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ma2_data) # Generate MA(3) dataset ar = np.r_[1, -0] ma = np.r_[1, 0.6, 0.7, 0.5] ma3_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ma3_data) # Build MA(1) model
# In[19]: # Simulate an AR(2) process n = int(1000) alphas = np.array([.444, .333]) betas = np.array([0.]) # Python requires us to specify the zero-lag value which is 1 # Also note that the alphas for the AR model must be negated # We also set the betas for the MA equal to 0 for an AR(p) model # For more information see the examples at statsmodels.org ar = np.r_[1, -alphas] ma = np.r_[1, betas] ar2 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n) _ = tsplot(ar2, lags=12, title="AR(2) process") # ## AR(2) process -- has ACF tailing out and PACF cutting off at lag=2 # In[20]: # Simulate an MA(1) process n = int(1000) # set the AR(p) alphas equal to 0 alphas = np.array([0.]) betas = np.array([0.8]) # add zero-lag and negate alphas ar = np.r_[1, -alphas] ma = np.r_[1, betas] ma1 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)
# Plot graphs xt.plot(ax=ax_xt) ax_xt.set_title('Time Series') plot_acf(xt, lags=50, ax=ax_acf) plot_pacf(xt, lags=50, ax=ax_pacf) plt.tight_layout() return None # Number of samples n = 600 # Generate AR(1) dataset ar = np.r_[1, -0.6] ma = np.r_[1, 0] ar1_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar1_data) # Generate AR(2) dataset ar = np.r_[1, 0.6, 0.7] ma = np.r_[1, 0] ar2_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar2_data) # Generate AR(3) dataset ar = np.r_[1, 0.6, 0.7, 0.5] ma = np.r_[1, 0] ar3_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar3_data) # Build AR(1) model
plt.legend() plt.grid(True) plt.show() #%% np.random.seed(12345) arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) ar = np.r_[1, -arparams] # add zero-lag and negate ma = np.r_[1, maparams] # add zero-lag #%% Generate Samples num_samples = 250 y = smt.arma_generate_sample(ar, ma, nsample=num_samples) dates = pd.date_range(start='2020-01-01', freq="D", periods=num_samples) y = pd.Series(y, index=dates) set_size = len(y) train_size = int(len(y)*0.7) train_y = y.iloc[:train_size] test_y = y.iloc[train_size:] print(train_y, len(train_y)) #%% tsplot tsplot(train_y) #%% fit model model = smt.arima.ARIMA(train_y, order=(2, 0, 2), trend='n').fit() model.params
a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a*x[t-1] + w[t] limit=12 _ = tsplot(x, lags=limit,title="AR(1)process") #MA(2) simulated process n = int(1000) alphas = np.array([0.]) betas = np.array([0.6, 0.4]) ar = np.r_[1, -alphas] ma = np.r_[1, betas] ma3 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n) _ = tsplot(ma3, lags=12,title="MA(2) process") ###-------------------------------------------------------------------- ###ARIMA with prophet from fbprophet import Prophet #prophet reqiures a pandas df at the below config # ( date column named as DS and the value column as Y) ts=sales.groupby(["date_block_num"])["item_cnt_day"].sum() ts.index=pd.date_range(start = '2013-01-01',end='2015-10-01', freq = 'MS') ts=ts.reset_index()
# Plot graphs xt.plot(ax=ax_xt) ax_xt.set_title('Time Series') plot_acf(xt, lags=50, ax=ax_acf) plot_pacf(xt, lags=50, ax=ax_pacf) plt.tight_layout() return None # Number of samples n = 600 # Generate AR(1) dataset ar = np.r_[1, 0.6] ma = np.r_[1, 0.3] ar1ma1_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar1ma1_data ) # Impluse response curve plt.plot(arima_process.arma_impulse_response(ar, ma, nobs=20)) plt.ylabel("Impact") plt.xlabel("Lag") # Build AR(1) model ar1ma1 = smtsa.ARMA(ar1ma1_data.tolist(), order=(1, 1)).fit( maxlag=30, method='mle', trend='nc') ar1ma1.summary() # Optimize ARMA parameters
def get_data(option, conditional=False): if option == 1: alphas = np.array([0.5, -0.25]) betas = np.array([0.5, -0.3]) ar = np.r_[1, -alphas] ma = np.r_[1, betas] n = int(100) burn = int(n / 10) # number of samples to discard before fit arma22 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=burn) samples = [] cond_arr = [] n_lists = 10000 for j in range(n_lists): signals = [] cond_sig = [] signals.append(smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=burn)) for i in range(0, n): cond_sig.append([alphas,betas]) samples.append(np.array(signals).T) cond_arr.append(np.array(cond_sig).T) samples = np.array(samples) scalar = MinMaxScaler(feature_range=(-1, 1)) samples = np.reshape(samples, newshape=(n_lists, n)) scalar.fit(samples) samples = scalar.transform(samples) samples = np.reshape(samples, newshape=[len(samples), n, 1]) if conditional == True: return samples, cond_arr, scalar else: return samples, scalar elif option == 2: n_samples = int(100) n = int(100) b = 0.6 alphas = np.array([0.]) betas = np.array([0.6]) # add zero-lag and negate alphas ar = np.r_[1, -alphas] ma = np.r_[1, betas] samples = [] cond_arr = [] n_lists = 10000 for j in range(n_lists): # x = w = np.random.normal(size=n_samples) signals = [] cond_sig = [] signals.append(smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)) if conditional == True: for i in range(0, n): cond_sig.append(b) samples.append(np.array(signals).T) cond_arr.append(np.array(cond_sig).T) samples = np.array(samples) cond_arr = np.array(cond_arr) scalar = MinMaxScaler(feature_range=(-1, 1)) samples = np.reshape(samples, newshape=(n_lists, n_samples)) scalar.fit(samples) samples = scalar.transform(samples) samples = np.reshape(samples, newshape=[len(samples), n_samples, 1]) if conditional == True: return samples, cond_arr, scalar else: return samples, scalar elif option == 3: # autoregressive model # dependent variable is regressed against one or more lagged values samples = [] cond_arr = [] n_lists = 10000 n_samples = int(100) a = 0.6 for j in range(n_lists): x = w = np.random.normal(size=n_samples) signals = [] cond_sig = [] for j in range(n_samples): signals.append(a * x[j - 1] + w[j]) if conditional == True: cond_sig.append(a) samples.append(np.array(signals).T) cond_arr.append(np.array(cond_sig).T) samples = np.array(samples) cond_arr = np.array(cond_arr) scalar = MinMaxScaler(feature_range=(-1, 1)) scalar.fit(samples) samples = scalar.transform(samples) samples = np.reshape(samples, newshape=[len(samples), n_samples, 1]) if conditional == True: return samples, cond_arr, scalar else: return samples, scalar
# Plot graphs xt.plot(ax=ax_xt) ax_xt.set_title('Time Series') plot_acf(xt, lags=50, ax=ax_acf) plot_pacf(xt, lags=50, ax=ax_pacf) plt.tight_layout() return None # Number of samples n = 600 # Generate AR(1) dataset ar = np.r_[1, -0.6] ma = np.r_[1, 0] ar1_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar1_data) # Generate AR(2) dataset ar = np.r_[1, 0.6, 0.7] ma = np.r_[1, 0] ar2_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar2_data) # Generate AR(3) dataset ar = np.r_[1, 0.6, 0.7, 0.5] ma = np.r_[1, 0] ar3_data = smtsa.arma_generate_sample(ar=ar, ma=ma, nsample=n) plotds(ar3_data)