def setupClass(cls): endog = y_arma[:, 2] cls.res1 = ARMA(endog).fit(order=(4, 1), trend='nc', disp=-1) (cls.res1.forecast_res, cls.res1.forecast_err, confint) = cls.res1.forecast(10) cls.res2 = results_arma.Y_arma41() cls.decimal_maroots = DECIMAL_3
def forecast_out_model(data, order=(3, 0)): """Forecast parameters for one model. Parameters ---------- data : DataFrame Parameters for one model only Returns ------- data : DataFrame Predicted parameters. The same structure as input. """ window = data.shape[0] // 2 maxlags = order[0] out = [data[:window]] nobs = data.shape[0] for first in range(nobs - window): last = window + first if data.shape[1] == 1: model = ARMA(data[first:last], order=order) res = model.fit(method='css', disp=False) forecast = res.forecast(1)[0] else: model = VAR(data[first:last]) res = model.fit(maxlags=maxlags) forecast = res.forecast(np.atleast_2d(data[first:last]), 1) out.append(forecast) return np.vstack(out)
def test_compare_arma(): #this is a preliminary test to compare arma_kf, arma_cond_ls and arma_cond_mle #the results returned by the fit methods are incomplete #for now without random.seed #np.random.seed(9876565) x = fa.ArmaFft([1, -0.5], [1., 0.4], 40).generate_sample(size=200, burnin=1000) # this used kalman filter through descriptive # d = ARMA(x) # d.fit((1,1), trend='nc') # dres = d.res modkf = ARMA(x) ##rkf = mkf.fit((1,1)) ##rkf.params reskf = modkf.fit((1,1), trend='nc', disp=-1) dres = reskf modc = Arma(x) resls = modc.fit(order=(1,1)) rescm = modc.fit_mle(order=(1,1), start_params=[0.4,0.4, 1.], disp=0) #decimal 1 corresponds to threshold of 5% difference #still different sign corrcted #assert_almost_equal(np.abs(resls[0] / d.params), np.ones(d.params.shape), decimal=1) assert_almost_equal(resls[0] / dres.params, np.ones(dres.params.shape), decimal=1) #rescm also contains variance estimate as last element of params #assert_almost_equal(np.abs(rescm.params[:-1] / d.params), np.ones(d.params.shape), decimal=1) assert_almost_equal(rescm.params[:-1] / dres.params, np.ones(dres.params.shape), decimal=1)
def certain_model(self, p, q): model = ARMA(self.data_ts, order=(p, q)) try: self.properModel = model.fit( disp=-1, method='css') self.p = p self.q = q self.bic = self.properModel.bic self.predict_ts = self.properModel.predict() self.resid_ts = deepcopy(self.properModel.resid) except: print 'You can not fit the model with this parameter p,q, ' \ 'please use the get_proper_model method to get the best model'
def proper_model(timeseries, maxLag): init_bic = 1000000000 for p in np.arange(maxLag): for q in np.arange(maxLag): model = ARMA(timeseries, order=(p, q)) try: results_ARMA = model.fit(disp = 0, method='css') except: continue bic = results_ARMA.bic if bic < init_bic: model_return = results_ARMA init_bic = bic return model_return
def MAmodel(order_value, shareFeature_data): dataSize = shareFeature_data.size #splitting of training and testing data trainSize = int(dataSize * 70 / 100 + 1) testSize = int(dataSize * 30 / 100) train = shareFeature_data[0:trainSize] test = shareFeature_data[trainSize + 1:] #the model fitting and forcasting model_ma = ARMA(shareFeature_data, order=(0, order_value)) res = model_ma.fit() forcast = res.predict(start=trainSize, end=dataSize) return forcast
def get_arma_forecast(ts, forecast_start, forecast_periods, pq_order, pickle_path=None): dates = ts.index start = dates.get_loc(pandas.datetools.parse(forecast_start)) end = start + forecast_periods if pickle is None: arma = ARMA(ts.values, order = pq_order) arma_fitted = arma.fit() else:#pickle = path_to_pickle_file arma_fitted = pickle.load(open(pickle_path, "rb")) forecast_values = arma_fitted.predict(start, end) forecast_index = date_range(forecast_start, periods=19) return Series(forecast_values[1:], index=forecast_index[1:])
def Do_ARMA(WIFIAPTag,TrainTime,PredictTime,p,q,Draw = False): Tag_Time_Series = GetTimeSeries(WIFIAPTag) ARMA_Time = [PredictTime[0]-timedelta(2),PredictTime[0] - timedelta(0,0,0,0,10,0)] #ARMA_Time = [pd.datetime(2016,9,11,6,0,0),pd.datetime(2016,9,14,15,0,0)] Tag_Time_Series = Get_Part_of_TimeSeries(Tag_Time_Series,ARMA_Time) # ARMA model from statsmodels.tsa.arima_model import ARMA arma_mod = ARMA(Tag_Time_Series,(p,q)).fit() Predict = arma_mod.predict(start=str(PredictTime[0]),end=str(PredictTime[1])) if Draw == True: plt.rc('figure', figsize=(12, 8)) plt.plot(arma_mod.fittedvalues,'r') plt.plot(Tag_Time_Series) plt.plot(Predict,'g-') return Predict
def create_model(self, p=None, q=None): if p == None and q == None: self._proper_model() model = ARMA(self.data_ts, order=(self.p, self.q)) else: model = ARMA(self.data_ts, order=(p, q)) try: self.properModel = model.fit(disp=-1, method="css") self.p = p self.q = q self.bic = self.properModel.bic self.predict_ts = self.properModel.predict() self.resid_ts = deepcopy(self.properModel.resid) except Exception as e: print("create_model ERROR!\n", e)
def proper_model(timeseries, maxLag): init_bic = 1000000000 model_return = None for p in np.arange(maxLag): for q in np.arange(maxLag): model = ARMA(timeseries, order=(p, q)) try: results_ARMA = model.fit(disp=0, method='css') except: continue bic = results_ARMA.bic if bic < init_bic: model_return = results_ARMA init_bic = bic return model_return
def sm_arma(file='weather.npy', p=3, q=3, n=30): """ Build an ARMA model with statsmodel and predict future n values. Parameters: file (str): data file p (int): maximum order of autoregressive model q (int): maximum order of moving average model n (int): number of values to predict Return: aic (float): aic of optimal model """ #Initialize the data and parameters for the data z = np.diff(np.load(file)) l = len(z) min_aic = np.inf bestp = 0 bestq = 0 datetime_col = pd.date_range(start='04-13-2019t19:56', periods=(l), freq="1h") data = pd.DataFrame(z, index=datetime_col, columns=["weather"]) #Idendifying the best model groups for i in range(1, p + 1): for j in range(1, q + 1): model = ARMA(z, order=(i, j)) model = model.fit(method='mle', trend='c') pred = model.predict(start=0, end=(l + 30)) aic = model.aic if aic < min_aic: min_aic = aic bestp = i bestq = j model = ARMA(z, order=(bestp, bestq)) model = model.fit(method='mle', trend='c') pred = model.predict(start=0, end=(l + 30)) #Plotting the best fit models and results plt.figure(figsize=(12, 8)) datetime_col = pd.date_range(start='04-13-2019t19:56', periods=(l + n + 1), freq="1h") pred_df = pd.DataFrame(pred, index=datetime_col, columns=["weather"]) data["weather"].plot(label="Data") pred_df["weather"].plot(label="Predicted") plt.title("Stats ARMA(" + str(bestp) + "," + str(bestq) + ")") plt.ylabel("Change in Temperature") plt.xlabel("Dates") plt.legend() plt.show() return min_aic
def predict(): """Compare stats models """ df = pd.read_csv('crime_gr.csv') df = df['N_CRIMES'] train, test = list(df[:-10]), list(df[-10:]) model = AR(train) model_fit = model.fit() # make prediction prediction = model_fit.predict(len(train), len(train) + 9) metric = rmse(test, prediction) print('Autoregression', metric) model = ARMA(train, order=(0, 1)) model_fit = model.fit(disp=False) # make prediction prediction = model_fit.predict(len(train), len(train) + 9) metric = rmse(test, prediction) print('Moving average', metric) model = SimpleExpSmoothing(train) model_fit = model.fit() # make prediction prediction = model_fit.predict(len(train), len(train) + 9) metric = rmse(test, prediction) print('Exp', metric)
def _proper_model(self): for p in np.arange(self.maxLag): for q in np.arange(self.maxLag): model = ARMA(self.data_ts, order=(p, q)) try: results_ARMA = model.fit(disp=-1, method='css') except: continue bic = results_ARMA.bic if bic < self.bic: self.p = p self.q = q self.properModel = results_ARMA self.bic = bic self.resid_ts = deepcopy(self.properModel.resid) self.predict_ts = self.properModel.predict()
def errorHandle(timeSer): best_aic = sys.maxint for i in [3, 5, 7, 8]: current_aic = ARMA(timeSer, order=(i, 0)).fit(disp=0).aic p = i if current_aic < best_aic else p return p
def fit(self, series: TimeSeries): super().fit(series) m = ARIMA(series.values(), order=(self.p, self.d, self.q)) if self.d > 0 else ARMA( series.values(), order=(self.p, self.q)) self.model = m.fit(disp=0)
def update(self): begin = max(0, self.index - self.window) data = self.arrivals[begin:self.index] # fit model model = ARMA(data, order=(0, 1)) model_fit = 0 try: model_fit = model.fit(disp=False) passed = True except: return 0 self.model = model_fit # make prediction self.prediction = model_fit.predict(len(data), len(data))[0]
def _safe_arma_fit(y, order, model_kw, trend, fit_kw, start_params=None): try: return ARMA(y, order=order, **model_kw).fit(disp=0, trend=trend, start_params=start_params, **fit_kw) except LinAlgError: # SVD convergence failure on badly misspecified models return except ValueError as error: if start_params is not None: # don't recurse again # user supplied start_params only get one chance return # try a little harder, should be handled in fit really elif ((hasattr(error, 'message') and 'initial' not in error.message) or 'initial' in str(error)): # py2 and py3 start_params = [.1] * sum(order) if trend == 'c': start_params = [.1] + start_params return _safe_arma_fit(y, order, model_kw, trend, fit_kw, start_params) else: return except: # no idea what happened return
def fit_ar(outputs, inputs, guessed_dim): """Fits an AR model of order p = guessed_dim. Args: outputs: Array with the output values from the LDS. inputs: Array with exogenous inputs values. guessed_dim: Guessed hidden dimension. Returns: - Fitted AR coefficients. """ if outputs.shape[1] > 1: # If there are multiple output dimensions, fit autoregressive params on # each dimension separately and average. params_list = [ fit_ar(outputs[:, j:j+1], inputs, guessed_dim) \ for j in xrange(outputs.shape[1])] return np.mean( np.concatenate([a.reshape(1, -1) for a in params_list]), axis=0) if inputs is None: model = AR(outputs).fit(ic='bic', trend='c', maxlag=guessed_dim, disp=0) arparams = np.zeros(guessed_dim) arparams[:model.k_ar] = model.params[model.k_trend:] return arparams else: model = ARMA(outputs, order=(guessed_dim, 0), exog=inputs) try: arma_model = model.fit(start_ar_lags=guessed_dim, trend='c', disp=0) return arma_model.arparams except (ValueError, np.linalg.LinAlgError) as e: warnings.warn(str(e), sm_exceptions.ConvergenceWarning) return np.zeros(guessed_dim)
def setupClass(cls): endog = y_arma[:, 11] cls.res1 = ARMA(endog).fit(order=(0, 2), trend="c", method="css", disp=-1) cls.res2 = results_arma.Y_arma02c("css")
def setupClass(cls): endog = y_arma[:, 7] cls.res1 = ARMA(endog).fit(order=(1, 4), trend="c", disp=-1) cls.res2 = results_arma.Y_arma14c() if fast_kalman: cls.decimal_t = 0 cls.decimal_cov_params -= 1
def cons_similarity(dat): siz = dat.shape temp = np.sum(dat, axis=1) tagvector = normalize(np.sum(dat, axis=1)) cos_dist = 1 - cosine_similarity(tagvector) aux0 = np.exp(-(cos_dist**2)) # 2时间相似性用AR(1)模型的acf去做 from statsmodels.tsa.arima_model import ARMA ts = np.sum(np.sum(dat, axis=0), axis=1) order = (1, 0) tempModel = ARMA(ts, order).fit() rho = np.abs(tempModel.arparams) aux1 = np.diag(np.ones(siz[1])) for nn in range(1, siz[1]): aux1 = aux1 + np.diag(np.ones(siz[1] - nn), -nn) * rho**nn + np.diag( np.ones(siz[1] - nn), nn) * rho**nn # 3话题之间相关性 aux2 = np.diag(np.ones(siz[2])) Pl = np.sum(temp, axis=1) / np.sum(temp) for i in range(siz[2]): for j in range(siz[2]): aux2[i, j] = np.exp(-np.sum((( (temp[:, i] - temp[:, j]) / np.max(temp, 1))**2) * Pl)) aux = [aux0, aux1, aux2] return (aux)
def ou_estimation(cum_residual): SST = cum_residual.var() # 每支股票累积残差序列的方差 R_square, k = [], [] for i in range(cum_residual.shape[1]): # plt.plot(XX.iloc[:, i]) # plt.show() # _, p, _, _, _, _ = tsa.adfuller(XX.iloc[:, i]) # p_values.append(p) try: arma = ARMA(cum_residual.iloc[:, i], order=(1, 0)).fit(maxiter=100, disp=-1) except: R_square.append(0.1) k.append(0.1) continue # if i == 10: # with open("params.txt", "a") as f: # f.write(",".join(map(str, list(arma.params))) + "\n") a = arma.params[0] b = arma.params[1] SSE = arma.resid.var() R_square.append(1 - SSE / SST[i]) k.append(-np.log(b) * 252) m = a / (1 - b) # if i == 10: # with open("params.txt", "a") as f: # f.write(str(m) + "\n") # print(np.mean(p_values)) R_square = pd.Series(R_square, index=cum_residual.columns, name='R_square') k = pd.Series(k, index=cum_residual.columns, name='k') result = {'R_square': R_square, 'k': k} return result
def get_best_arma_model(df): """ loops through all arma models and returns the best one based on the dataframe passed :param df: :return: """ best_aic = np.inf best_order = None best_mdl = None rng = range(5) # [0,1,2,3,4,5] for i in rng: for j in rng: try: tmp_mdl = ARMA(df, order=(i, j)).fit(method='mle', trend='nc', disp=-1) tmp_aic = tmp_mdl.aic if tmp_aic < best_aic: best_aic = tmp_aic best_order = (i, j) best_mdl = tmp_mdl except: continue print('aic: {:6.5f} | order: {}'.format(best_aic, best_order)) return best_aic, best_order, best_mdl
def ouMLEfit(x, dt=1, verbose=False): ''' Maximum likelihood OU fit, based on 1. MLE estimates of AR(1) parameters of x 2. OU parameters from the AR(1) fit Parameters ---------- x : TYPE input list or array, float DESCRIPTION. OU process is fit to these data: dX = k(x-mu)dt + sigma*dW dt : TYPE, float, optional time/distance between observations DESCRIPTION. The default is 1. Returns ------- k : TYPE float DESCRIPTION reversion rate mu : TYPE float DESCRIPTION mean of the process, x converges to mu. sigma : TYPE float DESCRIPTION uncertainity in the process. ''' from statsmodels.tsa.arima_model import ARMA # import numpy as np # from scipy.stats import norm ## AR(1) fit mod = ARMA(x, order=(1, 0)) result = mod.fit() # fit model if (verbose): print(result.summary()) mu, b = result.params k = np.sqrt(-np.log(b) / dt) a = (1 - np.exp(-k * dt)) * mu se = np.std(result.resid) sigma = se * np.sqrt((1 - 2 * np.log(b)) / ((1 - b**2) * dt)) return (mu, k, sigma)
def modelAR(timeSer, path, diff, date, lags=20, showFig=True): p = p_q_choice(timeSer) print 'check here1' ser = productDiff(path, diff, date) print 'check here2' #L = [] # if p >9: # order = (p,0) # else: # best_aic = sys.maxint # for i in range(1,9): # current_aic = ARMA(ser,order=(i,0)).fit(disp=0).aic # L.append(current_aic) # p = i if current_aic < best_aic else p # order = (p,0) # print L order = (p, 0) print 'check here3' print p model = ARMA(ser, order).fit(disp=0) print 'check here4' resid = model.resid global resid2 resid2 = np.square(resid) thread = Thread(target=close, args=(5, )) thread2 = Thread(target=close, args=(10, )) thread.start() thread2.start() if showFig: fig = plt.figure(figsize=(20, 6)) ax1 = fig.add_subplot(211) fig = plot_pacf(ser, lags=lags, ax=ax1) ax2 = fig.add_subplot(212) fig = plot_pacf(resid2, lags=lags, ax=ax2) plt.figure(figsize=(20, 12)) plt.subplot(211) plt.plot(resid, label='resid') plt.legend() plt.subplot(212) plt.plot(resid2, label='resid ** 2') plt.legend(loc=0) plt.show() _acf, q, p = acf(resid2, nlags=25, qstat=True) out = np.c_[range(1, 26), _acf[1:], q, p] output = DataFrame(out, columns=['lag', 'AC', 'Q', 'P-value']) output = output.set_index('lag') return output
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: ''' fits VAR model. Evaluates different lag orders up to maxlags, eval criterion = AIC ''' # log transformation for standardization, difference, drop NAs self._mins = [year.values.min() if year.values.min() < 0 else 0 for year in self._X_train] self._values = [year.apply(lambda x: x - min + 1) for year, min in zip(self._X_train, self._mins)] self._values = [np.log(year.values) for year in self._values] self._final_logs = [year[-1:,] for year in self._values] self._values = [np.diff(year,axis=0) for year in self._values] # use ARMA model in case some data only has 1 variable ## TODO - calculate these params automatically / hyperparams arma_p = self.hyperparams['arma_p'] arma_q = self.hyperparams['arma_q'] models = [vector_ar(vals, dates = original.index) if vals.shape[1] > 1 \ else ARMA(vals, order = (arma_p, arma_q), dates = original.index) for vals, original in zip(self._values, self._X_train)] self._fits = [] for vals, model in zip(self._values, models): # iteratively try fewer lags if problems with matrix decomposition if vals.shape[1] > 1: lags = self.hyperparams['max_lags'] while lags > 1: try: lags = model.select_order(maxlags = self.hyperparams['max_lags']).aic logging.debug('Successfully performed model order selection. Optimal order = {} lags'.format(lags)) if lags == 0: logging.debug('At least 1 coefficient is needed for prediction. Setting lag order to 1') lags = 1 self._lag_order = lags self._fits.append(model.fit(lags)) else: self._lag_order = lags self._fits.append(model.fit(lags)) break except np.linalg.LinAlgError: lags = lags // 2 logging.debug('Matrix decomposition error because max lag order is too high. Trying max lag order {}'.format(lags)) else: lags = self.hyperparams['max_lags'] while lags > 1: try: self._fits.append(model.fit(lags)) self._lag_order = lags logging.debug('Successfully fit model with lag order {}'.format(lags)) break except ValueError: logging.debug('Value Error - lag order {} is too large for the model. Trying lag order {} instead'.format(lags, lags - 1)) lags -=1 else: self._fits.append(model.fit(lags)) self._lag_order = lags logging.debug('Successfully fit model with lag order {}'.format(lags)) else: self._fits.append(model.fit(disp = -1)) return CallResult(None)
def arma2(week): col_weekly = db['weekly'] weeklyGrossSet = [] for record in col_weekly.find({"Year": "2018"}): wk = record['Week#'] if int(wk) >= week: break og = record['OverallGross($)'].replace(",", "") tm = record['TotalMovies'] weeklyGrossSet.append(int(og) / int(tm)) print(weeklyGrossSet) # fit model model = ARMA(weeklyGrossSet, order=(2, 1)) model_fit = model.fit(disp=False) # make prediction res = model_fit.predict(len(weeklyGrossSet), len(weeklyGrossSet)) print(res)
def setupClass(cls): endog = y_arma[:, 0] cls.res1 = ARMA(endog).fit(order=(1, 1), method="css", trend='nc', disp=-1) cls.res2 = results_arma.Y_arma11("css") cls.decimal_t = DECIMAL_1
def movingAverage(): col_daily = db['daily'] dailyGrossSet = [] for record in col_daily.find({"Date": "Nov. 30"}): year = record['Year'] movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) print(dailyGrossSet) del dailyGrossSet[len(dailyGrossSet) - 1] print(dailyGrossSet) # fit model model = ARMA(dailyGrossSet, order=(0, 1)) model_fit = model.fit(disp=False) # make prediction res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet)) print(res)
def draw_ar(ts, w): arma = ARMA(ts, order=(w,0)).fit(disp=-1) ts_predict = arma.predict() pred_error = (xdata_pred-xdata).dropna() #计算残差 pred_error=pred_error[pred_error>0] lb,p =acorr_ljungbox(pred_error,lags=lagnum) h=(p<0.05).sum() #p值小于0.05,认为是非白噪声 if h>0: print('模型ARIMA(0,1,1)不符合白噪声检验') else: print('模型ARIMA(0,1,1)符合白噪声检验') plt.clf() plt.plot(ts_predict, label="PDT") plt.plot(ts, label = "ORG") plt.legend(loc="best") plt.title("AR Test %s" % w)
def fit_model(self, data, order=None, name="ARMA"): if name == "ARMA": if order is None: order = self._proper_model(data=data) model = ARMA(data.dropna(), order=order) if name == "ARIMA": if order is None: order = self._proper_model(data=data.dropna()) order = order[0], order[1], len(self.O_datas) model = ARIMA(data.dropna(), order=order) self.order = order print(order) result_arma = model.fit(disp=-1, method='css') return result_arma
def plot_sequence(self, y): sns.set() x = np.linspace(0, seq_len, y.shape[0]) plt.plot(x, y) plt.show() g, _, _ = ARDataset(phis=[0.9], length=seq_len, c=c, noise_dim=d_model).data_loaders(16) # y = next(g)[:, :, 0].view(-1) y = next(g)[0, :, 0].view(-1) y = y.detach().cpu().numpy() plt.plot(x, y) plt.show() ar = ARMA(y, order=(1, 0)) model_fit = ar.fit(trend='nc')
def _proper_model(self): for p in np.arange(self.maxLag): for q in np.arange(self.maxLag): print("p, q, bic:", p, q, self.bic) model = ARMA(self.data_ts, order=(p, q)) try: result_ARMA = model.fit(disp=-1, method="css") except Exception as e: print("_proper_model Error!\n", e) continue bic = result_ARMA.bic if bic < self.bic: self.p = p self.q = q self.properModel = model self.bic = bic
def autoregressiveMovingAverage2(day): col_daily = db['daily'] dailyGrossSet = [] for record in col_daily.find({"Year": 2018}): year = record['Year'] movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) print(dailyGrossSet[day]) dailyGrossSet = dailyGrossSet[0:day] print(dailyGrossSet) # fit model model = ARMA(dailyGrossSet, order=(2, 1)) model_fit = model.fit(disp=False) # make prediction res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet)) print(res)
def mc_ar1_spectrum(self, data=None, N=1000, filter_type=None, filter_cutoff=None, spectrum='mtm'): """ calculates the Monte-Carlo spectrum with 1, 2.5, 5, 95, 97.5, 99 percentiles input: x .. time series spectrum .. spectral density estimation function N .. number of MC simulations filter_type .. filter_cutoff .. spectrum .. 'mtm': multi-taper method, 'per': periodogram, 'Welch' output: mc_spectrum .. """ if data is None: data = np.array(self.ts) assert type(data) == np.ndarray AM = ARMA(endog=data, order=(1, 0)).fit() phi, std = AM.arparams[0], np.sqrt(AM.sigma2) mc = self.mc_ar1_ARMA(phi=phi, std=std, n=len(data), N=N) if filter_type is not None: assert filter_type in ['lowpass', 'chebychev'] assert type(filter_cutoff) == int assert filter_cutoff > 1 n = int( filter_cutoff / 2 ) + 1 # datapoints to remove from either end due to filter edge effects mc = mc[:, n:-n] if filter_type == 'lowpass': mc = lowpass(mc.T, filter_cutoff).T elif filter_type == 'chebychev': mc = chebychev(mc.T, filter_cutoff).T if spectrum == 'mtm': freq, _ = self.mtspectrum() elif spectrum == 'per': freq, _ = self.periodogram() elif spectrum == 'Welch': freq, _ = self.Welch() mc_spectra = np.zeros((N, len(freq))) # mc_spectra = np.zeros((N, int(len(mc[0,:])/2)+1))#int(self.len/2)+1)) for i in range(N): if spectrum == 'mtm': freq, mc_spectra[i, :] = self.mtspectrum(data=mc[i, :]) elif spectrum == 'per': freq, mc_spectra[i, :] = self.periodogram(data=mc[i, :]) elif spectrum == 'Welch': freq, mc_spectra[i, :] = self.Welch(data=mc[i, :]) mc_spectrum = {} mc_spectrum['median'] = np.median(mc_spectra, axis=0) mc_spectrum['freq'] = freq for p in [1, 2.5, 5, 95, 97.5, 99]: mc_spectrum[str(p)] = np.percentile(mc_spectra, p, axis=0) return mc_spectrum
def _proper_model(self): for p in np.arange(self.maxLag): for q in np.arange(self.maxLag): # print p,q,self.bic model = ARMA(self.data_ts, order=(p, q)) try: results_ARMA = model.fit(disp=-1, method='css') except: continue bic = results_ARMA.bic # print 'bic:',bic,'self.bic:',self.bic if bic < self.bic: self.p = p self.q = q self.properModel = results_ARMA self.bic = bic self.resid_ts = deepcopy(self.properModel.resid) self.predict_ts = self.properModel.predict()
def main(): df = pd.read_csv(FILE_NAME, sep=',', skipinitialspace=True, encoding='utf-8') df = df.drop('AverageTemperatureUncertainty', axis=1) df = df[df.Country == 'Canada'] df = df.drop('Country', axis=1) df.index = pd.to_datetime(df.dt) df = df.drop('dt', axis=1) df = df.ix['1900-01-01':] df = df.sort_index() # Display AT df.AverageTemperature.fillna(method='pad', inplace=True) mp.plot(df.AverageTemperature) mp.show() # Rolling Mean df.AverageTemperature.plot.line(style='b', legend=True, grid=True, label='Avg. Temperature (AT)') ax = df.AverageTemperature.rolling(window=12).mean().plot.line(style='r', legend=True, label='Mean AT') ax.set_xlabel('Date') mp.legend(loc='best') mp.title('Weather timeseries visualization') mp.show() test_stationarity(df.AverageTemperature) res = arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc', max_ar=4, max_ma=4, fit_kw={'method': 'css-mle'}) print res # Fit the model ts = pd.Series(df.AverageTemperature, index=df.index) model = ARMA(ts, order=(3, 3)) results = model.fit(trend='nc', method='css-mle') print(results.summary2()) # Plot the model fig, ax = mp.subplots(figsize=(10, 8)) fig = results.plot_predict('01/01/2003', '12/01/2023', ax=ax) ax.legend(loc='lower left') mp.title('Weather Time Series prediction') mp.show() predictions = results.predict('01/01/2003', '12/01/2023')
def test_glsar_arima(self): from statsmodels.tsa.arima_model import ARMA endog = self.res.model.endog exog = self.res.model.exog mod1 = GLSAR(endog, exog, 3) res = mod1.iterative_fit(10) mod_arma = ARMA(endog, order=(3,0), exog=exog[:, :-1]) res_arma = mod_arma.fit(method='css', iprint=0, disp=0) assert_allclose(res.params, res_arma.params[[1,2,0]], atol=0.01, rtol=1e-3) assert_allclose(res.model.rho, res_arma.params[3:], atol=0.05, rtol=1e-3) assert_allclose(res.bse, res_arma.bse[[1,2,0]], atol=0.015, rtol=1e-3) assert_equal(len(res.history['params']), 5) # this should be identical, history has last fit assert_equal(res.history['params'][-1], res.params) res2 = mod1.iterative_fit(4, rtol=0) assert_equal(len(res2.history['params']), 4) assert_equal(len(res2.history['rho']), 4)
def predict_arma_next_days(self, item): ts = df_train[item] ts = ts.sort_index() # sorting index Date ts_last_day = ts[self.fc] # real last data ts = ts[0:self.fc] # index 0 until last data - 1 model = ARMA(ts, order=(self.p, self.q), freq='D') # build a model fitting = model.fit(disp=False) params = fitting.params residuals = fitting.resid p = fitting.k_ar q = fitting.k_ma k_exog = fitting.k_exog k_trend = fitting.k_trend # n_days forecasting forecast = _arma_predict_out_of_sample(params, self.n_days, residuals, p, q, k_trend, k_exog, endog=ts, exog=None, start=len(ts)) # ts: history until 1 day before self.fc # ts[self.fc]: last day # forecast: 1 day forecast (time equalto ts[self.fc]) return ts, ts_last_day, forecast
print 'p-value: ', d_order0[1] print'Critical values: ', d_order0[4] if d_order0[0] > d_order0[4]['5%']: print 'Time Series is nonstationary' else: print 'Time Series is stationary' # # selecting parameter order = sm.tsa.arma_order_select_ic(ts_diff_1, max_ar=6, max_ma=3, ic=['aic']) # print order try: # ARMA model model = ARMA(ts_diff_1,(order['aic_min_order'][0],order['aic_min_order'][1])) predict_diff_1 = model.fit(disp=False).forecast(14)[0] # restore predict = np.cumsum(predict_diff_1) predict = predict + np.mean(ts[-7:]) # use continuity is better stander = sklearn.preprocessing.StandardScaler() predict = stander.fit_transform(predict) predict = stander.fit(ts[-7:]).inverse_transform(predict) predict = np.round(predict) print predict predict_result = np.vstack((predict_result,predict))
print 'Results of Dickey-Fuller Test:' dftest = adfuller(timeseries, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value print dfoutput ts_log = np.log(ts) # ts_log_diff = ts_log - ts_log.shift() # ts_log_diff.dropna(inplace=True) # test_stationarity(ts_log_diff) from statsmodels.tsa.arima_model import ARIMA, ARMAResults, ARMA arma_mod30 = ARMA(ts_log , (2,1)).fit() # # print ts_log # # print arma_mod30 predict_sunspots = arma_mod30.predict('26-09-2014 00:00', '26-04-2015 23:00', dynamic=True) p_exp = np.exp(predict_sunspots) print len(p_exp) # from math import round f = open("output.csv","w") f.write("Datetime,Count\n") for i in range(1,len(p_exp)): f.write(datetime_col[i-1] + "," + str(int(round(p_exp[i])))) f.write("\n") f.write("26-04-2015 23:00,"+str(int(round(p_exp[5111])))) f.write("\n")
, of one of the simulated series that you generated in the earlier exercise. Since the parameters are known for a simulated series, it is a good way to understand the estimation routines before applying it to real data. For simulated_data_1 with a true ϕ ϕ of 0.9, you will print out the estimate of ϕ ϕ . In addition, you will also print out the entire output that is produced when you fit a time series, so you can get an idea of what other tests and summary statistics are available in statsmodels. INSTRUCTIONS 100XP Import the class ARMA in the module statsmodels.tsa.arima_model. Create an instance of the ARMA class called mod using the simulated data simulated_data_1 and the order (p,q) of the model (in this case, for an AR(1)), is order=(1,0). Fit the model mod using the method .fit() and save it in a results object called res. Print out the entire summmary of results using the .summary() method. Just print out an estimate of the constant and ϕ ϕ using the .params attribute (no parentheses). ''' # Import the ARMA module from statsmodels from statsmodels.tsa.arima_model import ARMA # Fit an AR(1) model to the first simulated data mod = ARMA(simulated_data_1, order=(1,0)) res = mod.fit() # Print out summary information on the fit print(res.summary()) # Print out the estimate for the constant and for phi print("When the true phi=0.9, the estimate of phi (and the constant) are:") print(res.params)
print res[0] proc = ArmaProcess.from_coeffs(res[0][: order[0]], res[0][: order[1]]) print ar, ma proc.nobs = nobs # TODO: bug nobs is None, not needed ?, used in ArmaProcess.__repr__ print proc.ar, proc.ma print proc.ar_roots(), proc.ma_roots() from statsmodels.tsa.arma_mle import Arma modn = Arma(x) resn = modn.fit_mle(order=order) moda = ARMA(x, order=order) resa = moda.fit(trend="nc") print "\nparameter estimates" print "ls ", res[0] print "norm", resn.params print "t ", res2.params print "A ", resa.params print "\nstandard deviation of parameter estimates" # print 'ls ', res[0] #TODO: not available yet print "norm", resn.bse print "t ", res2.bse print "A ", resa.bse print "A/t-1", resa.bse / res2.bse[:3] - 1
plt.title('Partial Autocorrelation Function (p=1)') plt.tight_layout() ''' In this plot, the two dotted lines on either sides of 0 are the confidence interevals. These can be used to determine the p and q values as: - p: The lag value where the PACF chart crosses the upper confidence interval for the first time, in this case p=1. - q: The lag value where the ACF chart crosses the upper confidence interval for the first time, in this case q=1. ''' ''' ### Fit ARMA model with statsmodels 1. Define the model by calling `ARMA()` and passing in the p and q parameters. 2. The model is prepared on the training data by calling the `fit()` function. 3. Predictions can be made by calling the `predict()` function and specifying the index of the time or times to be predicted. ''' from statsmodels.tsa.arima_model import ARMA model = ARMA(x, order=(1,1)).fit() # fit model print(model.summary()) plt.plot(x) plt.plot(model.predict(), color='red') plt.title('RSS: %.4f'% sum((model.fittedvalues-x)**2))
plt.figure() plt.plot(modc.error_estimate) #plt.show() from statsmodels.miscmodels.tmodel import TArma modct = TArma(x) reslst = modc.fit(order=(1,1)) print(reslst[0]) rescmt = modct.fit_mle(order=(1,1), start_params=[-0.4,0.4, 10, 1.],maxiter=500, maxfun=500) print(rescmt.params) from statsmodels.tsa.arima_model import ARMA mkf = ARMA(x) ##rkf = mkf.fit((1,1)) ##rkf.params rkf = mkf.fit((1,1), trend='nc') print(rkf.params) from statsmodels.tsa.arima_process import arma_generate_sample np.random.seed(12345) y_arma22 = arma_generate_sample([1.,-.85,.35, -0.1],[1,.25,-.7], nsample=1000) ##arma22 = ARMA(y_arma22) ##res22 = arma22.fit(trend = 'nc', order=(2,2)) ##print 'kf ',res22.params ##res22css = arma22.fit(method='css',trend = 'nc', order=(2,2)) ##print 'css', res22css.params mod22 = Arma(y_arma22) resls22 = mod22.fit(order=(2,2))
print(res[0]) proc = ArmaProcess.from_coeffs(res[0][:order[0]], res[0][:order[1]]) print(ar, ma) proc.nobs = nobs # TODO: bug nobs is None, not needed ?, used in ArmaProcess.__repr__ print(proc.ar, proc.ma) print(proc.ar_roots(), proc.ma_roots()) from statsmodels.tsa.arma_mle import Arma modn = Arma(x) resn = modn.fit_mle(order=order) moda = ARMA(x, order=order) resa = moda.fit( trend='nc') print('\nparameter estimates') print('ls ', res[0]) print('norm', resn.params) print('t ', res2.params) print('A ', resa.params) print('\nstandard deviation of parameter estimates') #print 'ls ', res[0] #TODO: not available yet print('norm', resn.bse) print('t ', res2.bse) print('A ', resa.bse) print('A/t-1', resa.bse / res2.bse[:3] - 1)
# 自相关和偏相关图,默认阶数为12阶 def draw_acf_pacf(ts, lags=1): f = plt.figure(facecolor='white') ax1 = f.add_subplot(211) plot_acf(ts, lags=31, ax=ax1) ax2 = f.add_subplot(212) plot_pacf(ts, lags=31, ax=ax2) plt.show() ts_log = np.log(ts) diff_12 = ts_log.diff(1) diff_12.dropna(inplace=True) model = ARMA(diff_12, order=(1, 1)) result_arma = model.fit( disp=-1, method='css') predict_ts = result_arma.predict() # 一阶差分还原 diff_shift_ts = diff_12.shift(1) diff_recover_1 = predict_ts.add(diff_shift_ts) rol_sum = ts_log.rolling(window=11).sum() rol_recover = diff_recover_1*12 - rol_sum.shift(1) # 对数还原 log_recover = np.exp(rol_recover) log_recover.dropna(inplace=True)
def queryandinsert(): """ This is the main function which will be call by main... it integrate several other functions. Please do not call this function in other pack, otherwise it will cause unexpected result!!!!""" global gtbuDict # gtbuDict, being used to store query data from gtbu database..... global omsDict # being used to store query data from OMS database..... global presisDict global counter global testingDict starttime = datetime.datetime.now() print len(presisDict) print "connect to databae!" # connect to the database use my own toolkits querydbinfoOMS = getdbinfo('OMS') querydbnameOMS = "wifi_data" querydbinfoGTBU = getdbinfo("GTBU") querydbnameGTBU = "ucloudplatform" insertdbinfo = getdbinfo('REMOTE') insertdbname = 'login_history' # print the database information for verification for key, value in querydbinfoOMS.iteritems(): print key + " : " + str(value) queryStatementRemote = """ SELECT epochTime,visitcountry,onlinenum FROM t_fordemo WHERE butype =2 AND visitcountry IN ('JP','DE','TR') AND epochTime BETWEEN DATE_SUB(NOW(),INTERVAL 2 DAY) AND NOW() ORDER BY epochTime ASC """ # get the online data which will be used to calculate the daily uer number ( Daily user number is bigger than the max number... # and the max number is actually what being used in this scenario queryStatementTraining = """ SELECT t1,t2,DATEDIFF(t2,t1) AS dif,imei,visitcountry FROM ( SELECT DATE(logindatetime) AS t1,DATE(logoutdatetime) AS t2, imei,visitcountry FROM t_usmguserloginlog WHERE visitcountry IN ('JP','DE','TR') ) AS z GROUP BY t1,t2,imei """ # (output data) get the max online number for each of these countries every day ( this record is incomplete due to the constant network partition # therefore a lot of corresponding operation is necessary for aligning the input and output date by day!... queryStatementOnline =""" SELECT epochTime,visitcountry,MAX(onlinenum) FROM ( SELECT DATE(epochTime) AS epochTime,visitcountry,onlinenum FROM t_fordemo WHERE butype =2 and visitcountry IN ('JP','DE','TR') ) AS z GROUP BY epochTime,visitcountry """ # (input data) get the order number information which will be used to calculate the daily maximum number for each country... # this number could be ridiculously large with respect to the real number for some specific countries. querystatementOMS = """ SELECT DATE(date_goabroad),DATE(date_repatriate),DATEDIFF(date_repatriate,date_goabroad),imei,package_id FROM tbl_order_basic WHERE imei IS NOT NULL AND (DATE(date_repatriate)) > '2016-01-01' AND DATE(date_goabroad) < DATE(NOW()) ORDER BY date_repatriate ASC """ querystatementOMSCount = """ SELECT date_goabroad,date_repatriate,DATEDIFF(date_repatriate,date_goabroad),t1.package_id,t3.iso2 FROM tbl_order_basic AS t1 LEFT JOIN tbl_package_countries AS t2 ON t1.package_id = t2.package_id LEFT JOIN tbl_country AS t3 ON t2.country_id = t3.pk_global_id WHERE t1.data_status = 0 AND DATE(date_goabroad) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(),INTERVAL 3 MONTH) OR ( DATE(date_repatriate) >= DATE(NOW()) ) """ # establish connection to the mysql databases................ querydbGTBU = MySQLdb.connect(user = querydbinfoGTBU['usr'], passwd = querydbinfoGTBU['pwd'], host = querydbinfoGTBU['host'], port = querydbinfoGTBU['port'], db = querydbnameGTBU) querydbOMS = MySQLdb.connect(user = querydbinfoOMS['usr'], passwd = querydbinfoOMS['pwd'], host = querydbinfoOMS['host'], port = querydbinfoOMS['port'], db = querydbnameOMS) insertdb = MySQLdb.connect(user = insertdbinfo['usr'], passwd = insertdbinfo['pwd'], host = insertdbinfo['host'], port = insertdbinfo['port'], db = insertdbname) queryCurGTBU = querydbGTBU.cursor() queryCurOMS = querydbOMS.cursor() insertCur = insertdb.cursor() print "executing query!!! By using generator!!!" insertCur.execute(queryStatementRemote) remoteGenerator = fetchsome(insertCur,100) #fetchsome is a generator which will fetch a certain number of query each time. for row in remoteGenerator: accumulatOnlineNumber(row,testingDict) onlineList = getTestingList(testingDict) countryList = onlineList[1] jpIndex = countryList.index('JP') datalist = onlineList[2][jpIndex] timelist = onlineList[0] tsJP = Series(datalist,index = timelist) df = DataFrame() df['JP'] = tsJP print df.index print df.columns print df tsJP_log = np.log(tsJP) lag_acf = acf(tsJP_log,nlags=200) lag_pacf = pacf(tsJP_log,nlags=200,method='ols') # model = ARIMA(tsJP_log,order=(2,1,2)) model = ARMA(tsJP_log,(5,2)) res = model.fit(disp=-1) print "Here is the fit result" print res params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 300 newP = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=tsJP_log, exog=None, start=len(tsJP_log)) newF,stdF,confiF = res.forecast(steps) print newP newP = np.exp(newP) print newP print " Forecast below!!" print newF newF = np.exp(newF) print newF print stdF stdF = np.exp(stdF) print stdF x_axis = range(len(lag_acf)) y_axis = lag_acf onlineEWMA=go.Scatter( x = x_axis, y = y_axis, mode = 'lines+markers', name = "lag_acf" ) onlinePre=go.Scatter( x = x_axis, y = newP, mode = 'lines+markers', name = "predictJP" ) layout = dict(title = 'predicewma', xaxis = dict(title = 'Date'), yaxis = dict(title = 'online Number'), ) data = [onlineEWMA,onlinePre] fig = dict(data=data, layout=layout) plot(fig,filename ="/ukl/apache-tomcat-7.0.67/webapps/demoplotly/EWMAprediction.html",auto_open=False)
def test_reset_trend(): endog = y_arma[:,0] mod = ARMA(endog) res1 = mod.fit(order=(1,1), trend="c", disp=-1) res2 = mod.fit(order=(1,1), trend="nc", disp=-1) assert_equal(len(res1.params), len(res2.params)+1)