def arima_predict(train_dat, n_predictions, p=2, d=0, q=0): arima = ARIMA(np.array(train_dat).astype(np.float), [p, d, q]) diffed_logged_results = arima.fit(trend='c', disp=False) preds = diffed_logged_results.predict(len(train_dat), len(train_dat) + n_predictions - 1, exog=None, dynamic=False) return preds
def ARIMA_forcast2(self): # this approach forecast 1 data pt at a time, then add the new forecast datapoint to the training data # then repeat import warnings warnings.filterwarnings('ignore') # test without taking log of data # using rolling avg y = vr_df2_ts.values train = vr_df2_ts.values[286:574] prediction = list() for t in range(288): modelY = ARIMA(y, order=(1,1,1)) results = modelY.fit(disp=-1) out = results.forecast() yhat = out[0] prediction.append(yhat) y = np.append(y,train[t]) forecast = pd.Series(prediction,index=pd.date_range(start='2017-02-09 00:00:00', periods=288,freq='5min')) exog = vr_df2_ts.iloc[286:574] exog.set_index(pd.date_range(start='2017-02-09 00:00:00', periods=288,freq='5min'),inplace=True) plt.plot(vr_df2_ts) plt.plot(exog,'g') plt.plot(forecast,'r')
def forecast_by_cluster(self, hold_out_n, n_ahead, order, exog): dfit = self.ds_agg_by_c efit = efor = None if hold_out_n > 0: # hold out validation required dfit = dfit[:-hold_out_n] if (exog is not None): efit = exog[:-hold_out_n] efor = exog[-hold_out_n:] else: if (exog is not None): efit = exog[:-n_ahead] efor = exog[-n_ahead:] ds_c_for = np.zeros((n_ahead, self.n_clusters)) for c in tqdm(range(self.n_clusters)): cdfit = dfit[:,c] if sum(cdfit) == 0: ds_c_for[:,c] = 0 continue m = ARIMA(cdfit, exog = efit, order = order) mf = m.fit() f = mf.forecast(n_ahead, exog = efor, alpha = .95)[0] ds_c_for[:,c] = f self.ds_c_for = ds_c_for
def arimamodel(ts): ts_log, ts_log_diff = trend(ts) model = ARIMA(ts_log, order = (2,1,2)) result_ARIMA = model.fit(disp = -1) m = ARIMA(ts, order = (2,1,2)).fit() arimares = ARMAResults(m, params = '') pre = arimares.forcast(steps = 60) # pre = m.predict('20150901', '20151230', dynamic = True) print pre # prediction back to the original scale predictions_ARIMA = backorg(result_ARIMA, ts_log) plt.plot(predictions_ARIMA) # print (predictions_ARIMA - ts)[40:80] plt.plot(ts, color = 'red') # plt.plot(ts_log_diff) # plt.plot(result_ARIMA.fittedvalues, color = 'red') plt.title('RSS: %.4F' % np.sum((result_ARIMA.fittedvalues - ts_log_diff)**2)) plt.show()
def get_grouped_data(self, forecast=False): cdf = self.cumulative_sum() gdf = self.group_by('M') if cdf.shape[0] > gdf.shape[0]: df = cdf.to_frame() df.columns = ['cumulative sum'] df['total added'] = gdf.to_frame()['event'] else: df = gdf.to_frame() df.columns = ['total added'] df['cumulative sum'] = cdf.to_frame()['event'] if forecast: mtotals = pd.to_numeric(df['cumulative sum'], downcast='float') model = ARIMA(mtotals, order=(10,1,0)) model_fit = model.fit(disp=0) forecast = model_fit.forecast(steps=12) dates = pd.date_range('2017-04-30', '2018-06-01', freq='M') records = zip([x.to_datetime() for x in dates], forecast[0]) ndf = pd.DataFrame.from_records(records) ndf.columns = ['date', 'forecast'] ndf.set_index(['date'], inplace=True) df = pd.concat([df, ndf], axis=1) return df
def mamodel(ts): ts_log, ts_log_diff = trend(ts) model = ARIMA(ts_log, order = (0,1,1)) result_MA = model.fit(disp = -1) plt.plot(ts_log_diff) plt.plot(result_MA.fittedvalues, color = 'red') plt.title('RSS: %.4F' % np.sum((result_MA.fittedvalues - ts_log_diff)**2)) plt.show(block = False)
def armodel(ts): ts_log, ts_log_diff = trend(ts) model = ARIMA(ts_log, order = (1,1,0)) result_AR = model.fit(disp = -1) plt.plot(ts_log_diff) plt.plot(result_AR.fittedvalues, color = 'red') # pdb.set_trace() plt.title('RSS: %.4F' % np.sum((result_AR.fittedvalues - ts_log_diff)**2)) plt.show(block = False)
def ARIMA_fit(self): # order=(p,d,q) AR and MA can also be modeled separately by enter 0 for either p or q model = ARIMA(ts_log, order=(5,1,5)) self.results_ARIMA = model.fit(disp=-1) print(results_ARIMA.summary()) plt.plot(ts_log_diff) plt.plot(results_ARIMA.fittedvalues, color='r') plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff['in_tpkts'])**2))
def fit(self): if len(self.df) < self.t_window: return None model = ARIMA(self.df, order=(2, 1, 1)) results_ARIMA = model.fit(disp=-1) forecast = results_ARIMA.predict(start = self.t_window, end= self.t_window+2, dynamic= True) forecast = forecast.cumsum() predictions_ARIMA_log = pd.Series(self.df.ix[self.t_window-1], index=forecast.index) predictions_ARIMA_log = predictions_ARIMA_log.add(forecast,fill_value=0) predictions_ARIMA = np.exp(predictions_ARIMA_log) #print self.df return predictions_ARIMA
def ARIMA_fun( data ): lag_pacf = pacf( data, nlags=20, method='ols' ) lag_acf, ci2, Q = acf( data, nlags=20 , qstat=True, unbiased=True) model = ARIMA(orig_data, order=(1, 1, int(ci2[0]) ) ) results_ARIMA = model.fit(disp=-1) plt.subplot(121) plt.plot( data ) plt.plot(results_ARIMA.fittedvalues) #plt.show() return results_ARIMA.fittedvalues
def objfunc(order, *params): series = params try: mod = ARIMA(series, order, exog=None) with warnings.catch_warnings(): warnings.simplefilter("ignore") res = mod.fit(disp=0, solver='bfgs', maxiter=5000) except: return float('inf') if math.isnan(res.aic): return float('inf') return res.aic
def pridictNextNdays(self,train): timeSerize = train[self.selected] timeSerize = timeSerize[self.start_train:self.end_train] model = ARIMA(timeSerize, order=(self.p,self.d,self.q), freq='D') # build a model fitting = model.fit(disp=False) forecast, fcasterr, conf_int = fitting.forecast(steps=self.next_ndays, alpha=.05) # params = fitting.params # residuals = fitting.resid # p = fitting.k_ar # q = fitting.k_ma # k_exog = fitting.k_exog # k_trend = fitting.k_trend # forecast = _arma_predict_out_of_sample(params,self.next_ndays,residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize)) return forecast
def testArima(self,train): realSerize = train[self.selected] timeSerize = realSerize[self.start_train:self.end_train] realData = train[self.selected][self.end_train:self.next_ndays] model = ARIMA(timeSerize, order=(self.p,self.d, self.q)) # build a model fitting = model.fit(disp=False) forecast, fcasterr, conf_int = fitting.forecast(steps=self.next_ndays, alpha=.05) # params = fitting.params # residuals = fitting.resid # p = fitting.k_ar # q = fitting.k_ma # k_exog = fitting.k_exog # k_trend = fitting.k_trend # forecast = _arma_predict_out_of_sample(params,self.next_ndays,residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize)) return {'real':list(realSerize)[self.end_train:self.end_train+self.next_ndays],'pridiction':forecast}
def predict_arima_next_days(self, item): ts = df_train[item] ts = ts.sort_index() # sorting index Date ts_last_day = ts[self.fc] # real last data ts = ts[0:self.fc] # index 0 until last data - 1 model = ARIMA(ts, order=(self.p, self.d, self.q)) # build a model fitting = model.fit(disp=False) # n_days forecasting forecast, fcasterr, conf_int = fitting.forecast(steps=self.n_days, alpha=.05) # ts: history until 1 day before self.fc # ts[self.fc]: last day # forecast: 1 day forecast (time equalto ts[self.fc]) return ts, ts_last_day, forecast
def arima(ts, forecast_window): logger.info(ts) start = int(ts.count() - 1) end = int(start + forecast_window) ts_log = np.log(ts) model = ARIMA(ts_log, order=(0, 1, 2)) results = model.fit(disp=-1) prediction = results.predict(start=start, end=end, dynamic=True) future = pd.Series(prediction, copy=True) cumsum = future.cumsum() prediction_future = future.add(ts_log.ix[-1]) prediction_future = prediction_future.add(cumsum) ts_future = np.exp(prediction_future) return ts_future
class ARIMAModelResult: def __init__(self, autoregressive_periods, integrated_order, moving_average_model_periods, training_data, test): self.autoregressive_periods = autoregressive_periods self.integrated_order = integrated_order self.moving_average_model_periods = moving_average_model_periods self.model = ARIMA(training_data, order=( self.autoregressive_periods, self.integrated_order, self.moving_average_model_periods ) ) self.fit = self.model.fit() self.aic = self.fit.aic self.predictions = self.fit.forecast(steps=len(test))[0] self.model_fitness = mean_squared_error(test, self.predictions) def __eq__(self, other): return self.model_fitness == other.model_fitness def __lt__(self, other): return self.model_fitness < other.model_fitness def __gt__(self, other): return self.model_fitness > other.model_fitness def __str__(self): return "Autoregressive periods: {}\nIntegraded Order: {}\nMoving Average Model Periods: {}\n Predictions: {}\nMSE: {}".format( self.autoregressive_periods, self.integrated_order, self.moving_average_model_periods, self.predictions, self.model_fitness )
def predictFutureProfit(df, forward): results = {} for asset in get_assets(df): ts = df[asset] ts_log = np.log(ts) model = ARIMA(ts_log, order=(1, 1, 0)) results_ARIMA = model.fit(disp=-1) predictions_diff = results_ARIMA.predict(2, len(ts.index)-1, dynamic=True) predictions_diff_cumsum = predictions_diff.cumsum() predictions_log = pd.Series(ts_log.ix[0], index=ts_log.index) predictions_log = predictions_log.add(predictions_diff_cumsum,fill_value=0) predictions = np.exp(predictions_log) results[asset] = predictions[-1] return results
def arima(self): kl = self.get_kline() cp = self.get_close_price(kl) date = self.get_date(kl) #t = datetime.fromtimestamp(date[-1].timestamp()+24*60*60) t = date[-1] + timedelta(days=int(self.day_history/5)) #days seconds ... print("predict date:", date[-1],"--->", t) dta = pd.Series(cp, index=date) print(dta) model=ARIMA(dta,order=(4,1,3)) #P D Q result=model.fit() pred=result.predict( date[-10], t,dynamic=True,typ='levels') plt.figure(figsize=(12,8)) plt.plot(dta, 'ro-') plt.xticks(rotation=45) plt.plot(pred, 'go-') plt.show()
def fitArima(ts): import statsmodels.api as sm logged_ts = np.log(ts) diffed_logged_ts = (logged_ts - logged_ts.shift(7))[7:] p = 0 d = 1 q = 1 arima = ARIMA(diffed_logged_ts, [p, d, q], exog=None, freq='D', missing='none') diffed_logged_results = arima.fit(trend='c', disp=False) predicted_diffed_logged = diffed_logged_results.predict(exog=None, dynamic=False) #a=pd.date_range(diffed_logged_ts.index[1], periods=90, freq='D') predicted_diffed_logged_ts = pd.Series(predicted_diffed_logged, index=diffed_logged_ts.index[d:]) predicted_diffed_logged_ts = np.exp(logged_ts.shift(7) + diffed_logged_ts.shift(d) + predicted_diffed_logged_ts) concatenated = pd.concat([ts, predicted_diffed_logged_ts], axis=1, keys=['original', 'predicted']) #a= concatenated #a.plot() #plt.show() return concatenated
def programmer_5(): discfile = "data/discdata_processed.xls" # 残差延迟个数 lagnum = 12 data = pd.read_excel(discfile, index_col="COLLECTTIME") data = data.iloc[:len(data) - 5] xdata = data["CWXT_DB:184:D:\\"] # 训练模型并预测,计算残差 arima = ARIMA(xdata, (0, 1, 1)).fit() xdata_pred = arima.predict(typ="levels") pred_error = (xdata_pred - xdata).dropna() lb, p = acorr_ljungbox(pred_error, lags=lagnum) h = (p < 0.05).sum() if h > 0: print(u"模型ARIMA(0,1,1)不符合白噪声检验") else: print(u"模型ARIMA(0,1,1)符合白噪声检验") print(lb)
def run_arima(self):#use current build ''' DEPRECATED: Primarily used for testing/debugging. Runs statsmodels ARIMA. ''' self.xts = self.X_train.set_index('date') self.yts = self.y_train.set_index('date') self.yts.astype('float', inplace=True) self.arimod = ARIMA(endog = self.yts, order = (2,1,2))#, exog=self.xts) self.aresults = self.arimod.fit()
def arima_model(accounts): """Fit ARIMA models for each account""" # Model each account account_models = {} for account_type, account in accounts: account_data = accounts[(account_type, account)] account_data.name = account # ARIMA model order is unknown, so find the highest order that can be fit order = 0 modeled = False while not modeled and order < len(ARIMA_ORDERS): try: model = ARIMA(account_data, order=ARIMA_ORDERS[order]) results = model.fit() modeled = True account_models[(account_type, account)] = results except (ValueError, np.linalg.LinAlgError): order += 1 return account_models
def ARIMA_forcast3(self): # load dataset series = pd.Series(vr_df['ACTIVE_FLOWS'][0:7000]) # seasonal difference X = series.values cycle = 288 #2016 differenced = difference(X, cycle) # fit model model = ARIMA(differenced, order=(1,1,1)) model_fit = model.fit(disp=0) # multi-step out-of-sample forecast forecast = model_fit.forecast(steps=2016)[0] # invert the differenced forecast to something usable history = [x for x in X] step = 1 forecast_values = [] for yhat in forecast: inverted = inverse_difference(history, yhat, cycle) #print('Day %d: %f' % (day, inverted)) forecast_values.append(inverted) history.append(vr_df['ACTIVE_FLOWS'][7000+step-1]) step += 1
def ARIMA_forecast4(self): # parameters num_train_init = 7318 num_forecast = 12 #one day = 288 data points cycle = 288 #for a total 288 samples per day startdate = vr_df.index[num_train] field = 'DELETED_FLOWS' # array of predicted values forecast_values = [] for i in range(0,int(len(vr_df)/num_forecast)): # check array for out of bound num_train_current = i*num_forecast+num_train_init if ((num_train_current) > len(vr_df)): break # load dataset series = pd.Series(vr_df[field][0:num_train_current]) # Make data stationary: seasonal difference X = series.values differenced = difference(X, cycle) # fit model model = ARIMA(differenced, order=(1,1,1)) model_fit = model.fit(disp=0) # multi-step out-of-sample forecast forecast = model_fit.forecast(steps=num_forecast)[0] # invert the differenced forecast to something usable history = [x for x in X] step = 1 for yhat in forecast: inverted = inverse_difference(history, yhat, cycle) forecast_values.append(inverted) #append actual data try: history.append(vr_df[field][num_train_current+step-1]) except: # reached the end of actual data array, use forecasted values to estimate history.append(inverted) step += 1
def __init__(self, autoregressive_periods, integrated_order, moving_average_model_periods, training_data, test): self.autoregressive_periods = autoregressive_periods self.integrated_order = integrated_order self.moving_average_model_periods = moving_average_model_periods self.model = ARIMA(training_data, order=( self.autoregressive_periods, self.integrated_order, self.moving_average_model_periods ) ) self.fit = self.model.fit() self.aic = self.fit.aic self.predictions = self.fit.forecast(steps=len(test))[0] self.model_fitness = mean_squared_error(test, self.predictions)
def getLikelihood(endog,exog, order = None,n_forecasted_data=1): ''' train_en = endog[:predict_start-1] test_en = endog[predict_start:] print train_en print test_en train_ex = exog[:predict_start-1] test_ex = exog[predict_start:] ''' # Automatically determine values of orders if order is None: from scipy.optimize import brute grid = (slice(1, 3, 1), slice(1, 3, 1),slice(0, 3, 1)) print "############################################" print endog print "############################################" try: order = brute(objfunc, grid, args=(exog, endog), finish=None) order = order.astype(int) except : order = [1,1,3] # Model fits given data (endog) with optimized order print "*********************************************" print "Choose order of ", print order print "*********************************************" model = ARIMA(endog,order).fit(full_output=False,disp=False) # 1st element of array x is the forecasted data. x = model.forecast(n_forecasted_data) return x[0]
def previsao_matematica(reservatId, data): seriesArray = Series.from_array(predict_info.getSeries(reservatId, data)) seriesValues = seriesArray.values mathDict = {'calculado': False, 'volumes': [], 'dias': 0} #if isNonStationary(seriesValues) == True: days_in_year = 1 differenced = predict_info.difference(seriesValues, days_in_year) # fit model model = ARIMA(differenced, order=(1,0,1)) model_fit = model.fit(disp = -1) # multi-step out-of-sample forecast forecast = model_fit.forecast(steps=180)[0] # invert the differenced forecast to something usable mathDict['calculado'] = True history = [x for x in seriesValues] for yhat in forecast: inverted = predict_info.inverse_difference(history, yhat, days_in_year) history.append(inverted) if inverted >= 0.0: mathDict['volumes'].append("%.4f" % round((inverted), 4)) mathDict['dias'] = mathDict['dias'] + 1 return mathDict
def get_arima_predictions(y, train_subset, order = [1,0,0], X = None): if X == None: arima = ARIMA(y[train_subset], order = order).fit() predictions = arima.predict() else: arima = ARIMA(y[train_subset], order = order, exog = X[train_subset,:]).fit() predictions = arima.predict(exog = X[train_subset,:]) for i in range(max(train_subset)+1,len(y)): if X == None: arima = ARIMA(y[0:i], order = order).fit() predictions = np.append(predictions, arima.predict(0, len(y) + i)[-1]) else: arima = ARIMA(y[0:i], order = order, exog = X[0:i,:]).fit() predictions = np.append(predictions, arima.predict(0, len(y) + i, exog = X[0:i+1,:])[-1]) return predictions
# Initialize local variable for time series trailer_series = subtype_result['Trailer'] # trailer_series = subtype_result_day['Trailer'] # trailer_series = subtype_result['Trailer'].resample('MS').sum() X = trailer_series.values train, test = X[0:-52], X[-52:] history = [x for x in train] # print(history) predictions = list() for t in range(len(test)): # fit model model = ARIMA(history, order=(4, 1, 0)) model_fit = model.fit(disp=False, trend='c') # single step forecast yhat = model_fit.forecast()[0] predictions.append(yhat) history.append(test[t]) # evaluate forecasts rmse = sqrt(mean_squared_error(test, predictions)) print('Test RMSE: %.3f' % rmse) print(model_fit.summary()) # model_fit.plot_diagnostics(figsize=(16, 8)) # Plot The Forecast plt.plot(test, color='#ff6832')
def main(): # parse arguments args = add_args() # set the level of logger logger.setLevel(logging.DEBUG) if not args.verbose: logger.setLevel(logging.INFO) logger.debug("--------DEBUG enviroment start---------") # show the hyperparameters logger.info("---------hyperparameter setting---------") logger.info(args) # set the random seed np.random.seed(args.seed) # data fetching logger.info("-------------Data fetching-------------") tickers = \ [ ("TSLA", "yahoo"), # 0, TESLA Stock ] # check if data range is legal. if args.month <= 0 or args.month > 24: logger.warning("The data range is illegal. Turn to use default 3") args.month = 3 tsla_df = data_loader( tickers, args.month)[0] # get dataframes from "yahoo" finance. tsla_close = tsla_df["Close"].resample( 'D').ffill() # fullfill the time series. # data cleaning logger.info("-------------Data cleaning-------------") if np.sum(tsla_close.isnull()) > 0: logger.debug( "The time series contain missing values & we use interpolation to resolve this issue" ) tsla_close = tsla_close.interpolate(method='polynomial', order=2, limit_direction='forward', axis=0) # Then, if there is still some missing values, we simply drop this value.abs tsla_close = tsla_close.dropna() logger.debug(tsla_close) # plot the graph describe tsla close if args.plot: fig = plt.gcf() fig.set_size_inches(18.5, 10.5) plt.plot(tsla_close, label="Series") plt.plot(tsla_close.rolling(int(.05 * len(tsla_close))).mean(), '--', label="Rolling mean") plt.plot(tsla_close.rolling(int(.05 * len(tsla_close))).std(), ":", label="Rolling Std") plt.legend(loc="best") plt.savefig("tesla_description.png") # if log transformation if args.log: tsla_close = tsla_close.apply(np.log) # log transformation # estimate the forecastability of a time series: # Approximate entropy is a technique used to quantify the amount of regularity and the unpredictability of fluctuations over time-series data. # Smaller values indicates that the data is more regular and predictable. logger.info("The approximate entropy: " + str( app_entropy(U=np.array(tsla_close), r=0.2 * np.std(np.array(tsla_close))))) # data splitting logger.info("-------------Data splitting------------") # check if split_ratio legal. if args.split_ratio > 1 or round(len(tsla_close) * args.split_ratio) <= 0: logger.warning("Splitting ratio is illegal. Turn to use default 0.7") args.split_ratio = 0.7 train = tsla_close[0:round(len(tsla_close) * args.split_ratio)] test = tsla_close[round(len(tsla_close) * args.split_ratio):] # time serise decomposition logger.info("-------------decomposition-------------") # check if period is legal. if args.period < 2: logger.warning("Seasonal period is illegal. Turn to use default 7.") args.period = 7 trend, seasonal, residual = decompose(train, args.period, args.plot) # difference logger.debug("-----------------Diff-----------------") trend_diff, trend_diff_counts = diff(trend, args.plot, "trend", args.diff) logger.debug("trend diff counts: " + str(trend_diff_counts)) residual_diff, residual_diff_counts = diff(residual, args.plot, "residual", args.diff) logger.debug("residual diff counts: " + str(residual_diff_counts)) # ARIMA model logger.info("-----------ARIMA construction----------") trend_model_fit, trend_model_order = ARIMA_model(trend_diff, args, "trend_diff") logger.info("Trend model parameters: " + str( tuple([trend_model_order[0], trend_diff_counts, trend_model_order[1]])) ) residual_model_fit, residual_model_order = ARIMA_model( residual_diff, args, "residual_diff") logger.info("Residual model parameters: " + str( tuple([ residual_model_order[0], residual_diff_counts, residual_model_order[1] ]))) # model summary try: logger.debug("---------trend model summary----------") logger.debug(trend_model_fit.summary()) except: logger.warning("Error occurs in summary, simply skip") pass try: logger.debug("---------resid model summary----------") logger.debug(residual_model_fit.summary()) except: logger.warning("Error occurs in summary, simply skip") pass if args.plot: # residual plots of trend model trend_model_fit.resid.plot() plt.savefig("resid_plt_trend.png") plt.close() trend_model_fit.resid.plot(kind='kde') plt.savefig("kde_resid_plt_trend.png") plt.close() # residual plots of residual model residual_model_fit.resid.plot() plt.savefig("resid_plt_residual.png") plt.close() residual_model_fit.resid.plot(kind='kde') plt.savefig("kde_resid_plt_residual.png") plt.close() logger.debug("-----trend model residual describe----") logger.debug(trend_model_fit.resid.describe()) # describe the dataframe logger.debug("-----resid model residual describe----") logger.debug(residual_model_fit.resid.describe()) # describe the dataframe # loss calculation logger.info("-----------Loss calculation------------") fit_seq = model_predict(trend_model_fit, residual_model_fit, trend, residual, seasonal, trend_diff_counts, residual_diff_counts, False, "", "", args.period) if args.log: fit_seq = np.exp(fit_seq) train = train.apply(np.exp) logger.debug(fit_seq) # calculate training loss training_loss = loss(fit_seq, np.array(train), args.loss) logger.info("Training loss: " + str(training_loss)) # plot train and fitted values in one graph. if args.plot: plt.figure() plt.plot(fit_seq, color='red', label='fit') plt.plot(np.array(train), color='blue', label='train') plt.legend(loc='best') plt.savefig('fit_vs_train.png') plt.close() if list(test): pred_seq = model_predict(trend_model_fit, residual_model_fit, trend, residual, seasonal, trend_diff_counts, residual_diff_counts, True, str(test.index.tolist()[0]), str(test.index.tolist()[-1]), args.period) if args.log: pred_seq = np.exp(pred_seq) test = test.apply(np.exp) logger.debug(pred_seq) # calculate testing loss testing_loss = loss(pred_seq, np.array(test), args.loss) logger.info("Testing loss: " + str(testing_loss)) # plot test and predicted value in one graph. if args.plot: plt.figure() plt.plot(pred_seq, color="red", label="pred") plt.plot(np.array(test), color="blue", label="test") plt.legend(loc="best") plt.savefig("pred_vs_test.png") plt.close() # plot several models performance comparison on train set. if args.plot: logger.info("-----------Model Comparison------------") plt.figure() # actual value plt.plot(np.array(train), color='blue', label="actual") # auto-ARIMA with seasonal decompostion plt.plot(fit_seq[1:], color='green', label='ARIMA with seasonal decomposition') # simple auto-ARIMA auto_arima_model_fit, _ = ARIMA_model(train, args, "auto_arima") plt.plot(np.array(auto_arima_model_fit.fittedvalues), color='yellow', label='Auto ARIMA') # auto-ARIMA with log transfromation. auto_log_arima_fit, _ = ARIMA_model(train.apply(np.log), args, "auto_arima") plt.plot(np.array(auto_log_arima_fit.fittedvalues.apply(np.exp)), color='brown', label='Auto ARIMA with log') # rolling mean plt.plot(np.array(train.rolling(int(.05 * len(train))).mean()), '--', label="Rolling mean") # ordinary arima plt.plot(np.array(ARIMA(train, (1, 0, 1)).fit(disp=0).fittedvalues), color="coral", label="Ordinary ARIMA") plt.legend(loc="best") plt.xlabel("days from " + str(train.index.tolist()[0]).replace(" 00:00:00", "")) plt.ylabel("stock prices") plt.title("Actual Stock Price Compared with Forecasted Stock Price") plt.grid(True) plt.tight_layout() plt.savefig("model_comparison.png") plt.close() if list(test): # calculate testing loss loss_dict = dict() # auto-ARIMA with seasonal decompostion loss_dict["auto sarima"] = testing_loss # simple auto-ARIMA loss_dict["auto arima"] = loss( np.array( auto_arima_model_fit.predict( start=str(test.index.tolist()[0]), end=str(test.index.tolist()[-1]), dynamic=True)), np.array(test), args.loss) # auto-ARIMA with log transfromation. loss_dict["auto arima log"] = loss( np.array( auto_log_arima_fit.predict( start=str(test.index.tolist()[0]), end=str(test.index.tolist()[-1]), dynamic=True).apply(np.exp)), np.array(test), args.loss) # ordinary arima loss_dict["arima"] = loss( np.array( ARIMA(train, (1, 0, 1)).fit(disp=0).predict( start=str(test.index.tolist()[0]), end=str(test.index.tolist()[-1]), dynamic=True)), np.array(test), args.loss) logger.info(loss_dict) plt.figure(figsize=(12, 6)) loss_df = pd.DataFrame.from_dict(loss_dict, orient='index') plt.bar(loss_df.index.tolist(), loss_df.iloc[:, 0]) plt.ylabel("RMSE") plt.legend('') plt.title("RMSE for Difference Models on Test Data") plt.savefig("RMSE_model_comparison.png") plt.close() # prediction logger.info("--------------prediction---------------") prediction = model_predict(trend_model_fit, residual_model_fit, trend, residual, seasonal, trend_diff_counts, residual_diff_counts, True, "2020-12-07 00:00:00", "2020-12-11 00:00:00", args.period) if args.log: prediction = np.exp(prediction) logger.info("2020-12-07 predicted value: " + str(prediction[0])) logger.info("2020-12-08 predicted value: " + str(prediction[1])) logger.info("2020-12-09 predicted value: " + str(prediction[2])) logger.info("2020-12-10 predicted value: " + str(prediction[3])) logger.info("2020-12-11 predicted value: " + str(prediction[4])) logger.info("--------------Process ends-------------")
color='gray') # lowwer置信区间 plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') # upper置信区间 plt.title('Autocorrelation Function') # p的获取:PACF图中曲线第一次穿过上置信区间.这里p取2 plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') plt.title('Partial Autocorrelation Function') plt.tight_layout() plt.show() # AR model model = ARIMA(ts_log, order=(2, 1, 0)) result_AR = model.fit(disp=-1) plt.plot(ts_log_diff) plt.plot(result_AR.fittedvalues, color='red') plt.title('AR model RSS:%.4f' % sum(result_AR.fittedvalues - ts_log_diff)**2) plt.show() # MA model model = ARIMA(ts_log, order=(0, 1, 2)) result_MA = model.fit(disp=-1) plt.plot(ts_log_diff) plt.plot(result_MA.fittedvalues, color='red') plt.title('MA model RSS:%.4f' % sum(result_MA.fittedvalues - ts_log_diff)**2) plt.show() # ARIMA 将两个结合起来 效果更好
def arima_models(ts_log, p, d, q): model = ARIMA(ts_log, order=(p, d, q)) results = model.fit(disp=-1) return results
def TrainTimeSeries(dataset, p, d, q, freq): if freq > 0: model = ARIMA(dataset,order= (p,d,q)) return model
def gen_ohlcv(interval: int) -> go.Figure: """Generate OHLCV Chart for BTCUSD with predicted price overlay. Args: interval: update the graph based on an interval """ # hack to wrap interval around available data. OOS starts at 1500, # df has a total of 2274 rows after processing to wrap around # 2274-1500 ~ 750. Reset prediction data to empty df. # interval = interval % 750 # print("interva is {}...".format(interval)) # read data from source # df = get_ohlcv_data(interval - 100, interval) df = bitfinex_candles_api() df["log_ret"] = np.log(df.Close) - np.log(df.Close.shift(1)) print("\ndata df loaded, starting prediction...\n") # online training and forecast. model = ARIMA(df.tail(60)["log_ret"], order=(3, 1, 0)).fit(disp=0) pred = model.forecast()[0] print("\nprediction ended, writing to output df...") # save forecast to output dataframe. should be dB irl. next_dt = df.tail(1).index[0] + pd.Timedelta("1 minute") config.df_pred.loc[next_dt] = [ pred[0], (np.exp(pred) * df.tail(1).Close.values)[0], ] print("\nnext datetime is {}...".format(next_dt)) # get index location of period. loc = config.df_pred.index.get_loc(next_dt) + 1 print("\nloc is {}...".format(loc)) # slices for the past N periods perdiction for plotting df_pred_plot = config.df_pred.iloc[slice(max(0, loc - 30), min(loc, len(df)))].sort_index() print("\n set pred df for plotting...\n", df_pred_plot) # plotting ohlc candlestick trace_ohlc = go.Candlestick( x=df.tail(50).index, open=df["Open"].tail(50), close=df["Close"].tail(50), high=df["High"].tail(50), low=df["Low"].tail(50), opacity=0.5, hoverinfo="skip", name="BTCUSD", ) # plotting prediction line trace_line = go.Scatter( x=df_pred_plot.index, y=df_pred_plot.pred_Close, line_color="yellow", mode="lines+markers", name="Predicted Close", ) layout = go.Layout( plot_bgcolor=config.app_color["graph_bg"], paper_bgcolor=config.app_color["graph_bg"], font={"color": "#fff"}, height=700, xaxis={ "showline": False, "showgrid": False, "zeroline": False }, yaxis={ "showgrid": True, "showline": True, "fixedrange": True, "zeroline": True, "gridcolor": config.app_color["graph_line"], "title": "Price (USD$)", }, ) return go.Figure(data=[trace_ohlc, trace_line], layout=layout)
def StartARIMAForecasting(Actual, P, D, Q): # print('from function screaming') model = ARIMA(Actual, order=(P, D, Q)) model_fit = model.fit(disp=0) prediction = model_fit.forecast()[0] return prediction
ptime.append(temp) sumtime.append(len(temp)) #dataset from statsmodels.tsa.arima_model import ARIMA time = [float(i) for i in time] time = pd.Series(time, index=tstamp) #separate training-test set split size = int(len(time) - 100) train, test = time[0:size], time[size:len(time)] history = [x for x in train] predictions = list() #train ARIMA model model = ARIMA(history, order=(3, 1, 5)) model_fit = model.fit(disp=0) #forecast the next 100 pieces of data output = model_fit.forecast(steps=100)[0] output = [x for x in output] test = [x for x in test] fig = plt.figure(figsize=(10, 5)) #plot the predicted vs. expected graph ax = fig.add_subplot(111) ax.plot(test, label="Observed") ax.plot(output, label="Predicted") plt.xlabel("Time") plt.ylabel("Number of Crime")
data.append(train_x2) data.append(train_x3) #print(data[0]) #print(train_x) for currentdata in data: TS = currentdata final_aic = math.inf final_bic = math.inf final_order = (0, 0, 0) #print(final_order) for p in range(0, 3): for d in range(1, 3): for q in range(0, 3): try: model = ARIMA(TS, order=(p, d, q)) #print(p,q,d) results_ARIMA = model.fit(disp=-1) current_aic = results_ARIMA.aic #compute AIC error on the model formed so far current_bic = results_ARIMA.bic #compute BIC error on the model formed so far #print(p,d,q) if ( current_bic < final_bic and current_aic < final_aic ): #if current error is minimum then update all the order,model etc final_aic = current_aic final_bic = current_bic final_order = (p, d, q) '''results_final_ARIMA = final_arima.fit() print(results_final_ARIMA.summary()) #final_accuracy = accuracy(model)''' except (ValueError, RuntimeError, TypeError, NameError):
df.diff().plot() # In other words, we make the time serie "stationary" # ======================= ARIMA ======================= # ARIMA model is the combination of these two concepts. # ARIMA uses the correlation with previous time steps to make forecast. ## ARIMA = AutoRregression + I (remove trend) + Moving Average # This is the same concept as we have seen in visualization. # (Moving average is for errors, which we won't use here) from statsmodels.tsa.arima_model import ARIMA arima = ARIMA(df.consumption, order=(5,1,0)) model_fit = arima.fit() #(disp=0) prediction = model_fit.forecast() df.plot() prediction.plot() print(model_fit.summary()) # ================== Train/Test split ================== ''' END OF THE INTERMEDIATE COURSE '''
# rolling_mean = df_log.rolling(window=12).mean() # df_log_minus_mean = df_log - rolling_mean # df_log_minus_mean.dropna(inplace=True) # get_stationarity(df_log_minus_mean) # # rolling_mean_exp_decay = df_log.ewm(halflife=12, min_periods=0, adjust=True).mean() # df_log_exp_decay = df_log - rolling_mean_exp_decay # df_log_exp_decay.dropna(inplace=True) # get_stationarity(df_log_exp_decay) # # df_log_shift = df_log - df_log.shift() # df_log_shift.dropna(inplace=True) # get_stationarity(df_log_shift) decomposition = seasonal_decompose(df_log) model = ARIMA(df_log, order=(5, 1, 0)) results = model.fit(disp=-1) #plt.plot(df_log_shift) plt.plot(results.fittedvalues, color='red') plt.show() predictions_ARIMA_diff = pd.Series(results.fittedvalues, copy=True) predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum() predictions_ARIMA_log = pd.Series(df_log['PKB'].iloc[0], index=df_log.index) predictions_ARIMA_log = predictions_ARIMA_log.add( predictions_ARIMA_diff_cumsum, fill_value=0) predictions_ARIMA = np.exp(predictions_ARIMA_log) plt.plot(df) predictions_ARIMA.head() plt.plot(predictions_ARIMA)
from pandas.tools.plotting import autocorrelation_plot def parser(x): return datetime.strptime('190'+x, '%Y-%m') series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) print(series.head()) series.plot() pyplot.show() autocorrelation_plot(series) pyplot.show() # fit model model = ARIMA(series, order=(5,1,0)) model_fit = model.fit(disp=0) print(model_fit.summary()) # plot residual errors residuals = DataFrame(model_fit.resid) residuals.plot() pyplot.show() residuals.plot(kind='kde') pyplot.show() print(residuals.describe()) # http://www.statsmodels.org/devel/generated/statsmodels.tsa.arima_model.ARIMA.predict.html X = series.values
def time_series_analysis(): from pandas import datetime def parser(x): return datetime.strptime('190' + x, '%Y-%m') # return datetime.strptime(x,'%Y-%m-%d') index1 = randint(1, 1450) # hard coded index2 = randint(1, 1412) data, label = get_time_series_index_based_method1(index1) series = pd.read_csv('elec.csv', header=0, parse_dates=[0], index_col=0, squeeze=True) # series = { # 'time': pd.Series(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06']), # 'time': pd.Series(['1901-01', '1902-01', '1903-01', '1904-01', '1905-01', '1906-01', '1907-01', '1908-01']), # 'value': pd.Series([19330.143540669856600, 30641.148325358849700, 23813.397129186604700, 23272.727272727275100, # 22866.028708133973200, 23961.722488038278900, 25856.459330143542400, 29598.086124401913600])} # series = { # 'time':pd.Series(data), # 'value':pd.Series(label) # } # series = pd.DataFrame(series) # series.plot() # pyplot.show() from sklearn.metrics import mean_squared_error from pandas import read_csv from statsmodels.tsa.arima_model import ARIMA series1 = read_csv('shampoo.csv', header=0, parse_dates=[0], index_col=0, squeeze=True) # series = { # 'time': pd.Series(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06']), # 'time': pd.Series(['1901-01', '1902-01', '1903-01', '1904-01', '1905-01', '1906-01', '1907-01', '1908-01']), # 'value': pd.Series([19330.143540669856600, 30641.148325358849700, 23813.397129186604700, 23272.727272727275100, # 22866.028708133973200, 23961.722488038278900, 25856.459330143542400, 29598.086124401913600])} # fit model # series = pd.DataFrame(series) print("series1.head") print(series1.head()) print("series.head") print(series.head()) # series2 = read_csv('f**k.csv', header=0, parse_dates=[ # 0], index_col=0,squeeze=True)#\names=["day","value"]) series2 = read_csv('ElectricityBy15Minutes.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, names=["day", "value"]) model = ARIMA(series2, order=(6, 1, 0)) model_fit = model.fit(disp=0) print(model_fit.summary()) # plot residual errors residuals = DataFrame(model_fit.resid) # residuals.plot() # pyplot.show() residuals.plot(kind='kde') pyplot.show() print(residuals.describe()) X = series2.values print("X is ") print(X) size = int(len(X) * 0.66) train, test = X[0:size], X[size:len(X)] history = [x for x in train] predictions = list() import time time.sleep(5) for t in range(300): model = ARIMA(history, order=(6, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) print('predicted=%f, expected=%f' % (yhat, obs)) print("test-----") print(test[:10]) print("history-----") print(history[-10:]) print("predictions-----") print(predictions) error = mean_squared_error(history[-300:], predictions) print('Test MSE: %.3f' % error) print(predictions) generate_comparison_plot(history[-300:], predictions) print(cal_error(history[-300:], predictions))
#print(sc_rolmean,sc_rolstd) check_adfuller(turb_ma_diff['turb(uS)']) check_mean_std(turb_ma_diff, 'Turb(FNU)') #%% #X = series.values train, test = turb_train.values, turb_test.values turb_history = [x for x in train] turb_predictions = list() turb_diff = list() k = 1921 for t in range(len(test)): model = ARIMA(turb_history, order=(1, 0, 1)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] turb_predictions.append(yhat) obs = test[t] turb_history.append(obs) diff = obs - yhat turb_diff.append(diff) print( 'TurbParameter Index= %d, predicted=%f, expected=%f, difference = %f' % (k, yhat, obs, diff)) k = k + 1 if (k == 3000): break #test1, test2 = tts(test,test_size = 337, random_state=0, shuffle=False)
# load data data = pd.read_csv('wholedata.csv') series = data['value'] # prepare data X = series.values X = X.astype('float32') train_size = int(len(X) * 0.99) train, test = X[0:train_size], X[train_size:] # walk-forward validation history = [x for x in train] predictions = list() for i in range(len(test)): # predict model = ARIMA(history, order=(2,1,3)) model_fit = model.fit(trend='nc', disp=0) yhat = model_fit.forecast()[0] predictions.append(yhat) # observation obs = test[i] history.append(obs) # errors residuals = [test[i]-predictions[i] for i in range(len(test))] residuals = DataFrame(residuals) print(residuals.describe()) pyplot.figure() pyplot.subplot(211) residuals.hist(ax=pyplot.gca()) pyplot.subplot(212) residuals.plot(kind='kde', ax=pyplot.gca())
# # Statsmodels also includes things like ARMA and ARIMA models that can be used to make predictions from time series. This data is not necessarily very stationary and often has strong periodic effects, so these may not necessarily work very well. I'll look at ARIMA predictions for the same set of very high viewcount pages. # In[54]: from statsmodels.tsa.arima_model import ARIMA import warnings cols = train.columns[1:-1] for key in top_pages: data = np.array(train.loc[top_pages[key], cols], 'f') result = None with warnings.catch_warnings(): warnings.filterwarnings('ignore') try: arima = ARIMA(data, [2, 1, 4]) result = arima.fit(disp=False) except: try: arima = ARIMA(data, [2, 1, 2]) result = arima.fit(disp=False) except: print(train.loc[top_pages[key], 'Page']) print('\tARIMA failed') #print(result.params) pred = result.predict(2, 599, typ='levels') x = [i for i in range(600)] i = 0 plt.plot(x[2:len(data)], data[2:], label='Data') plt.plot(x[2:], pred, label='ARIMA Model')
from statsmodels.tsa.arima_model import ARIMA import streamlit as st import pandas as pd import matplotlib.pyplot as plt st.write(""" AdventureWorks Forecasting """) df = pd.read_csv('csv_data/forecast_data.csv', index_col=False) df['RateChangeDate'] = pd.to_datetime(df['RateChangeDate']) df.set_index('RateChangeDate', inplace=True) ts = df['paidAmountSum'] st.text('This is a forecast') st.line_chart(df) st.dataframe(df) model = ARIMA(ts, order=(1, 1, 1)) results = model.fit() # results.plot_predict(1, 220) values = st.sidebar.slider("Forecast Range", 200, 300) st.pyplot(results.plot_predict(1, values))
#import the dependencies from random import random from statsmodels.tsa.arima_model import ARIMA #Generate randomized dataset in the range of 1 to 1000 dataset = [x + random() for x in range(1, 1000)] # # fitting the model arima = ARIMA(dataset, order=(1, 1, 1)) arima_fit = arima.fit(disp=False) # make prediction y = arima_fit.predict(len(dataset), len(dataset), typ='levels') print(y) #try to fiddle with the parameters of the ARIMA model
elif i == len(dataframe): break else: i += 1 #remove_points_with_propagation(df["9"][:1000]) print('Counter: ' + str(counter)) print('Number of rows with 0 as value: ' + str((df["9"] == 0).astype(int).sum(axis=0))) series.plot() plt.show() #Creating model model = ARIMA(series, order=(1, 0, 3)) model_fit = model.fit(disp=0) print(model_fit.summary()) # Plot residual errors residuals = pd.DataFrame(model_fit.resid) fig, ax = plt.subplots(1, 2) residuals.plot(title="Residuals", ax=ax[0]) residuals.plot(kind='kde', title='Density', ax=ax[1]) plt.show() model_fit.plot_predict(dynamic=False) plt.xlim(["2018-10-08", "2018-10-12"]) plt.ylabel("Voltage") plt.show()
print('\t{}: {}'.format(key, value)) korona_log = np.log(korona_death) plt.plot(korona_log) plt.show() rolling_mean = korona_log.rolling(window=12).mean() korona_log_minus_mean = korona_log - rolling_mean korona_log_minus_mean.dropna(inplace=True) plt.plot(korona_log_minus_mean) plt.show() korona_log_shift = korona_log - korona_log.shift() korona_log_shift.dropna(inplace=True) plt.plot(korona_log_shift) plt.show() model = ARIMA(new_korona.death, order=(1, 1, 2)) model_fit = model.fit(disp=0) print(model_fit.summary()) residuals = DataFrame(model_fit.resid) residuals.plot() pyplot.show() residuals.plot(kind='kde') pyplot.show() print(residuals.describe()) model_fit.plot_predict(dynamic=False) plt.show()
get_ipython().magic('matplotlib inline') dta = (series - series.mean()) / (series.std()) plt.acorr(dta,maxlags = len(dta) -1, linestyle = "solid", usevlines = False, marker='') plt.show() autocorrelation_plot(series) plt.show() # In[29]: from pandas import datetime from pandas import DataFrame from statsmodels.tsa.arima_model import ARIMA # fit the model arima_mod = ARIMA(series,order=(5,1,0)) arima_mod_fit = arima_mod.fit(disp=0) print(arima_mod_fit.summary()) #Residual Errors residuals = DataFrame(arima_mod_fit.resid) residuals.plot() plt.show() residuals.plot(kind='kde') plt.show() print(residuals.describe()) # In[30]: from pandas import read_csv
def arimaForecast(ts): #X=ts['Close'].values X = ts['high'].values #size=int(len(X)*0.98) size = len(X) - 100 train, test = X[0:size], X[size:len(X)] test_length = len(test) preds = [] history = [x for x in train] backtest = Backtester(500, ts['high'].values, ts.index) backtest.buy(train[-1], len(train) - 1) bought_at = train[-1] sold_at = None i = 0 #print(history) #print(test) minGlobalDifference = math.inf dailyforecast = [] globaldiffs = [] n_forecast = 0 while i < len(test): print(i, " : ", test_length) if i != 0 and i % 24 == 0: plt.delaxes() plt.bar(np.arange(len(dailyforecast)), dailyforecast) plt.savefig( "testingarimaplots/error_histogram{}.png".format(n_forecast)) print("SUMMARY OF PERIOD MEAN OF DIFFERENCES: {}".format( mean(dailyforecast))) if min(dailyforecast) < minGlobalDifference: minGlobalDifference = min(dailyforecast) print("STARTING FORECASTING PERIOD (DAY AHEAD)") history = history[0:-24] + [x for x in test[i - 24:i]] dailyforecast = [] n_forecast += 1 model = ARIMA(history, order=(3, 1, 4)) fit = model.fit(disp=0) out = fit.forecast() if out[0] >= bought_at * 1.18: backtest.sell(test[i], len(train) + i) sold_at = out[0] if sold_at != None: if out[0] <= sold_at * 0.82: backtest.buy(test[i], len(train) + i) bought_at = out[0] if i % 100 == 0: print("WEALTH {} ".format(backtest.getWealth(len(train) + i))) if backtest.getWealth(len(train) + i) <= 53: return preds.append(out[0]) diff = abs(preds[-1] - test[i]) dailyforecast.append(diff[0]) globaldiffs.append(diff[0]) #if diff>=150: history.append(test[i]) #else: history.append(preds[-1]) history.append(test[i]) print("PREDICTED {} EXPECTED {} DIFFERENCE {}".format( out[0], test[i], diff[0])) i += 1 plt.plot(preds) plt.plot(test) plt.show() print( "RMSE: {}, SMALLEST DIFF BETWEEN REAL AND PREDICTED {} MEAN OF ABSOLUTE DIFFERENCES {}" .format(mean_squared_error(test, preds), minGlobalDifference, mean(globaldiffs)))
ts_log_diff.plot(figsize=(15, 6)) test_stationarity(ts_log_diff.dropna()) #.dropna(inplace=True)) # Using decomposition method to decompose time series from pylab import rcParams plt.figure(5) rcParams['figure.figsize'] = 15, 6 #decomposition = sm.tsa.seasonal_decompose(ts_log, model = 'additive') decomposition = sm.tsa.seasonal_decompose(ts_log, freq=12, model='additive') decomposition.plot() ### Commented: ở đây decomposition ko có giá trị trả về. Tức là hàm seasonal_decompose ko trả ra kết quả gì cả ### nguyên nhân là do: gọi chuỗi dừng trong Arima, vì ts_log ko có tính dừng, nên khi gọi hàm này ra, kết quả ko có # Build ARIMA model arima_model = ARIMA(ts_log, order=(2, 1, 2)) arima_model_fit = arima_model.fit(disp=-1) plt.figure(6) plt.plot(ts_log_diff) plt.plot(arima_model_fit.fittedvalues, color='red') plt.title('RSS: %.4f' % np.nansum( (arima_model_fit.fittedvalues - ts_log_diff)**2)) # Read summary of ARIMA model print(arima_model_fit.summary()) # Convert predicted values to original scale predictions_ARIMA_diff = pd.Series(arima_model_fit.fittedvalues, copy=True) print(predictions_ARIMA_diff.head() ) # these are fitted values on the transformed data
plt.axhline(y=0,linestyle='--',color='gray') #plot acf plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0,linestyle='--',color='gray') # In[26]: from statsmodels.tsa.arima_model import ARIMA #AR Model model = ARIMA(indexedDataset_logScale,order=(2, 1, 2)) # print(model) result_AR= model.fit(disp=-1) plt.plot(datasetLogDiffShifting) plt.plot(result_AR.fittedvalues,color='red') plt.title('RSS: %.4f'%sum((result_AR.fittedvalues-datasetLogDiffShifting["#Passengers"])**2)) print('Plotting AR Model') # In[27]: from statsmodels.tsa.arima_model import ARIMA #MA Model
def ARIMA_model(time_series_diff, args, name): """ time_series_diff: stationary time_series after diff. args: arguments parsed before. name: the name of time_series_diff. return fitted ARIMA model, parameters for ARIMA model. """ if args.plot and name in ["trend_diff", "residual_diff"]: fig, axes = plt.subplots(1, 2, figsize=(16, 3), dpi=100) plot_acf(time_series_diff.tolist(), lags=min(50, len(time_series_diff) - 1), ax=axes[0]) plot_pacf(time_series_diff.tolist(), lags=min(50, len(time_series_diff) - 1), ax=axes[1]) plt.savefig(name + "_acf_pacf.png") plt.close() # check if args.ic is illegal. if args.ic not in ["bic", "aic"]: logger.warning( "The information criteria is illegal. Turn to default ic: BIC") args.ic = "bic" # check the value of convergence tol. if args.tol > 0.01: logger.warning( "The convergence tolerance is too large. Turn to use default value: 1e-8" ) args.tol = 1e-8 # check the likelihood function used. if args.method not in ["css-mle", "mle", "css"]: logger.warning( "The likelihood function is illegal. Turn to default choice: css-mle" ) args.method = "css-mle" evaluate = sm.tsa.arma_order_select_ic(time_series_diff, ic=args.ic, trend="c", max_ar=args.max_ar, max_ma=args.max_ma) # get the parameter for ARIMA model. min_order = evaluate[args.ic + "_min_order"] # initial the success_flag to false success_flag = False while not success_flag: # construct the ARIMA model. model = ARIMA(time_series_diff, order=( min_order[0], 0, min_order[1] )) # d is the order of diff, which we have done that perviously. # keep finding initial parameters until convergence. try: model_fit = model.fit( disp=False, start_params=np.random.rand(min_order[0] + min_order[1] + 1), method=args.method, trend= "c", # Some posts' experimentation suggests that ARIMA models may be less likely to converge with the trend term disabled, especially when using more than zero MA terms. transparams=True, solver= "lbfgs", # we turn to use this one, which gives the best RMSE & executation time. tol=args.tol, # The convergence tolerance. Default is 1e-08. ) success_flag = True except: logger.warning("Error occurs, try another starting parameters.") pass return model_fit, min_order
plt.show() from collections import deque items = deque(np.asarray(fft_df['absolute'].tolist())) items.rotate(int(np.floor(len(fft_df)/2))) plt.figure(figsize=(10, 7), dpi=80) plt.stem(items) plt.title('Figure 4: Components of Fourier transforms') plt.show() from statsmodels.tsa.arima_model import ARIMA from pandas import DataFrame from pandas import datetime series = data_FT['GS'] model = ARIMA(series, order=(5, 1, 0)) model_fit = model.fit(disp=0) print(model_fit.summary()) from pandas.tools.plotting import autocorrelation_plot autocorrelation_plot(series) plt.figure(figsize=(10, 7), dpi=80) plt.show() plt.figure(figsize=(12, 6), dpi=100) plt.plot(test, label='Real') plt.plot(predictions, color='red', label='Predicted') plt.xlabel('Days') plt.ylabel('USD') plt.title('Figure 5: ARIMA model on GS stock') plt.legend() plt.show()
def smape(y_true, y_pred): return np.mean( (np.abs(y_true - y_pred) * 200 / (np.abs(y_true) + np.abs(y_pred)))) # In[18]: train_val = train_set['Open'].values test_val = test_set['Open'].values history = [x for x in train_val] print(type(history)) #this is list of training data prediction = list() prediction for t in range(len(test_val)): model = ARIMA(history, order=(3, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] prediction.append(yhat) obs = test_val[t] history.append(obs) error = mean_squared_error(test_val, prediction) print("Mean squared error : %0.3f", error) error2 = smape(test_val, prediction) print("Symmetric mean absolute percentage error: %0.3f", error2) # In[19]: print('Testing Mean Squared Error: %.3f' % error) print("Symmetric mean absolute percentage error: %0.3f" % error2)
#!/usr/bin/env python2 # -*- coding: utf-8 -*- from statsmodels.tsa.arima_model import ARIMA SERIES = [16, 20, 32, 40, 20, 18, 11, 21, 4, 6, 31, 48, 43, 49, 37] model = ARIMA(SERIES, order=(4, 1, 1)) model_fit = model.fit(disp=0) prediction = model_fit.predict(16, 19, typ='levels') print prediction
def parser(x): return datetime.strptime('190' + x, '%Y-%m') series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) X = series.values size = int(len(X) * 0.66) train, test = X[0:size], X[size:len(X)] history = [x for x in train] predictions = list() for t in range(len(test)): model = ARIMA(history, order=(5, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) # plot pyplot.plot(test) pyplot.plot(predictions, color='red') pyplot.show()
#-*- coding: utf-8 -*- #确定最佳p、d、q值 import pandas as pd #参数初始化 discfile = '../data/discdata_processed.xls' data = pd.read_excel(discfile, index_col='COLLECTTIME') data = data.iloc[:len(data) - 5] #不使用最后5个数据 xdata = data['CWXT_DB:184:D:\\'] from statsmodels.tsa.arima_model import ARIMA #定阶 pmax = int(len(xdata) / 10) #一般阶数不超过length/10 qmax = int(len(xdata) / 10) #一般阶数不超过length/10 bic_matrix = [] #bic矩阵 for p in range(pmax + 1): tmp = [] for q in range(qmax + 1): try: #存在部分报错,所以用try来跳过报错。 tmp.append(ARIMA(xdata, (p, 1, q)).fit().bic) except: tmp.append(None) bic_matrix.append(tmp) bic_matrix = pd.DataFrame(bic_matrix) #从中可以找出最小值 p, q = bic_matrix.stack().idxmin() #先用stack展平,然后用idxmin找出最小值位置。 print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
mpl.rcParams['font.sans-serif'] = [u'SimHei'] mpl.rcParams['axes.unicode_minus'] = False x = data['Passengers'].astype(np.float) x = np.log(x) print x.head(10) show = 'prime' # 'diff', 'ma', 'prime' d = 1 diff = x - x.shift(periods=d) ma = x.rolling(window=12).mean() xma = x - ma p = 2 q = 2 model = ARIMA(endog=x, order=(p, d, q)) # 自回归函数p,差分d,移动平均数q arima = model.fit(disp=-1) # disp<0:不输出过程 prediction = arima.fittedvalues print type(prediction) y = prediction.cumsum() + x[0] mse = ((x - y)**2).mean() rmse = np.sqrt(mse) plt.figure(facecolor='w') if show == 'diff': plt.plot(x, 'r-', lw=2, label=u'原始数据') plt.plot(diff, 'g-', lw=2, label=u'%d阶差分' % d) #plt.plot(prediction, 'r-', lw=2, label=u'预测数据') title = u'乘客人数变化曲线 - 取对数' elif show == 'ma': #plt.plot(x, 'r-', lw=2, label=u'原始数据')
model_fit = model.fit(maxlag=1, method='mle', disp=-1) # make prediction yhat = model_fit.predict(0, len(response)+10) createPlot(yhat, response) # In[343]: # Autoregressive Integrated Moving Average # ## Autoregressive Integrated Moving Average # # http://www.statsmodels.org/dev/generated/statsmodels.tsa.arima_model.ARIMA.html#statsmodels.tsa.arima_model.ARIMA # # https://otexts.com/fpp2/non-seasonal-arima.html model = ARIMA(endog = response, order=(1, 0, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict(0, len(response)+10) createPlot(yhat, response) # In[344]: # Seasonal Autoregressive Integrated Moving Average # ## Seasonal Autoregressive Integrated Moving Average # http://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html # fit model model = SARIMAX(response, order=(1, 1, 1), seasonal_order=(1, 1, 1, 1))