def test_corner_cases(): assert_raises(ValueError, auto_arima, wineind, error_action='some-bad-string') # things that produce warnings with warnings.catch_warnings(record=False): warnings.simplefilter('ignore') # show a constant result will result in a quick fit _ = auto_arima(np.ones(10), suppress_warnings=True) # show the same thing with return_all results in the ARIMA in a list _ = auto_arima(np.ones(10), suppress_warnings=True, return_valid_fits=True) assert hasattr(_, '__iter__') # we did this in 0.1-alpha: # show that with <= 3 samples, using a non-aic metric reverts to AIC # try: # _ = auto_arima(np.arange(3), information_criterion='bic', seasonal=False, suppress_warnings=True) # except ValueError: # this happens because it can't fit such small data... # pass # show we fail for n_iter < 0 assert_raises(ValueError, auto_arima, np.ones(10), random=True, n_fits=-1) # show if max* < start* it breaks: assert_raises(ValueError, auto_arima, np.ones(10), start_p=5, max_p=0)
def test_small_samples(): # if n_samples < 10, test the new starting p, d, Q samp = lynx[:8] auto_arima(samp, suppress_warnings=True, stepwise=True, error_action='ignore')
def test_with_seasonality6(): # show that we can fit an ARIMA where the max_p|q == start_p|q auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0, seasonal=False, max_order=np.inf, suppress_warnings=True)
def test_failing_model_fit(): with pytest.raises(ValueError): # raise ValueError('non-invertible starting MA parameters found' auto_arima(wineind, seasonal=True, suppress_warnings=True, error_action='raise', m=2, random=True, random_state=1, n_fits=2)
def test_many_orders(): lam = 0.5 lynx_bc = ((lynx**lam) - 1) / lam auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5, suppress_warnings=True, stepwise=True)
def autoarima(self, data, pre_len=7): D_f = nsdiffs(data, m=3, max_D=5, test='ch') d_f = ndiffs(data, alpha=0.05, test='kpss', max_d=5) if len(data) <= 30: seasonal = False else: seasonal = True try: stepwise_fit = auto_arima( data, start_p=0, start_q=0, max_p=3, max_q=3, m=12, start_P=0, seasonal=seasonal, d=d_f, D=D_f, trace=False, error_action= 'ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=True) # set to stepwise except: stepwise_fit = auto_arima( data, start_p=0, start_q=0, max_p=3, max_q=0, m=12, start_P=0, seasonal=False, d=0, D=0, trace=False, error_action= 'ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=True) # set to stepwise output = stepwise_fit.predict(n_periods=pre_len).tolist() self._get_model({ 'model_name': 'autoarima', 'model': stepwise_fit, 'pred': output, 'org_data': data, 'pre_len': pre_len }) return output
def train(self, y_train, order=None, seasonal_order=None): # y_train: training dataset # order: ARIMA order. Example: (1,1,0) # seasonal: ARIMA seasonal order. Example: (0,1,0) stepwise_model = auto_arima(y_train, start_p=0, start_q=1, max_p=6, max_q=3, m=12, start_P=0, seasonal=True, #d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) #.fit(y_train) warnings.filterwarnings("ignore") # specify to ignore warning messages self.model = sm.tsa.statespace.SARIMAX(y_train, order=stepwise_model.order, seasonal_order=stepwise_model.seasonal_order, enforce_stationarity=False, enforce_invertibility=False ).fit(disp=False) self.model_params = (stepwise_model.order, stepwise_model.seasonal_order) return self.model
def gaojier(differsets): forecast_result = [] for loop in np.arange(len(differsets)): data = differsets[loop] fittedmodel = auto_arima( data, start_p=1, start_q=1, max_p=6, max_q=6, max_d=6, max_order=None, seasonal=False, m=1, test='adf', trace=False, error_action= 'ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=True, information_criterion='bic', njob=-1) # set to stepwise y_hat = fittedmodel.predict(1)[0] forecast_result.append(y_hat) return forecast_result
def a_arima(timeseries): stepwise_model = auto_arima(timeseries, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) print(stepwise_model.aic()) # Train the Model stepwise_model.fit(train) # Forecast future_forecast = stepwise_model.predict(n_periods=182) future_forecast future_forecast = pd.DataFrame(future_forecast, index=test.index, column=['Prediction']) pd.concat([test, future_forecast], axis=1).iplot()
def fit(self): self.model = auto_arima(self.__data, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) print('Best found AIC: %f' % (self.model.aic())) print('Arima model (%d, %d, %d) x (%d, %d, %d, %d) ' % (self.model.order[0], self.model.order[1], self.model.order[2], self.model.seasonal_order[0], self.model.seasonal_order[1], self.model.seasonal_order[2], self.model.seasonal_order[3])) # self.train = self.__data.loc['1985-01-01':'2016-12-01'] # self.test = self.__data.loc['2015-01-01':] if self._comparing: line_to_write = 'AIC : {}, model ({}, {}, {}) x ({}, {}, {}, {})'.format( self.model.aic(), self.model.order[0], self.model.order[1], self.model.order[2], self.model.seasonal_order[0], self.model.seasonal_order[1], self.model.seasonal_order[2], self.model.seasonal_order[3]) self.save_aic(self.__data.index[0], self.__data.index[-1], line_to_write) self.results = self.model.fit(self.__data)
def auto_arima_predict(data): l = len(data) if l < 4293: train = data[:round(l * 4 / 5)] valid = data[round(l * 4 / 5):] else: train = data[-4293:l - 973] valid = data[-973:] training = train['Close'] validation = valid['Close'] model = auto_arima(training, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True) model.fit(training) forecast = model.predict(n_periods=round(len(data)*1/5)) forecast = pd.DataFrame(forecast, index=valid.index, columns=['Prediction']) plt.plot(train['Close']) plt.plot(valid['Close']) plt.plot(forecast['Prediction']) plt.show() print(np.sqrt(np.mean(np.power((np.array(valid['Close'])-np.array(forecast['Prediction'])), 2)))) return forecast['Prediction']
def fit(self, train_size='2016-12-01'): self.model = auto_arima(self.__data, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) print('Best found AIC: %f' % (self.model.aic())) print('Arima model (%d, %d, %d) x (%d, %d, %d, %d) ' % (self.model.order[0], self.model.order[1], self.model.order[2], self.model.seasonal_order[0], self.model.seasonal_order[1], self.model.seasonal_order[2], self.model.seasonal_order[3])) # self.train = self.__data.loc['1985-01-01':'2016-12-01'] # self.test = self.__data.loc['2015-01-01':] self.train = self.__data[:train_size] if type(train_size) is str: index = len(self.__data) index = list(self.__data.index).index(pd.to_datetime(train_size)) if index >= 30: index -= 30 elif train_size >= 30: index = train_size - 30 else: index = train_size self.test = self.__data[index:] self.model.fit(self.train)
def auto(self, **kwargs): """This method finds the best arima. @see pyrmid.arima.auto_arima Parameters ---------- Returns ------- """ # Library. from pyramid.arima import auto_arima from pyramid.arima.arima import ARIMA # Compute auto_arima. results = auto_arima(**kwargs) # Return a single PyramidWrapper object. if isinstance(results, ARIMA): return [self.from_instance(results)] # Return an array of PyramidWrapper objects. if isinstance(results, list): return [PyramidWrapper().from_instance(a) for a in results]
def auto_fit_(self, init_p, init_q, max_p, max_q, m, init_P, d, D, seasonal, stepwise): """ Tunning the parameters of ARIMA """ #Creating train and test sets train_size = int(0.8 * len(self.timeseries)) train = self.timeseries[0:train_size] stepwise_fit = auto_arima( train, start_p=init_p, start_q=init_q, max_p=max_p, max_q=max_q, m=m, start_P=init_P, seasonal=True, d=d, D=D, trace=True, error_action= 'ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=stepwise) # set to stepwise return stepwise_fit.summary()
def auto_arimax(X, test_split=0.2): # Do : data['Date'] = data['Date'].astype('datetime64[ns]') # data.set_index("Date", inplace = True) # Before sending data. The x axis labels wont get plotted if not done. # plt.imsave() outside this function to save plot test_samples = int(X.shape[0] * test_split) train_data, test_data = X[:-test_samples], X[-test_samples:] train_data.columns = ["Training Data"] test_data.columns = ["Test Data"] stepwise_model = auto_arima(X, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) stepwise_model.fit(train_data) predictions = stepwise_model.predict(n_periods=len(test_data)) predictions = pd.DataFrame(predictions, index=test_data.index, columns=['Prediction']) result = pd.concat([train_data, test_data, predictions], axis=1) result.plot() return rmse(np.array(test_data).flatten(), np.array(predictions).flatten())
def multi_process(i, df, date): try: df2 = df.loc[df['Stock Symbol'] == i].drop(columns='Stock Symbol') df2.isnull().sum() df2['Date'] = pd.to_datetime(df2['Date'], format='%Y-%m-%d', errors='coerce') df2t = df2[df2['Date'] <= date].iloc[-713:, ] df2v = df2[df2['Date'] > date].iloc[:1, ] df2t = df2t.set_index('Date') df2t_2 = df2t.diff().dropna() df2v = df2v.set_index('Date') model = auto_arima(df2t_2, trace=False, error_action='ignore', suppress_warnings=True) model.fit(df2t_2) next_day_forecast = model.predict( n_periods=1)[0] + df2t.iloc[-1:, ].values[0][0] AIC = model.aic() next_day_value = df2v.iloc[:1, ].values[0][0] last_day_value = df2t.iloc[-1:, ].values[0][0] l = [i, last_day_value, next_day_forecast, AIC, next_day_value] return l except: print('Error with ', i)
def test_with_seasonality5(): # can we fit the same thing with an exogenous array of predictors? # also make it stationary and make sure that works... all_res = auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, n_jobs=1, d=1, D=None, error_action='ignore', suppress_warnings=True, stationary=True, random=True, random_state=42, return_valid_fits=True, n_fits=5, exogenous=rs.rand(wineind.shape[0], 4)) # only fit 2 # show it is a list assert hasattr(all_res, '__iter__')
def test_the_r_src(): # this is the test the R code provides fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc) # the R code's AIC = ~135 assert abs(135 - fit.aic()) < 1.0 # the R code's BIC = ~145 assert abs(145 - fit.bic()) < 1.0 # R's coefficients: # ar1 ar2 ma1 mean # -0.6515 -0.2449 0.8012 5.0370 # note that statsmodels' mean is on the front, not the end. params = fit.params() assert_almost_equal(params, np.array([5.0370, -0.6515, -0.2449, 0.8012]), decimal=2) # > fit = forecast::auto.arima(abc, max.p=5, max.d=5, max.q=5, max.order=100, stepwise=F) fit = auto_arima(abc, max_p=5, max_d=5, max_q=5, max_order=100, seasonal=False, trend='c', suppress_warnings=True, error_action='ignore') # this differs from the R fit with a slightly higher AIC... assert abs(137 - fit.aic()) < 1.0 # R's is 135.28
def train(self, base_dataset, start_date=None, end_date=None): """ Trains the model. :param Dataset base_dataset: The dataset used to extract the training set in accordance with the date range. :param start_date: The minimum date for the records used in the training set. :type start_date: datetime.datetime or None :param end_date: The maximum date for the records used in the training set. :type end_date: datetime.datetime or None """ if start_date is not None and end_date is not None and start_date > end_date: raise ValueError('Invalid training date range') training_set = base_dataset.getDataframe(ticker_symbol=self.ticker_symbol, from_date=start_date, to_date=end_date) self.training_start = min(training_set.date) self.training_end = max(training_set.date) # Pre-Processing # ARIMA only receives as sequence of value in the training, # then only the sequence of closing prices is needed training_set = training_set.close self.model = auto_arima(training_set, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True) self.model.fit(training_set)
def calculate_prediction_data_arima(past_data, date_to): result = dict() for col in past_data.keys(): if len(list(past_data[col].keys())) > 0: result[col] = dict() factor = pd.DataFrame.from_dict(past_data[col], orient='index') factor.index = pd.to_datetime(factor.index) stepwise_model = auto_arima(factor, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) stepwise_model.fit(factor) future = stepwise_model.predict( n_periods=util.get_prediction_periods(date_to)) future_dt = util.get_prediction_datetimes_dt( datetime.now(), date_to) for i in range(len(future_dt)): result[col][future_dt[i].strftime( consts.DATE_FORMAT)] = future[i] result = filter_unrealistic_values(result) return result
def fit(self): #训练模型 # self.fittedModel = auto_arima(y=self.ts_train) self.fittedModel = auto_arima( y=self.ts_train, start_p=self.start_p, start_q=self.start_q, max_p=self.max_p, max_q=self.max_q, max_d=self.max_d, max_order=self.max_order, seasonal=self.seasonal, m=self.m, # start_P=2, # start_Q=0, # D=1, # max_Q=0, maxiter=1000, test='adf', trace=False, error_action= 'ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=self.stepwise, information_criterion='bic', njob=-1)
def ArimaxKLX(trainDF, predDF): TrainExogenous = {'Month' : trainDF['MonthYear'].dt.month, 'Year' : trainDF['MonthYear'].dt.year} TrainExogenousDF = pd.DataFrame(TrainExogenous) TestExogenous = {'Month' : predDF['MonthYear'].dt.month, 'Year' : predDF['MonthYear'].dt.year} TestExogenousDF = pd.DataFrame(TestExogenous) arimax_fit = auto_arima(trainDF['OrderQuantity'], exogenous=TrainExogenousDF, start_p=0, start_q=0, max_p=6, max_d=2, max_q=6, start_P=0, start_Q=0, max_P=6, max_D=2, max_Q=6, max_order=6, seasonal=True, stationary=False, information_criterion='aic', stepwise=False, trace=False, test='adf', seasonal_test='ocsb', error_action='ignore', suppress_warnings=True, enforce_stationarity=False) ArimaxForecast = arimax_fit.predict(len(predDF), exogenous=TestExogenousDF) ArimaxForecast = np.round(ArimaxForecast, 0) ArimaxForecast = np.clip(ArimaxForecast, 0, np.max(ArimaxForecast)) return ArimaxForecast
def predicting(data, pltname): data = data[['_time', 'cycles']] original = data['cycles'] n = len(data) #print("length of datta",n) forecast_out = int(math.ceil(0.2 * (n))) #print("forecat_out",forecast_out) data['label'] = data['cycles'].shift(-forecast_out) data.dropna(inplace=True) original = data['cycles'] original = original.to_frame(name='cycles') d1 = data[ 'label'] #this step changesa dataframe object into that of a series.series object d1 = d1.to_frame( name='label') #thus need to convert it back into a dataframe object data = data['label'] data = data.to_frame(name='label') #print("last value maam",data.iat[len(data)-1,0]) #divide into train and validation set #train = data[:int(0.8*(len(data)))] #test = data[int(0.8*(len(data))):] model = auto_arima(data, trace=True, start_p=0, start_q=0, start_P=0, start_Q=0, max_p=3, max_q=3, max_P=3, max_Q=3, seasonal=True, stepwise=False, suppress_warnings=True, D=1, max_D=10, error_action='ignore', approximation=False) #change 3 to 10 #fitting model model.fit(data) #print(model.summary()) y_pred = model.predict(n_periods=forecast_out) y_pred = pd.DataFrame(y_pred, columns=['label']) #print("first element",y_pred.iat[0,0]) conn = pd.concat([d1, y_pred], axis=0) n = conn.size - forecast_out plt.figure(0) plt.plot(original[:n], conn[:n], 'y') diff = original.iat[len(original) - 1, 0] - original.iat[len(original) - forecast_out, 0] plt.plot(original[-forecast_out:] + diff, conn[-forecast_out:], 'r') plt.savefig("/Users/Arunima_Sharma/Desktop/py/flask/static/" + pltname) plt.show() return y_pred
def test_with_seasonality3(): # show we can estimate D even when it's not there... auto_arima( wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, d=1, D=None, error_action='ignore', suppress_warnings=True, trace=True, # get the coverage on trace random_state=42, stepwise=True)
def forecast_traces(balances, weeks=52) -> List[Scatter]: """ Forecast next weeks based on balance history with ARIMA """ last_balance_date = balances.index.max() last_balance = balances.loc[last_balance_date] X_forecast = list(pd.date_range(last_balance_date, periods=weeks, freq='W')) model = auto_arima(balances, trend=[1, 1], error_action='ignore', suppress_warnings=True) forecast = model.predict(n_periods=weeks) # error estimation model_error = np.std(model.resid()) sampling_error = np.sqrt(balances.var() / len(balances)) forecast_error = 2 * np.sqrt(model_error**2 + sampling_error**2) forecast_upper = forecast + forecast_error forecast_lower = forecast - forecast_error bad_forecast = np.min(forecast) < balances.loc[last_balance_date] balances_forecast = Scatter( name='forecast', x=X_forecast, y=[last_balance] + list(forecast), mode='lines', line={ 'dash': 'dash', 'color': color('EXP') if bad_forecast else color('INC') }, opacity=0.8) balances_forecast_upper = Scatter(name='', x=X_forecast, y=[last_balance] + list(forecast_upper), fill='tonexty', fillcolor=(color( 'EXP' if bad_forecast else 'INC', alpha=0.2)), line={'color': 'transparent'}, showlegend=False) balances_forecast_lower = Scatter(name='', x=X_forecast, y=[last_balance] + list(forecast_lower), fill='tozeroy', fillcolor='#8881', line={'color': 'transparent'}, showlegend=False) return [ balances_forecast_lower, balances_forecast_upper, balances_forecast, ]
def test_force_polynomial_error(): x = np.array([1, 2, 3, 4, 5, 6]) d = 2 xreg = None with pytest.raises(ValueError) as ve: auto_arima(x, d=d, D=0, seasonal=False, exogenous=xreg) assert 'simple polynomial' in str(ve), str(ve) # but it should pass when xreg is not none xreg = rs.rand(x.shape[0], 2) _ = auto_arima(x, d=d, D=0, seasonal=False, exogenous=xreg, error_action='ignore', suppress_warnings=True)
def fitFunc(yvals, ndays): model = auto_arima(yvals, trace=True, error_action='ignore', suppress_warnings=True) model.fit(yvals) n_periods = ndays forecast = model.predict(n_periods) oned = range(len(yvals), len(yvals) + len(forecast)) oned2 = range(0, len(yvals)) return [forecast[-1], oned2, yvals, oned, forecast]
def build_arima_model(data, p=0, d=0, q=0, predict=False, show_qqplot=False): forest_date = [ x.strftime('%Y-%m-%d') for x in list(pd.date_range(start='2014-09-01', end='2014-09-30')) ] if p == 0 and d == 0 and q == 0: model = auto_arima(data) model.fit(data) else: model = sm.tsa.ARIMA(data, order=(p, d, q)) results = model.fit() # 滚动预测 # forest = [] # for i in range(0, len(forest_date)-7): # if i % 7 == 0: # temp = [] # temp += results.forecast(steps=7)[0].tolist() # temp_pd = pd.Series(temp, index=pd.to_datetime(forest_date[i:i + 7])) # img = img.append(temp_pd) # forest += temp # model = None # model = sm.tsa.ARIMA(img, order=(p, d, q)) # results = model.fit() # forest += results.forecast(steps=2)[0].tolist() if show_qqplot: resid = results.resid.values for i in range(0, len(resid)): resid[i] = round(resid[i], 8) qqplot(resid) # 模型历史数据预测 if predict: if p == 0 and d == 0 and q == 0: predict = model.predict_in_sample(start=0, end=151, dynamic=False) else: predict = results.predict(start=str('2014-04-03'), end=str('2014-08-30'), dynamic=False) predict_date = [ x.strftime('%Y-%m-%d') for x in list(pd.date_range(start='2014-04-01', end='2014-08-30')) ] predict_df = pd.DataFrame({'forest': predict}, index=pd.to_datetime(predict_date)) fig, ax = plt.subplots(figsize=(12, 8)) ax = data.plot(ax=ax) ax = predict_df.plot(ax=ax) if p == 0 and d == 0 and q == 0: forest = model.predict(n_periods=30) else: forest = results.forecast(steps=30)[0] forest_df = pd.DataFrame({'forest': forest}, index=pd.to_datetime(forest_date)) fig, ax = plt.subplots(figsize=(12, 8)) ax = data.plot(ax=ax) ax = forest_df.plot(ax=ax) plt.show() return forest_df
def test_many_orders(): # show that auto-arima can't fit this data for some reason... lam = 0.5 lynx_bc = ((lynx**lam) - 1) / lam failed = False try: auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5, n_jobs=-1, suppress_warnings=True, maxiter=10) # shorter max iter except ValueError: failed = True assert failed
def arimamodelling(timeseries): automodel = auto_arima(timeseries, start_p=1, start_q=1, max_p=10, max_q=10, trace=True, error_action='ignore', suppress_warnings=True) return automodel
data1 = data1.iloc[:, 1:37] no_of_column=len(data1.columns) forecasted=pd.DataFrame() for row in data1.iloc[0:20,:].iterrows(): index, data = row if(np.sum(data[no_of_column-3:no_of_column])==0): print("Next") continue data=data.replace(0,np.median(data)) stepwise_model = auto_arima(data, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) print(index) print(row) stepwise_model.aic() future_forecast = stepwise_model.predict(n_periods=12) forecasted=forecasted.append(pd.Series(future_forecast),ignore_index=True) # Example plt.plot(data) plt.plot(future_forecast) print(future_forecast)