def cities(): start = request.args.get('start') end = request.args.get('end') rows = CityQuantity.query.join(City).with_entities( City.name, CityQuantity.date_occurrence, CityQuantity.theft).filter( CityQuantity.date_occurrence >= start).filter( CityQuantity.date_occurrence <= end).all() values = __transform(rows) if datetime.strptime(start, '%Y-%m-%d') >= datetime.strptime( '2018-01-01', '%Y-%m-%d'): loaded = ARIMAResults.load(ROOT + 'ssp/v1/cities.pkl') d = pandas.date_range(start='1/1/2018', end='12/1/2018', freq='MS') d = d.format(formatter=lambda current: current.strftime('%Y-%m')) x = [int(round(x)) for x in loaded.forecast(steps=12)[0]] total = sum(x) values["labels"] = d values["total"] = values["total"] + total values["data"].append({ "values": x, "label": "Goiânia Previsão", "total": total }) return jsonify(values)
def main(): data = pd.read_csv("./query_large.csv", sep=",", parse_dates=['time'], index_col='time', squeeze=True) #, date_parser=parser) fit = data['mag'] model = sm.tsa.statespace.SARIMAX(fit, order=(1, 0, 1)) model_fit = model.fit(disp=0) residuals = DataFrame(model_fit.resid) model_fit.save('arima_normal.pkl') timestamp = lambda s: datetime.strptime(s, "%d/%m/%Y") model = ARIMAResults.load('arima_normal.pkl') start_index = int(sys.argv[1]) end_index = start_index + 6 #datetime(1925, 12, 26) forecast = model.predict(start=start_index, end=end_index) print(forecast)
# load the finalized model and make a prediction from pandas import Series from statsmodels.tsa.arima_model import ARIMAResults from math import exp from math import log import numpy # invert box-cox transform def boxcox_inverse(value, lam): if lam == 0: return exp(value) return exp(log(lam * value + 1) / lam) model_fit = ARIMAResults.load('Wmodel2.pkl') lam = numpy.load('Wmodel_lambda.npy') yhat = model_fit.forecast()[0] yhat = boxcox_inverse(yhat, lam) print('Predicted: %.3f' % yhat) # evaluate the finalized model on the validation dataset from pandas import Series from matplotlib import pyplot from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_model import ARIMAResults from scipy.stats import boxcox from sklearn.metrics import mean_squared_error from math import sqrt from math import exp from math import log
def test(): # load model model_fit = ARIMAResults.load('model.pkl') forecast = model_fit.forecast(steps=7)[0] print(forecast)
def load_models(self): for i in self.describe: self.describe[i]['model'] = ARIMAResults.load( f'{self.model_path}/model_{i}.pkl')
numpy.save('mmodel_bias.npy', [bias]) #load the finalized model and make a prediction from pandas import Series from statsmodels.tsa.arima_model import ARIMAResults from math import exp from math import log import numpy # invert box-cox transform def boxcox_inverse(value, lam): if lam == 0: return exp(value) return exp(log(lam * value + 1) / lam) model_fit = ARIMAResults.load('bwmodel.pkl') lam = numpy.load('bwmodel_lambda.npy') bias = numpy.load('bwmodel_bias.npy') yhat= model_fit.forecast()[0] yhat = bias + boxcox_inverse(yhat, lam) print('Predicted: %.3f' % yhat) # evaluate the finalized model on the validation dataset from pandas import Series from matplotlib import pyplot from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_model import ARIMAResults from scipy.stats import boxcox from sklearn.metrics import mean_squared_error from math import sqrt
def load(self): self.model = ARIMAResults.load(self.outfile_estimation_parameters)
squeeze=True) X = series.values X = X.astype('float32') series = read_csv('shampoo-ARIMA-validate.csv', header=None, index_col=0, parse_dates=True, squeeze=True) y = series.values y = y.astype('float32') history = [x for x in X] predictions = list() model_fit = ARIMAResults.load('shampoo-ARIMA-model-fin.pkl') bias = load('sAfb.npy') # compare predictions to validation for i in range(len(y)): p_hist = processing(array(history), 0) station = diff(p_hist) model = ARIMA(station, order=(3, 1, 0)) model_fit = model.fit(trend='nc', disp=0) yhat = model_fit.forecast()[0] yhat = bias + inverse_diff(p_hist, yhat) predictions.append(asscalar(yhat)) obs = y[i] history = p_hist.tolist() history.append(obs) print('Predicted= %.3f, Expected= %.3f' % (yhat, obs))
def load_model(model_path): return ARIMAResults.load(model_path)
from statsmodels.tsa.arima_model import ARIMAResults import sys if __name__ == '__main__': sn = sys.argv[1] t_time = sys.argv[2] t_time = int(t_time) % 163731 load_model = ARIMAResults.load('model/' + str(sn) + '.model') print(str(sn)) values = load_model.fittedvalues print(values[t_time])
def load(self): #self.model = ARIMAResults.load(self.config.base_dir + "models/final/ARIMA.pkl") self.model = ARIMAResults.load(self._build_model_file_name())
def mlwilldoit(time): arima_fitted_model = ARIMAResults.load("models/arima_model.pkl") diff = monthdelta(datetime.strptime("2017-07-01","%Y-%m-%d"),datetime.strptime(time,"%Y-%m-%d")) return str(arima_fitted_model.forecast(diff)[0][-1])
#take input cat = input("input the category you want to predict (Theft, Assault, Drug, Sex Offenses, Other Offenses)") if cat.lower() == "theft": cat = "Theft" elif cat.lower() == "assault": cat = "Assault" elif cat.lower() == "drug": cat = "Drug" elif cat.lower() == "sex offenses": cat = "Sexoffense" elif cat.lower() == "other offenses": cat = "Offenses" #find model name = cat + '.pkl' #get latitude and longtitude location_x = float(input('latitude')) location_y = float(input('longtitude')) #get location possibility possibility = run(location_x,location_y,file) #predict model = ARIMAResults.load(name) raw = model.forecast()[0] print("The possibility of crime is: ", raw*possibility*100, "%")
def validate_arima_model(csv_file_name): # load data for 70% - 30% dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_dataset_70_30.csv', header=0, index_col=0, parse_dates=True, squeeze=True) X = dataset.values.astype('float32') history = [x for x in X] validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_validation_70_30.csv', header=0, index_col=0, parse_dates=True, squeeze=True) y = validation.values.astype('float32') print() print("==========================================================") print("For 70% - 30% we have...") print( "Enter (p, d, q) extracted by the conclusions made by stationarity and ACF/PACF plots" ) p = input("Enter p value (Autoregression (AR) --> p): ") d = input("Enter d value (differencing --> d): ") q = input("Enter q value (Moving Average (MA) --> q): ") print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q))) # load model print("Loading model...") model_fit = ARIMAResults.load('data/saved_models/' + csv_file_name.split('.csv')[0] + '_arima_70_30.pkl') # bias = numpy.load('model_bias.npy') # make first prediction print("Starting model evaluation...") predictions = list() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0])) # rolling forecasts for i in range(1, len(y)): # predict warnings.filterwarnings("ignore") model = ARIMA(history, order=(int(p), int(d), int(q))) model_fit = model.fit() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) # observation obs = y[i] history.append(obs) print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs)) # report performance calculate_forecasting_performance_measures(y, predictions) calculate_correlation_index(y, predictions) print("Model evaluation finished...") print("==========================================================") print() pyplot.plot(y) pyplot.plot(predictions, color='red') pyplot.show() # ================================================================================================================== # load data for 80% - 20% dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_dataset_80_20.csv', header=0, index_col=0, parse_dates=True, squeeze=True) X = dataset.values.astype('float32') history = [x for x in X] validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_validation_80_20.csv', header=0, index_col=0, parse_dates=True, squeeze=True) y = validation.values.astype('float32') print() print("==========================================================") print("For 80% - 20% we have...") print( "Enter (p, d, q) extracted by the conclusions made by stationarity and ACF/PACF plots" ) p = input("Enter p value (Autoregression (AR) --> p): ") d = input("Enter d value (differencing --> d): ") q = input("Enter q value (Moving Average (MA) --> q): ") print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q))) # load model print("Loading model...") model_fit = ARIMAResults.load('data/saved_models/' + csv_file_name.split('.csv')[0] + '_arima_80_20.pkl') # bias = numpy.load('model_bias.npy') # make first prediction print("Starting model evaluation...") predictions = list() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0])) # rolling forecasts for i in range(1, len(y)): # predict warnings.filterwarnings("ignore") model = ARIMA(history, order=(int(p), int(d), int(q))) model_fit = model.fit() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) # observation obs = y[i] history.append(obs) print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs)) # report performance calculate_forecasting_performance_measures(y, predictions) calculate_correlation_index(y, predictions) print("Model evaluation finished...") print("==========================================================") print() pyplot.plot(y) pyplot.plot(predictions, color='red') pyplot.show() # ================================================================================================================== # load data for 90% - 10% dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_dataset_90_10.csv', header=0, index_col=0, parse_dates=True, squeeze=True) X = dataset.values.astype('float32') history = [x for x in X] validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_validation_90_10.csv', header=0, index_col=0, parse_dates=True, squeeze=True) y = validation.values.astype('float32') print() print("==========================================================") print("For 90% - 10% we have...") print( "Enter (p, d, q) extracted by the conclusions made by stationarity and ACF/PACF plots" ) p = input("Enter p value (Autoregression (AR) --> p): ") d = input("Enter d value (differencing --> d): ") q = input("Enter q value (Moving Average (MA) --> q): ") print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q))) # load model print("Loading model...") model_fit = ARIMAResults.load('data/saved_models/' + csv_file_name.split('.csv')[0] + '_arima_90_10.pkl') # bias = numpy.load('model_bias.npy') # make first prediction print("Starting model evaluation...") predictions = list() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0])) # rolling forecasts for i in range(1, len(y)): # predict warnings.filterwarnings("ignore") model = ARIMA(history, order=(int(p), int(d), int(q))) model_fit = model.fit() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) # observation obs = y[i] history.append(obs) print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs)) # report performance calculate_forecasting_performance_measures(y, predictions) calculate_correlation_index(y, predictions) print("Model evaluation finished...") print("==========================================================") print() pyplot.plot(y) pyplot.plot(predictions, color='red') pyplot.show() # ================================================================================================================== # load data for 95% - 5% dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_dataset_95_5.csv', header=0, index_col=0, parse_dates=True, squeeze=True) X = dataset.values.astype('float32') history = [x for x in X] validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] + '_validation_95_5.csv', header=0, index_col=0, parse_dates=True, squeeze=True) y = validation.values.astype('float32') print() print("==========================================================") print("For 95% - 5% we have...") print( "Enter (p, d, q) extracted by the conclusions made by stationarity and ACF/PACF plots" ) p = input("Enter p value (Autoregression (AR) --> p): ") d = input("Enter d value (differencing --> d): ") q = input("Enter q value (Moving Average (MA) --> q): ") print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q))) # load model print("Loading model...") model_fit = ARIMAResults.load('data/saved_models/' + csv_file_name.split('.csv')[0] + '_arima_95_5.pkl') # bias = numpy.load('model_bias.npy') # make first prediction print("Starting model evaluation...") predictions = list() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0])) # rolling forecasts for i in range(1, len(y)): # predict warnings.filterwarnings("ignore") model = ARIMA(history, order=(int(p), int(d), int(q))) model_fit = model.fit() # yhat = bias + float(model_fit.forecast()[0]) yhat = float(model_fit.forecast()[0]) predictions.append(yhat) # observation obs = y[i] history.append(obs) print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs)) # report performance calculate_forecasting_performance_measures(y, predictions) calculate_correlation_index(y, predictions) print("Model evaluation finished...") print("==========================================================") print() pyplot.plot(y) pyplot.plot(predictions, color='red') pyplot.show()
# save and load an ARIMA model with a workaround from pandas import read_csv from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_model import ARIMAResults # monkey patch around bug in ARIMA class def __getnewargs__(self): return ((self.endog), (self.k_lags, self.k_diff, self.k_ma)) ARIMA.__getnewargs__ = __getnewargs__ # load data series = read_csv('daily-total-female-births.csv', header=0, index_col=0, parse_dates=True, squeeze=True) # prepare data X = series.values X = X.astype('float32') # fit model model = ARIMA(X, order=(1, 1, 1)) model_fit = model.fit() # save model model_fit.save('model.pkl') # load model loaded = ARIMAResults.load('model.pkl')
def model_with_arima(ts, train_size, order, seasonal_order=(), seasonal_freq=None, trend=None, grid_search=False, path_to_model=None, verbose=False, ds_name='DS', var_name='Value'): """Model a time series with an ARIMA forecast. Inputs: ts [pd Series]: A pandas Series with a DatetimeIndex and a column for numerical values. train_size [float]: The percentage of data to use for training, as a float (e.g., 0.66). order [tuple]: The order hyperparameters (p,d,q) for this ARIMA model. Optional Inputs: seasonal_order [tuple]: The seasonal order hyperparameters (P,D,Q) for this SARIMA model. When specifying these, 'seasonal_freq' must also be given. seasonal_freq [int]: The freq hyperparameter for this SARIMA model, i.e., the number of samples that make up one seasonal cycle. trend [str]: The trend hyperparameter for an SARIMA model. grid_search [bool]: When True, perform a grid search to set values for the 'order' and 'seasonal order' hyperparameters. Note this overrides any given (p,d,q)(P,D,Q) hyperparameter values. Default is False. path_to_model [str]: Path to a *.pkl file of a trained (S)ARIMA model. When set, no training will be done because that model will be used. verbose [bool]: When True, show ACF and PACF plots before grid searching, plot residual training errors after fitting the model, and print predicted v. expected values during outlier detection. TODO: mention plot w/ forecast & outliers once it's under an "if verbose" var_name [str]: The name of the dependent variable in the time series. Default is 'Value'. Outputs: ts_with_arima [pd DataFrame]: Optional Outputs: None Example: time_series_with_arima = model_with_arima(time_series, train_size=0.8, order=(12,0,0), seasonal_order=(0,1,0), seasonal_freq=365, verbose=False) """ # Finalize ARIMA/SARIMA hyperparameters if grid_search and path_to_model is not None: raise ValueError('\'grid_search\' should be False when specifying a path to a pre-trained ARIMA model.') if (seasonal_freq is not None) and (len(seasonal_order) == 3) and (grid_search is False): seasonal_order = seasonal_order + (seasonal_freq,) # (P,D,Q,freq) elif (seasonal_freq is not None) and (len(seasonal_order) != 3) and (grid_search is False): raise ValueError('\'seasonal_order\' must be a tuple of 3 integers when specifying a seasonal frequency and not grid searching.') elif (seasonal_freq is None) and (len(seasonal_order) == 3) and (grid_search is False): raise ValueError('\'seasonal_freq\' must be given when specifying a seasonal order and not grid searching.') if grid_search: # if verbose: # lag_acf = acf(ts, nlags=20) # lag_pacf = pacf(ts, nlags=20, method='ols') # pyplot.show() if seasonal_freq is None: # ARIMA grid search print('No seasonal frequency was given, so grid searching ARIMA(p,d,q) hyperparameters.') order = grid_search_arima_params(ts) print('Grid search found hyperparameters: ' + str(order) + '\n') else: # SARIMA grid search print('Seasonal frequency was given, so grid searching ARIMA(p,d,q)(P,D,Q) hyperparameters.') order, seasonal_order, trend = grid_search_sarima_params(ts, seasonal_freq) print('Grid search found hyperparameters: ' + str(order) + str(seasonal_order) + '\n') # Train or load ARIMA/SARIMA model X = ts split = int(len(X) * train_size) train, test = X[0:split], X[split:len(X)] threshold = float(train.values.std(ddof=0)) * 2.0 # TODO: 2stds; finalize/decide std scheme (pass it in?) if len(seasonal_order) < 4: trained_model = ARIMA(train, order=order) else: # TODO: consider enforce_stationarity=False and enforce_invertibility=False, unless that prevents from detecting 2 DSs not right for ARIMA trained_model = SARIMAX(train, order=order, seasonal_order=seasonal_order, trend=trend) if path_to_model is not None: # load pre-trained model print('Loading model: ' + path_to_model) trained_model_fit = ARIMAResults.load(path_to_model) else: current_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) print('Before fitting: ' + current_time + '\n') trained_model_fit = trained_model.fit(disp=1) current_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) print('After fitting: ' + current_time + '\n') # # save the just-trained model # try: # current_time = str(datetime.now().strftime("%Y-%m-%dT%H-%M-%S")) # filename = 'SARIMA_' + var_name + '_' + train_size + '_' + str(order) + '_' + str(seasonal_order) + '_' + current_time + '.pkl' # model_dir = 'Models/' # if not os.path.exists(model_dir): # os.makedirs(model_dir) # filename = model_dir + filename # trained_model_fit.save(filename) # except Exception as e: # print('Saving model failed:') # print(e) print(trained_model_fit.summary()) # if verbose: # # plot residual errors # residuals = pd.DataFrame(trained_model_fit.resid) # residuals.plot(title='Training Model Fit Residual Errors') # pyplot.show() # residuals.plot(kind='kde', title='Training Model Fit Residual Error Density') # pyplot.show() # print('\n') # Forecast with the trained ARIMA/SARIMA model predictions = trained_model_fit.predict(start=1, end=len(X)-1, typ='levels') predict_index = pd.Index(X.index[1:len(X)]) predictions_with_dates = pd.Series(predictions.values, index=predict_index) errors = pd.Series() # try: # model_error = sqrt(mean_squared_error(X[1:len(X)], predictions_with_dates)) # print('RMSE: %.3f' % model_error) # if len(test) > 0: # test_error = mean_squared_error(test, predictions_with_dates[test.index[0]:test.index[-1]]) # print('Test MSE: %.3f' % test_error) # except Exception as e: # print('Forecast error calculation failed:') # print(e) # Plot the forecast and outliers if len(seasonal_order) < 4: # ARIMA title title_text = ds_name + ' with ' + str(order) + ' ARIMA Forecast' else: # SARIMA title title_text = ds_name + ' with ' + str(order) + '_' + str(seasonal_order) + '_' + str(trend) + ' ARIMA Forecast' ax = X.plot(color='#192C87', title=title_text, label=var_name, figsize=(14, 6)) if len(test) > 0: test.plot(color='#441594', label='Test Data') predictions_with_dates.plot(color='#0CCADC', label='ARIMA Forecast') ax.set(xlabel='Time', ylabel=var_name) pyplot.legend(loc='best') # save the plot before showing it if train_size == 1: plot_filename = ds_name + '_with_arima_full.png' elif train_size == 0.5: plot_filename = ds_name + '_with_arima_half.png' else: plot_filename = ds_name + '_with_arima_' + str(train_size) + '.png' plot_path = './save/datasets/' + ds_name + '/arima/plots/' + str(int(train_size*100)) + ' percent/' if not os.path.exists(plot_path): os.makedirs(plot_path) pyplot.savefig(plot_path + plot_filename, dpi=500) #pyplot.show() pyplot.clf() # Save data to proper directory with encoded file name ts_with_arima = pd.DataFrame({'ARIMA': predictions_with_dates, var_name: ts}) ts_with_arima.rename_axis('Time', axis='index', inplace=True) # name index 'Time' column_names = [var_name, 'ARIMA'] # column order ts_with_arima = ts_with_arima.reindex(columns=column_names) # sort columns in specified order if int(train_size) == 1: data_filename = ds_name + '_with_arima_full.csv' elif train_size == 0.5: data_filename = ds_name + '_with_arima_half.csv' else: data_filename = ds_name + '_with_arima_' + str(train_size) + '.csv' data_path = './save/datasets/' + ds_name + '/arima/data/' + str(int(train_size * 100)) + ' percent/' if not os.path.exists(data_path): os.makedirs(data_path) ts_with_arima.to_csv(data_path + data_filename) return ts_with_arima
def graph_print(called_status, graph_to_print, training_data, test_data, len_training): results_ar = ARIMAResults.load("../trained_model/" + called_item + ".pkl") global original_data_plot, predicted_data if called_status == 0: #making prediction pred = results_ar.forecast(steps=60)[0] y = pred.tolist() s = pd.Series(y, copy=False) s = np.exp(s) training_data = training_data.reset_index() for x in range(len(s)): if (training_data.Month[len(training_data) - 1].month < 12): m = training_data.Month[len(training_data) - 1].month + 1 y = training_data.Month[len(training_data) - 1].year else: y = training_data.Month[len(training_data) - 1].year + 1 m = 1 d = '{}-{}'.format(y, m) d = datetime.strptime(d, '%Y-%m') training_data = training_data.append( { 'Month': d, 'Quantity': s[x] }, ignore_index=True) training_data.set_index("Month", inplace=True) original_data_plot = training_data[:len_training] predicted_data = training_data[len_training:] fig = plt.figure(figsize=(4.5, 4), dpi=90) plt.title(title_print_first_word + " of " + called_item) if (graph_to_print == "Line Graph"): plt.plot(original_data_plot, color='black', label="Training Data") plt.plot(test_data, color='blue', label="Actual Data") plt.plot(predicted_data, color='red', label="Predicted Data") elif (graph_to_print == "Scatter Graph"): plt.scatter(original_data_plot.index.values, original_data_plot['Quantity'], s=10, color='black', label="Training Data") plt.scatter(test_data.index.values, test_data['Quantity'], s=10, color='blue', label="Actual Data") plt.scatter(predicted_data.index.values, predicted_data['Quantity'], s=10, color='red', label="Predicted Data") # plt.show() plt.xlabel("Date") plt.ylabel("Quantity in Kg") plt.legend(loc='best') # You can make your x axis labels vertical using the rotation # specify the window as master canvas = FigureCanvasTkAgg(fig, master=window) canvas.draw() canvas.get_tk_widget().grid(row=1, column=0, columnspan=4, padx=7, pady=5, ipadx=20) called_status = 1 return called_status
squeeze=True) X = series.values X = X.astype('float32') series = read_csv('champagne-ARIMA-validate.csv', header=None, index_col=0, parse_dates=True, squeeze=True) y = series.values y = y.astype('float32') history = [x for x in X] predictions = list() model_fit = ARIMAResults.load('champagne-ARIMA-model-fin.pkl') bias = load('cAfb.npy') # compare predictions to validation for i in range(len(y)): p_hist = processing(array(history), 1) station = diff(p_hist) model = ARIMA(station, order=(0, 2, 2)) model_fit = model.fit(trend='nc', disp=0) yhat = model_fit.forecast()[0] yhat = bias + inverse_diff(p_hist, yhat) predictions.append(asscalar(yhat)) obs = y[i] history = p_hist.tolist() history.append(obs) print('Predicted= %.3f, Expected= %.3f' % (yhat, obs))
import pandas as pd from pandas import Series from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_model import ARIMAResults data = pd.read_series('./data/BTC-USD(PB).csv') model_load = ARIMAResults.load('./models/ARIMA BTC-USD AR') print(model_load)
def load(source): # load model f = ArimaForecaster() f.model = ARIMAResults.load(source) return f
from statsmodels.tsa.arima_model import ARIMAResults from scipy.stats import boxcox from sklearn.metrics import mean_squared_error from math import sqrt from math import exp from math import log import numpy # invert box-cox transform # load and prepare datasets dataset = Series.from_csv('ddata2.csv') X = dataset.values.astype('float32') history = [x for x in X] validation = Series.from_csv('ValidationDailyData.csv') y = validation.values.astype('float32') # load model model_fit = ARIMAResults.load('dmodel.pkl') bias = numpy.load('dmodel_bias.npy') #lam = numpy.load('dmodel_lambda.npy') # make first prediction predictions = list() yhat = bias + model_fit.forecast()[0] #yhat = boxcox_inverse(yhat, lam) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.3f, Expected=%3.f' % (yhat, y[0])) # rolling forecasts for i in range(1, len(y)): # transform # predict model = ARIMA(history, order=(4, 0, 0))
#Take the relevant values pred = np.array(history_[-output_seq_length:]) #Retrieve the actual target values test_y_ = test_y[seq * output_seq_length:] #Calculate MAPE and store in list mape_score = data_handler.MAPE( test_y_.reshape(-1)[:output_seq_length], pred) mape_scores.append(mape_score) mape_sum += mape_score #Print a list of MAPE scores for the test data ad their average print(round(mape_sum / (len(test_y) // output_seq_length), 3)) print(mape_scores) elif (train == 'test'): #Declare and fit model model = ARIMA(differenced, order=order) model = ARIMAResults.load(path_to_model) #Testing the first sequence history = np.append(train_x, test_x[:output_seq_length]) preds = model.forecast(output_seq_length)[0] pred = list() #Removing float64 types for convenient plotting for i in range(len(preds)): pred.append(int(preds[i])) #Restore seasonality for prediction in pred: np.append( history, data_handler.add_seasonality(history, prediction,
#load the finalized model and make a prediction from pandas import Series from statsmodels.tsa.arima_model import ARIMAResults from math import exp from math import log import numpy # invert box-cox transform def boxcox_inverse(value, lam): if lam == 0: return exp(value) return exp(log(lam * value + 1) / lam) model_fit = ARIMAResults.load('MModel.pkl') lam = numpy.load('MModel_lambda.npy') bias = numpy.load('MModel_bias.npy') yhat, stderr, conf = model_fit.forecast() yhat = bias + boxcox_inverse(yhat, lam) print('Forecast: %.3f' % yhat) print('Standard Error: %.3f' % stderr) print('95%% Confidence Interval: %.3f to %.3f' % (conf[0][0], conf[0][1])) # evaluate the finalized model on the validation dataset from pandas import Series from matplotlib import pyplot from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_model import ARIMAResults from scipy.stats import boxcox from sklearn.metrics import mean_squared_error
def load_model(): return ARIMAResults.load('./model.pkl')
from statsmodels.tsa.arima_model import ARIMAResults import numpy # invert differenced value def inverse_difference(history, yhat, interval=1): return yhat + history[-interval] series = read_csv(r'data\dataset.csv', header=None, index_col=0, parse_dates=True, squeeze=True) months_in_year = 12 model_fit = ARIMAResults.load(r'data\model.pkl') bias = numpy.load(r'data\model_bias.npy') yhat = float(model_fit.forecast()[0]) yhat = bias + inverse_difference(series.values, yhat, months_in_year) print('Predicted: %.3f' % yhat) #%% Validate model # Validation using validation.csv (testset) # 1. Load the model and predict the next 12 months # The forecast beyond the first one will start # to degrade quickly # 2. Rolling forecast. Updating the transform and model # for each time step (preferred). This means that # that we will step over lead times in de validation # dataset and take the observations as an update to the history.
def forecast_next(m_name): loaded = ARIMAResults.load(m_name + '.pkl') return loaded.predict()
def prediction(): model_fit = ARIMAResults.load('model.pkl') bias = np.load('model_bias.npy') yhat = bias + float(model_fit.forecast()[0]) print('Predicted: %.3f' % yhat)
from pandas import Series from statsmodels.tsa.arima_model import ARIMAResults import numpy from matplotlib import pyplot # invert differenced value def inverse_difference(history, yhat, interval=1): return yhat + history[-interval] predictions = list() series = Series.from_csv('dataset_training.csv') months_in_year = 12 model_fit = ARIMAResults.load('sales_model.pkl') bias = numpy.load('model_bias.npy') yhat = float(model_fit.forecast()[0]) predictions.append(yhat) for i in range(1, 60): yhat = bias + inverse_difference(series.values, yhat, months_in_year) predictions.append(yhat) pyplot.plot(predictions, color='red') pyplot.show()
for i in range(interval, len(dataset)): value = dataset[i] - dataset[i - interval] diff.append(value) return diff # invert differenced value def inverse_difference(history, yhat, interval=1): return yhat + history[-interval] # load and prepare datasets dataset = Series.from_csv('trainsetar1.csv') X = dataset.values.astype('float32') history = [x for x in X] months_in_year = 12 validation = Series.from_csv('testsetar1.csv') y = validation.values.astype('float32') # load model model_fit = ARIMAResults.load('model1.pkl') bias = numpy.load('model_bias1.npy') # make first prediction predictions = list() yhat = float(model_fit.forecast()[0]) yhat = bias + inverse_difference(history, yhat, months_in_year) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.d, Expected=%.d' % (yhat, y[0])) # rolling forecasts for i in range(1, len(y)): # difference data months_in_year = 12 diff = difference(history, months_in_year) # predict model = ARIMA(diff, order=(0,0,1))
print("p,q") print(p, q) # 建立ARIMA(0, 1, 1)模型 order = (p, 1, q) train_X = diff_1_df[:] arima_model = ARIMA(train_X, order).fit() # 模型报告 # print(arima_model.summary2()) # 保存模型 arima_model.save('./data/arima_model.h5') # # load model arima_model = ARIMAResults.load('./data/arima_model.h5') # 预测未来两天数据 predict_data_02 = arima_model.predict(start=len(train_X), end=len(train_X) + 1, dynamic=False) # 预测历史数据 predict_data = arima_model.predict(dynamic=False) # 逆log差分 # original_series = np.exp(train_X.values[1:] + np.log(dau.values[1:-1])) # predict_series = np.exp(predict_data.values + np.log(dau.values[1:-1])) # 逆差分 original_series = train_X.values[1:] + dau.values[1:-1] predict_series = predict_data.values + dau.values[1:-1]