def cities():
    start = request.args.get('start')
    end = request.args.get('end')
    rows = CityQuantity.query.join(City).with_entities(
        City.name, CityQuantity.date_occurrence, CityQuantity.theft).filter(
            CityQuantity.date_occurrence >= start).filter(
                CityQuantity.date_occurrence <= end).all()
    values = __transform(rows)
    if datetime.strptime(start, '%Y-%m-%d') >= datetime.strptime(
            '2018-01-01', '%Y-%m-%d'):
        loaded = ARIMAResults.load(ROOT + 'ssp/v1/cities.pkl')
        d = pandas.date_range(start='1/1/2018', end='12/1/2018', freq='MS')
        d = d.format(formatter=lambda current: current.strftime('%Y-%m'))
        x = [int(round(x)) for x in loaded.forecast(steps=12)[0]]
        total = sum(x)
        values["labels"] = d
        values["total"] = values["total"] + total
        values["data"].append({
            "values": x,
            "label": "Goiânia Previsão",
            "total": total
        })
    return jsonify(values)
Esempio n. 2
0
def main():

    data = pd.read_csv("./query_large.csv",
                       sep=",",
                       parse_dates=['time'],
                       index_col='time',
                       squeeze=True)  #, date_parser=parser)

    fit = data['mag']
    model = sm.tsa.statespace.SARIMAX(fit, order=(1, 0, 1))
    model_fit = model.fit(disp=0)

    residuals = DataFrame(model_fit.resid)

    model_fit.save('arima_normal.pkl')
    timestamp = lambda s: datetime.strptime(s, "%d/%m/%Y")

    model = ARIMAResults.load('arima_normal.pkl')

    start_index = int(sys.argv[1])

    end_index = start_index + 6  #datetime(1925, 12, 26)
    forecast = model.predict(start=start_index, end=end_index)
    print(forecast)
Esempio n. 3
0
# load the finalized model and make a prediction
from pandas import Series
from statsmodels.tsa.arima_model import ARIMAResults
from math import exp
from math import log
import numpy


# invert box-cox transform
def boxcox_inverse(value, lam):
    if lam == 0:
        return exp(value)
    return exp(log(lam * value + 1) / lam)


model_fit = ARIMAResults.load('Wmodel2.pkl')
lam = numpy.load('Wmodel_lambda.npy')
yhat = model_fit.forecast()[0]
yhat = boxcox_inverse(yhat, lam)
print('Predicted: %.3f' % yhat)

# evaluate the finalized model on the validation dataset
from pandas import Series
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults
from scipy.stats import boxcox
from sklearn.metrics import mean_squared_error
from math import sqrt
from math import exp
from math import log
Esempio n. 4
0
def test():
    # load model
    model_fit = ARIMAResults.load('model.pkl')
    forecast = model_fit.forecast(steps=7)[0]
    print(forecast)
Esempio n. 5
0
 def load_models(self):
     for i in self.describe:
         self.describe[i]['model'] = ARIMAResults.load(
             f'{self.model_path}/model_{i}.pkl')
Esempio n. 6
0
numpy.save('mmodel_bias.npy', [bias])


#load the finalized model and make a prediction
from pandas import Series
from statsmodels.tsa.arima_model import ARIMAResults
from math import exp
from math import log
import numpy
# invert box-cox transform
def boxcox_inverse(value, lam):
 if lam == 0:
  return exp(value)
 return exp(log(lam * value + 1) / lam)

model_fit = ARIMAResults.load('bwmodel.pkl')
lam = numpy.load('bwmodel_lambda.npy')
bias = numpy.load('bwmodel_bias.npy')
yhat= model_fit.forecast()[0]
yhat = bias + boxcox_inverse(yhat, lam)
print('Predicted: %.3f' % yhat)


# evaluate the finalized model on the validation dataset
from pandas import Series
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults
from scipy.stats import boxcox
from sklearn.metrics import mean_squared_error
from math import sqrt
Esempio n. 7
0
 def load(self):
     self.model = ARIMAResults.load(self.outfile_estimation_parameters)
Esempio n. 8
0
                  squeeze=True)
X = series.values
X = X.astype('float32')

series = read_csv('shampoo-ARIMA-validate.csv',
                  header=None,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
y = series.values
y = y.astype('float32')

history = [x for x in X]
predictions = list()

model_fit = ARIMAResults.load('shampoo-ARIMA-model-fin.pkl')
bias = load('sAfb.npy')

# compare predictions to validation
for i in range(len(y)):
    p_hist = processing(array(history), 0)
    station = diff(p_hist)
    model = ARIMA(station, order=(3, 1, 0))
    model_fit = model.fit(trend='nc', disp=0)
    yhat = model_fit.forecast()[0]
    yhat = bias + inverse_diff(p_hist, yhat)
    predictions.append(asscalar(yhat))
    obs = y[i]
    history = p_hist.tolist()
    history.append(obs)
    print('Predicted= %.3f, Expected= %.3f' % (yhat, obs))
Esempio n. 9
0
def load_model(model_path):
    return ARIMAResults.load(model_path)
Esempio n. 10
0
from statsmodels.tsa.arima_model import ARIMAResults
import sys

if __name__ == '__main__':
    sn = sys.argv[1]
    t_time = sys.argv[2]
    t_time = int(t_time) % 163731

    load_model = ARIMAResults.load('model/' + str(sn) + '.model')
    print(str(sn))
    values = load_model.fittedvalues
    print(values[t_time])
Esempio n. 11
0
 def load(self):
     #self.model = ARIMAResults.load(self.config.base_dir + "models/final/ARIMA.pkl")
     self.model = ARIMAResults.load(self._build_model_file_name())
Esempio n. 12
0
def mlwilldoit(time):
    arima_fitted_model = ARIMAResults.load("models/arima_model.pkl")
    diff = monthdelta(datetime.strptime("2017-07-01","%Y-%m-%d"),datetime.strptime(time,"%Y-%m-%d"))
    return str(arima_fitted_model.forecast(diff)[0][-1])
Esempio n. 13
0
#take input
cat = input("input the category you want to predict (Theft, Assault, Drug, Sex Offenses, Other Offenses)")

if cat.lower() == "theft":
	cat = "Theft"
elif cat.lower() == "assault":
	cat = "Assault"
elif cat.lower() == "drug":
	cat = "Drug"
elif cat.lower() == "sex offenses":
	cat = "Sexoffense"
elif cat.lower() == "other offenses":
	cat = "Offenses"

#find model
name = cat + '.pkl'

#get latitude and longtitude
location_x = float(input('latitude'))
location_y = float(input('longtitude'))

#get location possibility
possibility = run(location_x,location_y,file)

#predict
model = ARIMAResults.load(name)
raw = model.forecast()[0]

print("The possibility of crime is: ", raw*possibility*100, "%")

def validate_arima_model(csv_file_name):
    # load data for 70% - 30%
    dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                       '_dataset_70_30.csv',
                       header=0,
                       index_col=0,
                       parse_dates=True,
                       squeeze=True)
    X = dataset.values.astype('float32')
    history = [x for x in X]
    validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                          '_validation_70_30.csv',
                          header=0,
                          index_col=0,
                          parse_dates=True,
                          squeeze=True)
    y = validation.values.astype('float32')
    print()
    print("==========================================================")
    print("For 70% - 30% we have...")
    print(
        "Enter (p,  d, q) extracted by the conclusions made by stationarity and  ACF/PACF plots"
    )
    p = input("Enter p value (Autoregression (AR) --> p): ")
    d = input("Enter d value (differencing --> d): ")
    q = input("Enter q value (Moving Average (MA) --> q): ")
    print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q)))
    # load model
    print("Loading model...")
    model_fit = ARIMAResults.load('data/saved_models/' +
                                  csv_file_name.split('.csv')[0] +
                                  '_arima_70_30.pkl')
    # bias = numpy.load('model_bias.npy')
    # make first prediction
    print("Starting model evaluation...")
    predictions = list()
    # yhat = bias + float(model_fit.forecast()[0])
    yhat = float(model_fit.forecast()[0])
    predictions.append(yhat)
    history.append(y[0])
    print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0]))
    # rolling forecasts
    for i in range(1, len(y)):
        # predict
        warnings.filterwarnings("ignore")
        model = ARIMA(history, order=(int(p), int(d), int(q)))
        model_fit = model.fit()
        # yhat = bias + float(model_fit.forecast()[0])
        yhat = float(model_fit.forecast()[0])
        predictions.append(yhat)
        # observation
        obs = y[i]
        history.append(obs)
        print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs))
    # report performance
    calculate_forecasting_performance_measures(y, predictions)
    calculate_correlation_index(y, predictions)
    print("Model evaluation finished...")
    print("==========================================================")
    print()
    pyplot.plot(y)
    pyplot.plot(predictions, color='red')
    pyplot.show()

    # ==================================================================================================================
    # load data for 80% - 20%
    dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                       '_dataset_80_20.csv',
                       header=0,
                       index_col=0,
                       parse_dates=True,
                       squeeze=True)
    X = dataset.values.astype('float32')
    history = [x for x in X]
    validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                          '_validation_80_20.csv',
                          header=0,
                          index_col=0,
                          parse_dates=True,
                          squeeze=True)
    y = validation.values.astype('float32')
    print()
    print("==========================================================")
    print("For 80% - 20% we have...")
    print(
        "Enter (p,  d, q) extracted by the conclusions made by stationarity and  ACF/PACF plots"
    )
    p = input("Enter p value (Autoregression (AR) --> p): ")
    d = input("Enter d value (differencing --> d): ")
    q = input("Enter q value (Moving Average (MA) --> q): ")
    print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q)))
    # load model
    print("Loading model...")
    model_fit = ARIMAResults.load('data/saved_models/' +
                                  csv_file_name.split('.csv')[0] +
                                  '_arima_80_20.pkl')
    # bias = numpy.load('model_bias.npy')
    # make first prediction
    print("Starting model evaluation...")
    predictions = list()
    # yhat = bias + float(model_fit.forecast()[0])
    yhat = float(model_fit.forecast()[0])
    predictions.append(yhat)
    history.append(y[0])
    print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0]))
    # rolling forecasts
    for i in range(1, len(y)):
        # predict
        warnings.filterwarnings("ignore")
        model = ARIMA(history, order=(int(p), int(d), int(q)))
        model_fit = model.fit()
        # yhat = bias + float(model_fit.forecast()[0])
        yhat = float(model_fit.forecast()[0])
        predictions.append(yhat)
        # observation
        obs = y[i]
        history.append(obs)
        print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs))
    # report performance
    calculate_forecasting_performance_measures(y, predictions)
    calculate_correlation_index(y, predictions)
    print("Model evaluation finished...")
    print("==========================================================")
    print()
    pyplot.plot(y)
    pyplot.plot(predictions, color='red')
    pyplot.show()

    # ==================================================================================================================
    # load data for 90% - 10%
    dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                       '_dataset_90_10.csv',
                       header=0,
                       index_col=0,
                       parse_dates=True,
                       squeeze=True)
    X = dataset.values.astype('float32')
    history = [x for x in X]
    validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                          '_validation_90_10.csv',
                          header=0,
                          index_col=0,
                          parse_dates=True,
                          squeeze=True)
    y = validation.values.astype('float32')
    print()
    print("==========================================================")
    print("For 90% - 10% we have...")
    print(
        "Enter (p,  d, q) extracted by the conclusions made by stationarity and  ACF/PACF plots"
    )
    p = input("Enter p value (Autoregression (AR) --> p): ")
    d = input("Enter d value (differencing --> d): ")
    q = input("Enter q value (Moving Average (MA) --> q): ")
    print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q)))
    # load model
    print("Loading model...")
    model_fit = ARIMAResults.load('data/saved_models/' +
                                  csv_file_name.split('.csv')[0] +
                                  '_arima_90_10.pkl')
    # bias = numpy.load('model_bias.npy')
    # make first prediction
    print("Starting model evaluation...")
    predictions = list()
    # yhat = bias + float(model_fit.forecast()[0])
    yhat = float(model_fit.forecast()[0])
    predictions.append(yhat)
    history.append(y[0])
    print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0]))
    # rolling forecasts
    for i in range(1, len(y)):
        # predict
        warnings.filterwarnings("ignore")
        model = ARIMA(history, order=(int(p), int(d), int(q)))
        model_fit = model.fit()
        # yhat = bias + float(model_fit.forecast()[0])
        yhat = float(model_fit.forecast()[0])
        predictions.append(yhat)
        # observation
        obs = y[i]
        history.append(obs)
        print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs))
    # report performance
    calculate_forecasting_performance_measures(y, predictions)
    calculate_correlation_index(y, predictions)
    print("Model evaluation finished...")
    print("==========================================================")
    print()
    pyplot.plot(y)
    pyplot.plot(predictions, color='red')
    pyplot.show()

    # ==================================================================================================================
    # load data for 95% - 5%
    dataset = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                       '_dataset_95_5.csv',
                       header=0,
                       index_col=0,
                       parse_dates=True,
                       squeeze=True)
    X = dataset.values.astype('float32')
    history = [x for x in X]
    validation = read_csv('data/datasets/' + csv_file_name.split('.csv')[0] +
                          '_validation_95_5.csv',
                          header=0,
                          index_col=0,
                          parse_dates=True,
                          squeeze=True)
    y = validation.values.astype('float32')
    print()
    print("==========================================================")
    print("For 95% - 5% we have...")
    print(
        "Enter (p,  d, q) extracted by the conclusions made by stationarity and  ACF/PACF plots"
    )
    p = input("Enter p value (Autoregression (AR) --> p): ")
    d = input("Enter d value (differencing --> d): ")
    q = input("Enter q value (Moving Average (MA) --> q): ")
    print('ARIMA(%d, %d, %d)' % (int(p), int(d), int(q)))
    # load model
    print("Loading model...")
    model_fit = ARIMAResults.load('data/saved_models/' +
                                  csv_file_name.split('.csv')[0] +
                                  '_arima_95_5.pkl')
    # bias = numpy.load('model_bias.npy')
    # make first prediction
    print("Starting model evaluation...")
    predictions = list()
    # yhat = bias + float(model_fit.forecast()[0])
    yhat = float(model_fit.forecast()[0])
    predictions.append(yhat)
    history.append(y[0])
    print('>Predicted=%.3f, Expected=%.3f' % (yhat, y[0]))
    # rolling forecasts
    for i in range(1, len(y)):
        # predict
        warnings.filterwarnings("ignore")
        model = ARIMA(history, order=(int(p), int(d), int(q)))
        model_fit = model.fit()
        # yhat = bias + float(model_fit.forecast()[0])
        yhat = float(model_fit.forecast()[0])
        predictions.append(yhat)
        # observation
        obs = y[i]
        history.append(obs)
        print('>Predicted=%.3f, Expected=%.3f' % (yhat, obs))
    # report performance
    calculate_forecasting_performance_measures(y, predictions)
    calculate_correlation_index(y, predictions)
    print("Model evaluation finished...")
    print("==========================================================")
    print()
    pyplot.plot(y)
    pyplot.plot(predictions, color='red')
    pyplot.show()
# save and load an ARIMA model with a workaround
from pandas import read_csv
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults


# monkey patch around bug in ARIMA class
def __getnewargs__(self):
    return ((self.endog), (self.k_lags, self.k_diff, self.k_ma))


ARIMA.__getnewargs__ = __getnewargs__

# load data
series = read_csv('daily-total-female-births.csv',
                  header=0,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
# prepare data
X = series.values
X = X.astype('float32')
# fit model
model = ARIMA(X, order=(1, 1, 1))
model_fit = model.fit()
# save model
model_fit.save('model.pkl')
# load model
loaded = ARIMAResults.load('model.pkl')
Esempio n. 16
0
def model_with_arima(ts, train_size, order, seasonal_order=(), seasonal_freq=None, trend=None,
                                grid_search=False, path_to_model=None, verbose=False, ds_name='DS', var_name='Value'):
    """Model a time series with an ARIMA forecast.

       Inputs:
           ts [pd Series]:     A pandas Series with a DatetimeIndex and a column for numerical values.
           train_size [float]: The percentage of data to use for training, as a float (e.g., 0.66).
           order [tuple]:      The order hyperparameters (p,d,q) for this ARIMA model.


       Optional Inputs:
           seasonal_order [tuple]: The seasonal order hyperparameters (P,D,Q) for this SARIMA model. When specifying these, 'seasonal_freq' must also be given.
           seasonal_freq [int]:    The freq hyperparameter for this SARIMA model, i.e., the number of samples that make up one seasonal cycle.
           trend [str]:            The trend hyperparameter for an SARIMA model.
           grid_search [bool]:     When True, perform a grid search to set values for the 'order' and 'seasonal order' hyperparameters.
                                   Note this overrides any given (p,d,q)(P,D,Q) hyperparameter values. Default is False.
           path_to_model [str]:    Path to a *.pkl file of a trained (S)ARIMA model. When set, no training will be done because that model will be used.
           verbose [bool]:         When True, show ACF and PACF plots before grid searching, plot residual training errors after fitting the model,
                                   and print predicted v. expected values during outlier detection. TODO: mention plot w/ forecast & outliers once it's under an "if verbose"
           var_name [str]:         The name of the dependent variable in the time series.
                                   Default is 'Value'.


       Outputs:
           ts_with_arima [pd DataFrame]:

       Optional Outputs:
           None

       Example:
           time_series_with_arima = model_with_arima(time_series, train_size=0.8, order=(12,0,0),
                                                                             seasonal_order=(0,1,0), seasonal_freq=365,
                                                                             verbose=False)
    """

    # Finalize ARIMA/SARIMA hyperparameters
    if grid_search and path_to_model is not None:
        raise ValueError('\'grid_search\' should be False when specifying a path to a pre-trained ARIMA model.')

    if (seasonal_freq is not None) and (len(seasonal_order) == 3) and (grid_search is False):
        seasonal_order = seasonal_order + (seasonal_freq,)  # (P,D,Q,freq)
    elif (seasonal_freq is not None) and (len(seasonal_order) != 3) and (grid_search is False):
        raise ValueError('\'seasonal_order\' must be a tuple of 3 integers when specifying a seasonal frequency and not grid searching.')
    elif (seasonal_freq is None) and (len(seasonal_order) == 3) and (grid_search is False):
        raise ValueError('\'seasonal_freq\' must be given when specifying a seasonal order and not grid searching.')

    if grid_search:
        # if verbose:
        #     lag_acf = acf(ts, nlags=20)
        #     lag_pacf = pacf(ts, nlags=20, method='ols')
        #     pyplot.show()
        if seasonal_freq is None:  # ARIMA grid search
            print('No seasonal frequency was given, so grid searching ARIMA(p,d,q) hyperparameters.')
            order = grid_search_arima_params(ts)
            print('Grid search found hyperparameters: ' + str(order) + '\n')
        else:  # SARIMA grid search
            print('Seasonal frequency was given, so grid searching ARIMA(p,d,q)(P,D,Q) hyperparameters.')
            order, seasonal_order, trend = grid_search_sarima_params(ts, seasonal_freq)
            print('Grid search found hyperparameters: ' + str(order) + str(seasonal_order) + '\n')

    # Train or load ARIMA/SARIMA model
    X = ts
    split = int(len(X) * train_size)
    train, test = X[0:split], X[split:len(X)]
    threshold = float(train.values.std(ddof=0)) * 2.0  # TODO: 2stds; finalize/decide std scheme (pass it in?)

    if len(seasonal_order) < 4:
        trained_model = ARIMA(train, order=order)
    else:
        # TODO: consider enforce_stationarity=False and enforce_invertibility=False, unless that prevents from detecting 2 DSs not right for ARIMA
        trained_model = SARIMAX(train, order=order, seasonal_order=seasonal_order, trend=trend)

    if path_to_model is not None:
        # load pre-trained model
        print('Loading model: ' + path_to_model)
        trained_model_fit = ARIMAResults.load(path_to_model)
    else:
        current_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        print('Before fitting: ' + current_time + '\n')

        trained_model_fit = trained_model.fit(disp=1)

        current_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        print('After fitting: ' + current_time + '\n')
        # # save the just-trained model
        # try:
        #     current_time = str(datetime.now().strftime("%Y-%m-%dT%H-%M-%S"))
        #     filename = 'SARIMA_' + var_name + '_' + train_size + '_' + str(order) + '_' + str(seasonal_order) + '_' + current_time + '.pkl'
        #     model_dir = 'Models/'
        #     if not os.path.exists(model_dir):
        #         os.makedirs(model_dir)
        #     filename = model_dir + filename
        #     trained_model_fit.save(filename)
        # except Exception as e:
        #     print('Saving model failed:')
        #     print(e)

    print(trained_model_fit.summary())

    # if verbose:
    #     # plot residual errors
    #     residuals = pd.DataFrame(trained_model_fit.resid)
    #     residuals.plot(title='Training Model Fit Residual Errors')
    #     pyplot.show()
    #     residuals.plot(kind='kde', title='Training Model Fit Residual Error Density')
    #     pyplot.show()
    #     print('\n')

    # Forecast with the trained ARIMA/SARIMA model
    predictions = trained_model_fit.predict(start=1, end=len(X)-1, typ='levels')
    predict_index = pd.Index(X.index[1:len(X)])
    predictions_with_dates = pd.Series(predictions.values, index=predict_index)
    errors = pd.Series()


    # try:
    #     model_error = sqrt(mean_squared_error(X[1:len(X)], predictions_with_dates))
    #     print('RMSE: %.3f' % model_error)
    #     if len(test) > 0:
    #         test_error = mean_squared_error(test, predictions_with_dates[test.index[0]:test.index[-1]])
    #         print('Test MSE: %.3f' % test_error)
    # except Exception as e:
    #     print('Forecast error calculation failed:')
    #     print(e)

    # Plot the forecast and outliers
    if len(seasonal_order) < 4:  # ARIMA title
        title_text = ds_name + ' with ' + str(order) + ' ARIMA Forecast'
    else:  # SARIMA title
        title_text = ds_name + ' with ' + str(order) + '_' + str(seasonal_order) + '_' + str(trend) + ' ARIMA Forecast'
    ax = X.plot(color='#192C87', title=title_text, label=var_name, figsize=(14, 6))
    if len(test) > 0:
        test.plot(color='#441594', label='Test Data')
    predictions_with_dates.plot(color='#0CCADC', label='ARIMA Forecast')
    ax.set(xlabel='Time', ylabel=var_name)
    pyplot.legend(loc='best')

    # save the plot before showing it
    if train_size == 1:
        plot_filename = ds_name + '_with_arima_full.png'
    elif train_size == 0.5:
        plot_filename = ds_name + '_with_arima_half.png'
    else:
        plot_filename = ds_name + '_with_arima_' + str(train_size) + '.png'
    plot_path = './save/datasets/' + ds_name + '/arima/plots/' + str(int(train_size*100)) + ' percent/'
    if not os.path.exists(plot_path):
        os.makedirs(plot_path)
    pyplot.savefig(plot_path + plot_filename, dpi=500)

    #pyplot.show()
    pyplot.clf()

    # Save data to proper directory with encoded file name
    ts_with_arima = pd.DataFrame({'ARIMA': predictions_with_dates, var_name: ts})
    ts_with_arima.rename_axis('Time', axis='index', inplace=True)  # name index 'Time'
    column_names = [var_name, 'ARIMA']  # column order
    ts_with_arima = ts_with_arima.reindex(columns=column_names)  # sort columns in specified order

    if int(train_size) == 1:
        data_filename = ds_name + '_with_arima_full.csv'
    elif train_size == 0.5:
        data_filename = ds_name + '_with_arima_half.csv'
    else:
        data_filename = ds_name + '_with_arima_' + str(train_size) + '.csv'
    data_path = './save/datasets/' + ds_name + '/arima/data/' + str(int(train_size * 100)) + ' percent/'
    if not os.path.exists(data_path):
        os.makedirs(data_path)
    ts_with_arima.to_csv(data_path + data_filename)

    return ts_with_arima
Esempio n. 17
0
    def graph_print(called_status, graph_to_print, training_data, test_data,
                    len_training):
        results_ar = ARIMAResults.load("../trained_model/" + called_item +
                                       ".pkl")
        global original_data_plot, predicted_data
        if called_status == 0:

            #making prediction
            pred = results_ar.forecast(steps=60)[0]
            y = pred.tolist()
            s = pd.Series(y, copy=False)
            s = np.exp(s)
            training_data = training_data.reset_index()
            for x in range(len(s)):
                if (training_data.Month[len(training_data) - 1].month < 12):
                    m = training_data.Month[len(training_data) - 1].month + 1
                    y = training_data.Month[len(training_data) - 1].year
                else:
                    y = training_data.Month[len(training_data) - 1].year + 1
                    m = 1
                d = '{}-{}'.format(y, m)
                d = datetime.strptime(d, '%Y-%m')
                training_data = training_data.append(
                    {
                        'Month': d,
                        'Quantity': s[x]
                    }, ignore_index=True)
            training_data.set_index("Month", inplace=True)
            original_data_plot = training_data[:len_training]
            predicted_data = training_data[len_training:]
        fig = plt.figure(figsize=(4.5, 4), dpi=90)
        plt.title(title_print_first_word + " of " + called_item)
        if (graph_to_print == "Line Graph"):
            plt.plot(original_data_plot, color='black', label="Training Data")
            plt.plot(test_data, color='blue', label="Actual Data")
            plt.plot(predicted_data, color='red', label="Predicted Data")
        elif (graph_to_print == "Scatter Graph"):
            plt.scatter(original_data_plot.index.values,
                        original_data_plot['Quantity'],
                        s=10,
                        color='black',
                        label="Training Data")
            plt.scatter(test_data.index.values,
                        test_data['Quantity'],
                        s=10,
                        color='blue',
                        label="Actual Data")
            plt.scatter(predicted_data.index.values,
                        predicted_data['Quantity'],
                        s=10,
                        color='red',
                        label="Predicted Data")

    #            plt.show()
        plt.xlabel("Date")
        plt.ylabel("Quantity in Kg")
        plt.legend(loc='best')
        # You can make your x axis labels vertical using the rotation

        # specify the window as master
        canvas = FigureCanvasTkAgg(fig, master=window)
        canvas.draw()
        canvas.get_tk_widget().grid(row=1,
                                    column=0,
                                    columnspan=4,
                                    padx=7,
                                    pady=5,
                                    ipadx=20)
        called_status = 1
        return called_status
Esempio n. 18
0
                  squeeze=True)
X = series.values
X = X.astype('float32')

series = read_csv('champagne-ARIMA-validate.csv',
                  header=None,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
y = series.values
y = y.astype('float32')

history = [x for x in X]
predictions = list()

model_fit = ARIMAResults.load('champagne-ARIMA-model-fin.pkl')
bias = load('cAfb.npy')

# compare predictions to validation
for i in range(len(y)):
    p_hist = processing(array(history), 1)
    station = diff(p_hist)
    model = ARIMA(station, order=(0, 2, 2))
    model_fit = model.fit(trend='nc', disp=0)
    yhat = model_fit.forecast()[0]
    yhat = bias + inverse_diff(p_hist, yhat)
    predictions.append(asscalar(yhat))
    obs = y[i]
    history = p_hist.tolist()
    history.append(obs)
    print('Predicted= %.3f, Expected= %.3f' % (yhat, obs))
Esempio n. 19
0
import pandas as pd
from pandas import Series
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults

data = pd.read_series('./data/BTC-USD(PB).csv') 
model_load = ARIMAResults.load('./models/ARIMA BTC-USD AR')

print(model_load)
 def load(source):
     # load model
     f = ArimaForecaster()
     f.model = ARIMAResults.load(source)
     return f
Esempio n. 21
0
from statsmodels.tsa.arima_model import ARIMAResults
from scipy.stats import boxcox
from sklearn.metrics import mean_squared_error
from math import sqrt
from math import exp
from math import log
import numpy
# invert box-cox transform
# load and prepare datasets
dataset = Series.from_csv('ddata2.csv')
X = dataset.values.astype('float32')
history = [x for x in X]
validation = Series.from_csv('ValidationDailyData.csv')
y = validation.values.astype('float32')
# load model
model_fit = ARIMAResults.load('dmodel.pkl')
bias = numpy.load('dmodel_bias.npy')
#lam = numpy.load('dmodel_lambda.npy')
# make first prediction
predictions = list()
yhat = bias + model_fit.forecast()[0]
#yhat = boxcox_inverse(yhat, lam)
predictions.append(yhat)
history.append(y[0])
print('>Predicted=%.3f, Expected=%3.f' % (yhat, y[0]))
# rolling forecasts
for i in range(1, len(y)):
    # transform

    # predict
    model = ARIMA(history, order=(4, 0, 0))
Esempio n. 22
0
        #Take the relevant values
        pred = np.array(history_[-output_seq_length:])
        #Retrieve the actual target values
        test_y_ = test_y[seq * output_seq_length:]
        #Calculate MAPE and store in list
        mape_score = data_handler.MAPE(
            test_y_.reshape(-1)[:output_seq_length], pred)
        mape_scores.append(mape_score)
        mape_sum += mape_score
    #Print a list of MAPE scores for the test data ad their average
    print(round(mape_sum / (len(test_y) // output_seq_length), 3))
    print(mape_scores)
elif (train == 'test'):
    #Declare and fit model
    model = ARIMA(differenced, order=order)
    model = ARIMAResults.load(path_to_model)

    #Testing the first sequence
    history = np.append(train_x, test_x[:output_seq_length])
    preds = model.forecast(output_seq_length)[0]
    pred = list()

    #Removing float64 types for convenient plotting
    for i in range(len(preds)):
        pred.append(int(preds[i]))

    #Restore seasonality
    for prediction in pred:
        np.append(
            history,
            data_handler.add_seasonality(history, prediction,
Esempio n. 23
0
#load the finalized model and make a prediction
from pandas import Series
from statsmodels.tsa.arima_model import ARIMAResults
from math import exp
from math import log
import numpy


# invert box-cox transform
def boxcox_inverse(value, lam):
    if lam == 0:
        return exp(value)
    return exp(log(lam * value + 1) / lam)


model_fit = ARIMAResults.load('MModel.pkl')
lam = numpy.load('MModel_lambda.npy')
bias = numpy.load('MModel_bias.npy')
yhat, stderr, conf = model_fit.forecast()
yhat = bias + boxcox_inverse(yhat, lam)
print('Forecast: %.3f' % yhat)
print('Standard Error: %.3f' % stderr)
print('95%% Confidence Interval: %.3f to %.3f' % (conf[0][0], conf[0][1]))

# evaluate the finalized model on the validation dataset
from pandas import Series
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults
from scipy.stats import boxcox
from sklearn.metrics import mean_squared_error
Esempio n. 24
0
def load_model():
    return ARIMAResults.load('./model.pkl')
Esempio n. 25
0
from statsmodels.tsa.arima_model import ARIMAResults
import numpy


# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]


series = read_csv(r'data\dataset.csv',
                  header=None,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
months_in_year = 12
model_fit = ARIMAResults.load(r'data\model.pkl')
bias = numpy.load(r'data\model_bias.npy')
yhat = float(model_fit.forecast()[0])
yhat = bias + inverse_difference(series.values, yhat, months_in_year)
print('Predicted: %.3f' % yhat)

#%% Validate model
#   Validation using validation.csv (testset)
#   1. Load the model and predict the next 12 months
#      The forecast beyond the first one will start
#      to degrade quickly
#   2. Rolling forecast. Updating the transform and model
#      for each time step (preferred). This means that
#      that we will step over lead times in de validation
#      dataset and take the observations as an update to the history.
Esempio n. 26
0
def forecast_next(m_name):
    loaded = ARIMAResults.load(m_name + '.pkl')
    return loaded.predict()
Esempio n. 27
0
def prediction():
    model_fit = ARIMAResults.load('model.pkl')
    bias = np.load('model_bias.npy')
    yhat = bias + float(model_fit.forecast()[0])
    print('Predicted: %.3f' % yhat)
Esempio n. 28
0
from pandas import Series
from statsmodels.tsa.arima_model import ARIMAResults
import numpy
from matplotlib import pyplot


# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]


predictions = list()
series = Series.from_csv('dataset_training.csv')
months_in_year = 12
model_fit = ARIMAResults.load('sales_model.pkl')
bias = numpy.load('model_bias.npy')
yhat = float(model_fit.forecast()[0])
predictions.append(yhat)
for i in range(1, 60):
    yhat = bias + inverse_difference(series.values, yhat, months_in_year)
    predictions.append(yhat)

pyplot.plot(predictions, color='red')
pyplot.show()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return diff
# invert differenced value
def inverse_difference(history, yhat, interval=1):
	return yhat + history[-interval]
# load and prepare datasets
dataset = Series.from_csv('trainsetar1.csv')
X = dataset.values.astype('float32')
history = [x for x in X]
months_in_year = 12
validation = Series.from_csv('testsetar1.csv')
y = validation.values.astype('float32')
# load model
model_fit = ARIMAResults.load('model1.pkl')
bias = numpy.load('model_bias1.npy')
# make first prediction
predictions = list()
yhat = float(model_fit.forecast()[0])
yhat = bias + inverse_difference(history, yhat, months_in_year)
predictions.append(yhat)
history.append(y[0])
print('>Predicted=%.d, Expected=%.d' % (yhat, y[0]))
# rolling forecasts
for i in range(1, len(y)):
	# difference data
	months_in_year = 12
	diff = difference(history, months_in_year)
	# predict
	model = ARIMA(diff, order=(0,0,1))
Esempio n. 30
0
File: arima.py Progetto: lbship/Blog
print("p,q")
print(p, q)

# 建立ARIMA(0, 1, 1)模型
order = (p, 1, q)
train_X = diff_1_df[:]
arima_model = ARIMA(train_X, order).fit()

# 模型报告
# print(arima_model.summary2())

# 保存模型
arima_model.save('./data/arima_model.h5')

# # load model
arima_model = ARIMAResults.load('./data/arima_model.h5')

# 预测未来两天数据
predict_data_02 = arima_model.predict(start=len(train_X),
                                      end=len(train_X) + 1,
                                      dynamic=False)

# 预测历史数据
predict_data = arima_model.predict(dynamic=False)

# 逆log差分
# original_series = np.exp(train_X.values[1:] + np.log(dau.values[1:-1]))
# predict_series = np.exp(predict_data.values + np.log(dau.values[1:-1]))
# 逆差分
original_series = train_X.values[1:] + dau.values[1:-1]
predict_series = predict_data.values + dau.values[1:-1]