Python SARIMAX.forecastの例、statsmodels.tsa.statespace.sarimax.SARIMAX.forecast Pythonの例

コード例 #1

0

ファイルを表示

ファイル: sarimax.py プロジェクト: koehnden/ts_forecasting

def order_selection(train,
                    test,
                    params,
                    loss_func=mean_squared_error,
                    **loss_kwargs):
    warnings.filterwarnings(
        "ignore")  # to ignore statsmodels warning for unconverged models
    best_score, best_cfg = float("inf"), None
    keys, values = zip(*params.items())
    grid = [dict(zip(keys, v)) for v in itertools.product(*values)]
    for params in grid:
        try:
            model_fit = SARIMAX(train, **params).fit()
        except:
            continue
        else:
            yhat = model_fit.forecast(test.shape[0])
            loss = loss_func(test, yhat, **loss_kwargs)

        if loss < best_score:
            best_score, best_params = loss, params
            print(best_score)

    print('Best ARIMA%s Loss=%.3f' % (best_params, best_score))
    return best_params, best_score

コード例 #2

0

ファイルを表示

ファイル: cleaning_functions.py プロジェクト: mattcarr17/time_series_project

def sarima_models_top_18():
    new_sarima_orders = [
        ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((0, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)),
        ((0, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 0, 12)),
        ((0, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12))
    ]

    codes = [
        60804, 60085, 60110, 60104, 60505, 60651, 60073, 60436, 60120, 60165,
        60160, 60641, 60432, 46327, 60633, 46324, 60099, 46394
    ]

    data = load_data_top_27()

    forecasts = {}
    for i, code in enumerate(codes):
        model = SARIMAX(data.loc[:, code],
                        order=new_sarima_orders[i][0],
                        seasonal_order=new_sarima_orders[i][1],
                        enforce_invertibility=False,
                        enforce_stationarity=False).fit()
        forecasts[code] = model.forecast(steps=12).values

    return forecasts

コード例 #3

0

ファイルを表示

def process_data6():
    series = pd.read_excel('../../Data/Styrene-Net Industry Average 2010-2015.xlsx', header=0,
                           index_col=0, parse_dates=True)
    series.index.freq = 'MS'

    data = series.copy()

    actuals = pd.read_excel('../../Data/Styrene-Net Industry Average 2015-2018 Actuals.xlsx',
                            header=0, index_col=0, parse_dates=True)

    actuals.index.freq = 'MS'

    #Test ranges
    data = data['2010-01-01':]

    model = SARIMAX(np.log(data['Styrene']), order=(1,1,2), seasonal_order=(0,0,1,12), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit()

    #auto_arima(data['Styrene'], seasonal=True, m=12, enforce_invertibility = False, exog = data[['Oil_Lag']]).summary()

    preds = []

    for i in actuals.index:
        df = actuals.loc[i,:]
        df = pd.DataFrame(df).T
        fd = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd.set_index = i+1
        fd = pd.DataFrame(fd).T

        fd2 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd2.set_index = i+2
        fd2 = pd.DataFrame(fd2).T

        fd3 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd3.set_index = i+3
        fd3 = pd.DataFrame(fd3).T

        fd4 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd4.set_index = i+4
        fd4 = pd.DataFrame(fd4).T

        fd5 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd5.set_index = i+5
        fd5 = pd.DataFrame(fd5).T

        df = pd.concat([df, fd, fd2, fd3, fd4, fd5])
        yhat_log = model.forecast(steps = 6, exog = df[['Oil_Lag', 'Gas_Lag']])
        yhat_log = yhat_log[[5]]
        yhat = numpy.exp(yhat_log)
        preds.append(yhat)
        act = pd.Series(actuals.loc[i,:])
        act = pd.DataFrame(act).T
        data = pd.concat([data, act], axis = 0)
        model = SARIMAX(np.log(data['Styrene']), order=(1,1,2), seasonal_order=(0,0,1,12), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit()

    df = pd.DataFrame({'timestamp': [i.index for i in preds], 'value':[round(i[0],2) for i in preds]})
    df['timestamp'] = df.timestamp.apply(lambda x: str(x).split('[')[1].split(']')[0])
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.to_csv('../../Data/Results.csv', index = False)

コード例 #4

0

ファイルを表示

ファイル: sarima.py プロジェクト: RedLicorice/crypto-forecast

class SARIMAModel(SMModel):
    type = [ModelType.CONTINUOUS_PRICE, ModelType.UNIVARIATE]
    name = 'statsmodels.arima'
    default_params = {'order': (1, 1, 1)}

    @with_params
    def fit(self, x, **kwargs):
        params = kwargs.get('params')
        try:
            self.model = SARIMAX(x, order=params['order']) \
                    .fit(disp=params.get('disp',0))
            return self.model
        except (ValueError, np.linalg.linalg.LinAlgError):
            logger.error('ARIMA convergence error (order {} {} {})'.format(
                params['order'][0], params['order'][1], params['order'][2]))
            return None

    def predict(self, x, **kwargs):
        if not self.model:
            return None
        try:
            forecast = self.model.forecast(steps=x.shape[0])
            return to_discrete_double(forecast, -0.01, 0.01)
        except (ValueError, np.linalg.linalg.LinAlgError):
            logger.error('ARIMA convergence error (order {} {} {})'.format(
                self.params['order'][0], self.params['order'][1],
                self.params['order'][2]))

    @with_x
    def get_grid_search_configs(self, **kwargs):
        x_train = kwargs.get('x_train')
        x_test = kwargs.get('x_test')

        p_values = range(0, 6)
        d_values = range(0, 6)
        q_values = range(0, 6)
        # If series is stationary, don't apply differentiation
        adf = adfuller(x_train)  # 0 is score, 1 is pvalue
        if adf[1] < 0.05:  # Null hp rejected, series is stationary and requires no differentiation
            logger.info('Series is stationary, no need for differencing')
            d_values = [0]  # Set d = 0
        # Get all possible configs
        configs = []
        for p in p_values:
            for d in d_values:
                for q in q_values:
                    configs.append({
                        'params': {
                            'order': (p, d, q)
                        },
                        'x_train': x_train,
                        'x_test': x_test
                    })
        return configs

コード例 #5

0

ファイルを表示

 def f_ARIMA(self, O_Train, O_Test, order1, seasonal_order1):
     ar_model = SARIMAX(O_Train,
                        order=order1,
                        seasonal_order=seasonal_order1).fit()
     #pred = ar_model.predict(start=O_Test.index[0], end=O_Test.index[-1])
     p1 = O_Test.reset_index()
     pre = pd.DataFrame(ar_model.forecast(len(O_Test)))
     pre.reset_index(drop=True, inplace=True)
     pred1 = pd.concat([p1['Date'], pre], axis=1)
     pred1.columns = ['Date', 'pred']
     pred1 = pred1.set_index('Date')
     pred = pred1['pred']
     return pred

コード例 #6

0

ファイルを表示

def sarima_prediction(data, pollutant, p, q, length=1):
    seasonality = 7
    if pollutant == 'O3':
        d = 1
    else:
        d = 0
    order_arima = (p, d, q)
    order_sarima = (1, d, 1, seasonality)
    fit = SARIMAX(np.asarray(data),
                  order=order_arima,
                  seasonal_order=order_sarima,
                  initialization='approximate_diffuse').fit()
    # Forecast one value in the future
    return fit.forecast(length)

コード例 #7

0

ファイルを表示

class SARIMA_regressor(BaseEstimator, RegressorMixin):
    """Uses a SARIMAX model in a sklearn compatible regressor"""
    def __init__(
        self,
        endog_col,
        exog_cols,
        order,
        seasonal_order,
        measurement_error=True,
    ):
        """

        Parameters
        ----------
        endog_col : str
            Column in X for endogenous data
        exog_cols : list
            Column in X for exogenous data
        order : tuple
            (p,d,q) for ARIMA
        seasonal_order : [type]
            (P,D,Q,s) for SARIMA
        measurement_error : bool, optional
            Does the endog_col have measurement error?, by default True
        """
        self.endog_col = endog_col
        self.exog_cols = exog_cols
        self.order = order
        self.seasonal_order = seasonal_order
        self.measurement_error = measurement_error

    def fit(self, X, y=None):
        self.model = SARIMAX(
            X[self.endog_col],
            exog=X[self.exog_cols],
            order=self.order,
            seasonal_order=self.seasonal_order,
            measurement_error=self.measurement_error,
        ).fit()

        return self

    def predict(self, X):
        """
        Parameters
        ----------
        X : array-like
            Array like specifying the number of periods into the future to fit after end of X used in `fit()`
        """
        return self.model.forecast(X.shape[0], exog=X[self.exog_cols])

コード例 #8

0

ファイルを表示

def model_sarima(df, steps, kwargs):
    exog_to_train, exog_to_test = None, None
    if 'fourier' in kwargs and kwargs['fourier']:
        exog_to_train, exog_to_test = _get_fourier_terms(df, steps)

    # train
    try:
        model = SARIMAX(df,
                        order=kwargs['order'],
                        seasonal_order=kwargs['seasonal_order'],
                        exog=exog_to_train)
        model = model.fit(disp=-1)
    except:
        return None
    # predict
    return model.forecast(steps, exog=exog_to_test).reset_index(drop=True)

コード例 #9

0

ファイルを表示

    def DomesticModelMaking(self):
        self.Domesticdata = self.Domesticdata.set_index('InvoiceDate')

        sar = SARIMAX(
            self.Domesticdata['AvgNetFare'],
            order=(6, 2, 4),
            seasonal_order=(6, 2, 4, 12),
            trend='n',
        )
        sar = sar.fit()
        pred = sar.forecast(steps=30)
        pred = pd.DataFrame(pred, columns=['AvgNetFare'])

        predDomestic = pd.DataFrame(self.Domesticdata['AvgNetFare'])
        predDomestic.append(pred)

        predDomestic.to_csv('PredictedDomesticDataset.csv')

コード例 #10

0

ファイルを表示

ファイル: test_ar.py プロジェクト: Gys19/statsmodels

def test_autoreg_predict_forecast_equiv(reset_randomstate):
    e = np.random.normal(size=1000)
    nobs = e.shape[0]
    idx = pd.date_range("2020-1-1", freq="D", periods=nobs)
    for i in range(1, nobs):
        e[i] = 0.95 * e[i - 1] + e[i]
    y = pd.Series(e, index=idx)
    m = AutoReg(y, trend="c", lags=1, old_names=False)
    res = m.fit()
    a = res.forecast(12)
    b = res.predict(nobs, nobs + 11)
    c = res.forecast("2022-10-08")
    assert_series_equal(a, b)
    assert_series_equal(a, c)
    sarimax_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit(disp=False)
    d = sarimax_res.forecast(12)
    pd.testing.assert_index_equal(a.index, d.index)

コード例 #11

0

ファイルを表示

def predict_next_sales(best_params, dataset):
    order, sorder, trend = best_params
    model = SARIMAX(dataset,
                    order=order,
                    seasonal_order=sorder,
                    trend=trend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    model = model.fit(disp=False)

    predictions = list()
    # split dataset
    train, test = train_test_split(dataset, num_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = sarima_forecast(history, best_params)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error

    # prints and saves accuracy final model
    error_2019 = measure_rmse(test, predictions)
    print("Estimated RMSE is ", error_2019)
    plt.close()
    plt.plot(predictions)
    plt.plot(history[-len(test):])
    plt.savefig("Final Model 2018 estimated sales.png")
    plt.close()

    predictions = model.forecast(3)
    print("Predictions are")
    print(predictions)

    # plot bar graph of predictions
    predictions.plot.bar()
    plt.savefig("2019 Forecast Bar Chart.png")
    plt.close()

    # plot line graph of predictions
    predictions.plot()
    plt.savefig("2019 Forecast Line plot.png")

コード例 #12

0

ファイルを表示

ファイル: Analysis.py プロジェクト: lnguyen7-ops/Improve_neighborhood_safety_by_predictive_patrolling

def arima_best(fh, train, val, p_range, d_range, q_range, loss_metric="MSE"):
    '''
    fh : int. Forecast horizon. While validation set can be longer than
            the forecast horizon, only the fh portion of the validation set
            will be used to calculate score/loss, instead of forecasting the
            entire length of the validation set. This is to keep consistent with
            the actual use purpose of the model which will be to predict only
            the selected forecast horizon.
    p_range: tuple of 2
    d_range: tuple of 2
    q_range: tuple of 2
    '''
    # Hyperparameters tunning
    #print("Tuning p, d, q:")
    #print("-"*50)
    # true values to be scored again
    true = val[:fh]
    min_loss = float("inf")
    best_model = None
    best_p = best_d = best_q = None
    for p in range(*p_range):
        for d in range(*d_range):
            for q in range(*q_range):
                model = SARIMAX(train,
                                order=(p, d, q),
                                seasonal_order=(4, 1, 2, 8),
                                enforce_stationarity=False,
                                enforce_invertibility=False,
                                trend=None).fit(maxiter=100, method="powell")
                # make prediction
                predictions = model.forecast(fh)
                loss = loss_func(loss_metric, tensor=False)(true, predictions)
                if loss < min_loss:
                    min_loss = loss
                    best_model = model
                    best_p = p
                    best_d = d
                    best_q = q
                    #print(f"{p}, {d}, {q}: Validation {loss_metric} ", round(min_loss, 4), end="\r")
    #print("-"*50)
    #return (best_p, best_d, best_q)
    return best_model, (best_p, best_d, best_q)

コード例 #13

0

ファイルを表示

ファイル: Analysis.py プロジェクト: lnguyen7-ops/Improve_neighborhood_safety_by_predictive_patrolling

def arima_evaluate(model, test, fh=8, refit=pd.Series(), metric=MAPE):
    '''
    model : SARIMAX model.
    test : pd Time series. Test data set.
    fh : int. Forecast horizon.
    refit : pd Time series. New time series data to refit the model on.
    '''
    if not refit.empty:
        params = model.params  # store previous parameters
        p_d_q = (model.model.k_ar_params, model.model.k_diff,
                 model.model.k_ma_params)
        model = SARIMAX(refit,
                        order=p_d_q,
                        enforce_stationarity=False,
                        enforce_invertibility=False,
                        trend=None).fit(params, maxiter=1000)
    pred = model.forecast(steps=fh)  # Forcast value
    true = test[:fh]  # true values
    loss = metric(pred.array, true.array)
    return pred, true, loss

コード例 #14

0

ファイルを表示

def sarimax_forecast(df):
    '''it takes a dataframe split it into train/forecast sets based on
    the availability of price and then forecasts electricity price for next hour.
    it returns forecast dataframe ('price','lower_interval', 'upper_interval') and
    historical price dataframe ('price')'''

    # split past and furture
    past = df[~df.price.isnull()]
    future = df[df.price.isnull()].drop('price', axis=1)
    # forecast for next time point only
    future = future.iloc[:1, :]
    if future.temp.isnull(
    )[0]:  # when weather forecast data is not available for that hour
        forecast = np.nan
        lower = np.nan
        upper = np.nan
        print('weather data is not available')
    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         exog=past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        lower = results.conf_int()['lower price'][0]
        upper = results.conf_int()['upper price'][0]

    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(dict(price=forecast,
                                 lower_interval=lower,
                                 upper_interval=upper),
                            index=future.index)
    forecast.index.name = 'date_time'
    past = past.iloc[-1:, 0]
    return forecast, past

コード例 #15

0

ファイルを表示

def sarimax_forecast(hour=11):
    '''hour: hour of a day, range(0, 23),
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    df_all = get_data(hour=hour)

    # split past and furture
    past = df_all[~df_all.price.isnull()]
    future = df_all[df_all.price.isnull()].drop('price', axis=1)

    future = future.iloc[:1, :]
    if future.temp.isnull()[0]:
        forecast = np.array([np.nan])
        confidence_int = pd.DataFrame(
            {
                'lower price': np.nan,
                'upper price': np.nan
            }, index=['x'])

    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        confidence_int = results.conf_int()
    # create forecast df with datetimeIndex
    lower = confidence_int['lower price'][0]
    upper = confidence_int['upper price'][0]
    forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper),
                            index=future.index)
    past = past.iloc[-1:, 0]
    return forecast, past

コード例 #16

0

ファイルを表示

def predictionArima(df):
    start_time = time.time()
    window = pd.DataFrame(columns=[
        'Current test', 'Current prediction', 'MSE',
        'Glycemia prediction RMSE (mg/dl)', 'PSW',
        'Prediction Horizon (minutes)'
    ])

    for n in PSW:
        for v in range(0, inter):

            interval = (v + 1) * 15
            windo = n / 12

            for x in range((len(df) - n - v)):

                #print(v, x)
                train = df.iloc[x:n + x]
                test = df.iloc[n + x:n + x + v + 1]

                model = SARIMAX(train,
                                order=orderArima,
                                enforce_stationarity=False,
                                enforce_invertibility=False).fit()

                #pred = result.predict(start= n+x, end= n+x+v, exog= test['sugarValue'])
                pred = model.forecast(step=v + 1)
                pred = pred.values
                #model = SARIMAX(df['sugarValue'], order=(0, 1, 3), seasonal_order=(0, 0, 0, 12), enforce_invertibility=False).fit()
                #pred = result.predict(n, n+v)

                window = app(window, train, test['sugarValue'], pred, interval,
                             windo)

            v = v + 1

    print("--- %s Seconds for computation ---" % (time.time() - start_time))
    return window

コード例 #17

0

ファイルを表示

    def sarimaParaSelect(self, classNo, trainLabel, testLabel, useAic=False):
        dataLength = len(trainLabel)
        data = pd.Series(trainLabel)
        for i in range(0, dataLength):
            data[i] = log(data[i] + 1)
        index = self.dtIndex[0:dataLength]
        data.index = pd.Index(index)

        minBias = 99999.0
        minAic = 99999.0
        (ar, ma) = (0, 0)
        label = array(testLabel)
        for p, q in [(1, 1), (0, 1), (1, 2), (2, 0), (2, 1), (2, 2)]:
            try:
                model = SARIMAX(data,
                                order=(p, 1, q),
                                seasonal_order=(0, 1, 1, 7)).fit()
                output = array(model.forecast(len(testLabel)))
                for i in range(0, len(testLabel)):
                    output[i] = exp(output[i]) - 1
                bias = math.sqrt(
                    sum((output - label) * (output - label)) / len(testLabel))
                if (bias < minBias
                        and (useAic == False or model.aic < minAic)):
                    (ar, ma) = (p, q)
                    minBias = bias
                    minAic = model.aic
                    bestOutput = output
            except:
                pass

        if (minBias < 90000.0):
            self.ParaChoose[classNo] = (ar, ma)
            return ((ar, ma), bestOutput)
        else:
            raise ValueError

コード例 #18

0

ファイルを表示

                      enforce_invertibility=False).fit()
agile_model.summary()

#just do deactive warnings regarding PyCharm and Numpy
# noinspection PyTypeChecker
agile_model_pred = np.exp(
    agile_model.predict(start=test_first_date,
                        end=test_last_date,
                        dynamic=True,
                        typ='levels'))

print(f'MAPE {np.round(mean_abs_pct_error(test_data,agile_model_pred),2)}%')
# print(f'MAE:{np.round(mean_absolute_error(test_data,agile_model_pred),2)}')

# noinspection PyTypeChecker
agile_model_forecast = np.exp(agile_model.forecast(steps=2))
print(agile_model_forecast)


def plot_prediciton(training_data, agile_model, agile_model_pred,
                    original_data):
    model_data = training_data.values[1:].reshape(-1) - agile_model.resid[1:]
    model_data = pd.concat((model_data, agile_model_pred))
    plt.figure(figsize=(16, 6))
    plt.plot(model_data)
    plt.plot(original_data[1:])
    plt.legend('Model Forecast', 'Original Data')
    plt.show()


plot_prediciton(train_data, agile_model, agile_model_pred, df['Last'])

コード例 #19

0

ファイルを表示

ファイル: finaldomesticforecasting.py プロジェクト: shrikantrepository/Heroku-NetFare

                  n_jobs=1,
                  station)

auto = auto.fit(xTrain)
pred = auto.predict(len(xTest))

mean_squared_error(xTest, pred)
np.sqrt(mean_squared_error(xTest, pred))
"""Use of SARIMAX"""

sar = SARIMAX(
    xTrain,
    order=(6, 2, 4),
    seasonal_order=(6, 2, 4, 1),
    trend='n',
)
sar = sar.fit()

pred = sar.forecast(steps=len(xTest))

print(mean_squared_error(xTest, pred))
print(np.sqrt(mean_squared_error(xTest, pred)))

import pickle
# Saving model to disk
pickle.dump(sar, open('model.pkl', 'wb'))

# Loading model to compare the results
model = pickle.load(open('model.pkl', 'rb'))
print(model.predict([[2020 - 01 - 01]]))

コード例 #20

0

ファイルを表示

ファイル: SARIMAX2.py プロジェクト: AwakeLithiumFlower/weather_predict

# bestModel:             SARIMAX(0, 1, 1)x(1, 1, 1, 52)

#经检测的最优训练模型
best_model = SARIMAX(df_day_train.tmax,
                     order=(0, 1, 1),
                     seasonal_order=(1, 1, 1, 52)).fit(disp=-1)

# tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48,)
# # 下图是对残差进行的检验。可以确认服从正太分布，且不存在滞后效应。
# best_model.plot_diagnostics(lags=30, figsize=(16, 12))
# df_month2 = df_month_test[['tmax']]
# best_model.predict()  设定开始结束时间
# invboxcox函数用于还愿boxcox序列
# df_month2['forecast'] = invboxcox(best_model.forecast(steps=5), lmbda)
# 预测未来500个单位的数据
df_day2 = best_model.forecast(500)
# plt.figure(figsize=(15, 7))
#数据展示
plt.plot(df_day2)
df_day_train.tmax.plot(color='r', ls='--', label='Origin')
#保存图片
plt.savefig('长春week.png')
plt.show()

# 获取rmse
# 将预测数据切片
df_day2 = df_day2['20-':'2013']
# print(np.sqrt(sum((df_day2-ts)**2)/ts.size))

# save = pd.DataFrame(df_day2, columns = ['data', 'tmax'])
#保存预测数据

コード例 #21

0

ファイルを表示

ファイル: sarima.py プロジェクト: griseldo91/ElectricityPricePredictor

def train_sarima(data=False,
                 hour=11,
                 split_date='2019-10-22 11:00:00',
                 n=30,
                 exog=False):
    '''hour: hour of a day, range(0, 23),
    split_date: train, test splitted on this date,
    n: number of days that will be forecasted,
    exog: in case of sarimax, takes (list of exog features, order, seasonal_order)
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    if isinstance(data, bool):
        if isinstance(exog, bool):
            df = get_daily(hour=hour)
        else:
            df = get_all(hour=hour)
    else:
        df = data
    # formating split_date
    split_date = pd.DatetimeIndex(np.array([split_date]))
    # get train and test for plotting only
    train = df[(df.index <= split_date[0])]
    test = df[(df.index > split_date[0]) & \
                      (df.index <= (split_date + pd.Timedelta(days=n))[0])]
    # will collect following information from forecast
    forecasts = []
    upper = []
    lower = []
    # loop over to get walk forward forecast for n days
    for i in range(1, n + 1):
        # walk one day forward to set train_set
        predict_date = df[df.index == split_date[0]].index + pd.Timedelta(
            days=i)
        train_set = df[df.index < predict_date[0]]
        train_set.index = pd.DatetimeIndex(train_set.index.values,
                                           freq=train_set.index.inferred_freq)
        # Build Model without exogenous features
        if isinstance(exog, bool):
            sarima = SARIMAX(train_set,
                             order=(1, 1, 1),
                             seasonal_order=(1, 0, 2, 7))
            sarima = sarima.fit(maxiter=200)
            # Forecast
            results = sarima.get_forecast(1, alpha=0.05)
            forecast = sarima.forecast(1, alpha=0.05)
            confidence_int = results.conf_int()
        # Build Model with exogenous features
        else:
            # StandardScaling the exogenous features
            # scaler = StandardScaler()
            # scaler = scaler.fit(train_set[['wind_speed', 'temp', 'humidity']])
            # train_set.loc[:,['wind_speed', 'temp', 'humidity']]=\
            # scaler.transform(train_set[['wind_speed', 'temp', 'humidity']])
            # training model
            sarima = SARIMAX(train_set.price,
                             exog=train_set[exog[0]],
                             order=exog[1],
                             seasonal_order=exog[2])
            sarima = sarima.fit(maxiter=200)
            # get features for forecast
            exog_fore = test[test.index == predict_date[0]][exog[0]]
            # scaling features for forecast
            # exog_fore.loc[:,['wind_speed', 'temp', 'humidity']]=\
            # scaler.transform(exog_fore[['wind_speed', 'temp', 'humidity']])
            # forecasting
            results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05)
            forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05)
            confidence_int = results.conf_int()
        # add forecast result into the list
        lower.append(confidence_int['lower price'][0])
        upper.append(confidence_int['upper price'][0])
        forecasts.append(forecast[0])

    # calculate the mape
    mape = get_mape(test.price, forecasts)
    mase = get_mase(test.price, forecasts, train.price)
    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(forecasts, index=test.index, columns=['price'])

    return forecast, lower, upper, mape, mase, train, test

コード例 #22

0

ファイルを表示

ファイル: SARIMAX加注释.py プロジェクト: AwakeLithiumFlower/weather_predict

#打开文件
df = pd.read_csv('长春.csv',encoding='utf-8')
#加载date到时间轴
df.ds = pd.to_datetime(df.date)
df.index = df.ds
#加载tmax到变量
df['平均气温'].astype('double')#1
#展示训练前数据
df.drop(['date'], axis=1, inplace=True)
df.平均气温.plot(color='r', ls='--', label='Origin')#1
plt.show()
#按周拆分
df_day = df.resample('D').mean()
# 拆分出训练数据
df_day_train = df_day['2017-5-31':'2020-5-31']
#经检测的最优训练模型
best_model=SARIMAX(df_day_train.平均气温, order=(1, 1, 1),seasonal_order=(1, 1, 1, 90)).fit(disp=-1)
# 预测未来500个单位的数据
df_day2 = best_model.forecast(90)
# plt.figure(figsize=(15, 7))
#数据展示
plt.plot(df_day2)
df_day_train.平均气温.plot(color='r', ls='--', label='Origin')#1
#保存图片
plt.savefig('长春daytave.png')#2
plt.show()
# 将预测数据切片
df_day2=df_day2['2020-5-31':'2025-5-31']
#保存预测数据
df_day2.to_csv('长春daytave.csv')#2

コード例 #23

0

ファイルを表示

AIC = []
label = []

for p in range(0, 3):
    for d in range(0, 3):
        for q in range(0, 3):
            for P in range(0, 3):
                for D in range(0, 3):
                    for Q in range(0, 3):
                        model_fit = SARIMAX(
                            training,
                            order=(p, d, q),
                            seasonal_order=(P, D, Q, 12),
                            enforce_stationarity=False,
                            enforce_invertibility=False).fit(disp=-1)
                        forecast = model_fit.forecast(len(testing))
                        label.append(
                            int(
                                str(p) + str(d) + str(q) + str(P) + str(D) +
                                str(Q) + str(12)))
                        error.append(mse(testing, forecast))
                        AIC.append(model_fit.aic)
                        BIC.append(model_fit.bic)
                        print('ARIMA:', p, d, q, 'Seasonal:', P, D, Q)
                        del model_fit
                        del forecast

# Convert the results into a dataframe using pandas
import pandas as pd

BIC = pd.DataFrame(np.asarray(BIC).reshape(729, 1))

コード例 #24

0

ファイルを表示

    def fit_sarimax(self):

        # sarimax= auto_arima(y=self.data_lag[["fallecimientos"]],
        #                    exogenous=self.data_lag[["casos_total"]],
        #                    start_p=1, start_q=1,
        #                    test='adf',
        #                    max_p=2, max_q=2, m=7,
        #                    start_P=0, seasonal=True,
        #                    d=None, D=1, trace=False,
        #                    error_action='ignore',
        #                    suppress_warnings=True,
        #                    stepwise=True)

        sarimax = SARIMAX(endog=self.data_lag.iloc[:-1, ][["fallecimientos"]],
                          exog=self.data_lag.iloc[:-1, ][["casos_total"]],
                          order=(0, 0, 3),
                          seasonal_order=(0, 0, 0, 0)).fit()

        sum = sarimax.summary()
        predictions = pd.DataFrame(
            sarimax.forecast(steps=5, exog=self.forecast[["casos_total"]]))

        e = pd.DataFrame({
            "Modelo":
            "SARIMAX",
            "Predicción de hoy": [predictions.iloc[0, 0]],
            "Error de hoy": [
                abs(predictions.iloc[0, 0] -
                    self.dt.loc[len(self.dt) - 1, "fallecimientos"])
            ]
        })

        predictions["fecha"] = self.dt.loc[len(self.dt) - 1, "fecha"]
        predictions.columns = ["fallecimientos", "fecha"]
        predictions.reset_index(drop=True, inplace=True)
        for i in range(len(self.forecast)):
            c = 0
            c += i
            predictions.loc[i,
                            "fecha"] = predictions.fecha[i] + timedelta(days=c)

        new = pd.concat(
            (self.dt[["fallecimientos", "fecha"]], predictions.iloc[1:, :]),
            axis=0)

        new["Predicciones"] = np.where(
            new.fecha <= self.dt.loc[len(self.dt) - 1, "fecha"], "Real",
            "Pred")

        fig = px.bar(
            new,
            x="fecha",
            y="fallecimientos",
            color="Predicciones",
        )

        # predictions.columns =["Predicciones_Fallecimientos", "fecha"]
        #
        # load = str(self.dt.loc[len(self.dt)-1, "fecha"] - timedelta(days=1))
        # load = load[0:10] + "_.pkl"
        #
        # with open(load, "rb") as file:
        #     historic = pickle.load(file)
        # predictions["Error"] = 0
        # p=pd.concat([predictions.reset_index(drop=True), historic], ignore_index=True)
        # p = p.loc[p.fecha <= self.dt.loc[len(self.dt)-1, "fecha"],:]
        # p.reset_index(drop=True, inplace=True)
        # for i in range(0,len(p)):
        #     if self.dt.loc[len(self.dt)-1,"fecha"] == p.loc[i,"fecha"]:
        #         p.loc[i,"Error"] = np.sqrt((self.dt.loc[len(self.dt)-1,"fallecimientos"] - p.loc[i,"Predicciones_Fallecimientos"])**2)
        #
        # save = str(self.dt.loc[len(self.dt)-1, "fecha"])
        # save = save[0:10] + "_.pkl"
        #
        # with open(save, "wb") as file:
        #     pickle.dump(p, file)

        return e, fig, sum

コード例 #25

0

ファイルを表示

ファイル: SARIMAX2.py プロジェクト: AwakeLithiumFlower/weather_predict

#     if aic < best_aic:
#         best_model = model
#         best_aic = aic
#         best_param = parameters
#     results.append([parameters, model.aic])
#
# result_table = pd.DataFrame(results)
# result_table.columns = ['parameters', 'aic']
# print(result_table.sort_values(by='aic', ascending=True).head())
# print(best_model.summary())

# bestModel:             SARIMAX(0, 1, 1)x(1, 1, 1, 12)

best_model=SARIMAX(df_day_train.tmax, order=(0, 1, 1),seasonal_order=(1, 1, 1, 52)).fit(disp=-1)

# tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48,)
# # 下图是对残差进行的检验。可以确认服从正太分布，且不存在滞后效应。
# best_model.plot_diagnostics(lags=30, figsize=(16, 12))
# df_month2 = df_month_test[['tmax']]
# best_model.predict()  设定开始结束时间
# invboxcox函数用于还愿boxcox序列
# df_month2['forecast'] = invboxcox(best_model.forecast(steps=5), lmbda)
df_day2 = best_model.forecast(1000)
# plt.figure(figsize=(15, 7))
plt.plot(df_day2)
df_day_train.tmax.plot(color='r', ls='--', label='Origin')
plt.show()

# 获取mse

コード例 #26

0

ファイルを表示

#print(predictions)

#creating the basis of error in the test
error_test = check_error(compare_test_df['AveragePrice'],
                         compare_test_df['Predicted_AveragePrice'],
                         name_col='Value Comp. Pred.vs. Fit',
                         index_name='Testing Base')

print(' TEST and PREDICTION')
plot_compare_error(compare_test_df, len(compare_test_df) - 1)
print(error_test)

#dti = pd.date_range(data_index_max, periods=5, freq='W-SUN')
print("____________________________")
print("Forecast for one period")
print(model.forecast()[0])
#print("on")
#print(  dti[1] )
nstepsfor = int(15)
pred_uc = model.forecast(steps=nstepsfor)[0]

#print(pred_ci = pred_uc.conf_int())

print("CONFIDENCE INTERVALS")
print("____________________________")
print("Forecast for")
print(nstepsfor)

#for t in range(0,nstepsfor):
#    print(pred_uc[t])

コード例 #27

0

ファイルを表示

ファイル: ARMA family prediction.py プロジェクト: AntonYurievNikolov/PythonTests

        
        X_Test_CS.Country = le.fit_transform(X_Test_CS.Country)
        X_Test_CS['State'] = le.fit_transform(X_Test_CS['State'])
        
        X_Test_CS_Min_Date = X_Test_CS['Date'].min()
        X_Train_CS_Max_Date = X_Train_CS['Date'].max()


        #SARIMA Data
        model1 = SARIMAX(y1_Train_CS, order=(1,1,0), 
                        #seasonal_order=(1,1,0,12),
                        measurement_error=True).fit(disp=False)    
        model2 = SARIMAX(y2_Train_CS, order=(1,1,0), 
                        #seasonal_order=(1,1,0,12),
                        measurement_error=True).fit(disp=False)   
        y1_xpred = model1.forecast(X_Test_CS[X_Test_CS['Date'] > X_Train_CS_Max_Date].shape[0])
        y2_xpred = model2.forecast(X_Test_CS[X_Test_CS['Date'] > X_Train_CS_Max_Date].shape[0])
        
        train_confirmed_y1 = X_Train_CS[(X_Train_CS['Date'] >=  X_Test_CS_Min_Date)]['ConfirmedCases']
        train_confirmed_y2 = X_Train_CS[(X_Train_CS['Date'] >=  X_Test_CS_Min_Date)]['Fatalities']
        
        y1_xpred = np.concatenate((train_confirmed_y1,y1_xpred), axis = 0)
        y2_xpred = np.concatenate((train_confirmed_y2,y2_xpred), axis = 0)
        
        
        #Simple Linear Model witnout Enchancing the Data
        #After we transform them they should roughly follow linear regression trend
        X_Train_CS = X_Train_CS.loc[:, ['State', 'Country', 'Date']]
#        y1_Train_CS = y1_Train_CS.apply(lambda x: np.log1p(x))
#        y2_Train_CS = y2_Train_CS.apply(lambda x: np.log1p(x))
#        train_confirmed_y1 = train_confirmed_y1.apply(lambda x: np.log1p(x))

コード例 #28

0

ファイルを表示

            results = mod.fit()
            if results.aic < a:
                a = results.aic
                s = 'ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic)
        except:
            continue
print(s)
'''
pdq = (0, 1, 1)
PDQ = (1, 1, 1, 4)
model_train = SARIMAX(train.REVENUE,
                      order=pdq,
                      seasonal_order=PDQ,
                      enforce_stationarity=False).fit()
predict_train = model_train.forecast(test_size + 1)

model_run = SARIMAX(df.REVENUE, order=pdq, seasonal_order=PDQ).fit()
predict_run = model_run.forecast(1)

#residual = predict_train - test
'''
print(model_train.summary())
model_train.plot_diagnostics()
'''
print(predict_run[0])
plt.plot(df.REVENUE, label='df', marker='o')
plt.plot(predict_train, label='SARIMA', marker='o', linestyle='--')
plt.plot(predict_run, label='SARIMA_RUN', marker='o')
plt.legend(loc='best')

コード例 #29

0

ファイルを表示

class SARIMAXModel(ModelStrategy):
    '''
    A class for a Seasonal Autoregressive Integrated Moving Average Model and the standard operations on it
    '''
    def __init__(self, hparams, log_dir=None):
        univariate = True
        model = None
        name = 'SARIMAX'
        self.auto_params = hparams.get('AUTO_PARAMS', False)
        self.trend_p = int(hparams.get('TREND_P', 10))
        self.trend_d = int(hparams.get('TREND_D', 2))
        self.trend_q = int(hparams.get('TREND_Q', 0))
        self.seasonal_p = int(hparams.get('SEASONAL_P', 5))
        self.seasonal_d = int(hparams.get('SEASONAL_D', 2))
        self.seasonal_q = int(hparams.get('SEASONAL_Q', 0))
        self.m = int(hparams.get('M', 12))
        super(SARIMAXModel, self).__init__(model,
                                           univariate,
                                           name,
                                           log_dir=log_dir)

    def fit(self, dataset):
        '''
        Fits a SARIMAX forecasting model
        :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption
        '''
        if dataset.shape[1] != 2:
            raise Exception(
                'Univariate models cannot fit with datasets with more than 1 feature.'
            )
        dataset.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                       inplace=True)
        series = dataset.set_index('ds')
        if self.auto_params:
            best_model = pmdarima.auto_arima(
                series,
                seasonal=True,
                stationary=False,
                m=self.m,
                information_criterion='aic',
                max_order=2 * (self.p + self.q),
                max_p=2 * self.p,
                max_d=2 * self.d,
                max_q=2 * self.q,
                max_P=2 * self.p,
                max_D=2 * self.d,
                max_Q=2 * self.q,
                error_action='ignore'
            )  # Automatically determine model parameters
            order = best_model.order
            seasonal_order = best_model.seasonal_order
            print("Best SARIMAX params: (p, d, q):", best_model.order,
                  " and  (P, D, Q, s):", best_model.seasonal_order)
        else:
            order = (self.trend_p, self.trend_d, self.trend_q)
            seasonal_order = (self.seasonal_p, self.seasonal_d,
                              self.seasonal_q, self.m)
        self.model = SARIMAX(series,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=True,
                             enforce_invertibility=True).fit()
        print(self.model.summary())
        return

    def evaluate(self, train_set, test_set, save_dir=None, plot=False):
        '''
        Evaluates performance of SARIMAX model on test set
        :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param save_dir: Directory in which to save forecast metrics
        :param plot: Flag indicating whether to plot the forecast evaluation
        '''
        train_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                         inplace=True)
        test_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                        inplace=True)
        train_set = train_set.set_index('ds')
        test_set = test_set.set_index('ds')
        train_set["model"] = self.model.fittedvalues
        test_set["forecast"] = self.forecast(
            test_set.shape[0])['Consumption'].tolist()

        df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'})
        test_metrics = self.evaluate_forecast(df_forecast,
                                              save_dir=save_dir,
                                              plot=plot)
        return test_metrics

    def forecast(self, days, recent_data=None):
        '''
        Create a forecast for the test set. Note that this is different than obtaining predictions for the test set.
        The model makes a prediction for the provided example, then uses the result for the next prediction.
        Repeat this process for a specified number of days.
        :param days: Number of days into the future to produce a forecast for
        :param recent_data: A factual example for the first prediction
        :return: An array of predictions
        '''
        forecast_df = self.model.forecast(steps=days).reset_index(level=0)
        forecast_df.columns = ['Date', 'Consumption']
        return forecast_df

    def save(self, save_dir, scaler_dir=None):
        '''
        Saves the model to disk
        :param save_dir: Directory in which to save the model
        '''
        if self.model:
            model_path = os.path.join(save_dir,
                                      self.name + self.train_date + '.pkl')
            self.model.save(model_path)  # Serialize and save the model object

    def load(self, model_path, scaler_path=None):
        '''
        Loads the model from disk
        :param model_path: Path to saved model
        '''
        if os.path.splitext(model_path)[1] != '.pkl':
            raise Exception('Model file path for ' + self.name +
                            ' must have ".pkl" extension.')
        self.model = SARIMAXResults.load(model_path)
        return

コード例 #30

0

ファイルを表示

ファイル: Stacked Predictions.py プロジェクト: AntonYurievNikolov/PythonTests

        adjusted_y_train_fatalities = y_train_fatalities[
            idx:]  #.values.reshape(-1, 1)
        idx = X_pred[X_pred[feature_use] == 0].shape[0]
        adjusted_X_pred = X_pred[idx:][feature_use].values.reshape(-1, 1)

        pred_data = test[(test['Country_Region'] == country)
                         & (test['Province_State'] == province)]
        max_train_date = train[(train['Country_Region'] == country) & (
            train['Province_State'] == province)]['Date'].max()
        min_test_date = pred_data['Date'].min()
        model = SARIMAX(
            adjusted_y_train_confirmed,
            order=(1, 1, 0),
            #seasonal_order=(1,1,0,12),
            measurement_error=True).fit(disp=False)
        y_hat_confirmed = model.forecast(
            pred_data[pred_data['Date'] > max_train_date].shape[0])
        y_train_confirmed = train[(train['Country_Region'] == country)
                                  & (train['Province_State'] == province) &
                                  (train['Date'] >=
                                   min_test_date)]['ConfirmedCases'].values
        y_hat_confirmed = np.concatenate((y_train_confirmed, y_hat_confirmed),
                                         axis=0)

        model = SARIMAX(
            adjusted_y_train_fatalities,
            order=(1, 1, 0),
            #seasonal_order=(1,1,0,12),
            measurement_error=True).fit(disp=False)
        y_hat_fatalities = model.forecast(
            pred_data[pred_data['Date'] > max_train_date].shape[0])
        y_train_fatalities = train[(train['Country_Region'] == country)