def processing_df3(df3):
    """对df3数据进行处理"""
    df3['date'] = pd.to_datetime(df3['date'], format='%Y%m%d')
    # 由于给的数据是1.17-2.15共计30天的数据,所以需要转换为0~29的数字
    df3['day'] = df3['date'].dt.day.apply(
        lambda x: x - 17 if x >= 17 else x + 14)
    df3['weekday'] = df3['date'].dt.weekday  # 数值为0-6,表示一周中的第几天
    # 增加一个标签,表示迁入地是否为北京
    df3['city'] = df3['arrival_city'].apply(lambda x: 1 if x == '北京市' else 0)
    inbj = df3[df3['city'] == 1]  # 表示迁入数据
    outbj = df3[df3['city'] == 0]  # 表示迁出数据
    index_mean_in = inbj.groupby(['day']).mean()['index']
    index_std_in = inbj.groupby(['day']).std()['index']
    index_mean_in_copy = index_mean_in.copy()
    index_mean_in_copy.index = np.arange(30)
    dict_imi = dict(index_mean_in_copy)
    index_std_in_copy = index_std_in.copy()
    index_std_in_copy.index = np.arange(30)
    dict_isi = dict(index_std_in_copy)
    index_mean_out = outbj.groupby(['day']).mean()['index']
    index_std_out = outbj.groupby(['day']).std()['index']
    index_mean_out_copy = index_mean_out.copy()
    index_mean_out_copy.index = np.arange(30)
    dict_imo = dict(index_mean_out_copy)
    index_std_out_copy = index_std_out.copy()
    index_std_out_copy.index = np.arange(30)
    dict_iso = dict(index_std_out_copy)
    # 预测测试数据所需的输入特征值
    ar_model1 = AR(index_mean_in.values).fit()
    pred_index_mean_in = ar_model1.predict(len(index_mean_in),
                                           len(index_mean_in) + 8, dynamic=True)
    ar_model2 = AR(index_std_in.values).fit()
    pred_index_std_in = ar_model2.predict(len(index_std_in),
                                          len(index_std_in) + 8, dynamic=True)
    ar_model3 = AR(index_mean_out.values).fit()
    pred_index_mean_out = ar_model3.predict(len(index_mean_out),
                                            len(index_mean_out) + 8,
                                            dynamic=True)
    ar_model4 = AR(index_std_out.values).fit()
    pred_index_std_out = ar_model4.predict(len(index_std_out),
                                           len(index_std_out) + 8, dynamic=True)

    pred_index_mean_in = pd.Series(pred_index_mean_in,
                                   index=np.arange(30, 39, 1))
    dict_pimi = dict(pred_index_mean_in)
    pred_index_std_in = pd.Series(pred_index_std_in, index=np.arange(30, 39, 1))
    dict_pisi = dict(pred_index_std_in)
    pred_index_mean_out = pd.Series(pred_index_mean_out,
                                    index=np.arange(30, 39, 1))
    dict_pimo = dict(pred_index_mean_out)
    pred_index_std_out = pd.Series(pred_index_std_out,
                                   index=np.arange(30, 39, 1))
    dict_piso = dict(pred_index_std_out)

    return dict_imi, dict_isi, dict_imo, dict_iso, dict_pimi, dict_pisi, dict_pimo, dict_piso
def factorEstimator(series):
    ans = []

    for i in range(0, (len(series) - 258)):
        arModel = AR(series[i:(i + 258)])
        res = arModel.fit(ic='bic')
        ans.append(arModel.predict(res.params)[-1])

    return pd.Series(list(series[:258]) + list(ans))
Example #3
0
def test_ar_dates():
    # just make sure they work
    data = sm.datasets.sunspots.load(as_pandas=False)
    dates = date_range(start='1700', periods=len(data.endog), freq='A')
    endog = Series(data.endog, index=dates)
    with pytest.warns(FutureWarning):
        ar_model = AR(endog, freq='A').fit(maxlag=9, method='mle', disp=-1)
    pred = ar_model.predict(start='2005', end='2015')
    predict_dates = date_range(start='2005', end='2016', freq='A')[:11]

    assert_equal(ar_model.data.predict_dates, predict_dates)
    assert_equal(pred.index, predict_dates)
Example #4
0
    def do_forecast_ar_model(self, today, train, test):
        # train autoregression
        model_fit = AR(train.fillna(0)).fit()
        logging.info("Fitted AR...")

        AResults = model_fit.predict(start=len(train),
                                     end=len(train) + len(test) - 1)
        logging.info("Predicted AR")

        mse = self.utils_cl.compute_mse(test, AResults)
        mae = self.utils_cl.compute_mae(test, AResults)
        mase = self.utils_cl.compute_mase(today, test, AResults)

        logging.info("Exit do_forecast_ar_model")
        return AResults, mse, mae, mase
Example #5
0
class AutoRegression:
    def __init__(self, train_series, p):
        self.train_series = train_series
        self.ar = AR(train_series)
        self.p = p

    def fit(self):
        self.ar = self.ar.fit(disp=0)

    def preprocessing(self):
        pass

    def predict(self, step_size):
        return self.ar.predict(start=len(self.train_series),
                               end=(len(self.train_series) + step_size - 1),
                               dynamic=False)
Example #6
0
def predict(x, y, pred):

    #degree is unused here
    mu = 0.9
    ns = len(y)
    weights = np.ones(ns) * mu
    for k in range(ns):
        weights[k] = weights[k]**k
    weights = np.flip(weights, 0)

    # Fitting SVR to the dataset
    from sklearn.linear_model import LinearRegression
    lr = AR()
    #lr.fit(x, y, sample_weight=weights)
    lr.fit(y)
    y_pred = lr.predict(pred)
    return y_pred
Example #7
0
def project(ser, start, end):
    """Fit AR model to series and project to end of index. Primarily
    useful for filling in missing values at the end of time series to
    ensure they match.

            ser: series to fit trend to
            start: date to begin fitting
            end: date to end fitting

        Returns:
            new_ser: series with missing end values replaced by fitted
                     values."""

    from statsmodels.tsa.ar_model import AR

    trend_mod = AR(ser[start:end]).fit()

    return trend_mod.predict(start=trend_mod.k_ar, end=ser.index.shape[0])
Example #8
0
class AutoRegression:
    def __init__(self, ts_data, num_output):
        self.ts_data = ts_data
        self.t = len(self.ts_data)
        self.num_output = num_output
        self.model = None
        self.fitted_params = None
        self.pred_test_output = None

    def train_ar(self, lag):
        self.model = AR(self.ts_data[: -self.num_output]).fit(maxlag=lag, trend='nc')
        self.fitted_params = self.model.params[::-1]
        # forecast out-of-sample data by rolling the predicted values
        self.pred_test_output = self.model.predict(start=self.t - self.num_output, end=self.t - 1)

    def evaluate(self):
        true = self.ts_data[-self.num_output:]
        pred = self.pred_test_output
        return smape(true, pred)[0]
Example #9
0
def project(ser, start, end):
    """Fit AR model to series and project to end of index. Primarily
    useful for filling in missing values at the end of time series to
    ensure they match.

            ser: series to fit trend to
            start: date to begin fitting
            end: date to end fitting

        Returns:
            new_ser: series with missing end values replaced by fitted
                     values."""

    from statsmodels.tsa.ar_model import AR

    trend_mod = AR(ser[start:end]).fit()

    return trend_mod.predict(
        start=trend_mod.k_ar, end=ser.index.shape[0])
class ArRetrainAgent(Agent):
    """Agent that uses autoregression with public data only.
       It fits its model again after each day.

    Attributes:
        model: an autoregression model trained each day on all available history
               used for predicting future aggregate energy consumption.
    """
    def __init__(self, account=ACCOUNT_0, logging=True, **kwargs):
        super(ArRetrainAgent, self).__init__(account, logging, **kwargs)
        self.model = AR(self.aggregate_history).fit()
        self.log('ArRetrainAgent')

    def predict_for_tomorrow(self):
        # need to predict all starting from train_amt, but only return last NUM_PREDICTIONS
        # there is a 1 day offset between the period predicted for and the training data
        train_amt = len(self.aggregate_history)
        self.model = AR(self.aggregate_history).fit()
        predictions = self.model.predict(start=train_amt,
                                         end=train_amt + 2 * NUM_PREDICTIONS -
                                         1,
                                         dynamic=False)[-NUM_PREDICTIONS:]
        return list(map(int, predictions))
Example #11
0
def AR_prediction(data, test_data, test_for_AR, ar_summary):
    print("AR_prediction() start execute")
    tickers = test_for_AR
    log_returns = to_log_return(data)
    with open('prediction_results/AR_model_prediction.txt',
              'w') as results_file:
        for ticker in tickers:
            print("AR_prediction() start execute in ticker: " + ticker)
            log_rtn = log_returns[ticker].dropna()
            result = AR(log_rtn).fit(ar_summary[ticker][0])
            result_show = result.predict(test_data.index[0],
                                         test_data.index[-1])
            test_log_returns = to_log_return(test_data)
            test_log_rtn = test_log_returns[ticker].dropna()
            test_log_rtn = test_log_rtn[result_show.index]
            visualization(test_log_rtn, result_show, 'AR_prediction', ticker)
            rmse = sqrt(
                sum((result_show - test_log_rtn).dropna()**2) /
                test_log_rtn.size)
            message = "The prediction for {} is \n {}\n RMSE:{}\n".format(
                ticker, result_show, rmse)
            results_file.write(message)

    return
Example #12
0
class AutoRegressionModel(Model):
    def __init__(self, series_name, dataset, freq='2H'):
        """
        Start modelling a time serie
        :param series_name: name of the serie
        :param dataset: dataframe (Panda) with datapoints
        :param m: the seasonality factor
        :param d: the de-rending differencing factor
        :param d_large: the de-seasonality differencing factor
        """
        super().__init__(series_name, dataset)
        self._model = None
        self._dataset = dataset

        self.forecast_values = None
        self.is_stationary = False
        self._dataset.columns = ['ds', 'y']
        # self._dataset['ds'] = pd.to_datetime(self._dataset['ds'], unit='s')

        self._dataset['datetime'] = pd.to_datetime(self._dataset['ds'],
                                                   unit='s')
        self._dataset = self._dataset.set_index('datetime')
        self._dataset.drop(['ds'], axis=1, inplace=True)
        self._dataset.head()
        self._dataset = self._dataset.asfreq(freq=freq, method="pad")

    def create_model(self):
        series = pd.Series(self._dataset['y'], index=self._dataset.index)
        self._model = AR(series, missing='drop')
        self._model = self._model.fit()

    def do_forecast(self, update=False):
        """
        When a model is present, a set of forecasted future values can be generated.
        :param update:
        :return:
        """
        # freq = pd.Timedelta(self._find_frequency(self._dataset['ds'])).ceil('H')
        # periods = int(datetime.timedelta(days=7) / freq)
        # print(freq, periods)
        #
        # if periods < 20:
        #     periods = 20
        # l = 0/0

        if update or self.forecast_values is None:
            yhat = self._model.predict(len(self._dataset),
                                       len(self._dataset) + 200)
            # yhat = self._model.predict(start=1, end=5)
            indexed_forecast_values = []
            # values = yhat.to_frame()
            values = pd.DataFrame({'ds': yhat.index, 'yhat': yhat.values})
            for index, row in values.iterrows():
                indexed_forecast_values.append([
                    int(
                        time.mktime(
                            datetime.datetime.strptime(str(
                                row['ds']), "%Y-%m-%d %H:%M:%S").timetuple())),
                    row['yhat']
                ])
            self.forecast_values = indexed_forecast_values
            return self.forecast_values
        else:
            return self.forecast_values
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    r2_test = mean_squared_error(y_test, y_pred)
    K.clear_session()
    model = Sequential()
    model.add(
        Dense(50,
              input_shape=(X_test.shape[1], ),
              activation='relu',
              kernel_initializer='lecun_uniform'))
    model.add(Dense(50, input_shape=(X_test.shape[1], ), activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=Adam(lr=0.001), loss='mean_squared_error')
    model.fit(X_train, y_train, batch_size=12, epochs=24, verbose=0)
    y_pred = model.predict(X_test)
    print('R-Squared: %f' % (mean_squared_error(y_test, y_pred)))
    plt.figure(figsize=(16, 8))
    plt.plot(sc.inverse_transform(y_test), label='Resampled')
    plt.plot(sc.inverse_transform(y_pred), label='Forecast')
    plt.legend(loc='best')
    plt.show()
    plot = False
else:
    sys.exit("Error: Invalid model '" + model_type + "' specified!")

if plot:
    plt.figure(figsize=(16, 8))
    plt.plot(heart_rate, label='Original')
    plt.plot(heart_rate_resampled, label='Resampled')
    plt.plot(heart_rate_forecast, label=model_type + ' Forecast')


# -

# That's not so good! Let's calculate mean absolute error:

mape(df_valid, forecast)

# Now let's try larger models by increasing order of AR. It looks at a longer term trend now.

model = AR(df_train)
model = model.fit(maxlag=7,  trend='nc')
start = len(df_train)
end = len(df_train) + len(df_valid) - 1
forecast = model.predict(start, end)
fig = plt.figure()
ax = fig.gca()
df_train['target'].plot(ax=ax, legend=True, label="Train")
df_valid['target'].plot(ax=ax, legend=True, label="Actual")
forecast.plot(ax=ax, legend=True, label="Forecast")
mape(df_valid, forecast)

# Note that the MAPE is lower, meaning it is a better fit

# <div class="alert alert-success">
#   <h2>Exercise</h2>
#
#   Try a few other values yourself and see if you get a better/lower result  than mape=0.4
#   
#   - try trend='nc', which makes it return to the mean.
Example #15
0
def test_ar_predict_no_fit():
    data = sm.datasets.sunspots.load(as_pandas=False)
    with pytest.warns(FutureWarning):
        mod = AR(data.endog)
    with pytest.raises(RuntimeError, match='Model must be fit'):
        mod.predict([.1])
Example #16
0
lag = X[-window_size:]
np.save('man_data.npy', lag)

#Salvamos la última observación
np.save('man_obs.npy', [series.values[-1]])


################### Haciendo predicciones series temporales

#Cargamos el modelo
model = ARResults.load('ar_model.pkl')
data = np.load('ar_data.npy')
last_ob = np.load('ar_obs.npy')

#Hacemos predicciones
predictions = model.predict(start = len(data), end = len(data))

#Transformamos las predicciones
yhat = predictions[0] + last_ob[0]
print('Prediction: %f' % yhat)


#Esto también se podría realizar de la siguiente manera
def predict(coef, history):
    yhat = coef[0]
    for i in range(1, len(coef)):
        yhat += coef[i]*history[-1]
    return yhat

#Cargamos el modelo
coef = np.load('man_model.npy')
Example #17
0
          train_y,
          epochs=1,
          batch_size=batch_size,
          validation_data=(test_X, test_y),
          verbose=2,
          shuffle=False)
model.reset_states()
# plot history
#pyplot.plot(history.history['loss'], label='train')
#pyplot.plot(history.history['val_loss'], label='test')
#pyplot.legend()
#pyplot.show()

# make a prediction
# test data should be predicted with batch size
yhat = model.predict(test_X, batch_size)

# invert scaling for forecast
# take every 7 variables on certain time ranges like 0 to 6 and 7 to 13 and so forth
# assign this variables to dictionary
# make inverse transform
t1 = list()
t2 = list()
dic = {}
for g in range(0, n_seq, features):
    for j in range(features - 1, n_seq, features):
        t1.append(g)
        t1 = list(set(t1))
        t2.append(j)
        t2 = list(set(t2))
        t1 = sorted(t1)
Example #18
0
def auto_regressive(data, p=6):
    """ Auto regressive mode to predict accident rate in Jan 2017 """
    model = AR(data).fit(maxlag=p)
    return model.predict(len(data), len(data), dynamic=False)
Example #19
0
class PredictPrices(object):
    """
    This class trains on a time series data and forecast the next day value
    """
    def __init__(self, data_path=None, inspect_data=False, n_estimators=10, number_of_test_values=1):
        """
        :param data_path:string The location of the data file (loads CSV files only)
        :param inspect_data:bool Go over all the input values and make sure everything comes as expected
        :param n_estimators:int The amount of estimators to use
        :param number_of_test_values:int The amount of given test values
        """
        self.logger.info('Prices prediction process begins')
        self.logger = _logger()
        self.data_path = data_path
        self.number_of_test_values = number_of_test_values
        self.inspect_data = inspect_data
        self.n_estimators = n_estimators
        self.validate_input()
        self.products = []
        self.model = None
        self.model_fit = None
        self.t = Timer()

    def train(self, x_train, x_test):
        self.model = AR(x_train)
        history = [x_train[i] for i in range(len(x_train))]
        min_diff = math.inf
        optimized_maxlag = 0
        best_trend = vanilla_predictor = None
        for i in range(1, len(x_train)):
            for trend in [None, 'nc']:
                for vanilla in [True, False]:
                    if trend is None:
                        self.model_fit = self.model.fit(maxlag=i, disp=False)
                    else:
                        self.model_fit = self.model.fit(maxlag=i, disp=False, trend=trend)
                    y_predicted = self.predict(history, vanilla_predictor=vanilla)
                    temp_diff = abs(y_predicted - x_test)

                    if temp_diff < min_diff:
                        best_trend = trend
                        min_diff = temp_diff
                        optimized_maxlag = i
                        vanilla_predictor = vanilla
        if best_trend is None:
            self.model_fit = self.model.fit(maxlag=optimized_maxlag, disp=False)
        else:
            self.model_fit = self.model.fit(maxlag=optimized_maxlag, disp=False, trend=best_trend)
        return self.model_fit, history, vanilla_predictor

    def predict(self, history, vanilla_predictor=False):
        coef = self.model_fit.params
        if vanilla_predictor:
            return self.model.predict(params=coef)[0]

        yhat = coef[0]
        for i in range(1, len(coef)):
            yhat += coef[i] * history[-i]
        return yhat

    def import_and_arrange_data(self):
        train = pd.read_csv(self.data_path)
        train.fillna(
            train.mean())  # Empty cells (if there are any) will be replaced with the average of the product prices
        if self.inspect_data:
            self.logger.info(train.describe())
        self.products = train.columns.tolist()

        values_per_product = {}
        for product in self.products:
            values_per_product[product] = train[product].values.tolist()
        return values_per_product

    def import_train_and_predict(self):
        t = Timer()
        x_test_predicted = []
        x_test = []
        train_per_product = self.import_and_arrange_data()

        for product, values in train_per_product.items():
            if self.number_of_test_values >= len(values):
                raise Exception('The number of test values must be smaller than the amount of train set values')

            x_train = values[:-self.number_of_test_values]
            x_test.append(values[-self.number_of_test_values])
            self.model_fit, history, vanilla_predictor = self.train(x_train, values[-self.number_of_test_values])
            x_test_predicted.append(self.predict(history, vanilla_predictor))

        for x_test_, x_predicted in zip(x_test, x_test_predicted):
            self.logger.info('Predicted value: %.3f, real value: %s' % (x_predicted, x_test_))
            self.logger.info('Abs diff between predicted and true value: %.3f\n' % (abs(x_test_ - x_predicted)))

        self.logger.info('Price prediction took %s' % t.print_timer())

    def validate_input(self):
        self.logger.info('Validating input')
        if self.data_path is None:
            raise Exception('You must provide the location of the data')
        if self.data_path[-3:] != 'csv':
            raise TypeError('This code support CSV files only')
        if self.number_of_test_values <= 0:
            raise ValueError('The number of test values must be greater than 0')
        if not isinstance(self.number_of_test_values, int):
            raise TypeError('The number of test values must be integer')
        if not isinstance(self.inspect_data, bool):
            raise TypeError('Inspect data must be Boolean')
        if not isinstance(self.n_estimators, int):
            raise TypeError('The number of estimators must an integer')
        self.logger.info('Validation complete')
Example #20
0
plt.ylabel('unit')
plt.title('MA fitting result')
plt.legend(loc="best")
plt.show()

#AR fitting result

#自迴歸的的參數p
ar_p=2
#取得以AR(2)fit過的模型
model_fit=AR(data_df).fit(ar_p)
#印出AR(2)的迴歸係數
print('AR(%d)迴歸係數:'%(ar_p),model_fit.params)

#利用AR(2)fit過的模型取得 時間段到預測期101年的預測結果
ar_result=model_fit.predict(end=pd.datetime.strptime(str(101+1911), '%Y')).tolist()
#由於使用AR(p)模型  因此 前p年沒有預測結果 補上NA
for i in range(0,ar_p):
    ar_result.insert(0,np.nan)
#印出101年的預測結果 在ar_result  index為最後的數值
print("民101年的預測結果:",ar_result[-1])

#作圖
plt.plot(data_df.index,data_df.values, label='real')
#x軸加上預測期(即民101年)
xlist=data_df.index.tolist()
xlist.append(str(101+1911))

plt.plot(xlist,ar_result, color='red', linestyle='--', label='AR(%d)'%(ar_p))
plt.xlabel('year')
plt.ylabel('unit')
# %% # Exploring the data

plt.plot(data[::12], '-')
plt.show()
plt.scatter(data.index[::12], data[::12], s=2)
plt.show()
#yearly mean temperature
data['temp'].groupby(data.index.year).mean()
plt.plot(data['temp'].groupby(data.index.year).mean())
plt.show()
#monthly mean temerature
data['temp'].groupby(data.index.month).mean()
plt.plot(data['temp'].groupby(data.index.month).mean())
plt.show()

data['temp'].groupby(data.index.year).count()

# %%
data['moving_average'] = data['temp'].rolling(12).mean()

# %% # auto-regressor from stats model
from statsmodels.tsa.ar_model import AR
data['temp']

ar = AR(data['temp'], dates=None, missing='drop')

ar.fit()

ar.predict(data['temp'], start='2013-01-01', end='2013-05-01')
Example #22
0
File: utils.py Project: sar2160/GP
def fit_AR(series, t_pred =12 ):
    ar = AR(endog= series)
    ar_fit = ar.fit(maxlag=1)
    pred = ar.predict(params= ar_fit.params, end=t_pred)
    return np.round(pred,0)
Example #23
0
plot_acf(df.temp.tolist(), lags=30, ax=axes[0])
plot_pacf(df.temp.tolist(), lags=30, ax=axes[1])
fig.savefig('../Images/3_pacf.png')

# Create train data
train_df = df["temp"][:-13]
date = df.index[:-13]

# 1.  AR model

# with statsmodel
from statsmodels.tsa.ar_model import AR
ar = AR(train_df, dates=date).fit(maxlag=52, ic='aic')

# prediction is
ar_predict = ar.predict('2019-10-22', '2020-10-21')

# Visualization
fig = go.Figure()
fig.add_trace(go.Scatter(name="Raw Data", x=df.index, y=df.temp))
fig.add_trace(
    go.Scatter(name="AR model Prediction", x=ar_predict.index, y=ar_predict))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(title_text="AR MODEL",
                  xaxis_title="Date",
                  yaxis_title="Temperature, C")
plotly.offline.plot(fig, filename=r'../Images/4_AR.png')

# 2. ARMA Model
# with statsmodel, aic check of params
from statsmodels.tsa import stattools as st
Example #24
0
class VarModel(BaseModel):
    def __init__(self, feat_id, run_id, data=None):
        self.model_type = 'VAR'
        self.opt_p = 1
        super().__init__(feat_id, run_id, data)

    def train(self, data):
        if len(data.columns) > 1:
            self.model = VAR(data)
            self.opt_p = self.model.select_order(30).aic
        else:
            self.model = AR(data)
            self.opt_p = self.model.select_order(30, 'aic')

    def save(self):
        joblib.dump(
            self.opt_p,
            os.path.join(
                'temp', self.run_id, 'models', 'VAR', '{}_VAR.pkl'.format(
                    replace_multiple(
                        self.feat_id,
                        ['/', '\\', ':', '?', '*', '"', '<', '>', '|'], "x"))))

    def load(self):
        self.opt_p = \
            joblib.load(os.path.join('runs', self.run_id, 'models', 'VAR',
                                     '{}_VAR.pkl'.format(replace_multiple(self.feat_id,
                                                                          ['/', '\\',
                                                                           ':', '?',
                                                                           '*', '"',
                                                                           '<', '>',
                                                                           '|'],
                                                                          "x"))))

    def result(self, history, actual, prediction, forecast, anomaly_scores):
        mse = mean_squared_error(actual, prediction)
        mae = mean_absolute_error(actual, prediction)
        rmse = np.sqrt(mse)

        anomaly_scores['points'] = anomaly_scores.index

        future_alert = anomaly_scores.tail(len(forecast))
        past_alert = anomaly_scores.iloc[:len(history)]
        future_alert = future_alert[future_alert['outlier'] == -1]
        past_alert = past_alert[past_alert['outlier'] == -1]

        output = {
            'history': history.tolist(),
            'expected': prediction.tolist(),
            'forecast': forecast.tolist(),
            'rmse': rmse,
            'mse': mse,
            'mae': mae,
            'future_alerts': future_alert.fillna(0).to_dict(orient='record'),
            'past_alerts': past_alert.fillna(0).to_dict(orient='record'),
            'model': self.model_type
        }
        return output

    def predict(self, data, start_idx, end_idx):
        if len(data.columns) > 1:
            self.model = VAR(data)
            result = self.model.fit(self.opt_p)
            y_pred = self.model.predict(result.params,
                                        start=start_idx,
                                        end=end_idx,
                                        lags=self.opt_p)
            return pd.DataFrame(data=y_pred, columns=data.columns.values)
        else:
            self.model = AR(data)
            self.model = self.model.fit(self.opt_p)
            y_pred = self.model.predict(start=start_idx, end=end_idx)
            return pd.DataFrame(data=y_pred, columns=data.columns.values)
plt.legend()
plt.show()

# plot
y_train.plot(color="blue")
y_test.plot(color="pink")
predictions.plot(color="purple")

#######################---ARIMA---##############################

from pmdarima.arima import auto_arima

model = auto_arima(y_train,
                   trace=True,
                   error_action='ignore',
                   suppress_warnings=True)
model.fit(y_train)

forecast = model.predict(n_periods=len(y_test))
forecast = pd.DataFrame(forecast, index=y_test.index, columns=['Prediction'])
print("R2_score:%.3f" % r2_score(y_test, forecast))

#plot the predictions for validation set
plt.plot(y_train, label='Train')
plt.plot(y_test, label='Valid')
plt.plot(forecast, label='Prediction')
plt.legend()
plt.show()

#################################################################
Example #26
0
#%%

#build a basic lagged model
labels = gas.pop('RSGASSN')
X_train, X_test, y_train, y_test = train_test_split(gas,
                                                    labels,
                                                    test_size=.25,
                                                    shuffle=False)

model = AR(y_train)
model = model.fit()

print('Lag: ' + str(model.k_ar))
print('Coefficients: ' + str(model.params))

predictions = model.predict(start=len(y_train), end=len(gas) - 1)

plt.plot(y_test)
plt.plot(predictions, color='red')
plt.show()

#%%

#now build a lagged model usng all features
from statsmodels.tsa.vector_ar.var_model import VAR

gas = pd.concat([gas, labels], axis=1)

model = VAR(endog=gas[:75])
model = model.fit()
train, test = diff[diff.index < pd.to_datetime('2014-08-01')], diff[
    diff.index > pd.to_datetime('2014-07-31')]
model_test = ARIMA(train, order=(3, 0, 5)).fit()
predict = model_test.forecast(steps=31)[0]
x = []
x.append(train[-1])
for i in predict:
    x.append(x[-1] + i)
y = x[1:]
df = test.reset_index()
df['pred'] = y
df.set_index('report_date', inplace=True)
df.plot(figsize=(12, 8))
plt.show()

# In[408]:

#AR(3)
from statsmodels.tsa.ar_model import AR
ar_3 = AR(diff).fit(ic='hqic')
ar_3_pre = ar_3.predict()

# In[411]:

diff_copy = diff[diff.index > pd.to_datetime('2014-04-13')]
ardf = diff_copy.reset_index()
ardf['pred'] = ar_3_pre.values
ardf.set_index('report_date', inplace=True)
ardf.plot(figsize=(12, 8))
plt.show()
Example #28
0
# Let's train the regressor we want to work with:

# In[136]:

model = RandomForestRegressor()
model.fit(XX_train, yy_train)

# We reuse the previous evaluation code but this time we make predictions with this regressor

# In[137]:

# walk-forward validation
history = [x for x in train]
prediction_sl = list()
for i in range(len(test)):
    yhat = model.predict(XX_test[i, :])[0]
    yhat = inverse_difference(history, yhat, months_in_year)
    prediction_sl.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)
    prediction_sl[i] = yhat
#	print('>Predicted=%.3f, Expected=%3.f' % (yhat, obs))

# In[138]:

rmse = sqrt(mean_squared_error(test, prediction_sl))
print('RMSE: %.3f' % rmse)

# In[139]: