Python Holt.predict Beispiele, statsmodels.tsa.api.Holt.predict Python Beispiele

Beispiel #1

0

Datei anzeigen

def multi_output(input1):

    model = Holt(df[input1]).fit()  # fit the Exponential Smoothing model
    exp_sm = model.fittedvalues  # fitted values of the model

    # calculate the mean absolute error
    mae = np.round(mean_absolute_error(df[input1], exp_sm), decimals=2)

    # calculate the mean absolute percentage error
    y_true = list(filter(lambda x: x > 0, df[input1]))  # actual observations
    y_pred = exp_sm[len(df[input1]) -
                    len(y_true):]  # fitted/predicted observations
    mape = np.round(mean_absolute_percentage_error(y_true, y_pred), decimals=2)

    # find out the 7-day forecast
    preds = model.predict(start=len(df), end=len(df) + 6)
    dates = pd.date_range(df['Date'][len(df) - 1], periods=8, closed='right')

    # line plot showing the observed/actual datapoints, fitted datapoints and forecasts
    fig = px.line(df, x='Date', y=input1, title='Number of COVID19 cases')
    fig['data'][0]['showlegend'] = True
    fig['data'][0]['name'] = 'Actual Values'
    fig.add_scatter(x=df['Date'],
                    y=exp_sm,
                    mode='lines',
                    name='Exponential Smoother')
    fig.add_scatter(x=dates, y=preds, mode='lines', name='Forecasts')

    return fig, 'Mean Absolute Error of the Fits: {}'.format(
        mae), 'Mean Absolute Percentage Error of the Fits: {}'.format(mape)

Beispiel #2

0

Datei anzeigen

def run_holts(train, validate, target_variable,exponential,  smoothing_level = .1, smoothing_slope = .1):
    # Create model object
    model = Holt(train[target_variable], exponential = exponential)

    # Fit model 
    model = model.fit(smoothing_level = smoothing_level, smoothing_slope=smoothing_slope, optimized = False)

    # Create predictions
    y_pred = model.predict(start=validate.index[0], end=validate.index[-1])

    return model, y_pred

Beispiel #3

0

Datei anzeigen

def twocolorball_holt_forecast(df):
    l = []
    for i in range(1, 8):
        column = "红球%d" % i if i < 7 else "蓝球"
        fit_model = Holt(np.asarray(df[column])).fit(
            smoothing_level=random.randint(1, 10) / 10,
            smoothing_slope=random.randint(1, 10) / 10,
            optimized=False)
        predict = fit_model.predict()
        l.append(int(predict[0]))
    print(l)
    return l

Beispiel #4

0

Datei anzeigen

def holt_forecast(df):
    print("==== 逐一对每位数字进行霍尔特预测 ====")
    l = []
    for i in range(1, 8):
        column = "红球%d" % i if i < 7 else "蓝球"
        fit_model = Holt(np.asarray(df[column])).fit(
            smoothing_level=random.randint(1, 10) / 10,
            smoothing_slope=random.randint(1, 10) / 10,
            optimized=False)
        predict = fit_model.predict()
        is_blue = False if i < 7 else True
        l = add_number_pool(l, int(round(predict[0], 0)), is_blue)
    # print("霍尔特预测结果：%s" % l);
    return l

Beispiel #5

0

Datei anzeigen

def holts(train, validate, yhat_df):
    '''
    This function sets default parameters for Holt's model. 
    yhat_items makes predictions based on model.
    '''
    for col in train.columns:
        model = Holt(train[col], exponential=False, damped=True)
        model = model.fit(smoothing_level=.1,
                          smoothing_slope=.1,
                          optimized=True)
        yhat_items = model.predict(start=validate.index[0],
                                   end=validate.index[-1])
        yhat_df[col] = round(yhat_items, 2)
    return yhat_df

Beispiel #6

0

Datei anzeigen

Datei: modeling.py Projekt: Johndsalas/earths_temperature_time_series

def holt(train, validate, target_var, eval_df):

    model_type = "Holt's Linear Trend"

    model = Holt(train[target_var], exponential=False)
    model = model.fit(smoothing_level=.1, smoothing_slope=.1, optimized=False)

    temps = model.predict(start=validate.index[0], end=validate.index[-1])

    yhat = pd.DataFrame({target_var: '1'}, index=validate.index)
    yhat[target_var] = round(temps, 4)

    rmse = plot_and_eval(train, validate, yhat, target_var, model_type)

    eval_df = append(model_type, target_var, rmse, eval_df)

    return eval_df

Beispiel #7

0

Datei anzeigen

Datei: time_series_test.py Projekt: hymanHu/thorn_bird

def time_series_fun_5():
    # 读取 csv 文件，删除无用列
    df = pd.read_csv("/temp/time_series_data.csv").drop(labels="Unnamed: 0", axis=1);

    # 取出最后一条数据
    last_data = df.loc[len(df) - 1];
    time = datetime.strptime(last_data["date"], "%Y-%m-%d %H:%M:%S");

    # 未来三个月预测数据
    Holt_forecast_start = time + timedelta(hours=2);
    Holt_forecast_end = time + timedelta(days=90);
    datetime_index = pd.date_range(start=Holt_forecast_start, end=Holt_forecast_end, freq="2H");
    # 传入历史数据集，设置权重值（0 - 1），训练出适应模型
    fit_model = Holt(np.asarray(df["count"])).fit(smoothing_level=0.7, smoothing_slope=0.1, optimized=False);
    # 用适应模型获取预测数据
    data = fit_model.predict(start=0, end=len(datetime_index));
    Holt_forecast_dataFrame = df.append(DataFrame(data=list(zip(datetime_index, data)), columns=["date", "count"]));
    Holt_forecast_dataFrame["count"] = Holt_forecast_dataFrame["count"].apply(lambda item: int(item));

    # 按月平均值重新采集数据
    df.index = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S");
    df = df.resample(rule="M").mean();
    Holt_forecast_dataFrame.index = pd.to_datetime(Holt_forecast_dataFrame["date"], format="%Y-%m-%d %H:%M:%S");
    Holt_forecast_dataFrame = Holt_forecast_dataFrame.resample(rule="M").mean();

    # 绘制折线图
    plt.rcParams['font.sans-serif'] = ['SimHei'];
    plt.plot(Holt_forecast_dataFrame.index, Holt_forecast_dataFrame["count"], label="预测数据", linewidth=2);
    plt.plot(df.index, df["count"], label="预测数据", linewidth=2);
    # 指定标题以及 x、y 轴标签
    plt.title("铁路购票预测图");
    plt.xlabel("时间");
    plt.ylabel("每月购票均值");
    plt.legend(loc='upper left');
    # 显示图画
    plt.show();

Beispiel #8

0

Datei anzeigen

  predict = np.exp(predict_log)
  plt.plot(given_set['Count'], label = 'given_set')
  plt.plot(predict, color='red', label = 'Predict')
  plt.title('RMSE: %.4f'% (np.sqrt(np.dot(predict, given_set['Count']))/given_set.shape[0]))
  plt.show()

ARIMA_predict_diff = results_ARIMA.predict(start='2014-06-25', end='2014-09-25')
check_prediction_diff(ARIMA_predict_diff,valid)

"""**SARIMAX**"""

import statsmodels.api as sm

y_hat_avg = valid.copy()
fit1 = sm.tsa.statespace.SARIMAX(Train.Count, order=(2,1,4), seasonal_order=(0,1,1,7)).fit()
y_hat_avg['SARIMA'] = fit1.predict(start='2014-06-25', end='2014-09-25', dynamic=True)
plt.figure(figsize=(16,8))
plt.plot(Train['Count'], label='Train')
plt.plot(valid['Count'], label='vaild')
plt.plot(y_hat_avg['SARIMA'], label='SARIMA')
plt.legend(loc='best')
plt.show()

rmse = sqrt(mean_squared_error(valid.Count,y_hat_avg.SARIMA))
print(rmse)

predict = fit1.predict(start='2014-09-26', end='2015-04-26', dynamic=True)
test['prediction'] = predict
merge = pd.merge(test, test_original, on=('day','month','year'), how='left')
merge['Hour'] = merge['Hour_y']
merge = merge.drop(['year', 'month', 'Datetime', 'Hour_x', 'Hour_y'], axis=1)

Beispiel #9

0

Datei anzeigen

Datei: time_series_analysis.py Projekt: risg99/Data-Science

    plt.title('RMSE: %.4f'% (np.sqrt(np.dot(predict, given_set['Count']))/given_set.shape[0]))
    plt.show()
    
# Let’s predict the values for validation set.

ARIMA_predict_diff=results_ARIMA.predict(start="2014-06-25", end="2014-09-25")
check_prediction_diff(ARIMA_predict_diff, valid)


# In[133]:


import statsmodels.api as sm
y_hat_avg = valid.copy() 
fit1 = sm.tsa.statespace.SARIMAX(Train.Count, order=(2, 1, 4),seasonal_order=(0,1,1,7)).fit() 
y_hat_avg['SARIMA'] = fit1.predict(start="2014-6-25", end="2014-9-25", dynamic=True) 
plt.figure(figsize=(16,8)) 
plt.plot( Train['Count'], label='Train') 
plt.plot(valid['Count'], label='Valid') 
plt.plot(y_hat_avg['SARIMA'], label='SARIMA') 
plt.legend(loc='best') 
plt.show()


# In[134]:


rms = sqrt(mean_squared_error(valid.Count, y_hat_avg.SARIMA)) 
print(rms)

Beispiel #10

0

Datei anzeigen

# Using Holts winter method will be the best option among the rest of the models beacuse of the seasonality factor.
# The Holt-Winters seasonal method comprises the forecast equation and three smoothing equations:
# one for the level t, one for trend t and one for the seasonal component denoted by st, with smoothing parameters.

# plot the SPP load as daily average,
# in which the red line represents the trainning dataset,
# the green line represents the test dataset, and the blue line represents the forecasted value
name = 'method 6'
draw(train, test, predicted6, name)
rms_Method6 = sqrt(mean_squared_error(test['CSWS'], predicted6['CSWS']))
print("rms_Method6:", rms_Method6)

# Method 7: ARIMA:# Method
# Autoregressive Integrated Moving average.
# ARIMA models aim to describe the correlations in the data with each other.
predicted7 = test.copy()
fit1 = sm.tsa.statespace.SARIMAX(train['CSWS'],
                                 order=(2, 1, 4),
                                 seasonal_order=(0, 1, 1, 7)).fit()
predicted7['CSWS'] = fit1.predict(start="2018-1-1",
                                  end="2018-5-31",
                                  dynamic=True)

# plot the SPP load as daily average,
# in which the red line represents the trainning dataset,
# the green line represents the test dataset, and the blue line represents the forecasted value
name = 'method 7'
draw(train, test, predicted7, name)
rms_Method7 = sqrt(mean_squared_error(test['CSWS'], predicted6['CSWS']))
print("rms_Method7:", rms_Method7)

Beispiel #11

0

Datei anzeigen

Datei: COVID_7.py Projekt: rucha80/Covid19-Predictions

                                                  dynamic=True)
            rms_arimas.append(
                sqrt(mean_squared_error(test.total_cases, y_hat_avg.SARIMA)))
        except:
            continue

data_tuples = list(zip(params, rms_arimas))
rms = pd.DataFrame(data_tuples, columns=['Parameters', 'RMS value'])
minimum = int(rms[['RMS value']].idxmin())
parameters = params[minimum]

#SARIMA
y_hat_avg = test.copy()
fit1 = sm.tsa.statespace.SARIMAX(train.total_cases,
                                 order=parameters,
                                 seasonal_order=(0, 0, 0, 0),
                                 enforce_stationarity=False,
                                 enforce_invertibility=False).fit()
y_hat_avg['SARIMA'] = fit1.predict(start="2020-06-01",
                                   end="2020-06-05",
                                   dynamic=True).astype(int)
plt.figure(figsize=(16, 8))
plt.plot(train['total_cases'], label='Train')
plt.plot(test['total_cases'], label='Test')
plt.plot(y_hat_avg['SARIMA'], label='SARIMA')
plt.title("ARIMA Forecast")
plt.legend(loc='best')
plt.show()
rms_arima = sqrt(mean_squared_error(test.total_cases, y_hat_avg.SARIMA))
print(rms_arima)

Beispiel #12

0

Datei anzeigen

                       ascending=True,
                       inplace=True,
                       na_position='last')
sns.heatmap(delhidata.isnull(), cbar=True)
delhidata3.tail()
delhidata3.isna().sum()
delhidata3.info()
delhidata3.set_index(['date'], inplace=True)
delhidata3.shape
delhidata3.isnull().sum()
delhidata3_time = delhidata3.interpolate(method='time')
delhidata3_time.plot()

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
hw = Holt(delhidata3_time["pm25"]).fit()
hw_pred = hw.predict(start=2100, end=2616)
hw_train = hw.predict(start=0, end=2100)
hw_rmse_train = np.sqrt(
    mean_squared_error(hw_train, delhidata3_time["pm25"].iloc[:2101]))
hw_rmse_train  #52

hw_test = hw.predict(start=2102, end=2616)
hw_rmse_test = np.sqrt(
    mean_squared_error(hw_test, delhidata3_time["pm25"].iloc[2102:]))
hw_rmse_test
# 43.31605304441928
plt.plot(hw_test, color='red')
plt.plot(delhidata3_akima['pm25'].iloc[2103:])

import pickle
pickle.dump(hw, open('holts_model.pkl', 'wb'))

Beispiel #13

0

Datei anzeigen

Datei: timepredict.py Projekt: monkeyshare/clean_data

                            start_P=0,
                            seasonal=True,
                            d=1,
                            D=1,
                            trace=True,
                            error_action='ignore',
                            suppress_warnings=True,
                            stepwise=True)
autoarimamodel.fit(train)
y_hat['autoarima'] = autoarimamodel.predict(n_periods=test.shape[0])

fit1 = sm.tsa.statespace.SARIMAX(train['人数'],
                                 order=(2, 1, 4),
                                 seasonal_order=(0, 1, 1, 7)).fit()
y_hat['SARIMA'] = fit1.predict(start=list(test.index)[0],
                               end=list(test.index)[-1],
                               dynamic=True)
'''
# 分离季节性、趋势性
sm.tsa.seasonal_decompose(train['人数']).plot()
result=sm.tsa.stattools.adfuller(train['人数'])
plt.show()
'''
from pyramid.arima import auto_arima

plt.figure(figsize=(12, 8))
plt.plot(train.index, train['人数'], label='Train')
plt.plot(test.index, test['人数'], label='Test')
plt.plot(y_hat.index, y_hat['naive'], label='Naive Forecast')
plt.plot(y_hat.index, y_hat['avg_forcast'], label='avg_forcast')
plt.plot(y_hat.index, y_hat['moving_avg_forcast'], label='moving_avg_forcast')

Beispiel #14

0

Datei anzeigen

    y_hat_avg['Holt_Winter'] = fit1.forecast(len(test_df))
    plt.figure()
    plt.plot(train_df['Count'], label='Train')
    plt.plot(test_df['Count'], label='Test')
    plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter')
    plt.legend(loc='best')
    plt.savefig(output_folder + 'holt_winter.png')
    plt.close()

    rms_holt_winter = sqrt(mean_squared_error(test_df.Count, y_hat_avg.Holt_Winter))
    logger.debug('holt-winter model root-mean-squared error: %.3f' % rms_holt_winter)

    y_hat_avg = test_df.copy()
    fit1 = SARIMAX(train_df.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit()

    y_hat_avg['SARIMA'] = fit1.predict(start=test_df.index[0], end=test_df.index[-1], dynamic=True)
    plt.figure(figsize=(12, 8))
    plt.plot(train_df['Count'], label='Train')
    plt.plot(test_df['Count'], label='Test')
    plt.plot(y_hat_avg['SARIMA'], label='SARIMA')
    plt.legend(loc='best')
    plt.savefig(output_folder + 'sarimax.png')
    plt.close()

    rms_sarimax = sqrt(mean_squared_error(test_df.Count, y_hat_avg.SARIMA))
    logger.debug('SARIMAX root-mean-squared error: %.3f' % rms_sarimax)

    logger.debug('done')
    finish_time = time.time()
    elapsed_hours, elapsed_remainder = divmod(finish_time - start_time, 3600)
    elapsed_minutes, elapsed_seconds = divmod(elapsed_remainder, 60)

Beispiel #15

0

Datei anzeigen

Datei: views.py Projekt: Alex111cat/SmartGarbage

 def get_object(self, queryset=None):
     for i in os.listdir('media'):
         os.remove('media' + '/' + i)
     warnings.filterwarnings("ignore")  # отключает предупреждения
     methods = self.kwargs['methods']
     methods = methods.split('+')
     if self.kwargs['slug']:
         slug = self.kwargs['slug']
         try:
             obj = Modules.objects.get(m_module=slug)
             obj.no_active = None
             if obj.m_is_active == False:
                 obj.no_active = "Модуль неактивен"
             else:
                 rms_arr, data_pred, labels_pred, pars = [], [], [], []
                 try:
                     queryset = Containers.objects.filter(
                         c_module__m_module=slug, c_incr__isnull=False)
                     for item in queryset:
                         labels_pred.append(item.c_date.date())
                         data_pred.append(item.c_incr)
                     dd = np.asarray(data_pred)
                     df = pd.DataFrame(data=dd,
                                       index=pd.to_datetime(labels_pred),
                                       columns=['value'])
                     max_period = Analitics.objects.filter(
                         a_module__m_module=obj.m_module).aggregate(
                             Max('a_period'))
                     forecast_period = int(max_period['a_period__max'])
                     train = df[0:-forecast_period]
                     test = df[-forecast_period:]
                     # df = df.resample('D').mean()
                     # train = train.resample('D').mean()
                     # test = test.resample('D').mean()
                     y_hat_avg = test.copy()
                     plt.rcParams.update({'font.size': 14})
                     # проверка на стационарность
                     analiz, d_7 = self.stationarity(train.value)
                     for item in methods:
                         start = time.time()
                         rms = 1000000000.0
                         # ===================================================================================
                         if item == 'Наивный подход':
                             y_hat_avg['naive'] = dd[len(train) - 1]
                             # Расчет среднеквадратичной ошибки (RMSE)
                             rms = sqrt(
                                 mean_squared_error(test.value,
                                                    y_hat_avg.naive))
                             duration = time.time() - start
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(y_hat_avg.index,
                                      y_hat_avg['naive'],
                                      label='Naive Forecast')
                             plt.legend(loc='best')
                             plt.title("Naive Forecast \n (RMSE = " +
                                       str(round(rms, 10)) + ", time = " +
                                       str(round(duration, 3)) + "c)",
                                       fontsize=35,
                                       fontweight='bold')
                             plt.savefig('media/naive_forecast.png')
                             pars.append(None)
                         # ===================================================================================
                         elif item == 'Простое среднее':
                             y_hat_avg['avg_forecast'] = train[
                                 'value'].mean()
                             duration = time.time() - start
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(y_hat_avg['avg_forecast'],
                                      label='Average Forecast')
                             plt.legend(loc='best')
                             rms = sqrt(
                                 mean_squared_error(test.value,
                                                    y_hat_avg.avg_forecast))
                             plt.title("Average Forecast \n (RMSE = " +
                                       str(round(rms, 10)) + ", time = " +
                                       str(round(duration, 3)) + "c)",
                                       fontsize=35,
                                       fontweight='bold')
                             plt.savefig('media/average_forecast.png')
                             pars.append(None)
                         # ===================================================================================
                         elif item == 'Скользящее среднее':
                             y_hat_avg['moving_avg_forecast'] = train[
                                 'value'].rolling(48).mean().iloc[-1]
                             rms = sqrt(
                                 mean_squared_error(
                                     test.value,
                                     y_hat_avg.moving_avg_forecast))
                             duration = time.time() - start
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(y_hat_avg['moving_avg_forecast'],
                                      label='Moving Average Forecast')
                             plt.legend(loc='best')
                             plt.title(
                                 "Moving Average Forecast \n  (RMSE = " +
                                 str(round(rms, 10)) + ", time = " +
                                 str(round(duration, 3)) + "c)",
                                 fontsize=35,
                                 fontweight='bold')
                             plt.savefig('media/mov_avg_forecast.png')
                             pars.append(None)
                         # ===================================================================================
                         elif item == 'Простое экспоненциальное сглаживание':
                             for s_l in np.arange(0, 1, 0.1):
                                 fit2_curr = SimpleExpSmoothing(
                                     np.asarray(train['value'])).fit(
                                         smoothing_level=s_l,
                                         optimized=False)
                                 y_hat_avg['SES'] = fit2_curr.forecast(
                                     len(test))
                                 rms_curr = sqrt(
                                     mean_squared_error(
                                         test.value, y_hat_avg.SES))
                                 if (rms_curr < rms):
                                     rms = rms_curr
                                     plt.plot(y_hat_avg['SES'], label='SES')
                                     fit2 = fit2_curr
                                     p4 = {'s_l': round(s_l, 4)}
                             y_hat_avg['SES'] = fit2.forecast(len(test))
                             duration = time.time() - start
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(y_hat_avg['SES'], label='SES')
                             plt.legend(loc='best')
                             plt.title(
                                 "Simple Exponential Smoothing  \n (RMSE = "
                                 + str(round(rms, 10)) + ", time = " +
                                 str(round(duration, 3)) + "c)",
                                 fontsize=35,
                                 fontweight='bold')
                             plt.savefig('media/ses.png')
                             pars.append(p4)
                         # ===================================================================================
                         elif item == 'Метод линейного тренда Холта':
                             for s_l in np.arange(0, 1, 0.1):
                                 for s_s in np.arange(0, 1, 0.1):
                                     fit1_curr = Holt(
                                         np.asarray(train['value'])).fit(
                                             smoothing_level=s_l,
                                             smoothing_trend=s_s)
                                     y_hat_avg[
                                         'Holt_linear'] = fit1_curr.forecast(
                                             len(test))
                                     rms_curr = sqrt(
                                         mean_squared_error(
                                             test.value,
                                             y_hat_avg.Holt_linear))
                                     if (rms_curr < rms):
                                         rms = rms_curr
                                         fit1 = fit1_curr
                                         p5 = {
                                             's_l': round(s_l, 4),
                                             's_s': round(s_s, 4)
                                         }
                             duration = time.time() - start
                             y_hat_avg['Holt_linear'] = fit1.forecast(
                                 len(test))
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(y_hat_avg['Holt_linear'],
                                      label='Holt_linear')
                             plt.legend(loc='best')
                             plt.title(
                                 "Holt linear trend method \n  (RMSE = " +
                                 str(round(rms, 10)) + ", time = " +
                                 str(round(duration, 3)) + "c)",
                                 fontsize=35,
                                 fontweight='bold')
                             plt.savefig('media/holt_linear.png')
                             pars.append(p5)
                         # ===================================================================================
                         elif item == 'Метод Холта-Винтерса':
                             params = ['add', None]
                             for t in params:
                                 for s in params:
                                     for s_p in [7, 12]:
                                         try:
                                             fit1_curr = ExponentialSmoothing(
                                                 np.asarray(train['value']),
                                                 seasonal_periods=s_p,
                                                 trend=t,
                                                 seasonal=s,
                                             ).fit()
                                             y_hat_avg[
                                                 'Holt_Winter'] = fit1_curr.forecast(
                                                     len(test))
                                             rms_curr = sqrt(
                                                 mean_squared_error(
                                                     test.value,
                                                     y_hat_avg.Holt_Winter))
                                             if (rms_curr < rms):
                                                 rms = rms_curr
                                                 fit1 = fit1_curr
                                                 p6 = {
                                                     's_p': s_p,
                                                     't': t,
                                                     's': s
                                                 }
                                         except:
                                             pass
                             duration = time.time() - start
                             y_hat_avg['Holt_Winter'] = fit1.forecast(
                                 len(test))
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(y_hat_avg['Holt_Winter'],
                                      label='Holt_Winter')
                             plt.legend(loc='best')
                             plt.title(" Holt-Winters method \n (RMSE = " +
                                       str(round(rms, 10)) + ", time = " +
                                       str(round(duration, 3)) + "c)",
                                       fontsize=35,
                                       fontweight='bold')
                             plt.savefig('media/holt_winter.png')
                             pars.append(p6)
                         # ===================================================================================
                         elif item == 'SARIMA':
                             y_hat_avg = test.copy()
                             p = q = range(0, 4)
                             D = range(0, 2)
                             m = [7, 12]
                             pdq = list(itertools.product(p, d_7, q))
                             seasonal_pdq = [
                                 (x[0], x[1], x[2], x[3]) for x in list(
                                     itertools.product(p, D, q, m))
                             ]
                             for param in pdq:
                                 for param_seasonal in seasonal_pdq:
                                     try:
                                         fit1_curr = sm.tsa.statespace.SARIMAX(
                                             train.value,
                                             order=param,
                                             seasonal_order=param_seasonal,
                                             enforce_stationarity=False,
                                             enforce_invertibility=False
                                         ).fit()
                                         y_hat_avg[
                                             'SARIMA'] = fit1_curr.predict(
                                                 start=test.index[0].date(),
                                                 end=self.get_today().date(
                                                 ),
                                                 dynamic=True)
                                         rms_curr = sqrt(
                                             mean_squared_error(
                                                 test.value,
                                                 y_hat_avg.SARIMA))
                                         if (rms_curr < rms):
                                             rms = rms_curr
                                             fit1 = fit1_curr
                                             p7 = {
                                                 'p': param[0],
                                                 'd': param[1],
                                                 'q': param[2],
                                                 'P': param_seasonal[0],
                                                 'D': param_seasonal[1],
                                                 'Q': param_seasonal[2],
                                                 'm': param_seasonal[3]
                                             }
                                     except:
                                         pass
                             duration = time.time() - start
                             y_hat_avg['SARIMA'] = fit1.predict(
                                 start=test.index[0].date(),
                                 end=self.get_today().date(),
                                 dynamic=True)
                             plt.figure(figsize=(16, 10))
                             plt.plot(train['value'], label='Train')
                             plt.plot(test['value'], label='Test')
                             plt.plot(y_hat_avg['SARIMA'], label='SARIMA')
                             plt.legend(loc='best')
                             plt.title(" SARIMA method \n  (RMSE = " +
                                       str(round(rms, 10)) + ", time = " +
                                       str(round(duration, 3)) + "c)",
                                       fontsize=35,
                                       fontweight='bold')
                             plt.savefig('media/arima.png')
                             pars.append(p7)
                     # ===========================================================================================
                         elif item == 'LSTM':
                             # transform data to be stationary
                             # transform data to be supervised learning
                             if analiz[0] != 'Стационарный':
                                 supervised = self.timeseries_to_supervised(
                                     self.difference(data_pred, 1), 1)
                             else:
                                 supervised = self.timeseries_to_supervised(
                                     data_pred, 1)
                             supervised_values = supervised.values
                             # split data into train and test-sets
                             train_lstm, test_lstm = supervised_values[
                                 0:-len(test)], supervised_values[-len(test
                                                                       ):]
                             # transform the scale of the data
                             scaler, train_scaled, test_scaled = self.scale(
                                 train_lstm, test_lstm)
                             # walk-forward validation on the test data
                             error_scores, pred = list(), list()
                             for r in range(5):
                                 # fit the model
                                 lstm_model = self.fit_lstm(
                                     train_scaled, 1, 5, 5)
                                 # forecast the entire training dataset to build up state for forecasting
                                 train_reshaped = train_scaled[:, 0].reshape(
                                     len(train_scaled), 1, 1)
                                 lstm_model.predict(train_reshaped,
                                                    batch_size=1)
                                 # walk-forward validation on the test data
                                 predictions = list()
                                 for i in range(len(test_scaled)):
                                     # make one-step forecast
                                     X, y = test_scaled[
                                         i, 0:-1], test_scaled[i, -1]
                                     yhat = self.forecast_lstm(
                                         lstm_model, 1, X)
                                     # invert scaling
                                     yhat = self.invert_scale(
                                         scaler, X, yhat)
                                     if analiz[0] != 'Стационарный':
                                         # invert differencing
                                         yhat = self.inverse_difference(
                                             data_pred, yhat,
                                             len(test_scaled) + 1 - i)
                                     # store forecast
                                     predictions.append(yhat)
                                 # report performance
                                 rms = sqrt(
                                     mean_squared_error(
                                         test.value, predictions))
                                 error_scores.append(rms)
                                 pred.append(predictions)
                             rms = np.array(error_scores).min()
                             i_min = error_scores.index(rms)
                             predictions = pred[i_min]
                             duration = time.time() - start
                             plt.figure(figsize=(16, 10))
                             plt.plot(train.index,
                                      train['value'],
                                      label='Train')
                             plt.plot(test.index,
                                      test['value'],
                                      label='Test')
                             plt.plot(test.index, predictions, label='LSTM')
                             plt.legend(loc='best')
                             plt.title("LSTM \n (RMSE = " +
                                       str(round(rms, 10)) + ", time = " +
                                       str(round(duration, 3)) + "c)",
                                       fontsize=35,
                                       fontweight='bold')
                             plt.savefig('media/lstm.png')
                             pars.append(None)
                             # # ===========================================================================================
                         rms_arr.append(rms)
                     obj.data_pred = data_pred
                     i = rms_arr.index(np.array(rms_arr).min())
                     if i < 7:
                         obj.pars = pars[i]
                     obj.method = methods[i]
                     obj.data_max = np.array(data_pred).max()
                     obj.data_min = np.array(data_pred).min()
                     obj.data_mean = np.array(data_pred).mean().round()
                     obj.data_std = np.array(data_pred).std().round()
                     obj.rms_min = round(np.array(rms_arr).min(), 10)
                     obj.analiz = analiz
                 except Containers.DoesNotExist:
                     pass
         except Modules.DoesNotExist:
             pass
     else:
         obj = None
     return obj

Beispiel #16

0

Datei anzeigen

#Computing root mean squared error
    #Since pedictions is in form of series, RMSE cannot be applied in direct form
print("\n Root mean squared error for ARIMA model\n")
#Dividing sqrt of dot product by no of observations
ARIMA_rms = np.sqrt(np.dot(ARIMA_predict, valid['Count']))/valid.shape[0]
print(ARIMA_rms)

############################################################################################################################################################


                                                                #6.SARIMA model on daily time series
    #Extension of ARIMA; This takes seasonality also into account
fit1 = sm.tsa.statespace.SARIMAX(Train.Count, order=(1, 1, 1),seasonal_order=(1,1,1,7)).fit()
#To predict based on values out of trained model
y_hat['SARIMA'] = fit1.predict(start="2014-6-25", end="2014-9-25", dynamic=True)

#Plotting graph
plt.figure(figsize=(16,8)) 
plt.plot( Train['Count'], label='Train') 
plt.plot(valid['Count'], label='Valid') 
plt.plot(y_hat['SARIMA'], label='SARIMAX') 
plt.legend(loc='best')
plt.title('SARIMA')
plt.show()


#Computing root mean squared error
print("\n Root mean squared error for SARIMA model on daily time series model\n")
SARIMA_rms = sqrt(mean_squared_error(valid.Count, y_hat.SARIMA)) 
print(SARIMA_rms)

Beispiel #17

0

Datei anzeigen

Datei: model.py Projekt: MarcPlunkett/Data-science-portfolio

comb_predict1 = comb_predict1.add(comb_predict, fill_value=0)
comb_predict = np.exp(comb_predict1)

plt.plot(train_validate['Count'], label="Valid")
plt.plot(comb_predict, color='red', label="Predict")
plt.legend(loc='best')
plt.title('RMSE: %.4f' %
          (np.sqrt(np.dot(comb_predict, train_validate['Count'])) /
           train_validate.shape[0]))
plt.show()

# SARIMAX takes into account the seasonality of a dataseries

import statsmodels.api as sm
y_hat_avg = train_validate.copy()
fit1 = sm.tsa.statespace.SARIMAX(train_set.Count,
                                 order=(2, 1, 4),
                                 seasonal_order=(0, 1, 1, 7)).fit()
y_hat_avg['SARIMAX'] = fit1.predict(start='2014-06-25',
                                    end='2014-09-22',
                                    dynamic=True)

plt.figure(figsize=(16, 8))
plt.plot(train_set['Count'], label='Train')
plt.plot(train_validate['Count'], label='Valid')
plt.plot(y_hat_avg['SARIMAX'], label='SARIMA')
plt.legend(loc='best')
plt.show()

rms = sqrt(mean_squared_error(train_validate.Count, y_hat_avg['SARIMAX']))
rms

Beispiel #18

0

Datei anzeigen

y_hat_avg = test.copy()
fit1 = ExponentialSmoothing(np.asarray(train['Count']), seasonal_periods=7, trend='add', seasonal='add', ).fit()
y_hat_avg['Holt_Winter'] = fit1.forecast(len(test))
# plt.figure(figsize=(16, 8))
# plt.plot(train['Count'], label='Train')
# plt.plot(test['Count'], label='Test')
# plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter')
# plt.legend(loc='best')
# plt.show()

RMSE = RMSE.append(
    {"method": 'Holt-Winters Method', "result": sqrt(mean_squared_error(test.Count, y_hat_avg.Holt_Winter))},
    ignore_index=True)
'''
Method 7 – ARIMA
'''
y_hat_avg = test.copy()
fit1 = sm.tsa.statespace.SARIMAX(train.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit()
y_hat_avg['SARIMA'] = fit1.predict(start="2013-11-1", end="2013-12-31", dynamic=True)
plt.figure(figsize=(16, 8))
# plt.plot(train['Count'], label='Train')
# plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['SARIMA'], label='SARIMA')
plt.legend(loc='best')
plt.show()

RMSE = RMSE.append({"method": 'ARIMA', "result": sqrt(mean_squared_error(test.Count, y_hat_avg.SARIMA))},
                   ignore_index=True)

print(RMSE.head(10))