def forecast(data):
    shop_train = data['RPRICE']
    fst_month_forecst = data.index.max() + pd.DateOffset(months=1)
    lst_month_forecst = fst_month_forecst + pd.DateOffset(months=11)
    shop_train.index = pd.DatetimeIndex(shop_train.index.values,
                                        freq=shop_train.index.inferred_freq)

    model = ExponentialSmoothing(shop_train,
                                 seasonal='mul',
                                 seasonal_periods=12).fit()
    shop_forecast = model.predict(start=fst_month_forecst,
                                  end=lst_month_forecst)

    df_frc = pd.DataFrame(
        shop_forecast.copy(),
        columns=['FORECAST']).reset_index().rename(columns={"index": "MDATE"})
    df_frc['REPORT'] = data['REPORT'][0]
    df_frc = df_frc[['MDATE', 'REPORT', 'FORECAST']]

    #make some graphics
    if make_graph:
        df_shop_forec = data.copy()
        df_shop_forec['FITVAL'] = model.fittedvalues
        df_shop_forec = df_shop_forec.merge(df_frc,
                                            how='outer',
                                            on=['MDATE', 'REPORT'])
        make_img2(df_shop_forec, False)

    return df_frc
Beispiel #2
0
def Holt_Winters(paramsList=['pollution.csv', '0.93','pm', 'humidity', 'date'], specialParams=['7']):

    path = paramsList[0]
    trainRows = float(paramsList[1])
    saveto = 'result.csv'
    df = pd.read_csv(path, usecols=paramsList[2:])
    allRows = df.shape[0]
    season = specialParams[0]

    train = df[0:int(allRows*trainRows)]
    test = df[int(allRows*trainRows)+1:]

    df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M')
    df.index = df['Timestamp']
    df = df.resample('D').mean()

    train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M')
    train.index = train['Timestamp']
    train = train.resample('D').mean()

    test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M')
    test.index = test['Timestamp']
    test = test.resample('D').mean()

    y_hat = test.copy()
    nullArray = train.copy()
    nullArray['time'] = train.index
    # 以上可通用----------------------------


    for i in range(2,len(paramsList)-1):
        print("进入循环")
        fit1 = ExponentialSmoothing(np.asarray(train[paramsList[i]]), seasonal_periods=int(season), trend='add', seasonal='add').fit()
        y_hat[paramsList[i]] = fit1.predict(start="2014/7/3", end="2014/9/21")
        y_hat[paramsList[i]] = round(y_hat[paramsList[i]],2)
        print("结束fit1")
        rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]]))
        print(rms)

        y_hat['Holt_Winter'] = fit1.forecast(len(test))
        plt.figure(figsize=(16, 8))
        plt.plot(train[paramsList[i]], label='Train')
        plt.plot(test[paramsList[i]], label='Test')
        plt.plot(y_hat[paramsList[i]], label='Holt_Winter')
        plt.legend(loc='best')
        plt.show()

    y_hat['time'] = test.index
    yhat_naive = np.array(y_hat)
    nArray = np.array(nullArray)
    newArray = np.concatenate((nArray,yhat_naive),axis=0)
    s = pd.DataFrame(newArray, columns=paramsList[2:])
    for i in range(2,len(paramsList)-1):
        s[paramsList[i]][0:int(len(s)*trainRows)] = ""
    s.to_csv(saveto,index=False,header=True,float_format='%.2f')
    '''
Beispiel #3
0
y_hat_avg = test.copy()
fit1 = ExponentialSmoothing(
    np.asarray(train['Monthly Consumption of Type A Medicine']),
    seasonal_periods=12,
    trend='add',
    seasonal='add',
).fit()
y_hat_avg['Holt_Winter'] = fit1.forecast(len(test))
plt.figure(figsize=(16, 8))
plt.plot(train['Monthly Consumption of Type A Medicine'], label='Train')
plt.plot(test['Monthly Consumption of Type A Medicine'], label='Test')
plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter')
plt.legend(loc='best')
plt.show()

rms = sqrt(
    mean_squared_error(test['Monthly Consumption of Type A Medicine'],
                       y_hat_avg.Holt_Winter))
print(rms)
p = range(35 - 45)
plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter')
plt.show()
pred = fit1.predict(start=0, end=40)
plt.plot(pred, label="pred")
plt.plot(df['Monthly Consumption of Type A Medicine'], label="df")
plt.legend()
plt.show()

#ExponentialSmoothing.predict(np.asarray(train['Monthly Consumption of Type A Medicine']))
Beispiel #4
0



# In[301]:


#rms = sqrt(mean_squared_error(test.Amount, y_hat_avg.Holt_Winter))
#print(rms)
#RMSE = 23.9614925662


# In[316]:


model.predict()


# In[321]:


pred = model.forecast(12)
print(pred)


# In[ ]:


pred_imonth = input+1

Beispiel #5
0
    plt.figure(figsize=(12, 8))
    plt.plot(train['Count'], label='Train')
    plt.plot(test['Count'], label='Test')
    plt.plot(y_hat_HoltWinter['Holt_Winter'], label='Holt_Winter')
    plt.legend(loc='best')
    plt.title("Holt-Winters季节性预测法")

    rms = sqrt(mean_squared_error(test['Count'], y_hat_HoltWinter['Holt_Winter']))
    print("Holt-Winters季节性预测模型RMS:" + str(rms))
    # endregion

    # region自回归移动平均模型(ARIMA)
    import statsmodels.api as sm

    y_hat_avg = test.copy()
    fit1 = sm.tsa.statespace.SARIMAX(train.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit()
    y_hat_avg['ARIMA'] = fit1.predict(start="2013-11-1", end="2013-12-31", dynamic=True)

    plt.figure(figsize=(12, 8))
    plt.plot(train['Count'], label='Train')
    plt.plot(test['Count'], label='Test')
    plt.plot(y_hat_avg['ARIMA'], label='ARIMA')
    plt.legend(loc='best')
    plt.title("ARIMA自回归移动平均法")

    rms = sqrt(mean_squared_error(test['Count'], y_hat_avg['ARIMA']))
    print("自回归移动平均模型(ARIMA)RMS:" + str(rms))
    # endregion

    plt.show()
indata = indata.reset_index()
indata = indata.drop("index", axis=1)

outdata = oddata[data['YEAR'] >= 2017]  # year 2017 and 2018 becomes the sample data to test the model
outdata = outdata.reset_index()
outdata = outdata.drop("index", axis=1)

#### Code logic to implement HOLT-Winters method with seasonality cycle as 12
y_hw = indata.copy()
fit2 = ExponentialSmoothing(nm.asarray(y_hw['Total_apax']), seasonal_periods=12, trend='add', seasonal='mul', ).fit()

# plotting holt winters prediction with actual data
y_hw_plot = pd.concat([indata, outdata])  # combining both in and out samnple to track prediction over the entire launch
y_hw_plot = y_hw_plot.reset_index()
y_hw_plot = y_hw_plot.drop("index", axis=1)
y_hw_plot['Holt_Winter'] = fit2.predict(start=0, end=len(y_hw_plot) - 1)  # predicting with the paramemters returned

# calculating rms value for halt winters method::
rms_holt_winters = sqrt(mean_squared_error(y_hw_plot.Total_apax, y_hw_plot.Holt_Winter))

trace_real = go.Scatter(x=y_hw_plot['monthyear'], y=y_hw_plot['Total_apax'], mode='lines', name='real')
trace_predict = go.Scatter(x=y_hw_plot['monthyear'], y=y_hw_plot['Holt_Winter'], mode='lines', name='predict')

data_plot = [trace_real, trace_predict]
layout = go.Layout(
    title="HoltWinter method ::: RMS :: " + str(rms_holt_winters)
)
fig = go.Figure(data=data_plot,layout=layout)
plot(fig, filename="holt_winter.html")

# Implementing auto:ARIMA for the same market
def preprocess_load_data_forec(dataframe,
                               quarter_hour=True,
                               short_term=True,
                               scaler=None,
                               n_ahead=1,
                               calendars=None):
    # pre-process load data for forecasting: scale, split in train / test, de-seasonalize, and construct features
    # expects pandas Dataframe with a Datetimeindex and a load column containing the load data in MW with no missing
    # values.
    # Resolution either quarter hour (quarter_hour=True), if quarter_hour=False assumed to be hourly data

    # use GW for convenience and readability later, also the standard-scaled values are smaller
    dataframe = dataframe / 1000

    # split data first so scaler and deseasonilizing can be trained on train set properly
    train_df_o, test_df_o = train_test_split(dataframe,
                                             test_size=0.2,
                                             shuffle=False)
    if scaler is None:
        scaler = StandardScaler()
        scaler.fit(np.array(train_df_o['load']).reshape(-1, 1))
    train_df = pd.DataFrame(
        {
            'load':
            scaler.transform(np.array(train_df_o['load']).reshape(
                -1, 1)).squeeze()
        },
        index=train_df_o.index)
    test_df = pd.DataFrame(
        {
            'load':
            scaler.transform(np.array(test_df_o['load']).reshape(-1,
                                                                 1)).squeeze()
        },
        index=test_df_o.index)

    # deseasonalize
    offset_train = pd.DataFrame(0, index=train_df.index, columns=['load'])
    offset_test = pd.DataFrame(0, index=test_df.index, columns=['load'])
    # decomp and train Holt Winters on decomp
    seasonal_periods = [24, 24 * 7]
    freq = 'H'

    if quarter_hour:
        seasonal_periods = [p * 4 for p in seasonal_periods]
        freq = '15T'

    for p in seasonal_periods:
        decomp = seasonal_decompose(train_df, period=p)
        exp = ExponentialSmoothing(decomp.seasonal,
                                   seasonal_periods=p,
                                   seasonal='add',
                                   freq=freq).fit()

        train_pred = exp.predict(start=train_df.index[0],
                                 end=train_df.index[-1])
        test_pred = exp.predict(start=test_df.index[0], end=test_df.index[-1])
        train_df['load'] = (train_df['load'] - train_pred)
        test_df['load'] = (test_df['load'] - test_pred)

        offset_train['load'] = offset_train['load'] + train_pred
        offset_test['load'] = offset_test['load'] + test_pred

    # construct features
    train_df = construct_features(dataframe=train_df,
                                  offset=offset_train,
                                  short_term=short_term,
                                  quarter_hour=quarter_hour,
                                  n_ahead=n_ahead,
                                  calendars=calendars)
    test_df = construct_features(dataframe=test_df,
                                 offset=offset_test,
                                 short_term=short_term,
                                 quarter_hour=quarter_hour,
                                 n_ahead=n_ahead,
                                 calendars=calendars)

    return train_df, test_df, scaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

sales_tg = pd.read_csv('forecasting/SalesTG/Variant Report.csv', dtype={'Variant SKU': 'str'}, parse_dates=['Issued At'])
sales_2010 = sales_tg[(sales_tg['Customer Type'] == 'consumer') & (sales_tg['Variant SKU'] == '2010')][['Customer Name', 'Quantity', 'Issued At', 'Location Name']].reset_index().sort_values('Issued At')
sales_2010.head(20)

sales_2010[sales_2010['Issued At'] < '2017-01-01']['Quantity'].sum()

sales_2010_month = sales_2010.set_index('Issued At').resample('M').sum().drop('index', axis =1)

sales_2010_month.plot()

holt_model = ExponentialSmoothing(endog=sales_2010_month, seasonal_periods=12).fit(smoothing_level=0.6)
import datetime
holt_model.predict(123, start=datetime.date(2016, 6,1), end = '2017-12-31')
Beispiel #9
0
scaler.fit(df)

train = df[8036:-365]
test = df[-365:]

#---------1. PREDICTING RELATIVE HUMIDITY------------

print("Predicting humidity")

y_hat_avg1 = test.copy()
fit1 = ExponentialSmoothing(np.asarray(train['humidity']),
                            seasonal_periods=2,
                            trend='add',
                            seasonal='add').fit()
#y_hat_avg['Holt_Winter'] = fit1.forecast(len(test))
y_hat_avg1['Holt_Winter'] = fit1.predict(start=0, end=364)

#plotting

plt.figure()
plt.plot(train['humidity'], label='Train')
plt.plot(test['humidity'], label='Test')
plt.plot(y_hat_avg1['Holt_Winter'], label='Holt-Winters')

plt.title('RH1')
plt.xlabel('date')
plt.ylabel('Relative humidity')

plt.legend(loc='best')
#plt.show()