def ES(data=None, horizon=24, alpha=0.3):
    """
    Build ES model => Train model => Get forecasts.
    :param data: history data.
    :param horizon: Length of forecasts.
    :return: list, Forecasts in next h time steps.
    """
    model = SimpleExpSmoothing(data).fit(smoothing_level=alpha)
    fcasts = model.predict(start=len(data), end=len(data) + horizon)
    return fcasts
Example #2
0
def twocolorball_ses_forecast(df):
    l = []
    for i in range(1, 8):
        column = "红球%d" % i if i < 7 else "蓝球"
        fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit(
            smoothing_level=random.randint(1, 10) / 10, optimized=False)
        predict = fit_model.predict()
        l.append(int(predict[0]))
    print(l)
    return l
Example #3
0
def ses_forecast(df):
    print("==== 逐一对每位数字进行 SES 预测 ====")
    l = []
    for i in range(1, 8):
        column = "红球%d" % i if i < 7 else "蓝球"
        fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit(
            smoothing_level=random.randint(1, 10) / 10, optimized=False)
        predict = fit_model.predict()
        is_blue = False if i < 7 else True
        l = add_number_pool(l, int(round(predict[0], 0)), is_blue)
    # print("SEC 预测结果:%s" % l);
    return l
Example #4
0
def time_series_fun_4():
    # 读取 csv 文件,删除无用列
    df = pd.read_csv("/temp/time_series_data.csv").drop(labels="Unnamed: 0", axis=1);

    # 取出最后一条数据
    last_data = df.loc[len(df) - 1];
    time = datetime.strptime(last_data["date"], "%Y-%m-%d %H:%M:%S");

    # 未来三个月预测数据
    SES_forecast_start = time + timedelta(hours=2);
    SES_forecast_end = time + timedelta(days=90);
    datetime_index = pd.date_range(start=SES_forecast_start, end=SES_forecast_end, freq="2H");
    # 传入历史数据集,设置权重值(0 - 1),训练出适应模型
    fit_model = SimpleExpSmoothing(np.asarray(df["count"])).fit(smoothing_level=0.7, optimized=False);
    # 用适应模型获取预测数据
    data = fit_model.predict(start=0, end=len(datetime_index));
    SES_forecast_dataFrame = df.append(DataFrame(data=list(zip(datetime_index, data)), columns=["date", "count"]));
    SES_forecast_dataFrame["count"] = SES_forecast_dataFrame["count"].apply(lambda item:int(item));

    # 按月平均值重新采集数据
    df.index = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S");
    df = df.resample(rule="M").mean();
    SES_forecast_dataFrame.index = pd.to_datetime(SES_forecast_dataFrame["date"], format="%Y-%m-%d %H:%M:%S");
    SES_forecast_dataFrame = SES_forecast_dataFrame.resample(rule="M").mean();

    # 绘制折线图
    plt.rcParams['font.sans-serif'] = ['SimHei'];
    plt.plot(SES_forecast_dataFrame.index, SES_forecast_dataFrame["count"], label="预测数据", linewidth=2);
    plt.plot(df.index, df["count"], label="预测数据", linewidth=2);
    # 指定标题以及 x、y 轴标签
    plt.title("铁路购票预测图");
    plt.xlabel("时间");
    plt.ylabel("每月购票均值");
    plt.legend(loc='upper left');
    # 显示图画
    plt.show();
Example #5
0
SARIMA(0, 0, 2)x(0, 2, 2, 7)7 - AIC:2756.7590501587865
SARIMA(0, 1, 2)x(2, 2, 2, 7)7 - AIC:2750.7295981905936


# In[ ]:


df.tail()


# In[ ]:


fit2 = sm.tsa.statespace.SARIMAX(df.Amount, order=(0, 1, 2),seasonal_order=(2, 2, 2, 7)).fit()
Next_10weeks = fit2.predict(start=130, end=139, dynamic=True)
plt.figure(figsize=(8,4))
plt.plot( df['Amount'], label='df')
plt.plot(Next_10weeks, label='SARIMA')
plt.legend(loc='best')
plt.show()


# In[49]:


#p,d,q  p = periods taken for autoregressive model
#d -> Integrated order, difference
# q periods in moving average model
from statsmodels.tsa.arima_model import ARIMA
model_arima = ARIMA(train,order=(3, 2, 0))
Example #6
0
                    plt.plot(valid['Count'], label='Valid')
                    plt.plot(y_hat, label='SARIMA')
                    plt.title("p={}, d={}, q={}".format(p, d, q))
                    plt.legend(loc='best')
                    plt.savefig("results/p={}, d={}, q={}".format(p, d, q))
                except:
                    continue


# Calculate RMSE
fit1 = sm.tsa.statespace.SARIMAX(train.Count,
                                 order=(3, 1, 2),
                                 seasonal_order=(0, 1, 1, 7),
                                 enforce_stationarity=False,
                                 enforce_invertibility=False).fit()
y_hat = fit1.predict(start=16055, end=18285, dynamic=True)
rmse.loc[len(rmse)] = "SARIMAX 312_", sqrt(MSE(valid.Count, y_hat))

# Submission using SARIMAX Model 312
submission = pd.read_csv("data/Sample_Submission_QChS6c3.csv")
fit1 = sm.tsa.statespace.SARIMAX(train.Count,
                                 enforce_stationarity=False,
                                 enforce_invertibility=False,
                                 order=(3, 1, 2),
                                 seasonal_order=(0, 1, 1, 7)).fit()
predict = fit1.predict(start=18286, end=23397, dynamic=True)
submission.Count = predict
submission['ID'] = test['ID']

# Converting the final submission to csv format
submission.to_csv("submissions/5.csv", index=False)
def TIME_SERIES_ALGO(df, bool_stat):
    dict_rmse = dict()

    bool_log, df_log = log_transformation(df)
    col = df.columns[0]
    # 1.. NAIVE APPROACH
    # IN THIS APPROCAH WE ASSIGN RECENT VALUE TO THE TEST DATAFRAME

    try:
        train, test = train_test_split(df)

        y_prd = np.asarray([train.ix[train.shape[0] - 1].values[0]] *
                           (test.shape[0]))

        rs_naive = sqrt(mean_squared_error(test[col].values, y_prd))
        print(rs_naive)
        dict_rmse["naive"] = rs_naive
        insert_into_database("NAIVE", rs_naive, "{}")

        if bool_log:
            # PERFORM SAME ABOVE THING FOR LOG TRANSFORMED DATA
            train, test = train_test_split(df_log)

            y_prd = np.asarray([train.ix[train.shape[0] - 1].values[0]] *
                               (test.shape[0]))

            y_prd = np.exp(y_prd)

            rs_naive_log = sqrt(mean_squared_error(test[col].values, y_prd))
            print(rs_naive_log)
            dict_rmse["naive_log"] = rs_naive_log
            insert_into_database("NAIVE", rs_naive_log, "{}")

    except Exception as e:
        insert_into_database("NAIVE", None, e)
        print(("error in modelling in naive approach,{}".format(e)))

    # 2..SIMPLE AVERAGE
    try:

        train, test = train_test_split(df)
        mean_forecast = train[col].mean()
        y_prd = np.asarray([mean_forecast] * test.shape[0])
        rs_mean = sqrt(mean_squared_error(test[col].values, y_prd))
        dict_rmse["simple_avg"] = rs_mean
        insert_into_database("SIMPLE_AVG", rs_mean, "{}")

        if bool_log:
            train, test = train_test_split(df_log)
            mean_forecast = train[col].mean()
            y_prd = np.asarray([mean_forecast] * test.shape[0])

            y_prd = np.exp(y_prd)

            rs_mean = sqrt(mean_squared_error(test[col].values, y_prd))
            dict_rmse["simple_avg_log"] = rs_mean
            insert_into_database("SIMPLE_AVG", rs_mean, "{}")

    except Exception as e:
        insert_into_database("SIMPLE_AVG", None, e)
        print(("error in moving average,{}".format(e)))

    # 3..MOVING AVERAGE

    # IN PROGRESS HAVE TO MODIFY IT...
    try:
        train, test = train_test_split(df)
        for i in range(25, 90):
            # As rolling mean returns mean fo ecah row we want mean f only last row because it is onlu used to forecast
            mean_moving = train[col].rolling(i).mean().ix[train.shape[0] - 1]
            print(mean_moving)
            y_prd = np.asarray([mean_moving] * test.shape[0])
            rs_moving = sqrt(mean_squared_error(test[col].values, y_prd))
            insert_into_database("MVG_AVG", rs_moving, "{}")

    except Exception as e:
        insert_into_database("MVG_AVG", None, e)
        print(("error in moving average,{}".format(e)))
    try:

        if bool_log:
            for i in range(25, 90):
                train, test = train_test_split(df_log)

                # print(type(train[col].rolling(i).mean()))
                mean_moving = train[col].rolling(i).mean().ix[train.shape[0] -
                                                              1]

                y_prd = np.array([mean_moving] * test.shape[0])
                print(y_prd)
                y_prd = np.exp(y_prd)

                rs_moving_log = sqrt(
                    mean_squared_error(test[col].values, y_prd))
                insert_into_database("MVG_AVERAGE", rs_moving_log, "{}")

    except Exception as e:
        insert_into_database("MVG_AVERAGE", None, e)
        print(("error in log moving average model, {}".format(e)))

    # 4.. SIMPLE EXPONENTIAL SMOOTHING
    try:
        train, test = train_test_split(df)
        fit2 = SimpleExpSmoothing(df[col]).fit(smoothing_level=0.6,
                                               optimized=False)
        # print(test.index[0])
        # print(test.index[test.shape[0]-1])
        y_prd = fit2.forecast(len(test))
        print(y_prd)

        rs_simple = sqrt(mean_squared_error(test.values, y_prd))
        dict_rmse["simple"] = rs_simple
        insert_into_database("SIMPLE_EXP", rs_simple, "{}")

    except Exception as e:
        print(("error is simple exp without log,{}".format(e)))
        insert_into_database("SIMPLE_EXP", None, e)

    try:
        if bool_log:
            train, test = train_test_split(df_log)
            fit2 = SimpleExpSmoothing(df[col]).fit(smoothing_level=0.6,
                                                   optimized=False)
            y_prd = fit2.forecast(len(test))
            y_prd = np.exp(y_prd)
            rs_simple = sqrt(mean_squared_error(test.values, y_prd))
            dict_rmse["simple_log"] = rs_simple
            insert_into_database("SIMPLE_EXP", rs_simple, "{}")

    except Exception as e:
        insert_into_database("SIMPLE_EXP", None, e)
        print(("simple exponential smoothing log,{}".format(e)))

    # HOT LINEAR METHOD FOR FORECASTING
    try:
        train, test = train_test_split(df)
        fit2 = Holt(train[col], exponential=True, damped=False).fit()
        y_prd = fit2.predict(test.index.values[0],
                             test.index.values[test.shape[0] - 1])
        rs_hotl = sqrt(mean_squared_error(test[col].values, y_prd))
        dict_rmse["rs_hotl"] = rs_hotl
        insert_into_database("HOLT_LINEAR", rs_hotl, "{}")

        if bool_log:
            train, test = train_test_split(df)
            fit2 = Holt(train[col], exponential=True, damped=False).fit()
            y_prd = fit2.predict(test.index.values[0],
                                 test.index.values[test.shape[0] - 1])
            y_prd = np.exp(y_prd)
            rs_hotl_log = sqrt(mean_squared_error(test[col].values, y_prd))
            dict_rmse["rs_hotl_log"] = rs_hotl_log
            insert_into_database("HOLT_LINEAR", rs_hotl_log, "{}")

    except Exception as e:
        insert_into_database("HOLT_LINEAR", None, e)
        print((
            "error in HOLT linear forecasting in without damped.{}".format(e)))

    try:

        fit2 = Holt(train[col], exponential=True, damped=True).fit()
        y_prd = fit2.predict(test.index.values[0],
                             test.index.values[test.shape[0] - 1])
        rs_holtld = sqrt(mean_squared_error(test[col].values, y_prd))
        dict_rmse["rs_holtld"] = rs_holtld
        insert_into_database("HOLT_LINEAR", rs_holtld, "{}")

        if bool_log:
            fit2 = Holt(train[col], exponential=True, damped=True).fit()
            y_prd = fit2.predict(test.index.values[0],
                                 test.index.values[test.shape[0] - 1])
            y_prd = np.exp(y_prd)
            rs_holtld = sqrt(mean_squared_error(test[col].values, y_prd))
            dict_rmse["rs_holtld"] = rs_holtld
            insert_into_database("HOLT_LINEAR", rs_holtld, "{}")

    except Exception as e:
        print(("error in HOLT linear smoothing  damped,{}".format(e)))
        insert_into_database("HOLT_LINEAR", None, e)

    # HOLT WINTERS FORECASTING..
    try:
        train, test = train_test_split(df)
        # print("fmmf")
        fit2 = ExponentialSmoothing(test[col],
                                    trend="mul",
                                    seasonal="mul",
                                    seasonal_periods=12).fit()
        y_prd = fit2.predict(test.index.values[0],
                             test.index.values[test.shape[0] - 1])
        rs_hlw = sqrt(mean_squared_error(test[col].values, y_prd))
        print(rs_hlw)
        dict_rmse["rs_hlw"] = rs_hlw
        insert_into_database("HOLT_WINTER", rs_hlw, "{}")

        if bool_log:
            train, test = train_test_split(df_log)
            fit2 = ExponentialSmoothing(test[col],
                                        trend="add",
                                        seasonal="add",
                                        seasonal_periods=12).fit()
            y_prd = fit2.predict(test.index.values[0],
                                 test.index.values[test.shape[0] - 1])
            y_prd = np.exp(y_prd)
            rs_hlw_log = sqrt(mean_squared_error(test[col].values, y_prd))
            print(rs_hlw_log)
            dict_rmse["rs_hlw_log"] = rs_hlw_log
            insert_into_database("HOLT_WINTER", rs_hlw_log, "{}")

    except Exception as e:
        print(("error in HOLT winter forecasting,{}".format(e)))
        insert_into_database("HOLT_WINTER", None, e)
    # ARIMA MODEL....

    # try:
    #     rs = test_stationary(df, col)
    #     if rs:
    #
    #         # Here we decide the order of diffrencing the Time Series
    #         df_diff = df - df.shift()
    #         df_diff.dropna(inplace=True)
    #         rs = test_stationary(df_diff, col)
    #         if rs:
    #             df_diff = df_diff - df_diff.shift()
    #
    #     df_diff.dropna(inplace=True)
    #
    #     train, test = train_test_split(df_diff)
    #
    #     """ The acf and pacf plots are
    #         used to calculate the the parametre for AR
    #         AND MA MODELS"""
    #
    #     ar_list = get_params_p(train)
    #     ma_list = get_params_q(train)
    #
    #     for i in ma_list:
    #         for j in ar_list:
    #             try:
    #                 model = ARIMA(train, order=(j, 0, i)).fit()
    #                 y_prd = model.predict(start=test.index.values[0], end=test.index.values[test.shape[0] - 1])
    #
    #                 rs = sqrt(mean_squared_error(test[col].values, y_prd))
    #                 insert_into_database("ARIMA", rs, "{}")
    #             except Exception as e:
    #
    #                 print(("error while training arima,{}".format(e)))
    #                 insert_into_database("ARIMA", None, e)
    # except Exception as e:
    #
    #     print(("error in arima model,{}".format(e)))
    #     insert_into_database("ARIMA", None, e)

    # .. SARIMAX
    try:
        train, test = train_test_split(df)
        p = d = q = list(range(0, 2))
        non_seas = list(itertools.product(p, d, q))
        lis = [1, 3, 6, 12, 24, 56]

        for i in lis:
            sea_so = [(x[0], x[1], x[2], i)
                      for x in list(itertools.product(p, d, q))]

            for j in non_seas:
                for k in sea_so:
                    try:
                        model = SARIMAX(train,
                                        order=j,
                                        seasonal_order=k,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False).fit()
                        y_prd = model.predict(
                            start=test.index.values[0],
                            end=test.index.values[test.shape[0] - 1])

                        rs = sqrt(mean_squared_error(test.values, y_prd))

                        print(rs)
                        insert_into_database("SARIMAX", rs, "{}")
                    except Exception as e:
                        print(("error while training the SARIMAX MODELS,{}".
                               format(e)))
                        insert_into_database("SARIMAX", None, e)

    except Exception as e:
        print(("error in seasonal_arima,{}".format(e)))
        insert_into_database("SARIMAX", None, e)

    # ..AUTO_ARIMA..

    try:
        train, test = train_test_split(df)
        model = auto_arima(train,
                           start_p=1,
                           start_q=1,
                           start_P=1,
                           start_Q=1,
                           max_p=5,
                           max_q=5,
                           max_P=5,
                           max_Q=1,
                           d=1,
                           D=1,
                           seasonal=True)
        model = model.fit(train)
        y_prd = model.predict(n_periods=len(test))
        rs = sqrt(mean_squared_error(test.values, y_prd))
        print("results in auto_Arima", rs)
        dict_rmse["auto_arima"] = rs
        insert_into_database("AUTO_ARIMA", rs, "{}")

    except Exception as e:

        print("error in auto_Arima,{}".format(e))
        insert_into_database("Auto_arima", None, e)
Example #8
0
 def get_pred(self, module):
     labels_pred, data_pred, predictions = [], [], []
     # формирование временного ряда
     queryset = Containers.objects.filter(
         c_module__m_module=module.m_module, c_incr__isnull=False)
     object = Containers.objects.filter(
         c_module__m_module=module.m_module,
         c_curr=module.m_height).latest('c_date')
     days = (self.get_today().date() - object.c_date.date()).days
     for it in queryset:
         labels_pred.append(it.c_date.date())
         data_pred.append(it.c_incr)
     dd = np.asarray(data_pred)
     df = pd.DataFrame(data=dd,
                       index=pd.to_datetime(labels_pred),
                       columns=['value'])
     # df = df.resample('D').mean()  # уменьшение количества выбросов
     obj = get_object_or_404(Containers,
                             c_module__m_module=module.m_module,
                             c_date=self.get_today(),
                             c_is_collected=False)
     fill_level = obj.fill_level
     if fill_level >= 100:
         module.m_plan = self.get_today().date()
     else:
         max_period = Analitics.objects.filter(
             a_module__m_module=module.m_module).aggregate(Max('a_period'))
         forecast_period = int(max_period['a_period__max']) + 2 - days
         method = str(module.m_method)
         if (method == 'Наивный подход'):
             predictions = [dd[len(dd) - 1]] * forecast_period
         elif (method == 'Простое среднее'):
             predictions = [df['value'].mean()] * forecast_period
         elif (method == 'Скользящее среднее'):
             predictions = [df['value'].rolling(48).mean().iloc[-1]
                            ] * forecast_period
         elif (method == 'Простое экспоненциальное сглаживание'):
             fit = SimpleExpSmoothing(np.asarray(df['value'])).fit(
                 smoothing_level=module.params['s_l'], optimized=False)
             predictions = fit.forecast(forecast_period)
         elif (method == 'Метод линейного тренда Холта'):
             fit = Holt(np.asarray(df['value'])).fit(
                 smoothing_level=module.params['s_l'],
                 smoothing_slope=module.params['s_s'])
             predictions = fit.forecast(forecast_period)
         elif (method == 'Метод Холта-Винтерса'):
             fit = ExponentialSmoothing(
                 np.asarray(df['value']),
                 seasonal_periods=module.params['s_p'],
                 trend=module.params['t'],
                 seasonal=module.params['s'],
             ).fit()
             predictions = fit.forecast(forecast_period)
         elif (method == 'SARIMA'):
             fit = sm.tsa.statespace.SARIMAX(
                 df.value,
                 order=(module.params['p'], module.params['d'],
                        module.params['q']),
                 seasonal_order=(module.params['P'], module.params['D'],
                                 module.params['Q'],
                                 module.params['m'])).fit()
             s_date = self.get_today() + datetime.timedelta(1)
             e_date = self.get_today() + datetime.timedelta(forecast_period)
             predictions = fit.predict(start=s_date.date(),
                                       end=e_date.date(),
                                       dynamic=True)
         elif (method == 'LSTM'):
             analiz, d_7 = self.stationarity(df.value)
             for i in range(forecast_period):
                 if analiz[0] != 'Стационарный':
                     data_pred = self.difference(data_pred, 1)
                 supervised = self.timeseries_to_supervised(data_pred, 1)
                 supervised_values = supervised.values
                 train_lstm = supervised_values[0:len(supervised_values) -
                                                1]
                 test_lstm = supervised_values[len(supervised_values) - 1:]
                 scaler = MinMaxScaler(feature_range=(-1, 1))
                 scaler = scaler.fit(train_lstm)
                 # transform train
                 train = train_lstm.reshape(train_lstm.shape[0],
                                            train_lstm.shape[1])
                 train_scaled = scaler.transform(train)
                 # fit the model
                 lstm_model = self.fit_lstm(train_scaled, 1, 5, 5)
                 # forecast the entire training dataset to build up state for forecasting
                 train_reshaped = train_scaled[:, 0].reshape(
                     len(train_scaled), 1, 1)
                 lstm_model.predict(train_reshaped, batch_size=1)
                 # walk-forward validation on the test data
                 # transform test
                 test = test_lstm.reshape(test_lstm.shape[0],
                                          test_lstm.shape[1])
                 test_scaled = scaler.transform(test)
                 for i in range(len(test_scaled)):
                     # make one-step forecast
                     X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
                     yhat = self.forecast_lstm(lstm_model, 1, X)
                     # invert scaling
                     yhat = self.invert_scale(scaler, X, yhat)
                     if analiz[0] != 'Стационарный':
                         # invert differencing
                         yhat = self.inverse_difference(
                             data_pred, yhat,
                             len(test_scaled) + 1 - i)
                     # store forecast
                     predictions.append(round(yhat))
                     data_pred.append(yhat)
         count = self.get_count_days(fill_level, predictions)
         date_plan = self.get_today() + datetime.timedelta(days=count)
         module.m_plan = date_plan.date()
     module.save()
     return