예제 #1
0
def arima(window_values):
    weighted_window_vaues = deepcopy(window_values)
    weights = [0.06, 0.07, 0.08, 0.09, 0.1, 0.1, 0.11, 0.12, 0.13, 0.14]
    for x in range(0, 10):
        weighted_window_vaues[x] *= weights[x]
    arma_mod = sm.tsa.ARMA(weighted_window_vaues, order=(0, 0))
    arma_res = arma_mod.fit()
    # print(arma_res.summary())

    res = arma_res
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 1

    return _arma_predict_out_of_sample(params,
                                       steps,
                                       residuals,
                                       p,
                                       q,
                                       k_trend,
                                       k_exog,
                                       endog=weighted_window_vaues,
                                       exog=None,
                                       start=len(weighted_window_vaues)) * 10
예제 #2
0
def arima_predict_out_of_sample(res):
    '''
    res = results from statsmodels.tsa.arima_model.ARIMA().fit(X, y)
    '''
    # this is the nsteps ahead predictor function
    from statsmodels.tsa.arima_model import _arma_predict_out_of_sample
    res = sm.tsa.ARMA(y, (3, 2)).fit(trend="nc")

    # get what you need for predicting one-step ahead
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 1

    new_prediction_one_step_ahead = _arma_predict_out_of_sample(params,
                                                                steps,
                                                                residuals,
                                                                p,
                                                                q,
                                                                k_trend,
                                                                k_exog,
                                                                endog=y,
                                                                exog=None,
                                                                start=len(y))
    # tack this on to y, then update residuals
    return new_prediction_one_step_ahead
예제 #3
0
파일: arma.py 프로젝트: fndjjx/practice
def arma(data,p,q,n):
    result = []
    for i in range(p):
        for j in range(q):
            try:
                arma_mod = sm.tsa.ARMA(data, (i,j)).fit()
                arma_predict = arma_mod.predict(n,dynamic=True)
                error = arma_predict - np.array(data[n:])
                error = sum(error**2)
                result.append((i,j,error))
            except:
                pass

    result.sort(key=lambda x:x[2])
    select_p = result[0][0]
    select_q = result[0][1]
    print result
    print select_p
    print select_q
    arma_mod = sm.tsa.ARMA(data, (select_p,select_q)).fit()

    params = arma_mod.params
    residuals = arma_mod.resid
    p = arma_mod.k_ar
    q = arma_mod.k_ma
    k_exog = arma_mod.k_exog
    k_trend = arma_mod.k_trend
    steps = 1
    result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data, exog=None, start=len(data))
    return result[0]
예제 #4
0
def MA_predict(data, p=2, w=None, step=1):
    """

    :param data: ts data
    :param p: p parameter of MA
    :param w: weight of WMA
    :param step: predict step
    :return:
    """
    # params = [0.5] * order[0]
    # steps = 3
    # residuals = [0]
    # p = order[0]
    # q = order[1]
    # k_exog = 0
    # k_trend = 0
    # y = a
    # _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(y))
    p = min(len(data), p)
    w = w[::-1] if w is not None else [1.0 / p] * p
    residuals = [0]
    q = 0
    k_exog = 0
    k_trend = 0
    res = _arma_predict_out_of_sample(w,
                                      step,
                                      residuals,
                                      p,
                                      q,
                                      k_trend,
                                      k_exog,
                                      endog=data)
    return res
예제 #5
0
    def predict_arma_next_days(self, item):
        ts = df_train[item]
        ts = ts.sort_index()  # sorting index Date
        ts_last_day = ts[self.fc]  # real last data
        ts = ts[0:self.fc]  # index 0 until last data - 1

        model = ARMA(ts, order=(self.p, self.q), freq='D')  # build a model
        fitting = model.fit(disp=False)
        params = fitting.params
        residuals = fitting.resid
        p = fitting.k_ar
        q = fitting.k_ma
        k_exog = fitting.k_exog
        k_trend = fitting.k_trend

        # n_days forecasting
        forecast = _arma_predict_out_of_sample(params,
                                               self.n_days,
                                               residuals,
                                               p,
                                               q,
                                               k_trend,
                                               k_exog,
                                               endog=ts,
                                               exog=None,
                                               start=len(ts))
        # ts:          history until 1 day before self.fc
        # ts[self.fc]: last day
        # forecast:    1 day forecast (time equalto ts[self.fc])
        return ts, ts_last_day, forecast
예제 #6
0
def forecast_transfer (params, step, model_name, endog):
    forecast_transfer = _arma_predict_out_of_sample(params, step,
                                                    model_name.resid, model_name.k_ar, 
                                                    model_name.k_ma, model_name.k_trend, 
                                                    model_name.k_exog, endog,
                                                    exog=None, method=model_name.model.method)
    pred_test = model_name.forecast(steps=int(24/ave_window))
    return forecast_transfer
예제 #7
0
    def optPredictedValue(self, train_data):
        """
        :description calculate a optimized predicted value
        :param train_data: model train data
        :return:
        """
        self.test_data = train_data[123:183]
        dta = pd.Series(train_data[:123])
        dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2001', '2123'))
        # dta = dta.diff(1)
        # plt.plot(dta.index, dta.values)
        # plt.show()

        p = sm.tsa.acf(dta, nlags=20)
        print(p)
        p = sm.tsa.pacf(dta, nlags=20)
        print(p)

        arma_mod = sm.tsa.ARMA(dta, (15, 5)).fit(disp=-1, trend="c", solver='powell', method="css")
        # print(arma_mod.summary())

        # get what you need for predicting one-step ahead
        params = arma_mod.params
        residuals = arma_mod.resid
        p = arma_mod.k_ar
        q = arma_mod.k_ma
        k_exog = arma_mod.k_exog
        k_trend = arma_mod.k_trend
        steps = 60

        self.pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None,
                                                 method='ols', start=len(dta))
        # print "#################################"
        print(self.pre_result)

        # result_data_fd = open("./oprResult.txt", 'w+')
        # date_list = []
        # for iter in range(20150901, 20150931):
        #     date_list.append(iter)
        # for iter in range(20151001, 20151031):
        #     date_list.append(iter)
        #
        # item = self.pre_result
        # art_id = ""
        # output = ""
        # for iter, one_date in zip(item, date_list):
        #     output = "%s,%s,%s\n" % (art_id, str(int(iter)), one_date)
        #     result_data_fd.write(output)
        # result_data_fd.close()

        plt.plot(range(183), train_data)
        plt.plot(range(123,183), self.pre_result, 'red')
        plt.show()
예제 #8
0
    def predict_arma_next_days(self, item, Xy='train'):
        ts = df_train[item] if Xy == 'train' else df_test[item]
        # sorting index Date
        ts = ts.sort_index()
        ts_last_day = ts[self.fc]  # later, it will be stored as output
        start = self.fc - 100
        ts = ts[start:self.fc]
        # http://statsmodels.sourceforge.net/devel/examples/generated/ex_dates.html
        # https://groups.google.com/forum/#!msg/pystatsmodels/_ItLBVpePIY/nBiP3fn4kDkJ
        # https://github.com/statsmodels/statsmodels/issues/1857
        # http://stackoverflow.com/questions/27931571/arma-predict-for-out-of-sample-forecast-does-not-work-with-floating-points
        # http://statsmodels.sourceforge.net/devel/examples/generated/ex_dates.html
        # https://bicorner.com/2015/11/16/time-series-analysis-using-ipython/
        # http://stackoverflow.com/questions/35593759/python-arima-model-predicted-values-are-shifted
        # http://www.statsmodels.org/dev/examples/notebooks/generated/statespace_sarimax_stata.html
        # freq : str {'B','D','W','M','A', 'Q'}
        #     'B' - business day, ie., Mon. - Fri.
        #     'D' - daily
        #     'W' - weekly
        #     'M' - monthly
        #     'A' - annual
        #     'Q' - quarterly
        # Example: model = ARMA(ts, (self.p, self.q))

        # build a model
        model = ARMA(ts, order=(self.p, self.q), freq='D')
        # fitting model
        fitting = model.fit(disp=False)
        # print ( "ARMA: fitting model: '{}'.".format(item) )
        # preparing input for forecasting out of sample data
        params = fitting.params
        residuals = fitting.resid
        p = fitting.k_ar
        q = fitting.k_ma
        k_exog = fitting.k_exog
        k_trend = fitting.k_trend
        # n_days forecasting
        forecast = _arma_predict_out_of_sample(params,
                                               self.n_days,
                                               residuals,
                                               p,
                                               q,
                                               k_trend,
                                               k_exog,
                                               endog=ts,
                                               exog=None,
                                               start=len(ts))
        # print ( "ARMA: forecast '{0}' for next {1} days.".format(item, self.n_days))
        # ts:          history until 1 day before self.fc
        # ts[self.fc]: last day
        # forecast:    1 day forecast (time equalto ts[self.fc])
        return ts, ts_last_day, forecast
예제 #9
0
def _arma_predict(rw, data, steps, order):
    #print rw.resid
    return _arma_predict_out_of_sample(np.array(rw.params),
                                       steps,
                                       np.array(rw.resid),
                                       order[0],
                                       order[1],
                                       rw.k_trend,
                                       rw.k_exog,
                                       data,
                                       exog=None,
                                       start=0,
                                       method='mle')
예제 #10
0
파일: ts.py 프로젝트: bigdig/Thesis
 def predict(self):
     m = self.model
     data = self.model_info.endog
     return _arma_predict_out_of_sample(m.params,
                                        1,
                                        m.resid,
                                        m.k_ar,
                                        m.k_ma,
                                        m.k_trend,
                                        m.k_exog,
                                        endog=data,
                                        exog=None,
                                        start=data.shape[0])
예제 #11
0
def arma_predict(data, step):
    #res = sm.tsa.ARMA(data, (2, 1)).fit(trend="nc")
    res = sm.tsa.ARMA(data, (2, 1)).fit()
    
    # get what you need for predicting one-step ahead
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = step

    return _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data, exog=None, start=len(data))
예제 #12
0
def main():

    # Q1
    df = pd.read_excel('C:\\Users\\hpatil\\Desktop\\fredgraph.xls',
                       skiprows=10)
    print(statsmodels.tsa.stattools.adfuller(df['CSUSHPISA'], maxlag=1))

    #t-stat seems to be higher than 1,5 and 10.
    #So we cannot rejects the hypothesis of gamma = 0.
    #Thus, so possibility of unit roots.

    # Q2
    diff = df['CSUSHPISA'] - df['CSUSHPISA'].shift()
    diff = df['CSUSHPISA']
    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(311)
    fig = statsmodels.graphics.tsaplots.plot_acf(diff, lags=200, ax=ax1)
    ax2 = fig.add_subplot(312)
    fig = statsmodels.graphics.tsaplots.plot_pacf(diff, lags=200, ax=ax2)
    ax3 = fig.add_subplot(313)
    fig = plt.plot(diff)
    plt.show()

    # from the graphs plotted, it looks like ACF is decreasing very very slowly. And PACF cuts off at 2.
    # Thus, we can conclude this to be a ARIMA(1,0,0) = AR(1) model

    # Q3
    from statsmodels.tsa.arima_model import _arma_predict_out_of_sample
    res = sm.tsa.ARMA(diff.values, (1, 0)).fit()
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 4

    print(
        _arma_predict_out_of_sample(params,
                                    steps,
                                    residuals,
                                    p,
                                    q,
                                    k_trend,
                                    k_exog,
                                    endog=diff.values,
                                    exog=None,
                                    start=len(diff.values)))
예제 #13
0
def _arima_predict(rw, data, steps, order):
    d = order[1]
    _endog = np.diff(data, n=d)
    forecast = _arma_predict_out_of_sample(np.array(rw.params),
                                           steps,
                                           np.array(rw.resid),
                                           order[0],
                                           order[2],
                                           rw.k_trend,
                                           rw.k_exog,
                                           _endog,
                                           exog=None,
                                           method='css-mle')

    endog = data[-d:]
    forecast = unintegrate(forecast, unintegrate_levels(endog, d))[d:]
    return forecast
예제 #14
0
def MA_predict(data, p, w=None, step=1):
    # params = [0.5] * order[0]
    # steps = 3
    # residuals = [0]
    # p = order[0]
    # q = order[1]
    # k_exog = 0
    # k_trend = 0
    # y = a
    # _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(y))
    w = w[::-1] or [1.0 / p] * p
    residuals = [0]
    q = 0
    k_exog = 0
    k_trend = 0
    res = _arma_predict_out_of_sample(w, step, residuals, p, q, k_trend, k_exog, endog=data)
    return res
예제 #15
0
def armaPredict(data, ratio):
    s = time.time()
    # 开始的时候进行两次差分,选择最平稳的数据进入
    result = []
    for i in range(len(data)):
        print('loop', i)
        try:
            train_data = data[i, :int(ratio * len(data[i]))].ravel()
            order = sm.tsa.arma_order_select_ic(train_data,
                                                ic='aic')['aic_min_order']
            #order = (4,2)
            # 结合生成ARIMA模型
            model = ARMA(train_data, order=order)
            res = model.fit()
            # 这一步值得商榷,未知arma使用什么来获得最后的结果
            pre = _arma_predict_out_of_sample(res.params,
                                              48,
                                              res.resid,
                                              res.k_ar,
                                              res.k_ma,
                                              res.k_trend,
                                              res.k_exog,
                                              endog=train_data,
                                              exog=None,
                                              start=len(train_data))
            result.append(pre[1:])
            # 有可能中断,这时候尝试一阶差分,不然直接转线性
        except:
            #可能因为各种原因发生错误,这时候需要使用简单直接预测
            # 根据上一个点的差分来预测下一个点
            pre = np.zeros(len(data[i]) - int(len(data[i]) * ratio) - 1)
            for j in range(len(pre)):
                index = int(len(data[i]) * ratio) + j
                pre[j] = data[i, index,
                              0] + (data[i, index, 0] - data[i, index - 1, 0]
                                    )  #第i个点的预测值由上一个点的真实值加上上一个点与上两个点的差分
            result.append(pre)
        print(len(result))
    e = time.time()
    print('arma predict time:', e - s, 's')
    return result
예제 #16
0
def predict_out_of_sample_ARMA(data, AR=3, MA=0):
    res = sm.tsa.ARMA(data, order=(AR, MA)).fit(trend="nc")
    # get what you need for predicting one-step ahead
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 1
    prediction = _arma_predict_out_of_sample(params,
                                             steps,
                                             residuals,
                                             p,
                                             q,
                                             k_trend,
                                             k_exog,
                                             endog=y,
                                             exog=None,
                                             start=len(data))
    return prediction
예제 #17
0
    def predict_arma_next_days(self, item):
        ts = df_train[item]
        ts = ts.sort_index() # sorting index Date
        ts_last_day = ts[self.fc] # real last data
        ts = ts[0:self.fc] # index 0 until last data - 1

        model = ARMA(ts, order=(self.p, self.q), freq='D') # build a model
        fitting = model.fit(disp=False)
        params = fitting.params
        residuals = fitting.resid
        p = fitting.k_ar
        q = fitting.k_ma
        k_exog = fitting.k_exog
        k_trend = fitting.k_trend

        # n_days forecasting
        forecast = _arma_predict_out_of_sample(params, self.n_days, residuals, p, q, k_trend, k_exog, endog=ts, exog=None, start=len(ts))
        # ts:          history until 1 day before self.fc
        # ts[self.fc]: last day
        # forecast:    1 day forecast (time equalto ts[self.fc])
        return ts, ts_last_day, forecast
예제 #18
0
파일: arma.py 프로젝트: renewday/practice
def arma(data, p, q, n):
    result = []
    for i in range(p):
        for j in range(q):
            try:
                arma_mod = sm.tsa.ARMA(data, (i, j)).fit()
                arma_predict = arma_mod.predict(n, dynamic=True)
                error = arma_predict - np.array(data[n:])
                error = sum(error**2)
                result.append((i, j, error))
            except:
                pass

    result.sort(key=lambda x: x[2])
    select_p = result[0][0]
    select_q = result[0][1]
    print result
    print select_p
    print select_q
    arma_mod = sm.tsa.ARMA(data, (select_p, select_q)).fit()

    params = arma_mod.params
    residuals = arma_mod.resid
    p = arma_mod.k_ar
    q = arma_mod.k_ma
    k_exog = arma_mod.k_exog
    k_trend = arma_mod.k_trend
    steps = 1
    result = _arma_predict_out_of_sample(params,
                                         steps,
                                         residuals,
                                         p,
                                         q,
                                         k_trend,
                                         k_exog,
                                         endog=data,
                                         exog=None,
                                         start=len(data))
    return result[0]
예제 #19
0
def wma(data, p, w=None, step=1):
    """ Use the data series to calculate the wma series.

    :param list data: ts data
    :param int p: p parameter of MA, use the length of data (from right)
    :param list w: weight of WMA
    :param int step: predict step
    :return: the predict of wma
    """
    w = w[::-1] if isinstance(w, list) else [1.0 / p] * int(p)
    residuals = [0]
    q = 0
    k_exog = 0
    k_trend = 0
    res = _arma_predict_out_of_sample(w,
                                      step,
                                      residuals,
                                      p,
                                      q,
                                      k_trend,
                                      k_exog,
                                      endog=data)
    return res
예제 #20
0
파일: arma.py 프로젝트: harsshal/WQU
def main():

    # Q1
    df = pd.read_excel('C:\\Users\\hpatil\\Desktop\\fredgraph.xls',skiprows=10)
    print(statsmodels.tsa.stattools.adfuller(df['CSUSHPISA'],maxlag=1))

    #t-stat seems to be higher than 1,5 and 10.
    #So we cannot rejects the hypothesis of gamma = 0.
    #Thus, so possibility of unit roots.

    # Q2
    diff = df['CSUSHPISA']-df['CSUSHPISA'].shift()
    diff = df['CSUSHPISA']
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(311)
    fig = statsmodels.graphics.tsaplots.plot_acf(diff, lags=200, ax=ax1)
    ax2 = fig.add_subplot(312)
    fig = statsmodels.graphics.tsaplots.plot_pacf(diff, lags=200, ax=ax2)
    ax3 = fig.add_subplot(313)
    fig = plt.plot(diff)
    plt.show()

    # from the graphs plotted, it looks like ACF is decreasing very very slowly. And PACF cuts off at 2.
    # Thus, we can conclude this to be a ARIMA(1,0,0) = AR(1) model

    # Q3
    from statsmodels.tsa.arima_model import _arma_predict_out_of_sample
    res = sm.tsa.ARMA( diff.values, (1, 0)).fit()
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 4

    print(_arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=diff.values, exog=None, start=len(diff.values)))
예제 #21
0
def forecast_oot():
    # Set of 8 weeks for forecasting out of time samples
    # test_period = [[201706, 201713],
    #                [201710, 201717],
    #                [201714, 201721]
    #                ]

    test_period = [
        [201636, 201643],
        [201640, 201647],
        [201645, 201652],
        [201649, 201704],
        [201701, 201708]
    ]

    # Out of time period
    # oot_period = [[201714, 201717],
    #               [201718, 201721],
    #               [201722, 201726]
    #               ]

    oot_period = [
        [201648, 201652],
        [201701, 201704],
        [201705, 201708],
        [201709, 201713],
        [201714, 201717]
    ]

    for filename in sales_files:

        # Load the model hyperparameters for file
        model_file_name = rename_file(filename, 'HyperParameters', 'pickle')
        os.chdir(result_path)
        model_params = pickle.load(open(model_file_name, 'rb'))

        # Read sku-sales data for forecasting
        os.chdir(data_path)
        df = pd.read_csv(filename)

        # df = df[df.ForecastUnitCode.isin(['3100:FGB0723'])]
        # df = df[df.ForecastUnitCode.isin(['3100:FGB0737', '3100: FGB0723', '3100: FGB6542'])]

        sku_group = df.groupby('ForecastUnitCode', as_index=False)
        sku_list = sku_group.groups.keys()

        total_predictions = []

        for sku in sku_list:
            df_sku = df[df.ForecastUnitCode.isin([sku])]
            period_index = 0
            print('-----------------------------------------------------')
            print('Result for SKU:', sku)
            for period in test_period:

                x_train = df_sku[
                    (df_sku.ForecastWeek >= period[0]) &
                    (df_sku.ForecastWeek <= period[1])
                    ]
                x_train = x_train['Weekly_Volume_Sales'].reset_index(drop=True)
                x_log = transform_data(x_train)
                history = [x for x in x_log]

                # y_test = df_sku[
                #     (df_sku.ForecastWeek >= oot_period[period_index][0]) &
                #     (df_sku.ForecastWeek <= oot_period[period_index][1])
                #     ]
                # y_test = y_test['Weekly_Volume_Sales'].reset_index(drop=True)

                for model_param in model_params:
                    if model_param['sku'] == sku:
                        p_order, d_order, q_order = model_param['best_cfg']

                        if d_order > 0:
                            print('Difference SKU %s with order %d' % (sku, d_order))
                            # No second order differencing exists in our model, hence only 1st order is required
                            history = difference(history)

                        print('week:', period_index)
                        params = model_param['params']
                        residuals = model_param['residuals']
                        p = model_param['p']
                        q = model_param['q']
                        k_exog = model_param['k_exog']
                        k_trend = model_param['k_trend']
                        intercept = model_param['intercept']
                        # steps = 4

                        y_real = df_sku[
                            (df_sku.ForecastWeek >= oot_period[period_index][0]) & (
                                    df_sku.ForecastWeek <= oot_period[period_index][1])].reset_index(
                            drop=True)

                        # if len(y_real) > 4:
                        #     steps = 5

                        steps = len(y_real)
                        # print('intercept %d' % intercept)

                        y_predicted_log = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog,
                                                                      endog=history, exog=None, start=len(history))

                        y_predicted = revert_to_order(y_predicted_log, x_log, d_order)
                        y_pred_series = pd.Series(y_predicted)

                        y_real.drop(y_real.columns[[0]], axis=1, inplace=True)
                        y_real['Predicted_Weekly_Volume_Sales'] = y_pred_series
                        # print(y_real)
                        # print()
                        total_predictions.append(y_real)

                period_index += 1

        # Save predicted sales for respective files
        result_file_name = rename_file(filename, 'Result', 'csv')

        result_df = pd.concat(total_predictions)
        result_df.reset_index(drop=True, inplace=True)

        # Change path to result folder
        os.chdir(result_path)
        result_df.to_csv(result_file_name, sep=',')
        print('Forecasting completed for %s' % filename)
def _forecast_sku(df, models, analysis_windows, category, country):
    # Future forecast at week level
    sku_list = df.groupby('sku', as_index=False).groups.keys()

    obp = analysis_windows['future_forecast']['obs_window_future']
    fp = analysis_windows['future_forecast']['future_window']
    f_steps = analysis_windows['future_forecast']['forecast_steps']
    f_select = analysis_windows['future_forecast']['forecast_select']

    total_predictions = []

    for sku in sku_list:
        df_sku = df[df['sku'].isin([sku])]

        x_train = df_sku[(df_sku['forecastWeek'] >= obp[0])
                         & (df_sku['forecastWeek'] <= obp[1])]
        x_train = x_train['actualVolume'].reset_index(drop=True)
        x_log = transform_data(x_train)
        obs_mat = [x for x in x_log]

        for model in models:
            if model['sku'] == sku:
                p_order, d_order, q_order = model['best_cfg']

                if d_order > 0:
                    print('Difference SKU %s with order %d' % (sku, d_order))
                    obs_mat = difference(obs_mat)

                params = model['params']
                residuals = model['residuals']
                p = model['p']
                q = model['q']
                k_trend = model['k_trend']
                k_exog = model['k_exog']

                # Forecast
                y_pred_log = _arma_predict_out_of_sample(params=params,
                                                         steps=f_steps,
                                                         errors=residuals,
                                                         p=p,
                                                         q=q,
                                                         k_trend=k_trend,
                                                         k_exog=k_exog,
                                                         endog=obs_mat,
                                                         start=len(obs_mat))
                y_pred = revert_to_order(y_pred_log, x_log, d_order)
                y_pred_series = pd.Series(y_pred)

                # Select the data to which we will append the forecast volumes
                y_hat = df_sku[(df_sku['forecastWeek'] >= fp[0]) & (
                    df_sku['forecastWeek'] <= fp[1])].reset_index(drop=True)
                y_hat['forecastVolume'] = round(y_pred_series, 0)

                total_predictions.append(y_hat)

    res_week_forecast = pd.concat(total_predictions)
    res_week_forecast.reset_index(drop=True, inplace=True)

    res_month_forecast = _monthly_sku_forecast(df=res_week_forecast,
                                               category=category,
                                               country=country)

    res = {
        'weeklyForecast': res_week_forecast,
        'monthlyForecast': res_month_forecast
    }

    return res
예제 #23
0
    res = sm.tsa.ARMA(data['blue'].iloc[i:end], (2, 0)).fit(trend="nc")

    # get what you need for predicting one-step ahead
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 1

    pred = _arma_predict_out_of_sample(params,
                                       steps,
                                       residuals,
                                       p,
                                       q,
                                       k_trend,
                                       k_exog,
                                       endog=data['blue'].iloc[i:end],
                                       exog=None,
                                       start=len(data['blue'].iloc[i:end]))
    out_sample_AR.append(pred)

# In[110]:

# 1-step ahead prediction for VAR model

out_sample_VAR = []
for i in range(0, 112):
    end = 100 + i
    model = api.VAR(data.iloc[i:end])
    results = model.fit(3)
예제 #24
0
params['ma.L1.value'] = 1
steps = 2
residuals = arma_mod.resid
p = arma_mod.k_ar
q = arma_mod.k_ma
k_exog = arma_mod.k_exog
k_trend = arma_mod.k_trend
y = a_pd

from statsmodels.tsa.arima_model import _arma_predict_out_of_sample

_arma_predict_out_of_sample(params,
                            steps,
                            residuals,
                            p,
                            q,
                            k_trend,
                            k_exog,
                            endog=y,
                            exog=None,
                            start=len(y))

# -------------------------- Example --------------------------

import numpy as np
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

from statsmodels.graphics.api import qqplot
예제 #25
0
    temp = []
    for iterm in itemrList:
    	# print "iterm = ",iterm
    	selector = dateframe[iterm]
    	timeSerize = selector[fc - fit_length:fc]
    	# print "timeSerize = ",len(timeSerize)  
        model = ARIMA(timeSerize, order=(p,d,q), freq='D')
        fitting = model.fit(disp=False)
        params = fitting.params
        residuals = fitting.resid
        p = fitting.k_ar
        q = fitting.k_ma
        k_exog = fitting.k_exog
        k_trend = fitting.k_trend
        # n_days forecasting
        forecast = _arma_predict_out_of_sample(params, 1, residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize))

        # forecast, fcasterr, conf_int = fitting.forecast(steps=1, alpha=.05)
        real = selector[fc-1:fc]
        # print "forecast = ",(forecast,type(forecast))
        # print "real = ",(real,type(real))
        temp.append(float(real))
        temp.append(float(forecast))
        # print "temp = ",temp
    x_sample.append(temp)
    y = dateframe['Close'][fc:fc+1]
    y_sample.append(float(y))


x_sample = np.array(x_sample)
print "x_sample = ",x_sample.shape
예제 #26
0
def train(filename):
    """
        Trains ARIMA model post least MSE per sku & selects the best model and saves it
    :return: None
    """
    begin = 0
    end = 1

    df = pd.read_csv(r'C:\Users\ashok.swarna\bosch_agg.csv')
    df['To_Date'] = pd.to_datetime(df.To_Date, format='%m/%d/%Y')
    
   # ExcelFile
   #df = pd.read_excel(file_path)
   #df = pd.read_excel(file_path)

    # Columns: Sku, Week, Sales
    material = 'M303.160.117'
    df_1 = df[df['Material'].isin([material])]
    
    material_group = df_1.groupby('Material', as_index=False)
    material_list = material_group.groups.keys()
    

    material_best_model = []

    for material in material_list:
        print()
        print(material)

        # Select SKU to train & validate model
        df_sku = df_1[df_1['Material'].isin([material])]
        

        price = get_unitprice(material, material_values, unit_price)
        period_index = 0
        best_period_models = []

        for tp in train_period:
            print()
            #print('Begin:%d End:%d' % (tp[0], tp[1]))
            print()
# Select SKU data from beginning to end of train period
            df_train_period = df_sku[
                    (df_sku['To_Date'] >= tp[begin]) & (df_sku['To_Date'] <= tp[end])]

            
            df_for_loss = df_train_period[['agg_closing_stock','Total_Issue_quantities']]
            
            # Select SKU data from beginning to end of in-time validation period
            df_validation_period = df_sku[
                (df_sku['To_Date'] >= validation_period[period_index][begin]) & (
                        df_sku['To_Date'] <= validation_period[period_index][end])
                ]

            df_mse_period = df_sku[
                (df_sku['To_Date'] >= mse_period[period_index][begin]) & (
                        df_sku['To_Date'] <= mse_period[period_index][end])
                ]
            print('%d train samples for %d period.' % (len(df_train_period), (period_index + 1)))
            print('%d validation samples for %d period.' % (len(df_validation_period), (period_index + 1)))
            print('%d mse samples for %d period.' % (len(df_mse_period), (period_index + 1)))

            # Select sales data for training & validation
            train_sales = df_train_period['Total_Issue_quantities'].reset_index(drop=True)
            validation_sales = df_validation_period['Total_Issue_quantities'].reset_index(drop=True)
            mse_sales = df_mse_period['Total_Issue_quantities'].reset_index(drop=True)

            train_valid_set = (train_sales, validation_sales, mse_sales)

            # Evaluate best model of selected train period

            best_score, best_cfg, best_params, best_residuals, best_p, best_q, best_k_exog, best_k_trend, best_intercept, y_predict_log = evaluate_models(
                train_valid_set, p_range, d_range, q_range, df_for_loss, price)
            
            #forecast
    
            y_pred_log = _arma_predict_out_of_sample(params=best_params, steps=4, errors=best_residuals,
                                                     p=1, q=1, k_trend= best_k_trend, k_exog= best_k_exog, endog = df_sku.To_Date)
            
            
            best_period_model = {'best_cfg': best_cfg, 'mse': best_score, 'Material': sku, 'week': (period_index + 1),
                                 'residuals': best_residuals, 'p': best_p, 'q': best_q, 'k_exog': best_k_exog,
                                 'k_trend': best_k_trend,
                                 'params': best_params, 'intercept': best_intercept}
            best_period_models.append(best_period_model)
            period_index += 1

        # Select best model in entire period
        best_model = find_best_model(best_period_models)

        # Add to best models list
        material_best_model.append(best_model)
        print('____________________________________________________________________________________________')
        print('____________________________________________________________________________________________')

    # Save model to disk
    model_path = app_settings['model_path']

    file_parts = filename.split('.')
    # model_file_name = file_parts[0] + '_HyperParameters.pickle'
    model_file_name = 'model.pickle'

    model_file_path = path.join(model_path, model_file_name)
    save_model_to_disk(model_file_path, sku_best_model)

    print('Training completed')
예제 #27
0
        pass
    # get what you need for predicting one-step ahead
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 1

    try:
        prediction = _arma_predict_out_of_sample(params,
                                                 steps,
                                                 residuals,
                                                 p,
                                                 q,
                                                 k_trend,
                                                 k_exog,
                                                 endog=dataset,
                                                 exog=None,
                                                 start=len(dataset))
    except:
        pass
    count += 1

    print('Point %d prediction = %f' % (j * 100, prediction))
    print('Target: %f' % vals[j * 100])
    with open(mseFile, 'a+') as rf:
        rf.writelines(
            ['%d\t%f\n' % (j * 100, (prediction - vals[j * 100])**2)])
    with open(absFile, 'a+') as rf:
        rf.writelines(['%d\t%f\n' % (j * 100, (prediction - vals[j * 100]))])
예제 #28
0
def predict(pq_file, train_file):
    if os.path.isfile(pq_file):
        pq_fd = open(pq_file, 'r')
    else:
        print "pq_file error"
    if os.path.isfile(train_file):
        train_fd = open(train_file, 'r')
    else:
        print "train_file error"
    pq_cont = pq_fd.readlines()
    train_cont = train_fd.readlines()
    pq_fd.close()
    train_fd.close()
    play_data = []
    artist_id = []
    score = []
    for index in range(1, len(train_cont), 4):
        play_data.append(train_cont[index])
        # print train_cont[index]
        artist_id.append(train_cont[index - 1][:-2])
    print len(play_data)
    oneline_list = []
    artist_pq = {}
    for item in pq_cont:
        oneline_list = item.split(',')
        print oneline_list
        artist_pq[oneline_list[0]] = [int(oneline_list[1]), int(oneline_list[2][:-1])]
    arma_model = []
    pre_data = []
    for one_id, one_train_data in zip(artist_id, play_data):
        p = artist_pq[one_id][0]
        q = artist_pq[one_id][1]
        list_data = one_train_data.split(',')
        for i in range(0, len(list_data), 1):
            list_data[i] = int(list_data[i])
        dta = pd.Series(list_data[105:183])
        dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2046', '2123'))
        try:
            one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css")
            arma_model.append(one_model)
            # get what you need for predicting one-step ahead
            params = one_model.params
            residuals = one_model.resid
            p = one_model.k_ar
            q = one_model.k_ma
            k_exog = one_model.k_exog
            k_trend = one_model.k_trend
            steps = 60

            pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta,
                                                     exog=None, method='ols', start=len(dta))
            pre_data.append(pre_result)
            one_score = Calculate_score(list_data[123:123 + steps], pre_result)
            # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@:%d" % one_score
            if one_score < -100:
                one_score = 0
            score.append(one_score)
        except:
            q = q - p
            try:
                one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css")
                arma_model.append(one_model)
                # get what you need for predicting one-step ahead
                params = one_model.params
                residuals = one_model.resid
                p = one_model.k_ar
                q = one_model.k_ma
                k_exog = one_model.k_exog
                k_trend = one_model.k_trend
                steps = 60

                pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta,
                                                         exog=None, method='ols', start=len(dta))
                pre_data.append(pre_result)
                one_score = Calculate_score(list_data[123:123 + steps], pre_result)
                # if one_score < 0:
                #     one_score = -1
                # print "###########################:%d" % one_score
                score.append(one_score)
            except:
                q = q + p
                p = 0
                try:
                    one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css")
                    arma_model.append(one_model)
                    # get what you need for predicting one-step ahead
                    params = one_model.params
                    residuals = one_model.resid
                    p = one_model.k_ar
                    q = one_model.k_ma
                    k_exog = one_model.k_exog
                    k_trend = one_model.k_trend
                    steps = 60

                    pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog,
                                                             endog=dta,
                                                             exog=None, method='ols', start=len(dta))
                    pre_data.append(pre_result)
                    one_score = Calculate_score(list_data[123:123 + steps], pre_result)
                    # if one_score < 0:
                    #     one_score = -1
                    # print "#################################################################################:%d" % one_score
                    score.append(one_score)
                except:
                    pre_result = list_data[123:123 + steps]
                    p = 0
                    q = 0
                    pre_data.append(pre_result)
                    one_score = Calculate_score(list_data[123:123 + steps], pre_result)
                    # if one_score < 0:
                    #     one_score = -1
                    # print "#################################################################################:%d" % one_score
                    score.append(one_score)
                    arma_model.append([])
                    pre_data.append([])
                    score.append(0)
                    print list_data
                    print one_id

    result_data_fd = open("./new_mars_tianchi_artist_plays_predict.csv", 'w')

    date_list = []
    for iter in range(20150901, 20150931):
        date_list.append(iter)
    for iter in range(20151001, 20151031):
        date_list.append(iter)

    for item, art_id in zip(pre_data, artist_id):
        output = ""
        for iter, one_date in zip(item, date_list):
            output = "%s,%s,%s\n" % (art_id, str(int(iter)), one_date)
            result_data_fd.write(output)
    result_data_fd.close()
    print sum(score)
예제 #29
0
def predict(data, Ds, AL, steps):
    
    key = data.keys()
    key = key[0]
    V = len(data[key])
   
    # Create N-step prediction using ARMA method on the initial timeseries
    res = sm.tsa.ARMA(data[key][0:(V-1-steps)], (3, 0)).fit()
    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    temp = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data[key], exog=None, start=V-steps)
    
    pArma = [data[key][V-steps-1]]
    pArma.extend(temp)
    arma_t = Series(pArma, index=DatetimeIndex([data[key].index[V-steps-1+i] for i in range(steps+1)],freq="D"))
    
    print("ARMA: \n",arma_t)
    pred = deepcopy(data)
    offset = 1
    # Create N-step prediction using recursive ARMA method on the initial timeseries
    for ss in range(steps, 0, -offset):
        res = sm.tsa.ARMA(pred[key][0:(V-1-ss)], (3, 0)).fit()
        params = res.params
        residuals = res.resid
        p = res.k_ar
        q = res.k_ma
        k_exog = res.k_exog
        k_trend = res.k_trend
        pred[key][V-ss] = _arma_predict_out_of_sample(params, offset, residuals, p, q, k_trend, k_exog, endog=data[key], exog=None, start=V-ss)[0]
        
    
    rArma = [data[key][V-steps-1]]
    rArma.extend(pred[key][V-steps:(V+1)])
    arma_t_r = Series(rArma, index=DatetimeIndex([data[key].index[V-steps-1+i] for i in range(steps+1)],freq="D"))
    
    print("rARMA: \n",arma_t_r)


    
    
    # Create N-step prediction using Summarization Features
    ext_Ds = np.pad(Ds, steps, mode='symmetric')
    ext_Ds = [ext_Ds[len(ext_Ds)-steps+i] for i in range(steps)]
    #print("Ds:",ext_Ds)
    m, s = stanDev(data[key])
    
    a,b = linreg(range(len(AL)), AL)
    r = [a*index + b for index in range(len(AL)+steps)]
    
    temp2 = [(ext_Ds[i]+r[len(AL)-1+i])/10 for i in range(steps)]
    
    fcst = [data[key][V-steps-1]]
    fcst.extend(temp2)
    summarized_t = Series(fcst, index=DatetimeIndex([data[key].index[V-steps-1+i] for i in range(steps+1)],freq="D"))
    print("Summarized: \n",summarized_t)
    
    return(arma_t, arma_t_r, summarized_t)
예제 #30
0
    def model_fit_pred(self, num):
        """
        :description fit model and predict num days data
        :return:
        False: predict fail
        True: predict success
        """
        # divide train data and test data
        artist_pq_value_fd = open('./artist_pq_value.txt', 'w')
        for item, artist_id in zip(self.play_data, self.artist_id):
            list_data = item.split(',')
            for i in range(0, len(list_data), 1):
                list_data[i] = int(list_data[i])

            self.train_data.append(list_data[45:123])
            self.test_data.append(list_data[123:])
            dta = pd.Series(list_data[105:183])
            dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2046', '2123'))
            p = self.get_cut_off_value(sm.tsa.acf(dta, nlags=10))
            q = self.get_cut_off_value(sm.tsa.pacf(dta, nlags=10))
            try:
                one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css")
                self.arma_model.append(one_model)
                self.ar_p_value.append(p)
                self.ma_q_value.append(q)
                # get what you need for predicting one-step ahead
                params = one_model.params
                residuals = one_model.resid
                p = one_model.k_ar
                q = one_model.k_ma
                k_exog = one_model.k_exog
                k_trend = one_model.k_trend
                steps = int(num)

                pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta))
                self.pre_data.append(pre_result)
                one_score = self.Calculate_score(list_data[123:123+steps], pre_result)
                # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@:%d" % one_score
                if one_score < -100:
                    one_score = 0
                self.score.append(one_score)
            except:
                q = q - p
                try:
                    one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css")
                    self.arma_model.append(one_model)
                    self.ar_p_value.append(p)
                    self.ma_q_value.append(q)
                    # get what you need for predicting one-step ahead
                    params = one_model.params
                    residuals = one_model.resid
                    p = one_model.k_ar
                    q = one_model.k_ma
                    k_exog = one_model.k_exog
                    k_trend = one_model.k_trend
                    steps = int(num)

                    pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta,
                                                             exog=None, method='ols', start=len(dta))
                    self.pre_data.append(pre_result)
                    one_score = self.Calculate_score(list_data[123:123 + steps], pre_result)
                    # if one_score < 0:
                    #     one_score = -1
                    # print "###########################:%d" % one_score
                    self.score.append(one_score)
                except:
                    q = q + p
                    p = 0
                    try:
                        one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css")
                        self.arma_model.append(one_model)
                        self.ar_p_value.append(p)
                        self.ma_q_value.append(q)
                        # get what you need for predicting one-step ahead
                        params = one_model.params
                        residuals = one_model.resid
                        p = one_model.k_ar
                        q = one_model.k_ma
                        k_exog = one_model.k_exog
                        k_trend = one_model.k_trend
                        steps = int(num)

                        pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog,
                                                                 endog=dta,
                                                                 exog=None, method='ols', start=len(dta))
                        self.pre_data.append(pre_result)
                        one_score = self.Calculate_score(list_data[123:123 + steps], pre_result)
                        # if one_score < 0:
                        #     one_score = -1
                        # print "#################################################################################:%d" % one_score
                        self.score.append(one_score)
                    except:
                        pre_result = list_data[123:123 + steps]
                        p = 0
                        q = 0
                        self.ar_p_value.append(p)
                        self.ma_q_value.append(q)
                        self.pre_data.append(pre_result)
                        one_score = self.Calculate_score(list_data[123:123 + steps], pre_result)
                        # if one_score < 0:
                        #     one_score = -1
                        # print "#################################################################################:%d" % one_score
                        self.score.append(one_score)
                        self.arma_model.append([])
                        self.pre_data.append([])
                        self.score.append(0)
                        print list_data
                        print artist_id
            output = "%s,%d,%d\n" % (artist_id, p, q)
            artist_pq_value_fd.write(output)
        artist_pq_value_fd.close()
예제 #31
0
파일: ts.py 프로젝트: Karlos7692/Thesis
 def predict(self):
     m = self.model
     data = self.model_info.endog
     return _arma_predict_out_of_sample(m.params, 1, m.resid, m.k_ar, m.k_ma, m.k_trend, m.k_exog, endog=data,
                                        exog=None, start=data.shape[0])
예제 #32
0
# In[663]:

sm.stats.diagnostic.acorr_ljungbox(sm.tsa.stattools.acf(arma_mod.resid),lags=20)


# We start using the <a href="https://en.wikipedia.org/wiki/Ljung%E2%80%93Box_test" > Ljung–Box test </a>. It is a test for whether any of a group of autocorrelations of a time series are different from zero. It is reported Ljung-Box test has better small sample properties compared to another test which is Box-Pierce statistic.
# The probaility p-value (the second array) are all larger than 5%. Means this model is sort of good. 

# In[664]:

size=3900
sample=10
predict_data=np.zeros(size)
predict_data[0:len(my_data)] = np.asarray(fitresult[0:len(my_data)]).copy()
from statsmodels.tsa.arima_model import _arma_predict_out_of_sample
predict_data[len(my_data):len(my_data)+sample]= _arma_predict_out_of_sample(arma_mod.params, sample, arma_mod.resid, 3, 1, arma_mod.k_trend, arma_mod.k_exog, endog=my_data, exog=None, start=len(my_data))*sample


# Here, we start to predict the future values based on our fitted model. 

# In[665]:

xlim(3750,3850)
ylim(-0.03,0.03)
plot(my_data,'r',linewidth=2, label="Original Data")
plot(fitresult,'b', linewidth=2, label="ARMA(3,1) Model Fitted")
plot(predict_data[0:len(my_data)+sample], 'g',linewidth=2, linestyle='--', label="Predict Values")
legend(loc='upper right')


# The prediction is done with 10 steps further. <br> 
예제 #33
0
def predict_with_residues(filenames):
    # Set of 8 weeks feed into ARIMA for forecasting out of time samples
    test_period = [[201706, 201713],
                   [201710, 201717],
                   [201714, 201721]
                   ]

    oot_period = [[201714, 201717],
                  [201718, 201721],
                  [201722, 201726]
                  ]

    for filename in filenames:

        # Load the model hyperparameters for file
        model_file_name = rename_file(filename, 'HyperParameters', 'pickle')
        os.chdir(result_path)
        model_params = pickle.load(open(model_file_name, 'rb'))

        # Read sku-sales data for forecasting
        os.chdir(data_path)
        df = pd.read_csv(filename)

        # 3100: FGB0723
        # 3100: FGB6542
        # 3100: FGB0737

        # df = df[df.ForecastUnitCode.isin(['3100:FGB0737'])]

        sku_group = df.groupby('ForecastUnitCode', as_index=False)
        sku_list = sku_group.groups.keys()

        total_predictions = []

        for sku in sku_list:
            df_sku = df[df.ForecastUnitCode.isin([sku])]
            period_index = 0
            print('-----------------------------------------------------')
            print('Result for SKU:', sku)
            for period in test_period:

                # data set to be fed for forecasting
                x_valid = df_sku[
                    (df_sku.ForecastWeek >= period[0]) &
                    (df_sku.ForecastWeek <= period[1])
                    ]
                x_valid_sales = x_valid['Weekly_Volume_Sales'].reset_index(drop=True)
                x_log = transform_data(x_valid_sales)
                history = [x for x in x_log]

                for model_param in model_params:
                    if model_param['sku'] == sku:
                        p_order, d_order, q_order = model_param['best_cfg']

                        if d_order > 0:
                            print('Difference SKU %s with order %d' % (sku, d_order))
                            # No second order differencing exists in our model, hence only 1st order is required
                            history = difference(history)

                        print('Period:', period_index + 1)
                        params = model_param['params']
                        residuals = model_param['residuals']
                        p = model_param['p']
                        q = model_param['q']
                        k_exog = model_param['k_exog']
                        k_trend = model_param['k_trend']
                        # intercept = model_param['intercept']
                        best_ols_params = model_param['best_ols_params']

                        steps = 4

                        y_actual = df_sku[
                            (df_sku.ForecastWeek >= oot_period[period_index][0]) & (
                                    df_sku.ForecastWeek <= oot_period[period_index][1])].reset_index(
                            drop=True)

                        if len(y_actual) > 4:
                            steps = 5

                        y_actual_sales = y_actual['Weekly_Volume_Sales']
                        # y_actual_log = np.log(y_actual_sales)

                        y_predicted_log = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog,
                                                                      endog=history, exog=None, start=len(history))

                        y_pred_promo_log = y_predicted_log.copy()

                        try:
                            if len(best_ols_params['pvalues'].keys()) > 1:
                                # SKU has promo significance
                                promo_sig_values = []
                                ols_pvalues_dict = best_ols_params['pvalues']
                                for index, row in y_actual.iterrows():
                                    # result = None
                                    # result = ols_pvalues_dict['const']
                                    result = 0
                                    for key, value in ols_pvalues_dict.items():

                                        if key != 'const':
                                            result += (row[key] * value)
                                    promo_sig_values.append(result)

                                y_pred_promo_log += promo_sig_values

                        except KeyError:
                            print('Key error %s' % sku)
                            continue



                        print()
                        print('-----------------')
                        print()

                        y_predicted = revert_to_order(y_predicted_log, x_log, d_order)
                        y_pred_series = pd.Series(y_predicted)

                        y_actual.drop(y_actual.columns[[0]], axis=1, inplace=True)
                        y_actual['Predicted_Weekly_Volume_Sales'] = y_pred_series
                        # print(y_real)
                        # print()

                        y_predicted_promo = revert_to_order(y_pred_promo_log, x_log, d_order)
                        y_pred_promo_series = pd.Series(y_predicted_promo)
                        y_actual['Promo_Weekly_Volume_Sales'] = y_pred_promo_series
                        total_predictions.append(y_actual)

                period_index += 1

        # Save predicted sales for respective files
        result_file_name = rename_file(filename, 'Result', 'csv')

        result_df = pd.concat(total_predictions)
        result_df.reset_index(drop=True, inplace=True)

        #Change path to result folder
        os.chdir(result_path)
        result_df.to_csv(result_file_name, sep=',')
        print('Forecasting completed for %s' %filename)
예제 #34
0
q = arima100.k_ma
k_exog = arima100.k_exog
k_trend = arima100.k_trend
steps = 4
#
# Obtain the Information Criterion (IC) values
#
arima100.aic  # Akaike Information Criterion (AIC)
arima100.bic  # Bayesian Information Criterion (BIC)

# Forecast the evolution of HPI using predict function
pred = _arma_predict_out_of_sample(params,
                                   steps,
                                   residuals,
                                   p,
                                   q,
                                   k_trend,
                                   k_exog,
                                   endog=hpi_log,
                                   exog=None,
                                   start=len(hpi_log))
#
# Provide one month, two-month and three-month forecasts
#
output = pd.Series(np.exp(pred),
                   index=[
                       'One-month Forecast', 'Two-month Forecast',
                       'Three-month Forecast', 'Four-month Forecast'
                   ])

print('Results for one month, two-month and three-month forecasts:',
      output[0:4])
예제 #35
0
파일: ewma.py 프로젝트: whsdu/glocalme
def queryandinsert():
    """ This is the main function which will be call by main... it integrate several other functions.
    Please do not call this function in other pack, otherwise it will cause unexpected result!!!!"""
    global gtbuDict             # gtbuDict, being used to store query data from gtbu database.....
    global omsDict              # being used to store query data from OMS database.....
    global presisDict
    global counter
    global testingDict

    starttime = datetime.datetime.now()

    print len(presisDict)
    print "connect to databae!"

    # connect to the database use my own toolkits
    querydbinfoOMS = getdbinfo('OMS')
    querydbnameOMS = "wifi_data"

    querydbinfoGTBU = getdbinfo("GTBU")
    querydbnameGTBU = "ucloudplatform"

    insertdbinfo = getdbinfo('REMOTE')
    insertdbname = 'login_history'

    # print the database information for verification
    for key, value in querydbinfoOMS.iteritems():
        print key + " : " + str(value)

    queryStatementRemote = """
    SELECT epochTime,visitcountry,onlinenum
    FROM t_fordemo
    WHERE butype =2 AND visitcountry IN ('JP','DE','TR') AND epochTime BETWEEN DATE_SUB(NOW(),INTERVAL 2 DAY) AND NOW()
    ORDER BY epochTime ASC
    """
    # get the online data which will be used to calculate the daily uer number ( Daily user number is bigger than the max number...
    # and the max number is actually what being used in this scenario
    queryStatementTraining = """
    SELECT t1,t2,DATEDIFF(t2,t1) AS dif,imei,visitcountry FROM
    (
    SELECT DATE(logindatetime) AS t1,DATE(logoutdatetime) AS t2, imei,visitcountry
    FROM t_usmguserloginlog
    WHERE visitcountry IN ('JP','DE','TR')
    ) AS z
    GROUP BY t1,t2,imei
    """

    # (output data) get the max online number for each of these countries every day ( this record is incomplete due to the constant network partition
    # therefore a lot of corresponding operation is necessary for aligning the input and output date by day!...
    queryStatementOnline ="""
    SELECT epochTime,visitcountry,MAX(onlinenum)
    FROM
    (
    SELECT DATE(epochTime) AS epochTime,visitcountry,onlinenum
    FROM t_fordemo
    WHERE butype =2 and visitcountry IN ('JP','DE','TR')
    ) AS z
    GROUP BY epochTime,visitcountry
    """

    # (input data) get the order number information which will be used to calculate the daily maximum number for each country...
    # this number could be ridiculously large with respect to the real number for some specific countries.
    querystatementOMS = """
    SELECT DATE(date_goabroad),DATE(date_repatriate),DATEDIFF(date_repatriate,date_goabroad),imei,package_id FROM tbl_order_basic
    WHERE imei IS NOT NULL AND (DATE(date_repatriate)) > '2016-01-01' AND DATE(date_goabroad) < DATE(NOW())
    ORDER BY date_repatriate ASC
    """

    querystatementOMSCount = """
    SELECT  date_goabroad,date_repatriate,DATEDIFF(date_repatriate,date_goabroad),t1.package_id,t3.iso2 FROM tbl_order_basic AS t1
    LEFT JOIN tbl_package_countries AS t2
    ON t1.package_id = t2.package_id
    LEFT JOIN tbl_country AS t3
    ON t2.country_id = t3.pk_global_id
    WHERE t1.data_status = 0 AND DATE(date_goabroad) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(),INTERVAL 3 MONTH) OR
    (
    DATE(date_repatriate) >= DATE(NOW())
    )
    """

    # establish connection to the mysql databases................
    querydbGTBU = MySQLdb.connect(user = querydbinfoGTBU['usr'],
                                  passwd = querydbinfoGTBU['pwd'],
                                  host = querydbinfoGTBU['host'],
                                  port = querydbinfoGTBU['port'],
                                  db = querydbnameGTBU)
    querydbOMS = MySQLdb.connect(user = querydbinfoOMS['usr'],
                                 passwd = querydbinfoOMS['pwd'],
                                 host = querydbinfoOMS['host'],
                                 port = querydbinfoOMS['port'],
                                 db = querydbnameOMS)
    insertdb = MySQLdb.connect(user = insertdbinfo['usr'],
                               passwd = insertdbinfo['pwd'],
                               host = insertdbinfo['host'],
                               port = insertdbinfo['port'],
                               db = insertdbname)

    queryCurGTBU = querydbGTBU.cursor()
    queryCurOMS = querydbOMS.cursor()
    insertCur = insertdb.cursor()


    print "executing query!!! By using generator!!!"
    insertCur.execute(queryStatementRemote)
    remoteGenerator = fetchsome(insertCur,100) #fetchsome is a generator which will fetch a certain number of query each time.

    for row in remoteGenerator:
        accumulatOnlineNumber(row,testingDict)

    onlineList = getTestingList(testingDict)

    countryList = onlineList[1]
    jpIndex = countryList.index('JP')
    datalist = onlineList[2][jpIndex]
    timelist = onlineList[0]

    tsJP = Series(datalist,index = timelist)
    df = DataFrame()
    df['JP'] = tsJP

    print df.index
    print df.columns

    print df

    tsJP_log = np.log(tsJP)
    lag_acf = acf(tsJP_log,nlags=200)
    lag_pacf = pacf(tsJP_log,nlags=200,method='ols')

    # model = ARIMA(tsJP_log,order=(2,1,2))
    model = ARMA(tsJP_log,(5,2))
    res = model.fit(disp=-1)


    print "Here is the fit result"
    print res

    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 300

    newP = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=tsJP_log, exog=None, start=len(tsJP_log))
    newF,stdF,confiF = res.forecast(steps)

    print newP
    newP = np.exp(newP)
    print newP

    print " Forecast below!!"
    print newF
    newF = np.exp(newF)
    print newF
    print stdF
    stdF = np.exp(stdF)
    print stdF

    x_axis = range(len(lag_acf))
    y_axis = lag_acf

    onlineEWMA=go.Scatter(
        x = x_axis,
        y = y_axis,
        mode = 'lines+markers',
        name = "lag_acf"
    )

    onlinePre=go.Scatter(
        x = x_axis,
        y = newP,
        mode = 'lines+markers',
        name = "predictJP"
    )

    layout = dict(title = 'predicewma',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'online Number'),
              )

    data = [onlineEWMA,onlinePre]
    fig = dict(data=data, layout=layout)

    plot(fig,filename ="/ukl/apache-tomcat-7.0.67/webapps/demoplotly/EWMAprediction.html",auto_open=False)
예제 #36
0
train = y[: 200]
test = y[200 :240]

y = range(1000)
random.shuffle(y)
#for i in range(1, 250):
#    y[i] += y[i - 1]
train = y[: 500]
test = y[500 :700]

#  Now, optionally, we can add some dates information. For this example, we'll use a pandas time series.
res = sm.tsa.stattools.arma_order_select_ic(train, ic='aic')
arma_mod = sm.tsa.ARMA(train, order=res.aic_min_order)
arma_res = arma_mod.fit(trend='nc', disp=-1)

#print res.params
# get what you need for predicting one-step ahead
params = arma_res.params
residuals = arma_res.resid
p = arma_res.k_ar
q = arma_res.k_ma
k_exog = arma_res.k_exog
k_trend = arma_res.k_trend
steps = 300

print y[700: ]
print _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=test, exog=None, start=len(test))

#for y1, y2 in zip(y, y_):
#    print '%f\t%f' % (y1, y2)
예제 #37
0
        timeSerize = selector[fc - fit_length:fc]
        # print "timeSerize = ",len(timeSerize)
        model = ARIMA(timeSerize, order=(p, d, q), freq='D')
        fitting = model.fit(disp=False)
        params = fitting.params
        residuals = fitting.resid
        p = fitting.k_ar
        q = fitting.k_ma
        k_exog = fitting.k_exog
        k_trend = fitting.k_trend
        # n_days forecasting
        forecast = _arma_predict_out_of_sample(params,
                                               1,
                                               residuals,
                                               p,
                                               q,
                                               k_trend,
                                               k_exog,
                                               endog=timeSerize,
                                               exog=None,
                                               start=len(timeSerize))

        # forecast, fcasterr, conf_int = fitting.forecast(steps=1, alpha=.05)
        real = selector[fc - 1:fc]
        # print "forecast = ",(forecast,type(forecast))
        # print "real = ",(real,type(real))
        temp.append(float(real))
        temp.append(float(forecast))
    # print "temp = ",temp
    x_sample.append(temp)
    y = dateframe['Close'][fc:fc + 1]
    y_sample.append(float(y))
예제 #38
0
    #  as you need a couple of measurements first. This means that the first predictions can be off a bit.)
    prediction_residuals_abs = np.zeros(test_length)
    count = 0
    count_pos = 0
    #threshold = 0.3

    # Make the predictions for the test data, using only the ARMA model generated with the training data
    for position in range(measures_needed, test_length):
        #print("Looking at residuals:", prediction_residuals[position-measures_needed:position])
        #print("And previous values:", series_test[position-measures_needed:position])
        predictions[position] = _arma_predict_out_of_sample(
            params,
            1,
            prediction_residuals[position - measures_needed:position],
            p,
            q,
            k_trend,
            k_exog,
            endog=series_test[position - measures_needed:position],
            exog=None,
            start=measures_needed)
        prediction_residuals[
            position] = series_test[position] - predictions[position]
        resi_abs = np.abs(series_test[position] - predictions[position])
        prediction_residuals_abs[position] = resi_abs
        # We don't throw any alarms for the first max(p, q) predictions,
        # as the first couple of predictions are typically more off then others
        if position > 2 * measures_needed and resi_abs > threshold:
            count += 1
            time = series_test[position:position + 1].index
            #print("Alert on {}: {} positive".format(time, attack_at_time(time)))
예제 #39
0
#
#
#
# r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
# data = np.c_[range(1,41), r[1:], q, p]
# table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
# # print(table.set_index('lag'))
#
# predict_sunspots = arma_mod20.forecast(30, alpha=.1)
# # print(predict_sunspots)
#
# # fig, ax = plt.subplots(figsize=(12, 8))
# # ax = dta.ix['2001':].plot(ax=ax)
# # fig = arma_mod30.plot_predict(83, 203, alpha=.1, exog=s_dta[80:183], dynamic=False, ax=ax, plot_insample=True)
# # fig.show()

# get what you need for predicting one-step ahead
params = arma_mod30.params
residuals = arma_mod30.resid
p = arma_mod30.k_ar
q = arma_mod30.k_ma
k_exog = arma_mod30.k_exog
k_trend = arma_mod30.k_trend
steps = 20

pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta))
# print "#################################"
print(pre_result)
plt.plot(s_dta[123:143])
plt.plot(pre_result,'red')
plt.show()