Example #1
0
def aa(j, p, d, q):
    pltf.clf()
    ap_df[j].plot()
    order = (p, d, q)
    model = ARIMA(ap_df[j], order, freq='D')
    model = model.fit()
    model.predict(1, 255).plot()
Example #2
0
def forecastARIMA(df, H, P, D, Q):
    order = (P, D, Q)
    model = ARIMA(df.values[:], order=order).fit(trend='nc')

    # Forecast
    n_periods = H
    fc, se, confint = model.forecast(n_periods)
    index_of_fc = pd.RangeIndex(start=df.index.stop, stop=df.index.stop+H)
    th = dumps([x for x in range(len(df), len(df)+H)])
    # index_of_fc = pd.PeriodIndex((pd.to_datetime(df.values[:,0]) + H*timedelta(weeks=12))[-H:],freq='Q')

    fc_series = pd.Series(fc, index=index_of_fc).to_json(orient='values')
    lower_series = pd.Series(
        confint[:, 0], index=index_of_fc).to_json(orient='values')
    upper_series = pd.Series(
        confint[:, 1], index=index_of_fc).to_json(orient='values')

    summary = {'order': order,
               'params': list(model.arparams)+list(model.maparams),
               'summary': model.summary().as_html()}

    try:
        pred = model.predict(df.index.stop,
                             df.index.stop + H - 1,
                             typ='linear').tolist()
    except Exception:
        pred = model.predict(df.index.stop,
                             df.index.stop + H - 1).tolist()

    pred_series = pd.Series(pred).to_json(orient='values')

    return pred_series, fc_series, th, lower_series, upper_series, summary
Example #3
0
def in_sample_prediction(p, q, y_true, train_ratio):
    if isinstance(y_true, pd.Series):
        # on oublie les dates et on regarde que si la position du jour par rapport au debut du test
        y_true = y_true.values
        # cela permet d'eviter des problemes comme par ex. l'existence d'un seul NaN au milieu des donnees

    t = round(train_ratio * len(y_true))
    model = ARIMA(y_true, order=(p, 0, q)).fit()  # on fit avec toutes les donnees

    train_data = y_true[:t]  # on ne fit pas le modele dessus

    pred_start = t
    pred_end = len(y_true)

    pred_index = np.arange(pred_start + 1, pred_end + 1)
    dynamic_predictions = model.predict(start=pred_start, end=pred_end - 1, dynamic=True)
    one_step_ahead_predictions = model.predict(start=pred_start, end=pred_end - 1, dynamic=False)

    plt.figure(figsize=(16, 4))
    plt.plot(np.arange(1, t + 1), train_data, label="Observed (train)", marker="o", ms=4)
    plt.plot(pred_index, y_true[t:], label="Test period (truth)", marker="o", ms=4)
    plt.plot(pred_index, dynamic_predictions, label="Dynamic pred", marker="o", ms=4)
    plt.plot(pred_index, one_step_ahead_predictions, label="1-step pred", marker="o", ms=4)
    plt.legend()
    plt.title(f"[train_ratio={train_ratio}] Resultats de prédiction pour AR={p} MA={q}")
    plt.xlabel("Jour du test")
    plt.xticks()
    plt.show()
Example #4
0
def buildARIMA(data, start, end):
    p, c, q = definePQ(data)
    model = ARIMA(data, (p, c, q)).fit()

    if c == 0:
        result = model.predict(start=start, end=end)
    else:
        result = model.predict(start=start, end=end, typ='levels')
    result = np.exp(result)
    return result
Example #5
0
    def calc_predictions(self, error_times: [int]) -> bool:
        train = self.window[:len(self.index_train) - 1]
        train.index = pd.to_datetime(self.index_train)

        test = self.window[len(self.index_train):]
        test.index = pd.to_datetime(self.index_test)

        stepwise_fit = auto_arima(train,
                                  seasonal=False,
                                  start_p=3,
                                  start_d=1,
                                  start_q=2,
                                  trace=True,
                                  error_action='ignore',
                                  suppress_warnings=False,
                                  stepwise=True)

        if sum(stepwise_fit.order):
            self.order = stepwise_fit.order
            preds, conf_int = stepwise_fit.predict(n_periods=len(test),
                                                   return_conf_int=True)
            prediction = pd.Series(preds, index=test.index)
            model = ARIMA(train, order=stepwise_fit.order).fit(disp=0)
            if stepwise_fit.order[1] == 0:
                #train_compare = model.predict()
                data = model.predict(end=3600 + 180)
            else:
                #train_compare = model.predict(typ='levels')
                data = model.predict(typ='levels', end=3600 + 180)

            # train.plot(label='Training data')
            # plt.plot(prediction, label='auto prediction')
            # plt.plot(train_compare, label='train compare')
            # plt.plot(data, label='prediction')
            # plt.legend()
            # plt.savefig(path+'results/test')

            index = np.arange(self.pw.userData.train_length() + sampleTime,
                              self.pw.userData.simlength * 60 + 1,
                              self.sample_time)
            self.prediction_values_all = prediction
            self.prediction_values_all.index = index
            self.prediction_values = self.prediction_values_all.loc[
                error_times + self.pw.userData.train_length()]
            self.prediction_values = self.prediction_values.tolist()
            return True
        else:
            return False, None
Example #6
0
def arima(demand, validation_points):
    model_count = 0
    pdq = list(itertools.product(params.p, params.d, params.q))
    for param in pdq:
        arima_cv_error = np.empty(shape=(0, 0))
        if param == pdq[0]:
            continue
        error = np.empty(shape=(0, 0))
        for split_count in range(1, validation_points - 1):
            demand_train, demand_valid = split_data(demand, validation_points, split_count)
            if len(demand_valid) != params.validation_steps:
                break        
            try:
                arima_fit = ARIMA(demand_train, order=(param[0], param[1], param[2])).fit(solver='bfgs',transparams=True,method='mle')
                arima_fcast = arima_fit.predict(start=len(demand_train), end=len(demand_train) + params.validation_steps - 1, typ='levels')
                error = mean_squared_error(demand_valid, arima_fcast)
            except:
                traceback.print_exc()
                arima_cv_error = np.append(arima_cv_error, error)
            arima_mean_error = np.nanmean(arima_cv_error)
        if math.isnan(arima_mean_error):
            arima_mean_error = float('Inf')
        if (model_count == 0):
            arima_best_error = arima_mean_error
            arima_best_model = param
        if arima_mean_error < arima_best_error:
            arima_best_error = arima_mean_error
            arima_best_model = param
        model = model + 1
    arima_best_fit = arima_fit = ARIMA(demand, order=(arima_best_model[0], arima_best_model[1], arima_best_model[2])).fit(solver='bfgs',transparams=True,method='mle')
    return [arima_best_model, arima_best_error, arima_best_fit]
Example #7
0
def arima(ahead, start_exp, n_samples, labels):
    var = []
    for idx in range(ahead):
        var.append([])

    error = np.zeros(ahead)
    count = 0
    for test_sample in range(start_exp, n_samples - ahead):  #
        print(test_sample)
        count += 1
        err = 0
        for j in range(labels.shape[0]):
            ds = labels.iloc[j, :test_sample - 1].reset_index()

            if (sum(ds.iloc[:, 1]) == 0):
                yhat = [0] * (ahead)
            else:
                try:
                    fit2 = ARIMA(ds.iloc[:, 1].values, (2, 0, 2)).fit()
                except:
                    fit2 = ARIMA(ds.iloc[:, 1].values, (1, 0, 0)).fit()
                #yhat = abs(fit2.predict(start = test_sample , end = (test_sample+ahead-1) ))
                yhat = abs(
                    fit2.predict(start=test_sample,
                                 end=(test_sample + ahead - 2)))
            y_me = labels.iloc[j, test_sample:test_sample + ahead]
            e = abs(yhat - y_me.values)
            err += e
            error += e

        for idx in range(ahead):
            var[idx].append(err[idx])
    return error, var
 def arima_(self, train, test, seasonal):
     
     arima_order = auto_arima(train, seasonal= seasonal, information_criterion= 'aic')
     order = arima_order.order
     seasonal_order = arima_order.seasonal_order
     
     if seasonal_order != (0, 0, 0, 0):
         sarima = SARIMAX(train, order = order, seasonal_order = seasonal_order).fit()
         start = len(train)
         end = start + len(test) - 1
         sar_pred = sar_forecast.predict(start= start, end = end, dynamic= False, typ = 'levels')
         rmse_sarima = rootMeanSquaredError(test, sar_pred)
         self.__arimaOrder__ = order
         self.__seasonalOrder__ = seasonal_order
         
     else:
         arima_model = ARIMA(train_data, order = order)
         arima_model = arima_model.fit()
         arima_model.summary()
         start = len(train)
         end = len(train) + len(test) - 1
         arima_pred = arima_model.predict(start, end, dynamic = False, typ = 'levels')
         rmse_arima = rootMeanSquaredError(test, arima_pred)
         self.__arimaOrder__ = order
         
      
     if rmse_arima < rmse_sarima :
         if rmse_arima < self.rmse:
             self.rmse = rmse_arima
             self.__model__ = 'arima'
     else:
         if rmse_sarima < self.rmse:
             self.rmse = rmse_sarima
             self.__model__ = 'sarima'
def predict_trend(df_train):
    diff1 = df_train.diff().dropna()
    res = arma_order_select_ic(diff1,max_ar=6,max_ma=4,ic='aic')['aic_min_order']
    arima_mod = ARIMA(df_train, order=(res[0],1,res[1])).fit()
    # Make a prediction for 3 months
    prediction = arima_mod.predict('2020-01-01', '2020-03-31')
    return prediction
Example #10
0
class ARIMAModel(StatModels):
    def __init__(self, params):

        super(ARIMAModel, self).__init__(params)
        self.p_ls = params.p_ls
        self.q_ls = params.q_ls
        self.d_ls = params.d_ls
        self.name = "ARIMA"

    def validate(self):
        best_value = float("inf")
        for p in self.p_ls:
            for q in self.q_ls:
                for d in self.d_ls:

                    self.model = ARIMA(self.train, order=(p, q, d))
                    self.model = self.model.fit(disp=0)
                    predictions = self.model.predict(start=self.start,
                                                     end=self.end)
                    eval_metric = EvalMetrics(self.validation, predictions)
                    rmse = eval_metric.val_rmse()

                    if rmse < best_value:
                        best_value = rmse
                        self.saved_model = self.model
Example #11
0
def get_forecast(org_ts,
                 forecast_periods,
                 orders=(2, 1, 2),
                 seasonal_orders=(0, 1, 1, 48),
                 freq='30min'):
    '''
    获得预测的数据
    :param org_ts:  原始的数据
    :param forecast_periods:    预测多少个point
    :param orders:  p d q的值。p、q分别和acf和pacf相关,d是差分的阶数建议先使用auto_arima( get_suitable_orders )测试出合适的值
    :param seasonal_orders: 同上,最后一位是序列的周期
    :param freq:    表示每一个point 之间的间隔
    :return:    预测的值
    '''
    order, seasonal_order = orders, seasonal_orders
    stepwise_fit = ARIMA(order=order,
                         seasonal_order=seasonal_order).fit(y=org_ts)
    forecast_ts = stepwise_fit.predict(n_periods=forecast_periods)

    forecasts_date_start = org_ts.index[-1] + (org_ts.index[-1] -
                                               org_ts.index[-2])
    forecast_ts = pd.Series(forecast_ts,
                            index=pd.date_range(forecasts_date_start,
                                                periods=forecast_periods,
                                                freq=freq))
    return forecast_ts
Example #12
0
def arima_predictions(dataset, pdq, frequency, number_of_predictions):
    price_data = dataset.values
    primed_price_data = differenciate_dataset(price_data, frequency)
    model_fit = ARIMA(primed_price_data, order=pdq).fit()
    forecast = model_fit.predict(start=len(primed_price_data),
                                 end=len(primed_price_data) +
                                 number_of_predictions)

    price_data = price_data.tolist()
    my_predictions = []
    my_predictions.append(dataset.iloc[-1].values)
    my_index = []
    my_index.append(dataset.index[-1])
    counter = 0
    for result in forecast:
        inverted = result + price_data[-frequency]
        price_data.append(inverted)
        my_predictions.append(round(inverted[0], 2))
        if (dataset.index[-1] + timedelta(days=counter)).isoweekday() == 5:
            counter += 3
        else:
            counter += 1
        my_index.append(dataset.index[-1] + timedelta(days=counter))

    return pd.DataFrame(data={
        'Close': my_predictions,
        'time': my_index
    }).set_index('time')
Example #13
0
def test(train_set, test_set):
    # %%
    # r2s = []
    y_true = []
    y_pred = []
    for store_id in train_set['storeId'].unique():
        train = train_set[train_set['storeId'] == store_id].iloc[:, :6]
        test = test_set[test_set['storeId'] == store_id].iloc[:, :6]

        # train = train[train['weekday'] == 0]
        # test = test[test['weekday'] == 0]

        train = pd.Series(train['Inside'].values, train['dateTime'].values)
        test = pd.Series(test['Inside'].values, test['dateTime'].values)

        # del fcast1
        # fcast1 = SimpleExpSmoothing(train).fit()
        # fcast1 = ExponentialSmoothing(train, seasonal='add', trend='add', seasonal_periods=7).fit()
        # fcast1 = Holt(train).fit()
        # pred = fcast1.forecast(test.shape[0])
        fcast1 = ARIMA(train, order=(1, 1, 1)).fit()
        pred = fcast1.predict(train.shape[0],
                              train.shape[0] + test.shape[0] - 1,
                              typ='levels')
        # pred = fcast1.predict(1, 10, typ='levels')
        # fcast1.predict(train.shape[0], train.shape[0], typ='levels')

        y_true.append(test.values)
        y_pred.append(pred.values)
        # print(test)
        # print(y_pred)
        # r2 = r2_score(test.values[:10], y_pred.values[:10])

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    print(y_true.shape)
    print(y_pred.shape)
    # %%
    df_result = pd.DataFrame({
        'storeId': test_set['storeId'].values,
        'Inside': y_true,
        'Inside_pred': y_pred,
    })

    # df_result = pd.read_csv('/Users/yinchuandong/PycharmProjects/ka/experiments/mlruns/1/3393e54394004e7497030f299258a955/artifacts/result.csv')
    r2_list = []
    for store_id in df_result['storeId'].unique():
        df_store = df_result[df_result['storeId'] == store_id]
        r2 = r2_score(df_store['Inside'], df_store['Inside_pred'])
        r2_list.append((store_id, r2))

    df_result2 = pd.DataFrame(r2_list, columns=['storeId', 'r2'])

    r2_score(df_result['Inside'], df_result['Inside_pred'])
    df_result2
    df_result2.describe()
    # %%

    return
Example #14
0
def arimaModelCheck():
    '''
    模型检验
    :return:
    '''
    discfile = 'data/discdata_processed.xls'
    # 残差延迟个数
    lagnum = 12

    data = pd.read_excel(discfile, index_col='COLLECTTIME')
    data = data.iloc[:len(data) - 5]
    xdata = data['CWXT_DB:184:D:\\']
    # 建立ARIMA(0,1,1)模型
    from statsmodels.tsa.arima_model import ARIMA
    # 建立并训练模型
    arima = ARIMA(xdata, (0, 1, 1)).fit()
    # 预测
    xdata_pred = arima.predict(typ='levels')
    # 计算残差
    pred_error = (xdata_pred - xdata).dropna()

    from statsmodels.stats.diagnostic import acorr_ljungbox
    # 白噪声检验
    lb, p = acorr_ljungbox(pred_error, lags=lagnum)
    # p值小于0.05,认为是非白噪声。
    h = (p < 0.05).sum()
    if h > 0:
        print(u'模型ARIMA(0,1,1)不符合白噪声检验')
    else:
        print(u'模型ARIMA(0,1,1)符合白噪声检验')
def evaluate_prediction_for_stock(stock_close_rtn, split=0.7):
    """Split the stock close returns into training (70%) and test set (30%)
    Train ARIMA on the training set, perform predict on the test set, and compute
    RMSE to evaluate the performance of the model

    return: the model, predicted values and RMSE
    """

    total_length = stock_close_rtn.shape[0]
    split_point = int(np.ceil(total_length * split))
    training_set = stock_close_rtn.iloc[:split_point]
    test_set = stock_close_rtn.iloc[split_point:]

    # get the p, q values
    diff_series = (stock_close_rtn - stock_close_rtn.shift()).dropna()
    #p, q = find_params_arima(diff_series)
    p, q = find_params_arima_acf_pacf(diff_series)

    model = ARIMA(training_set.values, (p, q, 0)).fit()

    predicted = model.predict(end=len(test_set))

    rmse = np.sqrt(mse(test_set.values, predicted))

    return predicted, rmse
Example #16
0
def arima_model(data):
    model = ARIMA(data, order=(1, 0, 0))
    model = model.fit()
    model.summary()
    start = len(data)
    end = len(data) + 6
    pred = model.predict(start=start, end=end, typ='levels')
    return pred.tolist()
Example #17
0
class ARIMAModel(Model):
    def __init__(self):
        Model.__init__(self)
        self._order = None

    def select_order_brute_force(self):
        def objfunc(order, endog, exog):
            from statsmodels.tsa.arima_model import ARIMA
            fit = ARIMA(endog, order, exog).fit(full_output=False)
            return fit.aic

        ts = self.get_series()
        bic = arma_order_select_ic(ts).bic_min_order
        grid = (slice(bic[0], bic[0] + 1,
                      1), slice(1, 2, 1), slice(bic[1], bic[1] + 1, 1))
        from scipy.optimize import brute
        return brute(objfunc, grid, args=(ts, None), finish=None)

    def select_order(self):
        ts = self.get_series()
        if is_stationary(ts):
            bic = arma_order_select_ic(ts).bic_min_order
            return bic[0], 0, bic[1]

        ts1diff = ts.diff(periods=1).dropna()
        if is_stationary(ts1diff):
            bic = arma_order_select_ic(ts1diff).bic_min_order
            return bic[0], 1, bic[1]

        ts2diff = ts.diff(periods=2).dropna()
        bic = arma_order_select_ic(ts2diff).bic_min_order

        return bic[0], 2, bic[1]

    def get_fitted_values(self):
        return self._model.fittedvalues

    def auto(self):
        ts = self.get_series()
        self._period = ts.index[1] - ts.index[0]
        freq = Second(self._period.total_seconds())
        self._order = self.select_order()
        self._model = ARIMA(self.get_series(), order=self._order,
                            freq=freq).fit()

    def predict(self):
        start_date = self._model.fittedvalues.index[-1]
        end_date = start_date + self._predict * self._period
        forecast = self._model.predict(start_date.isoformat(),
                                       end_date.isoformat())

        if self._order[1] > 0:
            shift = self.max() - self.min()
            forecast += shift

        return forecast
Example #18
0
class ARIMAModel(Model):
    def __init__(self, ts):
        Model.__init__(self, ts)
        self.order = None

    def select_order_brute_force(self):
        def objfunc(order, endog, exog):
            from statsmodels.tsa.arima_model import ARIMA
            fit = ARIMA(endog, order, exog).fit(full_output=False)
            return fit.aic

        bic = arma_order_select_ic(self.ts, max_ar=6, max_ma=4).bic_min_order
        grid = (slice(bic[0], bic[0] + 1,
                      1), slice(1, 2, 1), slice(bic[1], bic[1] + 1, 1))
        from scipy.optimize import brute
        return brute(objfunc, grid, args=(self.ts, None), finish=None)

    def _select_order_impl(self, ic):
        if is_stationary(self.ts):
            bic = _arma_order_selector(self.ts, ic)
            return bic[0], 0, bic[1]

        ts1diff = self.ts.diff(periods=1).dropna()
        if is_stationary(ts1diff):
            bic = _arma_order_selector(ts1diff, ic)
            return bic[0], 1, bic[1]

        ts2diff = self.ts.diff(periods=2).dropna()
        bic = _arma_order_selector(ts2diff, ic)

        return bic[0], 2, bic[1]

    def select_order(self):
        return self._select_order_impl('bic')

    def reselect_order(self):
        return self._select_order_impl('aic')

    def auto(self, order=None):
        self.period = self.ts.index[1] - self.ts.index[0]
        self.order = order if order is not None else self.select_order()
        logging.debug('Model order is {}'.format(self.order))
        self.model = ARIMA(self.ts, order=self.order).fit(disp=False,
                                                          method='css')

    def predict(self, length):
        start_date = self.model.fittedvalues.index[-1]
        end_date = start_date + length * self.period
        forecast = self.model.predict(start_date.isoformat(),
                                      end_date.isoformat())

        if self.order[1] > 0:
            shift = abs(self.model.fittedvalues[-1] - self.ts[-1])
            forecast += shift

        return forecast
Example #19
0
class ArimaModel:
    """
    ARIMA滑动平均差分自回归模型.
    """
    def __init__(self, data: pd.DataFrame, p: int, d: int, q: int):
        '''
        Init.
        '''
        self.data = data
        self.p = p
        self.d = d
        self.q = q
        # self.model = self.__define()

    def _define(self):
        """
        Model Define.
        """
        self.model = ARIMA(self.data, order=(self.p, self.d, self.q))

    def show_params(self):
        """
        Display p, d, q
        """
        print('ARIMA Model: p={0} d={1} q={2}'.format(self.p, self.d, self.q))

    def _train(self):
        '''
        Train.
        '''
        return self.model.fit(disp=-1, method='css', start_ar_lags=13)

    def _predict(self, start, end):
        '''
        Predict
        '''
        self.model.predict(start=start, end=end, typ='levels')

    def _forecast(self, pred_len: int):
        '''
        Forecast.
        '''
        return self.model.forecast(pred_len)
Example #20
0
def ARIMA_50(city, max):
    """
    Params:
    city -- time-series dataframe object containing Date and ZHVI columns
    max -- datetime object from index of city representing peak ZHVI
    """
    before = city[city['Date'] < max]
    model = ARIMA(before, (5, 1, 1))

    return model.predict(start, city['Date'].iloc[-1])
Example #21
0
def get_arima_predictions(y, train_subset, order = [1,0,0], X = None):
    if X == None:
        arima = ARIMA(y[train_subset], order = order).fit()
        predictions = arima.predict()
    else:
        arima = ARIMA(y[train_subset], order = order, 
                      exog = X[train_subset,:]).fit()
        predictions = arima.predict(exog = X[train_subset,:])
    for i in range(max(train_subset)+1,len(y)):
        if X == None:
            arima = ARIMA(y[0:i], order = order).fit()
            predictions = np.append(predictions, 
                                    arima.predict(0, len(y) + i)[-1])
        else:
            arima = ARIMA(y[0:i], order = order, exog = X[0:i,:]).fit()
            predictions = np.append(predictions, 
                                    arima.predict(0, len(y) + i, 
                                                  exog = X[0:i+1,:])[-1])
    return predictions
 def arima_forecast(self, history):
     # converting to a series
     series = self.make_series(history)
     # defining the model
     model = ARIMA(series, order=(7, 0, 0))
     # fitting
     model = model.fit(disp=False)
     # make a forecast
     yhat = model.predict(len(series), len(series) + 6)
     return yhat
Example #23
0
 def arima_forecast(self, ts,  p, i, q,):
     arima = ARIMA(ts, order=(p, i, q)).fit(disp=-1)
     ts_predict = arima.predict()
     next_ret = arima.forecast(1)[0]
     #print("Forecast stock extra return of next day: ", next_ret)
     # plt.clf()
     # plt.plot(ts_predict, label="Predicted")
     # plt.plot(ts, label="Original")
     # plt.legend(loc="best")
     # plt.title("AR Test {},{}".format(p, q))
     # #plt.show()
     return next_ret, arima.summary2()
Example #24
0
def arima_predictions(data, num_future_days=7):

    # Adjust for volitility
    log_price = np.log(data)
    model = ARIMA(log_price, order=(3, 1, 0)).fit(disp=0)

    y_hat = model.predict(len(data) + 1,
                          len(data) + num_future_days,
                          typ='levels')

    # Undo log
    return np.exp(y_hat)
Example #25
0
def recursive_estimation(T1_size, T2=30, steps_ahead=1):
  i = 0
  for j in range(T2):
    train_data = Y[i][:(T1_size+j)]
    IMA_model = ARIMA(train_data, order=(0, 1, 1))
    results = IMA_model.fit(trend='nc')

    fc = IMA_model.predict(Y[i][T1+j+1])
    forecasts.append(fc)
    
    RMSPE = np.sqrt(np.mean(np.square(((Y[i][T-T2:] - forecasts) / Y[i][T-T2])), axis=0))
  return RMSPE
Example #26
0
def predstl():
    predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv"
    fp = open(predict_file_path, 'wb')
    fpwriter = csv.writer(fp,
                          delimiter=',',
                          quotechar='"',
                          quoting=csv.QUOTE_NONE)
    for j in range(0, 50):
        orig = np.log(ap_df[j])
        stl_w = sm.tsa.seasonal_decompose(orig.tolist(), freq=7)
        stl_w_se = stl_w.seasonal
        w_s = stl_w_se[-7:]

        stl_w_rest = orig - stl_w_se
        stl_m = sm.tsa.seasonal_decompose(np.nan_to_num(stl_w_rest).tolist(),
                                          freq=30)
        stl_m_se = stl_m.seasonal
        m_s = stl_m_se[-30:]

        rest = stl_w_rest - stl_m_se
        rest_s = pd.Series(rest, index=d, name='artist' + str(j) + 'rest')

        order = (2, 0, 1)
        model = ARIMA(rest_s, order, freq='D')
        model = model.fit()
        rest_pred = model.predict(1, 244)

        rest_pred_nda = rest_pred.values

        for i in range(0, 8):
            stl_w_se = np.append(stl_w_se, w_s)
        stl_w_se = np.append(stl_w_se, w_s[:5])

        stl_m_se = np.append(stl_m_se, 0)
        stl_m_se = np.append(stl_m_se, m_s)
        stl_m_se = np.append(stl_m_se, m_s)

        compose_stl = stl_w_se + stl_m_se + rest_pred_nda
        fit_ap = np.exp(compose_stl)

        artist_id = artists_rank_to_id[j]
        for idx in range(184, 244):
            date = rank_to_date[idx]
            play_num = int(math.ceil(fit_ap[idx]))
            if play_num < 0:
                play_num = 0
            row = [artist_id, play_num, date]
            print row
            fpwriter.writerow(row)

    fp.close()
Example #27
0
def arima(df, horizon):
    col = df.columns[0]
    d = adf_test(df[col])
    d = min(d, 2)
    AIC = ARIMA(df[col], order=(0, d, 0)).fit().aic
    #     print((0,d,0))
    #     print(AIC)
    details = {'order': (0, d, 0), 'AIC': AIC, 'MAPE': 100}
    for p in range(0, 6):
        for q in range(0, 6):
            if (p == 0 and q == 0):
                continue
            try:
                results = ARIMA(df[col], order=(p, d, q)).fit()
                aic = results.aic
                #                 print((p,d,q))
                #                 print(aic)
                if aic < AIC:
                    AIC = aic
                    details['order'] = (p, d, q)
                    details['AIC'] = aic
            except:
                pass
    n_train = int(len(df) * 0.7)
    n_records = len(df)
    k = 0
    mape = 0
    for l in range(n_train, n_records - horizon + 1):
        try:
            train, test = df.iloc[0:l], df.iloc[l:l + horizon]
            #           print('train=%d, test=%d' % (len(train), len(test)))
            model = ARIMA(train[col], order=details['order'])
            #             model = ARIMA(train[col],order=order)
            results = model.fit()
            start = len(train)
            end = len(train) + len(test) - 1
            test_predictions = results.predict(start=start,
                                               end=end,
                                               dynamic=False,
                                               typ='levels')
            x = mean_absolute_percentage_error(test, test_predictions)
            if x > 0:
                mape += x
                k += 1
        except:
            pass
    if k > 0:
        mape = mape / k
        details['MAPE'] = mape
    return details
Example #28
0
def auto_arima(ts):
    def objfunc(order, x):
        mod = ARIMA(x, order)
        try:
            mod = mod.fit(disp=0)
        except ValueError:
            return 1e6
        return mod.aic

    grid = (slice(0, 3, 1), slice(0, 3, 1), slice(0, 3, 1))
    res = brute(objfunc, grid, args=[ts], finish=None)
    best_order = [int(i) for i in res]
    best_model = ARIMA(ts, best_order).fit(disp=0)
    pred = best_model.predict(60, 71, typ='levels')
    return pred
def predict_stock(stock_close_rtn, n_steps=5, plot=False):
    """Given the close returns of a stock (as a dataframe), predict the next n_step values"""
    diff_series = (stock_close_rtn - stock_close_rtn.shift()).dropna()
    p, q = find_params_arima(diff_series)

    model = ARIMA(stock_close_rtn.values, (p, q, 0)).fit(disp=plot)
    predicted = model.predict(end=n_steps)

    if plot:
        model.plot_predict(
            len(stock_close_rtn) - 10,
            len(stock_close_rtn) + n_steps)
    plt.axhline(y=0, linestyle='--', color='gray')

    return predicted
class ArimaModel():
    def __init__(self, order):
        self.order = order

    def fit(self, Y, X):
        self.model = ARIMA(Y.values,
                           order=self.order,
                           exog=X.values,
                           freq=None).fit(disp=0)

    def predict(self, Y, X, X_test):
        Y_hat = pd.Series(data=self.model.predict(start=0,
                                                  end=X_test.shape[0] - 1,
                                                  exog=X_test),
                          index=X_test.index)
        return Y_hat
Example #31
0
class Arima:
    def __init__(self, df, cfg):
        self.series = df[cfg['target_feature']]
        self.model = ARIMA(self.series, order=(3, 1, 0))

    def fit_model(self):
        # Fit model
        self.model = self.model.fit(disp=0)
        print(self.model.summary())

    def plot_autocorrelation(self):
        # Plot auto correlation
        autocorrelation_plot(self.series)
        plt.show()

    def predict_arima(self, series):
        return self.model.predict(series)
def programmer_5():
    discfile = "data/discdata_processed.xls"
    # 残差延迟个数
    lagnum = 12

    data = pd.read_excel(discfile, index_col="COLLECTTIME")
    data = data.iloc[:len(data) - 5]
    xdata = data["CWXT_DB:184:D:\\"]

    # 训练模型并预测,计算残差
    arima = ARIMA(xdata, (0, 1, 1)).fit()
    xdata_pred = arima.predict(typ="levels")
    pred_error = (xdata_pred - xdata).dropna()

    lb, p = acorr_ljungbox(pred_error, lags=lagnum)
    h = (p < 0.05).sum()
    if h > 0:
        print(u"模型ARIMA(0,1,1)不符合白噪声检验")
    else:
        print(u"模型ARIMA(0,1,1)符合白噪声检验")
    print(lb)
Example #33
0
    ax = feature_indiv.plot(y='value',use_index=True)
    if feature == 'mood':
        ax.set_ylim((0,10))
    ax.set_xlim((min(feature_indiv.index),max(feature_indiv.index)))
    fig = ax.get_figure()
    plt.show(block=False)
    plt.close(fig)
for individual in indiv_ids:
    print individual
    #plot_histogram(individual, 'mood')
    #plot_series(individual, 'mood')
#%%
y = get_feature_by_day(feature, current_indiv).values
train_subset = range(30)
arima = ARIMA(y[train_subset], [1,0,0]).fit()
predictions = arima.predict()
for i in range(max(train_subset)+1,len(y)):
    arima = ARIMA(y[0:i], [1,0,0]).fit()
    predictions = np.append(predictions,arima.predict(0, len(y) + i)[-1])
y = get_feature_by_day(feature, current_indiv)
y['preds'] = predictions
y.plot()
rmse(y['preds'].values,y['value'].values)
#%%
arima.predict(start = min(y.index), end = 50)
#%%
from statsmodels.tsa.stattools import acf, pacf
def get_feature_by_day(feature, current_indiv):
    y = get_feature(feature, current_indiv)
    avg_features = ['mood', 'circumplex.valence', 'circumplex.arousal']
    sum_features = [s for s in feature_names if s not in avg_features]
# -*- coding: utf-8 -*-
# 模型检验
import pandas as pd

# 参数初始化
discfile = '../data/discdata_processed.xls'
lagnum = 12  # 残差延迟个数

data = pd.read_excel(discfile, index_col='COLLECTTIME')
data = data.iloc[: len(data) - 5]  # 不使用最后5个数据
xdata = data['CWXT_DB:184:D:\\']

from statsmodels.tsa.arima_model import ARIMA  # 建立ARIMA(0,1,1)模型

arima = ARIMA(xdata, (0, 1, 1)).fit()  # 建立并训练模型
xdata_pred = arima.predict(typ='levels')  # 预测

print "-------預測模型------------\n", xdata_pred

pred_error = (xdata_pred - xdata).dropna()  # 计算残差

from statsmodels.stats.diagnostic import acorr_ljungbox  # 白噪声检验

lb, p = acorr_ljungbox(pred_error, lags=lagnum)
h = (p < 0.05).sum()  # p值小于0.05,认为是非白噪声。
if h > 0:
    print(u'模型ARIMA(0,1,1)不符合白噪声检验')
else:
    print(u'模型ARIMA(0,1,1)符合白噪声检验')