def get_arima_prediction(price_data: np.ndarray,
                         order=Tuple[int, int, int],
                         allow_short: bool = True) -> int:
    volatility = pd.Series(price_data).pct_change().std(ddof=1)
    model = ARIMA(price_data, order=order)
    forecast = model.fit().forecast(steps=1)
    forecasted_returns = ((forecast - price_data[-1]) / price_data[-1])[0]

    if forecasted_returns > 0.1 * volatility:
        prediction = 1
    elif forecasted_returns < -0.1 * volatility:
        prediction = -1
    else:
        prediction = 0

    if not allow_short:
        prediction = max(prediction, 0)

    assert prediction in [-1, 0, 1]
    return prediction
Esempio n. 2
0
def test_low_memory():
    # Basic test that the low_memory option is working
    endog = dta['infl'].iloc[:50]

    mod = ARIMA(endog, order=(1, 0, 0), concentrate_scale=True)
    res1 = mod.fit()
    res2 = mod.fit(low_memory=True)

    # Check that the models produce the same results
    assert_allclose(res2.params, res1.params)
    assert_allclose(res2.llf, res1.llf)

    # Check that the model's basic memory conservation option wasn't changed
    assert_equal(mod.ssm.memory_conserve, 0)

    # Check that low memory was actually used (just check a couple)
    assert_(res2.llf_obs is None)
    assert_(res2.predicted_state is None)
    assert_(res2.filtered_state is None)
    assert_(res2.smoothed_state is None)
Esempio n. 3
0
def in_sample_forecast(ts_data):
    # training (70 % ) and test (30%)
    training_data, test_data = ts_data[
        0:int(len(ts_data) * TRAINING_DATA_SPLIT_RATIO
              )], ts_data[int(len(ts_data) * TRAINING_DATA_SPLIT_RATIO):]
    history = [x for x in training_data]
    model_predictions = []
    test_observations_num = len(test_data)
    for time_point in range(test_observations_num):
        model = ARIMA(history, order=ARIMA_ORDER)
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        model_predictions.append(yhat)
        actual_value = test_data[time_point]
        print(actual_value, yhat)
        history.append(actual_value)
    error = mean_squared_error(test_data, model_predictions)

    return model_predictions, error
def predict_next(df, steps, time_range):
    df = df.reset_index()
    df_train, df_test = train_test_split(df, test_size=0.1, shuffle=False)
    return_df = df_train
    if time_range != 'Y':
        history = [x for x in df_train.iloc[:, -1]]
        model_predictions = []
        model = ARIMA(history, order=(1, 1, 0), seasonal_order=(0, 1, 0, 24))
        model_fit = model.fit()
        steps = df_test.index.size + steps
        predict_values = model_fit.get_forecast(steps=steps)
        # print(predict_values.summary_frame())
        [
            model_predictions.append(x)
            for x in predict_values.summary_frame()['mean']
        ]

        date = df_train['Date'].iloc[-1]
        for predict in model_predictions:
            if time_range == 'W':
                date += relativedelta(days=+7)

            elif time_range == 'M':
                date += relativedelta(months=+1)

            # elif time_range == 'Y':
            #     date += relativedelta(years=+1)

            if predict < 0 or math.isnan(predict):
                predict = 0
            return_df = return_df.append({
                'Date': date,
                'count': int(predict)
            },
                                         ignore_index=True)

    model_error = metrics.mean_squared_error(
        df_test['count'].tolist(),
        return_df.loc[df_train.index.size + 1:df.index.size, 'count'].tolist())
    return_df.set_index('Date', inplace=True)
    return return_df, df_train.index.size, model_error
Esempio n. 5
0
    def get(self):
        
        # Récupération du Dataset pour l'évaluation
        df = get_data_cassandra()
        
        print(df.head())
        X = df['total_estimated_load'].values

        # evaluate parameters (p,d,q)  <=> (AR, I, MA)
        p_values = 7
        d_values = 0
        q_values = 5
        #best_cfg, best_score = evaluate_models(X, p_values, d_values, q_values)
        best_cfg = (p_values,d_values,q_values)
        
        # Entrainement du meilleur modèle
        model = ARIMA(X, order=best_cfg)
        model_fit = model.fit()
        
        # save model
        if not os.path.exists(model_local_path):
               # Création du dossier d'export local qui n'existe pas
               os.makedirs(model_local_path,exist_ok=False)
        
        model_fit.save(model_local_path + model_name)
            
        # Connexion au client HDFS
        client = InsecureClient(url='http://namenode:9870', user='******')
    
        # Création du dossier de stockage des fichiers traités
        if client.status(model_hdfs_remote_path,strict=False) == None:
                client.makedirs(model_hdfs_remote_path)

	# Copie du modèle sur HDFS
        remote_load_path = client.upload(model_hdfs_remote_path, model_local_path + model_name,overwrite=True)
        #print(remote_load_path)

        print(client.list(model_hdfs_remote_path))

	
        return { 'best_cfg': best_cfg , 'status': 'Terminated'}
        def fitted_vals_arima(self):
            # ARIMA model
            model_ARIMA = ARIMA(self.cases.astype(float), order=self.order)
            ARIMA_fit = model_ARIMA.fit()
            fitted_cases = ARIMA_fit.fittedvalues

            latest_date = self.cases.last_valid_index() + datetime.timedelta(
                days=1)

            actual_cases = self.arima_model(self.cases, step=6)

            fitted_values_cases = self.arima_model(fitted_cases, step=6)

            actual_cases_float = float(actual_cases.loc[latest_date])
            pred_cases_float = float(fitted_values_cases.loc[latest_date])
            self.today_val = float(self.today_val)

            print("today_val : " + str(self.today_val) + "prediction :" +
                  str(pred_cases_float))
            try:
                error = float(
                    abs((self.today_val - pred_cases_float) * 100) /
                    self.today_val)
                error_pred = abs(error - 100)
            except ZeroDivisionError:
                error_pred = 0

            print("today_val : " + str(self.today_val) + "actual :" +
                  str(actual_cases_float))
            try:
                error = float(
                    abs((self.today_val - actual_cases_float) * 100) /
                    self.today_val)
                error_actual = abs(error - 100)
            except ZeroDivisionError:
                error_actual = 0

            if error_actual > error_pred:
                return error_actual, actual_cases.tail(6)
            else:
                return error_pred, fitted_values_cases.tail(6)
Esempio n. 7
0
    def ARIMA_predict(self, df):
        '''
        ==Function== 
        Attain user inputs to decide ARIMA order 
        
        ==Returns==
        res = .fit()
        atrain, atest = train and test set used for ARIMA
        arima_title = title to be used in a plot
        a_pred = predictions from ARIMA model
        order = order used in ARIMA
        '''
        if self.order_method.lower() == 'predetermined':
            order = (2, 0, 0)
        elif self.order_method.lower() == 'auto':
            order = self.auto_pdq(df)
        elif self.order_method.lower() == 'manual':
            print(
                'CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes) \nPlease enter "ok" to proceed'
            )
            confirmation = input()
            if confirmation.lower() == 'ok':
                print('Please hold')
                order = self.best_order(df)
            else:
                print('Changing to Auto')
                order = self.auto_pdq(df)
        elif self.order_method.lower() == 'select':
            print('Please input each parameter')
            ord_p = int(input('p:'))
            ord_d = int(input('d:'))
            ord_q = int(input('q:'))
            order = (ord_p, ord_d, ord_q)

        atrain, atest = self.train_test(df)
        atest_s, atest_e = atest.index.date[0], atest.index.date[-1]
        atrain_s, atrain_e = atrain.index.date[0], atrain.index.date[-1]
        res = ARIMA(df, order=order).fit()
        a_pred = res.predict(atest_s, atest_e)
        arima_title = f'ARIMA {order}         MSE={round(mean_squared_error(atest,a_pred),5)}'
        return res, atrain, atest, arima_title, a_pred, order
Esempio n. 8
0
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    X = X.astype('float32')
    train_size = int(len(X) * 0.50)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        # difference data
        months_in_year = 12
        diff = difference(history, months_in_year)
        model = ARIMA(diff, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        yhat = inverse_difference(history, yhat, months_in_year)
        predictions.append(yhat)
        history.append(test[t])
    # calculate out of sample error
    rmse = sqrt(mean_squared_error(test, predictions))
    return rmse
def arima_man_forecast(ts, f, order, name):
    plt.figure()
    train = ts[:int(len(ts) * f)]
    test = ts[int(len(ts) * f):]
    model = ARIMA(train, order=order)
    fit = model.fit()
    plt.plot(train)
    plt.plot(test)

    forecast = fit.get_forecast(len(test))
    plt.plot(forecast.predicted_mean)

    ci = forecast.conf_int()
    plt.fill_between(x=test.index,
                     y1=ci["lower Close Price"],
                     y2=ci["upper Close Price"],
                     color=(0.5, 0.5, 0.5, 0.2))

    plt.title(name + " | [p,d,q] : " + str(order))
    plt.legend(["train", "test", "forecast", "95% confidence"])
    plt.show()
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    train_size = int(len(X.values) * 0.66)
    train, test = X.values[0:train_size], X.values[train_size:]
    history = [x for x in train]
    predictions = list()
    try:
        for t in range(len(test)):
            model = ARIMA(history, order=arima_order)
            model_fit = model.fit()
            yhat = model_fit.forecast()[0]
            predictions.append(yhat)
            history.append(test[t])
    except:
        pass

    if len(test) > len(predictions):
        error = mean_squared_error(test[:len(predictions)], predictions)
    else:
        error = mean_squared_error(test, predictions[:len(test)])
    return error
Esempio n. 11
0
    def arima(df, *, ar, i, ma, fit=True, freq='B'):
        """
        Create an ARIMA object for modeling time series.

        Parameters:
            - df: The dataframe containing the stock closing price as `close`
                  and with a time index.
            - ar: The autoregressive order (p).
            - i: The differenced order (q).
            - ma: The moving average order (d).
            - fit: Whether or not to return the fitted model,
                   defaults to `True`.
            - freq: The frequency of the data. Default is 1 business day ('B').

        Returns:
            A `statsmodels` ARIMA object which you can use to fit and predict.
        """
        arima_model = ARIMA(
            df.close.asfreq(freq).fillna(method='ffill'), order=(ar, i, ma)
        )
        return arima_model.fit() if fit else arima_model
Esempio n. 12
0
def perform_arima(train_X, train_y, model_name, order):
    if model_name == 'arima':
        model = ARIMA(train_y,
                      exog=train_X,
                      order=order,
                      enforce_invertibility=False,
                      enforce_stationarity=False)

    elif model_name == 'sarimax':
        model = SARIMAX(train_y,
                        exog=train_X,
                        order=order,
                        enforce_invertibility=False,
                        enforce_stationarity=False)

    else:
        raise KeyError(
            'Invalid model selection, choose either "arima" or "sarimax"!')

    results = model.fit()
    return model, results
Esempio n. 13
0
    def __parameters_selection_arima(self, candles):
        if len(candles) > 500:
            return (1, 1, 1)
        d = range(1, 3)
        q = range(1, 3)

        best_aic = float('inf')
        best_params = ()

        for i in d:
            for j in q:
                try:
                    model = ARIMA(candles, order=(i, 1, j))
                    model_fit = model.fit()
                except:
                    continue
            aic = model_fit.aic
            if aic < best_aic:
                best_aic = aic
                best_params = (i, 1, j)
        return best_params
Esempio n. 14
0
def arima_model(data):
    import pmdarima as pm

    # model = pm.auto_arima(data, d=1, D=1,
    #                   m=12, trend='c', seasonal=True,
    #                   start_p=0, start_q=0, max_order=10, test='adf',
    #                   stepwise=False, trace=True)
    from statsmodels.tsa.arima.model import ARIMA

    mod = ARIMA(data, order=(1, 1, 2), seasonal_order=(1, 1, 2, 12)).fit()

    return mod
Esempio n. 15
0
    def grid_search(self):

        best = np.inf
        for p in range(1, 7):
            for q in range(1, 7):
                for P in range(1, 2):
                    for Q in range(1, 2):
                        model = ARIMA(
                            self.x,
                            order=(p, 1, q),
                            # seasonal_order=(P, 1, Q, 144),
                            dates=self.df.index,
                        )
                        model_fit = model.fit()
                        print("({},1,{}), ({},1,{},144) - AIC = {}.".format(
                            p, q, P, Q, model_fit.aic))
                        aic = model_fit.aic
                        if aic < best:
                            best = aic
                            best_model = (p, q)
        print(best, best_model)
Esempio n. 16
0
    def fit(self, train):
        series = train['Close']
        # Record stock price series
        self.history = series
        auto_model = pm.auto_arima(series,
                                   start_p=1,
                                   start_q=1,
                                   max_p=3,
                                   max_q=3,
                                   m=12,
                                   start_P=0,
                                   seasonal=False,
                                   d=self.d,
                                   D=1,
                                   trace=True,
                                   error_action='ignore',
                                   suppress_warnings=True,
                                   stepwise=True)
        self.model = ARIMA(series, order=auto_model.order)

        self.results = self.model.fit()
Esempio n. 17
0
def q3_d():
    print("begin")
    df = get_data("data/HW5_WMT.xlsx", "HW5_WMT")
    df.index = pd.to_datetime(df.index, format='%Y%m%d')
    df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1)
    df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4)
    df_test = df.tail(len(df.index) - df.index.get_loc('2016-03-31'))
    df_test = df_test.head(df_test.index.get_loc('2020-03-31'))
    df_p = df.head(df.index.get_loc('2016-03-31'))
    print(df_test)
    rst_arima_list = []
    rst_airline_list = []
    i = 1
    for index in df_test.index:
        ARIMA_model = ARIMA(np.log(df_p['WMT']),
                            order=(0, 1, 1)).fit()  # p=0, d=1, q=1
        airline_model = ARIMA(np.log(df_p['WMT']),
                              order=(0, 1, 1),
                              seasonal_order=(0, 1, 1, 4)).fit()
        rst_arima_list.append(ARIMA_model.forecast()[0])
        rst_airline_list.append(airline_model.forecast()[0])
        df_p = df.head(df.index.get_loc('2016-03-31') + i)
        i += 1

    plt.plot(df_test.index, rst_arima_list, label='ARIMA Model')
    plt.plot(df_test.index, rst_airline_list, label='AIRLINE Model')
    np.log(df_test['WMT']).plot(label='Reality')
    plt.legend()
    plt.show()
Esempio n. 18
0
def AutoARIMA(stock) :
     # Seasonality check
     decomposed_df = decompose(stock)             # TODO create new tab to plot the backend analysis

     # The training will be carried out in logarithmic domain, to reduce data fluctuations and retrieve the best pqd triplet
     dfClean = np.log(stock.stockValue['Close'])
     
     # Split in train data and test data
     train_data, test_data = dfClean[10:int(len(dfClean)*0.98)], dfClean[int(len(dfClean)*0.98):]

     # AutoARIMA pdq identification
     model_autoARIMA = auto_arima(train_data, start_p=5, start_q=5,
                      test='adf',       # use adftest to find optimal 'd'
                      max_p=7, max_q=7, # maximum p and q
                      m=1,              # frequency of series
                      d=None,           # let model determine 'd'
                      seasonal=False,   # No Seasonality
                      start_P=0, 
                      D=0, 
                      trace=True,
                      error_action='ignore',  
                      suppress_warnings=True, 
                      stepwise=True)
     print(model_autoARIMA.summary())
     
     # Train ARIMA Model
     model = ARIMA(train_data, order=model_autoARIMA.order)  
     fitted = model.fit()  

     # Forecast
     forecastedSteps = 15
     model_predictions = fitted.get_forecast(steps=forecastedSteps)  
     forecasted_value = model_predictions.predicted_mean
     forecasted_series = pd.Series(forecasted_value.values, index=test_data.index[:forecastedSteps])
     confidence = model_predictions.conf_int(alpha=0.25) # 75% confidence
     lower_series = pd.Series(confidence['lower Close'].values, index=test_data.index[:forecastedSteps])
     upper_series = pd.Series(confidence['upper Close'].values, index=test_data.index[:forecastedSteps])

     print('MAPE: {:.2%}'.format(np.mean(np.abs(forecasted_value.values - test_data[:forecastedSteps].values)/np.abs(test_data[:forecastedSteps].values))))
     return forecasted_series, lower_series, upper_series
Esempio n. 19
0
def arima(filename):
    ts = open(filename)
    tsA = ts.read().split('\n')
    tsA = list(map(int, tsA))

    #oepn the exel file
    book = Workbook()
    sheet = book.active

    sheet['A1'] = "Predicted"
    sheet['B1'] = "Expected"
    sheet['C1'] = "Error"

    now = time.strftime("%x")
    sheet['A3'] = now

    # split into train and test sets

    size = int(len(tsA) * 0.66)
    train, test = tsA[0:size], tsA[size:len(tsA)]
    history = [x for x in train]
    predictions = list()
    # walk-forward validation
    j = 2
    for t in range(len(test)):
        model = ARIMA(history, order=(5, 1, 0))
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        sheet['A%d' % j] = yhat
        sheet['B%d' % j] = obs
        j += 1

    # evaluate forecasts
    rmse = sqrt(mean_squared_error(test, predictions))
    sheet['C2'] = '%.3f' % rmse
    book.save("sent.xlsx")
Esempio n. 20
0
def test_invalid():
    # Tests that invalid options raise errors
    # (note that this is only invalid options specific to `ARIMA`, and not
    # invalid options that would raise errors in SARIMAXSpecification).
    endog = dta['infl'].iloc[:50]
    mod = ARIMA(endog, order=(1, 0, 0))

    # Need valid method
    assert_raises(ValueError, mod.fit, method='not_a_method')

    # Can only use 'statespace' with fixed parameters
    with mod.fix_params({'ar.L1': 0.5}):
        assert_raises(ValueError, mod.fit, method='yule_walker')

    # Cannot override model-level values in fit
    assert_raises(ValueError, mod.fit, method='statespace', method_kwargs={
        'enforce_stationarity': False})

    # start_params only valid for MLE methods
    assert_raises(ValueError, mod.fit, method='yule_walker',
                  start_params=[0.5, 1.])

    # has_exog and gls=False with non-statespace method
    mod2 = ARIMA(endog, order=(1, 0, 0), trend='c')
    assert_raises(ValueError, mod2.fit, method='yule_walker', gls=False)

    # non-stationary parameters
    mod3 = ARIMA(np.arange(100) * 1.0, order=(1, 0, 0), trend='n')
    assert_raises(ValueError, mod3.fit, method='hannan_rissanen')

    # non-invertible parameters
    mod3 = ARIMA(np.arange(20) * 1.0, order=(0, 0, 1), trend='n')
    assert_raises(ValueError, mod3.fit, method='hannan_rissanen')
Esempio n. 21
0
def forecast_arima(df: pd.DataFrame, cols: list, with_graph: bool = True):
    lag = 0
    order = 1
    moving_avg_model = 0
    steps = 50

    for col in cols:
        model = ARIMA(df[col].iloc[:-steps],
                      order=(lag, order, moving_avg_model))
        model_fit = model.fit()

        model_for = model_fit.get_forecast(steps=steps, alpha=0.05)
        print('\t==== Summary of forecast ARIMA(%d, %d, %d) ====\n' %
              (lag, order, moving_avg_model))
        print(model_for.summary_frame(), model_for.conf_int(), sep='\n')
        print('RMSE: %f\nMAE: %f' %
              (rmse(df[col][-50:], model_for.predicted_mean),
               meanabs(df[col][-50:], model_for.predicted_mean)))
        print()

        if with_graph is True:
            plt.figure(figsize=(12, 5))
            plt.xlabel(col)
            plt.title('Forecast for %s using ARIMA(%d, %d, %d)' %
                      (col, lag, order, moving_avg_model))

            ax1 = model_for.predicted_mean.plot(color='blue',
                                                grid=True,
                                                label='Actual')
            ax2 = df[col][-50:].plot(color='red',
                                     grid=True,
                                     secondary_y=True,
                                     label='Estimated')

            h1, l1 = ax1.get_legend_handles_labels()
            h2, l2 = ax2.get_legend_handles_labels()

            plt.legend(h1 + h2, l1 + l2, loc=2)
            plt.show()
Esempio n. 22
0
    def test_mle(self):
        # check predict with no constant, #3945
        res1 = self.res1
        endog = res1.model.endog
        with pytest.warns(FutureWarning):
            res0 = AR(endog).fit(maxlag=9, method='mle', trend='nc', disp=0)
        assert_allclose(res0.fittedvalues[-10:], res0.fittedvalues[-10:],
                        rtol=0.015)

        res_arma = ARIMA(endog, order=(9, 0, 0), trend="n").fit()
        assert_allclose(res0.params, res_arma.params[:-1], rtol=1e-2)
        assert_allclose(res0.fittedvalues[-10:], res_arma.fittedvalues[-10:],
                        rtol=1e-4)
Esempio n. 23
0
def train():
    # load data
    series = read_csv('SmtExpMngr/models/train_data.csv',
                      header=None,
                      index_col=0,
                      parse_dates=True,
                      squeeze=True)
    # prepare data
    X = series.values
    X = X.astype('float32')
    # difference data
    months_in_year = 12
    diff = difference(X)
    # fit model
    model = ARIMA(diff, order=(0, 0, 1))
    model_fit = model.fit()
    # bias constant, could be calculated from in-sample mean residual
    bias = 165.904728
    # save model
    model_fit.save('SmtExpMngr/models/model.pkl')
    numpy.save('SmtExpMngr/models/model_bias.npy', [bias])
    print("Tarined")
Esempio n. 24
0
def arima_forecast(df):
    """Arima forecast."""
    # Difference the data per week
    days_in_week = 7
    X = df.values
    #differenced = difference(X, days_in_week)
    differenced = X
    model = ARIMA(differenced, order=(7, 1, 1))  # history
    # fit the model
    model_fit = model.fit()
    # make forecast
    now = arrow.utcnow()
    today = now.format('YYYY-MM-DD')
    future = now.shift(months=+3).format('YYYY-MM-DD')
    start_index = today
    end_index = future
    forecast = model_fit.predict(
        start=1, end=90)

    list_results = []
    [list_results.append(int(x)) for x in forecast]
    return pd.DataFrame(list_results)
Esempio n. 25
0
def arima(df, n_pred=5):
    ''' 
    Create forecasts by using ARIMA model
    
    Input
    ------
    df: Dataframe with stock data
    
    Output
    ------
    predicted_set: List of Predicted values
    history_set : List of Historical values
    
    '''
    # number of values to be predicted
    split = int(df.shape[0])

    training_set = df.iloc[:split, 3:4].values

    # Create and fit ARIMA model
    hist_set = [x for x in training_set]
    predicted_set = []
    history_set = [item for sublist in training_set for item in sublist]

    for time_point in range(n_pred):
        model_init = ARIMA(hist_set, order=(4, 1, 0))  #(1,1,0)
        #print(time_point)
        model = model_init.fit()
        forecast = model.forecast()
        pred_value = forecast[0]
        #print(pred_value)
        predicted_set.append(pred_value)
        hist_set.append([pred_value])
        history_set.append(pred_value)

    print('PREDICTED: ', predicted_set)

    return predicted_set, history_set
Esempio n. 26
0
def run_arima(chunked_data, price_col='y', n_prediction_units=1):
    # supress trivial warnings from ARIMA
    warnings.simplefilter('ignore', ConvergenceWarning)

    # initialize a list to hold results (a list of dataframes)
    results = []

    # numerate through a list of chunked tuples, each having a pair of dataframes
    for idx, (x_i, y_i) in enumerate(chunked_data):
        # create ARIMA model based on x_i values
        m = ARIMA(x_i[price_col].values, order=(0, 1, 0))
        # fit the model
        m_fit = m.fit()
        # forecast for n_prediction_units
        yhat = m_fit.forecast(steps=n_prediction_units)

        # return a dataframe of targets and predictions of len targets
        y_i['yhat'] = yhat[:len(y_i)]

        # save results to a list and then return the list
        results.append(y_i)

    return results
Esempio n. 27
0
    def test_03(self):
        ts_data = self.getData()
        f_name='arima212_c_car_sold.pmml'
        model = StateSpaceARIMA(ts_data,order=(2,1,2),trend = 'c')
        result = model.fit()
        StatsmodelsToPmml(result, f_name, conf_int=[95])

        model_name = self.adapa_utility.upload_to_zserver(f_name)
        z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS')
        forecasts=result.get_forecast(5)

        z_forecasts = list(z_pred['outputs'][0]['predicted_'+ts_data.squeeze().name].values())
        model_forecasts = forecasts.predicted_mean.values.tolist()

        z_conf_int_95_upper = list(z_pred['outputs'][0]['conf_int_95_upper_'+ts_data.squeeze().name].values())
        model_conf_int_95_upper = forecasts.conf_int()['upper '+ts_data.squeeze().name].tolist()

        z_conf_int_95_lower = list(z_pred['outputs'][0]['conf_int_95_lower_'+ts_data.squeeze().name].values())
        model_conf_int_95_lower = forecasts.conf_int()['lower '+ts_data.squeeze().name].tolist()

        self.assertEqual(np.allclose(z_forecasts,model_forecasts),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper, model_conf_int_95_upper),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower, model_conf_int_95_lower),True)
Esempio n. 28
0
def arima_cross_validation(data,
                           order,
                           initial=12 * 15,
                           horizon=12,
                           period=6,
                           verbose=False):
    k = (len(data) - initial - horizon) // period
    if verbose: print('Cross validating over', str(k), 'folds.')

    rmses = []
    for i in range(1, k + 1):
        n = len(data) - horizon - ((k - i) * period)
        model = ARIMA(data[:n], order=order, freq='MS').fit()
        y_hat = model.get_forecast(steps=horizon).predicted_mean.to_numpy()
        y = data[n:n + horizon].to_numpy()
        rmse = np.sqrt(mean_squared_error(y, y_hat))
        if verbose:
            print(
                f'fold {i}: train[0:{n}], test[{n}:{n+horizon}] of {len(data)}, rmse={rmse}'
            )
        rmses.append(rmse)

    return rmses
Esempio n. 29
0
def get_sScore(pResiduals, kappa=252/30):
    lCumulativeResiduals = pd.DataFrame(pResiduals.cumsum())
    lCumulativeResiduals.index = lCumulativeResiduals.index.to_period('D')
    m = pd.Series(index = lCumulativeResiduals.index)
    sigma_eq = pd.Series(index = lCumulativeResiduals.columns)
    for i in lCumulativeResiduals.columns:
        lAR1Model = ARIMA(lCumulativeResiduals[i], order=(1,0,0))
        lAR1 = lAR1Model.fit()
        a = lAR1.params['const']
        b = lAR1.params['ar.L1']
        
        if -np.log(b) * 60 > kappa:
            tmp = (lCumulativeResiduals[i]-lCumulativeResiduals[i].shift(1)* b)[1:]
            a = tmp.mean()
            central_a =tmp - a
            m[i] = a/(1-b)
            sigma_eq[i]=math.sqrt(central_a.var()/(1-b*b))
    m = m.dropna()
    m = m - m.mean()
    Xt= lCumulativeResiduals.iloc[-1,:]
    sigma_eq = sigma_eq.dropna()
    s_score = (Xt-m)/sigma_eq
    return s_score
Esempio n. 30
0
def test_get_model_results():

    from statsmodels.tsa.arima.model import ARIMA
    import nb_credit_spread as cslibrary

    cslib = cslibrary.creditspread()
    start_date = '2009-01-31'  #'1990-01-31' # '2009-01-31'
    ytw_delta = cslib.get_ytw_from_date_delta(
        start=start_date, srcfile='src/YTW-All-Values.xlsx')
    endog_col = 'CS-Aaa-3MO-DCF'
    order = (1, 1, 0)
    endog, exog = ytw_delta[endog_col], None
    model = ARIMA(endog=endog, exog=exog, order=order, trend='ct')
    model_fit = model.fit()
    import logging
    # log = setlogging('test_get_model_results', logging.INFO)
    log = old_setlogger('test_get_model_results', logging.INFO)
    # logging.getLogger('test_get_model_results').info('test')

    log.info(model_fit.summary())

    if (log.hasHandlers()):
        log.handlers.clear()