コード例 #1
0
ファイル: Models.py プロジェクト: arminmirrezai/ufscase
 def _model(self, ts, dummies, stationary, trend, diff, method='lbfgs'):
     # TODO implement robust and corona variable
     exog = np.array(dummies).reshape(-1,
                                      1) if dummies is not None else None
     exog = self.x_train
     years = ts.index[-1].year - ts.index[0].year + 1
     periods = 52 if (ts.index[2].month -
                      ts.index[0].month) in {0, 1} else 12
     hyper_params = self.get_hyperparams()
     if len(hyper_params) == 0:
         sarimax = pm.auto_arima(y=ts,
                                 X=exog,
                                 seasonal=True,
                                 stationary=stationary,
                                 d=diff,
                                 max_p=10,
                                 method=method,
                                 trend=trend,
                                 with_intercept=True,
                                 max_order=None,
                                 max_P=int(years / 2),
                                 D=pm.arima.nsdiffs(ts, periods),
                                 m=periods,
                                 stepwise=True,
                                 maxiter=45,
                                 sarimax_kwargs={'cov_type': None})
     else:
         sarimax = pm.ARIMA(order=eval(hyper_params['Order']),
                            seasonal_order=eval(
                                hyper_params['Seasonal order']),
                            method=method,
                            maxiter=45,
                            trend=hyper_params['Trend']).fit(y=ts, X=exog)
     self.model = sarimax
コード例 #2
0
    def fit(self, price_indices):
        self.model = {}
        self.order = None
        with_intercept = None
        for capital_name in price_indices:
            self.model[capital_name] = {'ma': None, 'data': None, 'size': 0}
            seq_price = price_indices[capital_name].copy()
            seq_price[1:] = (seq_price[1:] - seq_price[:-1]) / seq_price[:-1]
            seq_price[0] = 0
            if self.order is None:
                model = pm.auto_arima(seq_price,
                                      seasonal=False,
                                      start_p=0,
                                      max_p=0,
                                      start_q=3,
                                      max_q=50,
                                      trace=True)
                params = model.get_params()
                self.order, _with_intercept = params['order'], params[
                    'with_intercept']
                if self.order[2] < 4:
                    self.order = (self.order[0], self.order[1], 4)
            model = pm.ARIMA(order=self.order, with_intercept=False)
            model.fit(seq_price)

            self.model[capital_name]['ma'] = model.maparams()
            self.model[capital_name]['data'] = seq_price[-(self.order[1] +
                                                           self.order[2]):]
            self.model[capital_name]['size'] = seq_price.shape[0] - (
                self.order[1] + self.order[2])

        pass
コード例 #3
0
 def __init__(self,
              target_column: str,
              order: tuple,
              seasonal_order: tuple,
              method: str = 'lbfgs',
              use_exog: bool = False,
              with_intercept: bool = True,
              trend: str = None,
              log: bool = False,
              power_transf: bool = False,
              one_step_ahead: bool = False):
     """
     :param target_column: target_column for prediction
     :param order: (p, d, q) of (S)ARIMA(X) model
     :param seasonal_order: (P, D, Q, m) of (S)ARIMA(X) model
     :param method: method to use for optimization
     :param use_exog: use exogenous input
     :param with_intercept: use intercept
     :param trend: trend component
     :param log: use log transform
     :param power_transf: use power transform
     :param one_step_ahead: perform one step ahead prediction
     """
     super().__init__(target_column=target_column,
                      seasonal_periods=seasonal_order[3],
                      name='(S)ARIMA(X)',
                      one_step_ahead=one_step_ahead)
     self.model = pmdarima.ARIMA(order=order,
                                 seasonal_order=seasonal_order,
                                 maxiter=50,
                                 disp=1,
                                 method=method,
                                 with_intercept=with_intercept,
                                 enforce_stationarity=False,
                                 suppress_warnings=True)
     self.use_exog = use_exog
     self.exog_cols_dropped = None
     self.trend = trend
     self.log = log
     self.power_transformer = sklearn.preprocessing.PowerTransformer(
     ) if power_transf else None
     self.contains_zeros = False
コード例 #4
0
def _arima_train(table, input_cols, p, d, q, intercept=True):

    arima = pm.ARIMA(order=(p, d, q), with_intercept=intercept)
    model = _model_dict('arima_model')
    rb = BrtcReprBuilder()

    rb.addMD(
        strip_margin("""
        |## ARIMA Train Result
        |
        """.format()))

    for column in input_cols:
        arima_fit = arima.fit(table[column])
        model['arima_' + str(column)] = arima_fit

        rb.addMD(
            strip_margin("""
        |### Column : {col}
        |
        | - (p,d,q) order : ({p_val}, {d_val}, {q_val})
        | - Intercept : {itc}
        | - Coefficients Array : {ca}
        | - AIC : {aic}
        |
        """.format(col=column,
                   p_val=p,
                   d_val=d,
                   q_val=q,
                   itc=intercept,
                   ca=str(arima_fit.params().tolist()),
                   aic=arima_fit.aic())))
        model['coefficients_array_' + str(column)] = arima_fit.params()
        model['aic_' + str(column)] = arima_fit.aic()

    model['input_columns'] = input_cols
    # model['order'] = arima_fit.order()
    model['intercept'] = intercept

    model['_repr_brtc_'] = rb.get()

    return {'model': model}
コード例 #5
0
def arima(price, window, desc):

    pred = np.full(price.shape, np.nan)
    for i in tnrange(window, price.shape[0], desc=desc):

        train = price[i - window:i]

        if np.any(np.isnan(train)):
            continue

        with warnings.catch_warnings():
            # Uninvertible hessian
            warnings.filterwarnings('ignore', 'Inverting')
            # RuntimeWarning: invalid value encountered in true_divide
            warnings.filterwarnings('ignore', 'invalid')
            # RuntimeWarning: overflow encountered in exp
            warnings.filterwarnings('ignore', 'overflow')
            # ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
            # warnings.filterwarnings('ignore', 'Maximum')
            # RuntimeWarning: divide by zero encountered in true_divide
            warnings.filterwarnings('ignore', 'divide')

            # Initialize model
            model = auto_arima(train,
                               max_p=3,
                               max_q=3,
                               seasonal=False,
                               trace=False,
                               error_action='ignore',
                               suppress_warnings=True)

            # Determine model parameters
            model.fit(train)
            order = model.get_params()['order']

            # Fit and predict
            model = pm.ARIMA(order=order)
            model.fit(train)
            pred[i] = model.predict(1)

    return pred
コード例 #6
0
def train_model_graph(df):
    current_dates = [pd.Timestamp(i) for i in df['BILL_DATE'].values]
    last_bill = current_dates[-1]
    future_dates = []
    for i in range(1, 13):
        bill = pd.Timestamp(last_bill) + pd.DateOffset(months=i)
        future_dates.append(bill)
    bill_dates = np.append(current_dates, future_dates)
    y = df['TOTAL'].values
    arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
    arima.fit(y)
    forecasts = arima.predict(12)
    fig = px.line()
    fig.add_scatter(x=bill_dates,
                    y=df['TOTAL'].values,
                    name='Historical Bills')
    fig.add_scatter(x=bill_dates[len(df['BILL_DATE'].values):],
                    y=forecasts,
                    mode='lines',
                    name='Predicted Bills')

    return fig
コード例 #7
0
def get_arima(data, train_len, test_len):
    # prepare train and test data
    data = data.tail(test_len + train_len).reset_index(drop=True)
    train = data.head(train_len).values.tolist()
    test = data.tail(test_len).values.tolist()

    # Initialize model
    model = auto_arima(train,
                       max_p=3,
                       max_q=3,
                       seasonal=False,
                       trace=True,
                       error_action='ignore',
                       suppress_warnings=True)

    # Determine model parameters
    model.fit(train)
    order = model.get_params()['order']
    print('ARIMA order:', order, '\n')

    # Genereate predictions
    prediction = []
    for i in range(len(test)):
        # model = pm.ARIMA(order, seasonal_order)
        model = pm.ARIMA(order=order)
        model.fit(train)
        print('working on', i + 1, 'of', test_len,
              '-- ' + str(int(100 * (i + 1) / test_len)) + '% complete')
        prediction.append(model.predict()[0])
        train.append(test[i])

    # Generate error data
    mse = mean_squared_error(test, prediction)
    rmse = mse**0.5
    mape = mean_absolute_percentage_error(pd.Series(test),
                                          pd.Series(prediction))
    return prediction, mse, rmse, mape
コード例 #8
0
"""
print(__doc__)

# Author: Taylor Smith <*****@*****.**>

import pmdarima as pm
import joblib  # for persistence
import os

# #############################################################################
# Load the data and split it into separate pieces
y = pm.datasets.load_wineind()
train, test = y[:125], y[125:]

# Fit an ARIMA
arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
arima.fit(y)

# #############################################################################
# Persist a model and create predictions after re-loading it
pickle_tgt = "arima.pkl"
try:
    # Pickle it
    joblib.dump(arima, pickle_tgt, compress=3)

    # Load the model up, create predictions
    arima_loaded = joblib.load(pickle_tgt)
    preds = arima_loaded.predict(n_periods=test.shape[0])
    print("Predictions: %r" % preds)

finally:
コード例 #9
0
import numpy as np
import pmdarima as pm
from pmdarima import model_selection

print("pmdarima version: %s" % pm.__version__)

# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = model_selection.train_test_split(data, train_size=165)

# Even though we have a dedicated train/test split, we can (and should) still
# use cross-validation on our training set to get a good estimate of the model
# performance. We can choose which model is better based on how it performs
# over various folds.
model1 = pm.ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1))
model2 = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1)

model1_cv_scores = model_selection.cross_val_score(
    model1, train, scoring='smape', cv=cv, verbose=2)

model2_cv_scores = model_selection.cross_val_score(
    model2, train, scoring='smape', cv=cv, verbose=2)

print("Model 1 CV scores: {}".format(model1_cv_scores.tolist()))
print("Model 2 CV scores: {}".format(model2_cv_scores.tolist()))

# Pick based on which has a lower mean error rate
m1_average_error = np.average(model1_cv_scores)
m2_average_error = np.average(model2_cv_scores)
コード例 #10
0
import numpy as np
import pmdarima as pm
from pmdarima import model_selection

print("pmdarima version: %s" % pm.__version__)

# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = model_selection.train_test_split(data, train_size=165)

# Even though we have a dedicated train/test split, we can (and should) still
# use cross-validation on our training set to get a good estimate of the model
# performance. We can choose which model is better based on how it performs
# over various folds.
model1 = pm.ARIMA(order=(2, 1, 1))
model2 = pm.ARIMA(order=(1, 1, 2),
                  seasonal_order=(0, 1, 1, 12),
                  suppress_warnings=True)
cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1)

model1_cv_scores = model_selection.cross_val_score(
    model1, train, scoring='smape', cv=cv, verbose=2)

model2_cv_scores = model_selection.cross_val_score(
    model2, train, scoring='smape', cv=cv, verbose=2)

print("Model 1 CV scores: {}".format(model1_cv_scores.tolist()))
print("Model 2 CV scores: {}".format(model2_cv_scores.tolist()))

# Pick based on which has a lower mean error rate
コード例 #11
0
def evaluate(predictOffest, trainY, testY, params):
    print("ARIMA evaluation start!")
    start = time.time()

    m = {
        "mae": 0,
        "rmse": 0,
        "mase": 0,
        "r2": 0,
    }

    order = (1, 1, 2)
    seasonal_order = (0, 1, 1, 12)

    if params is not None:
        if "order" in params: order = tuple(params["order"])
        if "seasonal_order" in params: order = tuple(params["seasonal_order"])

    arima = pm.ARIMA(order=order, seasonal_order=seasonal_order)
    arima.fit(trainY)

    count = len(testY) // predictOffest - 1
    print("ARIMA count:{} predictOffest:{} len(testY):{}".format(
        count, predictOffest, len(testY)))

    for i in range((len(testY) // predictOffest) - 1):
        start1 = time.time()
        forecasts = arima.predict(predictOffest)

        forecasts = [0 if a_ < 0.01 else a_ for a_ in forecasts]
        forecasts = [1 if a_ > 1 else a_ for a_ in forecasts]

        updateT = None
        if i * predictOffest + predictOffest < len(testY):
            updateT = testY[i * predictOffest:i * predictOffest +
                            predictOffest]
        elif i * predictOffest + predictOffest >= (len(testY) - 1):
            updateT = testY[i * predictOffest:len(testY) - 1]
            predictLen = len(testY) - 1 - i * predictOffest

        trainY = np.concatenate((trainY, updateT), axis=None)

        arima.update(updateT)

        _m = ModelUtils.getMetrics(updateT, forecasts, trainY)

        m["mae"] = _m["mae"]
        m["rmse"] = _m["rmse"]
        m["rmse"] = _m["mase"]
        m["r2"] = _m["r2"]
        end1 = time.time()
        print("{}/{}".format(i, count) + (' ARIMA sub MAE: %.2f' % m["mae"]) +
              ",spent: %.4fs" % (end1 - start1))

    xlen = len(testY) // predictOffest
    print("ARIMA xlen:{} predictOffest:{}".format(xlen, predictOffest))

    m["mae"] = m["mae"] / xlen
    m["rmse"] = m["rmse"] / xlen
    m["rmse"] = m["mase"] / xlen
    m["r2"] = m["r2"] / xlen

    end = time.time()
    print((' ARIMA MAE: %.2f' % m["mae"]) + ",spent: %.4fs" % (end - start))

    return m
コード例 #12
0
def predictFutureStats(player_id, sorted_Matches, all_player_stats_rows,
                       stat_index, topN):

    topN = min(len(sorted_Matches), topN)

    #Get stats of target player
    p_indx = all_player_stats_rows[:, 0] == player_id
    player_stats_rows = all_player_stats_rows[p_indx, :]

    # player_stats_rows =  np.array(stats_cursor.execute('SELECT * FROM Stats where player_id="'+player_id+'"').fetchall())
    player_stat_X = player_stats_rows[:, 2].astype(int)
    player_stat_Y = player_stats_rows[:, stat_index].astype(float)

    next_season_age = int(player_stats_rows[-1, :][2]) + 1

    #ARIMA. Use arima as endogenous regression on the new season and append it to player_stat_Y array
    if np.all(player_stat_Y == 0):
        player_stat_Y = np.append(player_stat_Y, player_stat_Y[-1])
    else:

        model = pm.ARIMA(order=(0, 0, 0), maxiter=100, method='powell')
        fitted = model.fit(player_stat_Y)
        APRED = fitted.predict(1)[0]
        player_stat_Y = np.append(player_stat_Y, APRED)

    player_stat_X = np.append(player_stat_X, next_season_age)

    Ref = np.zeros(
        [len(player_stat_X), topN]
    )  #matrix to hold the statistics of the topN players. we will use this to calculate the prediction weights for the target player
    Ref_weights = np.zeros(topN)
    Ref_next = np.zeros(
        [topN]
    )  #matrix to hold the statistics of the topN players for the next season. We will use this together with the calculated weights to generate the prediction
    #Get stats of topN matched players
    for i in range(topN):

        Ref_weights[i] = sorted_Matches[i][1]
        match_player_id = sorted_Matches[i][0]
        m_indx = all_player_stats_rows[:, 0] == match_player_id

        match_player_stats_rows = all_player_stats_rows[m_indx, :]
        match_player_stat_X = match_player_stats_rows[:, 2].astype(int)
        match_player_stat_Y = match_player_stats_rows[:,
                                                      stat_index].astype(float)

        #populate Reference matrix only at x locations (i.e. age) given by target player
        for s in range(len(player_stat_X)):
            loc = np.where(match_player_stat_X == player_stat_X[s])
            if loc[0].size > 0:
                Ref[s, i] = match_player_stat_Y[loc]

        #append the stat from the next season (i.e. to be predicted)
        next_season_match_stat = match_player_stat_Y[np.where(
            match_player_stat_X == next_season_age)][0]
        Ref_next[i] = next_season_match_stat

    #Remove any entries in the Ref array where all players have 0 values
    non_zero_indx = []
    for t in range(Ref.shape[0]):
        if any(Ref[t, :] > 0):
            non_zero_indx.append(t)

    #weighted SSE
    objective_fun = functools.partial(weighted_sum_objective,
                                      arg1=player_stat_Y,
                                      arg2=Ref)
    x0 = np.ones([topN])
    out = minimize(objective_fun, x0, options={'disp': False, 'maxiter': 200})
    predicted_stats = np.mean(Ref_next * out.x)

    # Various linear combination
    # [x,resid,rank,s] = np.linalg.lstsq(Ref[non_zero_indx,:],player_stat_Y[non_zero_indx]) #least-squares solution to a linear matrix equation (Numpy)
    # [x,resid]=nnls(Ref[non_zero_indx,:],player_stat_Y[non_zero_indx])  #non-negative least squares
    # out=lsq_linear(Ref[non_zero_indx,:],player_stat_Y[non_zero_indx], bounds=(0, np.inf)) # least squares with bound constraints (Scipy)
    # predicted_stats=np.sum(out.x.T*Ref_next) #Linear combination of players

    # #using cvxpy
    # x = cvx.Variable(topN)
    # A=Ref[non_zero_indx,:]
    # b=player_stat_Y[non_zero_indx]
    # objective = cvx.Minimize(cvx.sum_squares(A*x - b))
    # constraints = [cvx.sum(x) == 1, x>=0] #convex
    # prob = cvx.Problem(objective, constraints)
    # result = prob.solve()
    # predicted_stats=np.sum(x.value.T*Ref_next)

    # #using regression type 1
    # x_train=Ref  #topN-dim features x num_seasons observations
    # y_train=player_stat_Y #num_seasons labels
    # # model = KNeighborsRegressor(n_neighbors=3)
    # model =RandomForestRegressor(max_depth=2, random_state=0)
    # model.fit(x_train, y_train)
    # predicted_stats=model.predict(Ref_next.reshape(1,-1))[0] #topN-dim prediction feature vector

    # # using regression type 2
    # x_train=Ref.T
    # y_train=Ref_next
    # # model = KNeighborsRegressor(n_neighbors=3)
    # model =RandomForestRegressor(max_depth=4, random_state=0)
    # #model.fit(x_train, y_train)
    # model.fit(x_train, y_train, sample_weight=Ref_weights)
    # predicted_stats=model.predict(player_stat_Y.reshape(1,-1))[0]
    # if math.isnan(predicted_stats):
    #     predicted_stats=0

    return predicted_stats
コード例 #13
0
        exog_pred_series_2 = new_temp[year_2020_index + 1:]
        exog_pred_series_2 = exog_pred_series_2["tmax"]
        exog_pred_2 = np.expand_dims(exog_pred_series_2.to_numpy(),axis=1)
   
    
        exog_train_series = pd.concat([exog_train_series_1,exog_train_series_2],axis=1)
        exog_train = exog_train_series.to_numpy()
        
        
        
        exog_pred_series = pd.concat([exog_pred_series_1,exog_pred_series_2],axis=1)
        exog_pred = exog_pred_series.to_numpy()

        #change parameters accordingly
        my_order = (0, 1, 2)
        smodel = pm.ARIMA(order=my_order)
        

        smodel_fit = smodel.fit(train,exogenous=exog_train)
        fitted = smodel.predict(n_periods=n_periods, exogenous=exog_pred) #
        fitted_series = pd.Series(fitted,name='Value')
        fitted_series = pd.concat([years,fitted_series],axis=1)
  
        final = pd.concat([ind,fitted_series],axis=0, ignore_index=True)
        plt.plot(final["Value"])
        plt.show()
        
        final.to_csv(path_to_save + index_files[i], index=False)


コード例 #14
0
ファイル: arima_cv.py プロジェクト: TravorLZH/ml-stuff
#mv=np.ones_like(mesh[0],dtype=np.int)*6
paramsV = np.array([*mesh]).T
if stochastic == True:
    np.random.shuffle(paramsV)
    paramsV = paramsV[:ncandidates]
print(paramsV.shape)

params_final = np.zeros_like(paramsV[0])
least_err = 5000
test_err = 0
yhat = []

for params in paramsV:
    print("Training ARIMA(%d,%d,%d) seasonal=(%d,%d,%d,%d)" % tuple(params))
    model = pm.ARIMA(order=tuple(params[:3]),
                     seasonal_order=tuple(params[3:]),
                     suppress_warnings=True)
    try:
        model.fit(train)
    except Exception as ex:
        print("Error occurred: %s" % ex)
        continue
    y = model.predict(n_periods=nV + nT)
    err = np.square(y[:nV] - validation_y).mean()
    print("MSE=%.5f" % err)
    if least_err > err:
        least_err = err
        yhat = y
        params_final = params
        test_err = np.square(yhat[nV:] - test_y).mean()
コード例 #15
0
"""
print(__doc__)

# Author: Taylor Smith <*****@*****.**>

import numpy as np
import pmdarima as pm
from pmdarima import model_selection
from matplotlib import pyplot as plt

print("pmdarima version: %s" % pm.__version__)

# Load the data and split it into separate pieces
y = pm.datasets.load_wineind()
est = pm.ARIMA(order=(1, 1, 2),
               seasonal_order=(0, 1, 1, 12),
               suppress_warnings=True)
cv = model_selection.SlidingWindowForecastCV(window_size=150, step=4, h=4)
predictions = model_selection.cross_val_predict(est,
                                                y,
                                                cv=cv,
                                                verbose=2,
                                                averaging="median")

# plot the predictions over the original series
x_axis = np.arange(y.shape[0])
n_test = predictions.shape[0]

plt.plot(x_axis, y, alpha=0.75, c='b')
plt.plot(x_axis[-n_test:], predictions, alpha=0.75, c='g')  # Forecasts
plt.title("Cross-validated wineind forecasts")
コード例 #16
0
 def build_model(self, y_train):
     order = self.find_order(y_train)
     self.model = pm.ARIMA(order)
     return self