def cross_val(endog_series, exog_series, model1, model2, model3):
    cv = model_selection.RollingForecastCV(step=5, h=14, initial=160)
    model1_cv_scores = model_selection.cross_val_score(
        model1,
        y=endog_series,
        exogenous=exog_series,
        scoring='mean_absolute_error',
        cv=cv)
    model2_cv_scores = model_selection.cross_val_score(
        model2,
        y=endog_series,
        exogenous=exog_series,
        scoring='mean_absolute_error',
        cv=cv)
    model3_cv_scores = model_selection.cross_val_score(
        model3,
        y=endog_series,
        exogenous=exog_series,
        scoring='mean_absolute_error',
        cv=cv)

    # Filter the nan scores
    model1_cv_scores = model1_cv_scores[~(np.isnan(model1_cv_scores))]
    model2_cv_scores = model2_cv_scores[~(np.isnan(model2_cv_scores))]
    model3_cv_scores = model3_cv_scores[~(np.isnan(model3_cv_scores))]

    # Print score list for each model
    model1_cv_scoreslist = ["%.4f" % elem for elem in model1_cv_scores]
    model2_cv_scoreslist = ["%.4f" % elem for elem in model2_cv_scores]
    model3_cv_scoreslist = ["%.4f" % elem for elem in model3_cv_scores]
    print("Model 1 CV scores: {}".format(model1_cv_scoreslist))
    print("Model 2 CV scores: {}".format(model2_cv_scoreslist))
    print("Model 3 CV scores: {}".format(model3_cv_scoreslist))

    # Pick model based on which has a lower average error rate
    m1_average_error = np.average(model1_cv_scores)
    m2_average_error = np.average(model2_cv_scores)
    m3_average_error = np.average(model3_cv_scores)
    errors = [m1_average_error, m2_average_error, m3_average_error]
    models = [model1, model2, model3]

    # Print out the best model (where errors is min, discarding nan values):
    if np.isnan(np.nanmin(errors)):
        better_index = 0
    else:
        better_index = errors.index(np.nanmin(errors))

    best_order = models[better_index].order
    print("Lowest average MAE: {} (model{})".format(errors[better_index],
                                                    better_index + 1))
    print("Best model order: {}".format(best_order))

    return best_order
Esempio n. 2
0
def _cross_validate_single_model(model, group_series, metrics, cross_validator,
                                 error_score, exog, verbosity):

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        warnings.filterwarnings("ignore", category=ConvergenceWarning)

        if isinstance(metrics, str):
            metrics = list(metrics)

        output = {}
        for metric in metrics:
            scores = cross_val_score(
                estimator=model,
                y=group_series,
                X=exog,
                scoring=metric,
                cv=cross_validator,
                verbose=verbosity,
                error_score=error_score,
            )
            output[f"{metric}_mean"] = np.mean(scores)
            output[f"{metric}_stddev"] = np.std(scores)

    return output
print("pmdarima version: %s" % pm.__version__)

# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = model_selection.train_test_split(data, train_size=165)

# Even though we have a dedicated train/test split, we can (and should) still
# use cross-validation on our training set to get a good estimate of the model
# performance. We can choose which model is better based on how it performs
# over various folds.
model1 = pm.ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1))
model2 = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1)

model1_cv_scores = model_selection.cross_val_score(
    model1, train, scoring='smape', cv=cv, verbose=2)

model2_cv_scores = model_selection.cross_val_score(
    model2, train, scoring='smape', cv=cv, verbose=2)

print("Model 1 CV scores: {}".format(model1_cv_scores.tolist()))
print("Model 2 CV scores: {}".format(model2_cv_scores.tolist()))

# Pick based on which has a lower mean error rate
m1_average_error = np.average(model1_cv_scores)
m2_average_error = np.average(model2_cv_scores)
errors = [m1_average_error, m2_average_error]
models = [model1, model2]

# print out the answer
better_index = np.argmin(errors)  # type: int
Esempio n. 4
0
        df {pd.DataFrame} -- DataFrame of races

    Keyword Arguments:
        n_forecasts {int} -- number of steps ahead (default: {1})

    Returns:
        str -- The prediction, confidence intervals, and average cross
               validation error of the next race time
    """
    runner_df = df.loc[df.name.str.contains(name, case=False)]
    if runner_df.empty:
        return
    minutes = runner_df.time.dt.seconds / 60.0
    model = pm.auto_arima(minutes, seasonal=False, suppress_warnings=True)
    pred, conf_int = model.predict(n_forecasts, return_conf_int=True)
    cv_score = cross_val_score(model, minutes, scoring='mean_absolute_error')
    mean_cv_score = np.mean(cv_score)

    def formatter(num: int) -> str:
        return str(datetime.timedelta(minutes=num)).split('.')[0]

    pred_format = formatter(pred[0])
    conf_int_format = [formatter(x) for x in conf_int[0]]
    mean_cv_score_format = formatter(mean_cv_score)
    pred_string = (f'Results for {runner_df.name.unique()[0]}\n'
                   f'The prediction for the next 42 km race'
                   f' is {pred_format} with 95 % confidence'
                   f' interval ({conf_int_format[0]},'
                   f' {conf_int_format[1]})\n'
                   f'The average cross validation error score is'
                   f' {mean_cv_score_format}.\n'
Esempio n. 5
0
def calculate_cv_metrics(model, endog, metric, cv):
    cv_metric = model_selection.cross_val_score(model, endog, cv=cv, scoring=metric, verbose=0)
    return cv_metric[~np.isnan(cv_metric)].mean()