Beispiel #1
0
def arima(df, cfg):
    train, test = train_test_split(df, cfg['test_size'])
    scaler = MinMaxScaler(feature_range=(10 ** (-10), 1))
    train['y'] = scaler.fit_transform(train.values.reshape(-1, 1))
    test['y'] = scaler.transform(test.values.reshape(-1, 1))

    auto_model = auto_arima(train, start_p=1, start_q=1, max_p=11, max_q=11, max_d=3, max_P=5, max_Q=5, max_D=3,
                            m=12, start_P=1, start_Q=1, seasonal=True, d=None, D=None, suppress_warnings=True,
                            stepwise=True, information_criterion='aicc')

    print(auto_model.summary())

    pred_arima = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_arima_80 = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    conf_int_arima_95 = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])

    for i in range(len(test)-cfg['forecast_horizon']):
        forecast_arima_95 = auto_model.predict(n_periods=cfg['forecast_horizon'],
                                               return_conf_int=True, alpha=1-0.95)
        forecast_arima_80 = auto_model.predict(n_periods=cfg['forecast_horizon'],
                                               return_conf_int=True, alpha=1-0.8)
        pred_arima[i] = forecast_arima_95[0]
        conf_int_arima_80[i] = forecast_arima_80[1]
        conf_int_arima_95[i] = forecast_arima_95[1]
        auto_model.update(y=[test.values[i]])

    # Store results
    mse_arima, coverage_arima_95, coverage_arima_80, width_arima_95, width_arima_80 = [], [], [], [], []

    for i in range(cfg['forecast_horizon']):
        # ARIMA mean squared error (MSE):
        mse_arima.append(mean_squared_error(test[i:len(test)-cfg['forecast_horizon']+i], pred_arima[:, i]))

        # ARIMA 80% PI
        coverage_arima_80.append(compute_coverage(upper_limits=conf_int_arima_80[:, i, 1],
                                                  lower_limits=conf_int_arima_80[:, i, 0],
                                                  actual_values=test.values[i:len(test) - cfg['forecast_horizon'] + i]))
        width_arima_80.append(np.mean(conf_int_arima_80[:, i, 1] - conf_int_arima_80[:, i, 0], axis=0))

        # ARIMA 95% PI
        coverage_arima_95.append(compute_coverage(upper_limits=conf_int_arima_95[:, i, 1],
                                                  lower_limits=conf_int_arima_95[:, i, 0],
                                                  actual_values=test.values[i:len(test)-cfg['forecast_horizon']+i]))
        width_arima_95.append(np.mean(conf_int_arima_95[:, i, 1] - conf_int_arima_95[:, i, 0], axis=0))

    print('================ ARIMA =================')
    print('Mean MSE', np.mean(mse_arima))
    print('MSE sliding window', mse_arima)
    print('Coverage of 80% PI sliding window', coverage_arima_80)
    print('Width of 80% PI sliding window', width_arima_80)
    print('Coverage of 95% PI sliding window', coverage_arima_95)
    print('Width of 95% PI sliding window', width_arima_95)
    return mse_arima, coverage_arima_80, coverage_arima_95, width_arima_80, width_arima_95
def main():
    # Load data
    df, cfg = load_data()
    print(df.shape)
    # Initialize lists
    coverage_80pi = np.zeros([len(df.columns), cfg['forecasting_horizon']])
    coverage_95pi = np.zeros([len(df.columns), cfg['forecasting_horizon']])
    i = 0
    print(df)
    # Pre train autoencoder
    # encoder, scaler = pre_training(df, cfg)

    # Train over all time series in df
    for (columnName, columnData) in df.iteritems():
        print('Column Name : ', columnName)
        # print('Column Contents : ', columnData.values)
        df_i = scaler.fit_transform(df[[columnName]].values)
        prediction_sequence, mc_mean, mc_median, total_uncertainty, quantile_80, quantile_95, test = walk_forward_validation(
            df_i, cfg)
        for j in range(cfg['forecasting_horizon']):
            coverage_80pi[i, j] = compute_coverage(
                upper_limits=mc_mean[:, j] + 1.28 * total_uncertainty[:, j],
                lower_limits=mc_mean[:, j] - 1.28 * total_uncertainty[:, j],
                actual_values=test)
            coverage_95pi[i, j] = compute_coverage(
                upper_limits=mc_mean[:, j] + 1.96 * total_uncertainty[:, j],
                lower_limits=mc_mean[:, j] - 1.96 * total_uncertainty[:, j],
                actual_values=test)
        # plot_predictions(df_i, mc_mean, [mc_mean - 1.28 * total_uncertainty, mc_mean + 1.28 * total_uncertainty],
        #                 [mc_mean - 1.96 * total_uncertainty, mc_mean + 1.96 * total_uncertainty])
        i += 1

    # Print coverage for each forecasting horizon
    for j in range(cfg['forecasting_horizon']):
        print('Mean intervals over', len(df.columns.values), 'data sets')
        print('80%-prediction interval coverage: ', j,
              np.mean(coverage_80pi[:, j]))
        print('95%-prediction interval coverage: ', j,
              np.mean(coverage_95pi[:, j]))
def monte_carlo_forecast(test, model, cfg, inherent_noise, encoder=None):
    prediction_sequence = np.zeros([
        len(test), cfg['number_of_mc_forward_passes'],
        len(test[0]) - cfg['sequence_length'], cfg['forecasting_horizon']
    ])
    if encoder:
        enc = K.function([encoder.layers[0].input,
                          K.learning_phase()], [encoder.layers[-1].output])
    func = K.function(
        [model.layers[0].input, K.learning_phase()], [model.layers[-1].output])
    # Number of MC samples
    coverage_95_pi_list = []
    print("=== Forwarding", cfg['number_of_mc_forward_passes'], "passes ===")
    for l in range(len(test)):
        for j in tqdm.tqdm(range(cfg['number_of_mc_forward_passes'])):
            history = [x for x in test[l, :cfg['sequence_length']]]
            # Prediction horizon / test length
            for i in range(len(test[l]) - cfg['sequence_length']):
                # fit model and make forecast for history
                x_input = np.array(history[-cfg['sequence_length']:]).reshape(
                    (1, cfg['sequence_length'], 1))
                mc_sample = func([x_input, cfg['dropout_rate_test']])[0]
                # store forecast in list of predictions
                if cfg['multi_step_prediction']:
                    history.append(mc_sample[0, 0])
                else:
                    history.append(test[l, i + cfg['sequence_length']])
                prediction_sequence[l, j, i] = mc_sample
        mean = prediction_sequence[l].mean(axis=0)
        std = prediction_sequence[l].std(axis=0)
        mse = mean_squared_error(test[l, cfg['sequence_length']:], mean)
        uncertainty = np.sqrt(inherent_noise**2 + std**2)
        print('MSE test set', l, mse)
        coverage_95pi = compute_coverage(
            upper_limits=mean + 1.96 * uncertainty,
            lower_limits=mean - 1.96 * uncertainty,
            actual_values=test[l, cfg['sequence_length']:])
        print('95%-prediction interval coverage: ', coverage_95pi)
        coverage_95_pi_list.append(coverage_95pi)
        plot_predictions(
            test[l], mean, mse,
            np.array([mean - 1.28 * uncertainty, mean + 1.28 * uncertainty]),
            np.array([mean - 1.96 * uncertainty, mean + 1.96 * uncertainty]),
            cfg)
    print('Average 95%-prediction interval coverage: ',
          np.mean(coverage_95_pi_list))
    return prediction_sequence
def baseline_models(df, cfg):

    test_size = int(len(df) - cfg['sequence_length'])
    model_es = ExponentialSmoothing(df['y'].iloc[:-test_size],
                                    seasonal_periods=12,
                                    trend='add',
                                    seasonal='add')
    model_es = model_es.fit(optimized=True)
    pred_es = model_es.predict(start=df.index[-test_size], end=df.index[-1])

    auto_model = auto_arima(df['y'].iloc[:-test_size],
                            start_p=1,
                            start_q=1,
                            max_p=3,
                            max_q=3,
                            m=12,
                            start_P=1,
                            start_Q=1,
                            seasonal=True,
                            d=1,
                            D=1,
                            suppress_warnings=True,
                            stepwise=True)
    print('Auto arima', auto_model.aic())
    print(auto_model.summary())
    forecast_arima = auto_model.predict(n_periods=test_size,
                                        return_conf_int=True,
                                        alpha=0.05)
    pred_arima = forecast_arima[0]
    conf_int_arima = forecast_arima[1]

    coverage_95pi = compute_coverage(upper_limits=conf_int_arima[:, 1],
                                     lower_limits=conf_int_arima[:, 0],
                                     actual_values=df['y'].iloc[-test_size:])
    print('ARIMA 95%-prediction interval coverage: ', coverage_95pi)

    # model_arima = SARIMAX(df['y'].iloc[:-test_size], order=(1, 1, 0), seasonal_order=(0, 1, 0, 12))
    # model_arima = model_arima.fit(full_output=False, disp=False)
    # print('Not auto arima', model_arima.aic)
    # pred_arima = model_arima.predict(start=df.index[int(len(df)*(1-cfg['test_size'])+1)], end=df.index[-1])
    # forecast_arima = model_arima.forecast(steps=int(len(df)*cfg['test_size']))

    pred_es = np.asarray(pred_es)
    pred_arima = np.asarray(pred_arima)

    return pred_arima, pred_es, conf_int_arima
Beispiel #5
0
def exponential_smoothing(df, cfg):
    train, test = train_test_split(df, cfg['test_size'])
    scaler = MinMaxScaler(feature_range=(10 ** (-10), 1))
    train['y'] = scaler.fit_transform(train.values.reshape(-1, 1))
    test['y'] = scaler.transform(test.values.reshape(-1, 1))

    trends = [None, 'add', 'add_damped']
    seasons = [None, 'add', 'mul']
    best_model_parameters = [None, None, False]  # trend, season, damped
    best_aicc = np.inf
    for trend in trends:
        for season in seasons:
            if trend == 'add_damped':
                trend = 'add'
                damped = True
            else:
                damped = False
            model_es = ExponentialSmoothing(train, seasonal_periods=12,
                                            trend=trend, seasonal=season,
                                            damped=damped)
            model_es = model_es.fit(optimized=True)
            if model_es.aicc < best_aicc:
                best_model_parameters = [trend, season, damped]
                best_aicc = model_es.aicc
    model_es = ExponentialSmoothing(train, seasonal_periods=12,
                                    trend=best_model_parameters[0], seasonal=best_model_parameters[1],
                                    damped=best_model_parameters[2])
    model_es = model_es.fit(optimized=True)
    print(model_es.params)
    print('ETS: T=', best_model_parameters[0], ', S=', best_model_parameters[1], ', damped=', best_model_parameters[2])
    print('AICc', model_es.aicc)
    residual_variance = model_es.sse / len(train - 2)
    var = []
    alpha = model_es.params['smoothing_level']
    beta = model_es.params['smoothing_slope']
    gamma = model_es.params['smoothing_seasonal']
    for j in range(cfg['forecast_horizon']):
        s = 12
        h = j+1
        k = int((h-1)/s)

        if best_model_parameters[1] == 'add':
            if best_model_parameters[0] == 'add':
                var.append(residual_variance * (1 + (h - 1) * (alpha**2 + alpha*h*beta + h / 6 * (2 * h - 1) * beta ** 2)
                                                + k*gamma*(2*alpha + gamma + beta*s*(k + 1))))
            else:
                var.append(
                    residual_variance * (1 + (h - 1) * alpha ** 2 + k * gamma * (2 * alpha + gamma)))
        elif best_model_parameters[1] == 'mul':
            var.append(residual_variance*h)
        else:
            if best_model_parameters[0] == 'add':
                var.append(
                    residual_variance*(1 + (h - 1)*(alpha ** 2 + alpha * h * beta + h / 6 * (2 * h - 1) * beta ** 2)))
            else:
                var.append(residual_variance * (1 + (h - 1) * alpha ** 2))
    pred_es = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_es_80 = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    conf_int_es_95 = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])

    for i in range(len(test) - cfg['forecast_horizon']):
        pred_es[i] = model_es.forecast(steps=cfg['forecast_horizon'] + i)[-cfg['forecast_horizon']:]
        conf_int_es_80[i, :, 0] = pred_es[i] - 1.28 * np.sqrt(var)
        conf_int_es_80[i, :, 1] = pred_es[i] + 1.28 * np.sqrt(var)
        conf_int_es_95[i, :, 0] = pred_es[i] - 1.96 * np.sqrt(var)
        conf_int_es_95[i, :, 1] = pred_es[i] + 1.96 * np.sqrt(var)

    # Store results
    mse_es, coverage_es_95, coverage_es_80, width_es_95, width_es_80 = [], [], [], [], []

    for i in range(cfg['forecast_horizon']):
        # Exponential Smoothing MSE
        mse_es.append(mean_squared_error(test[i:len(test) - cfg['forecast_horizon'] + i], pred_es[:, i]))

        # Exponential Smoothing 80% PI
        coverage_es_80.append(compute_coverage(upper_limits=conf_int_es_80[:, i, 1],
                                               lower_limits=conf_int_es_80[:, i, 0],
                                               actual_values=test.values[i:len(test) - cfg['forecast_horizon'] + i]))
        width_es_80.append(np.mean(conf_int_es_80[:, i, 1] - conf_int_es_80[:, i, 0], axis=0))

        # Exponential Smoothing 95% PI
        coverage_es_95.append(compute_coverage(upper_limits=conf_int_es_95[:, i, 1],
                                               lower_limits=conf_int_es_95[:, i, 0],
                                               actual_values=test.values[i:len(test) - cfg['forecast_horizon'] + i]))
        width_es_95.append(np.mean(conf_int_es_95[:, i, 1] - conf_int_es_95[:, i, 0], axis=0))

    print('================ ES ====================')
    print('MSE sliding window', mse_es)
    print('Mean MSE', np.mean(mse_es))
    print('Coverage of 80% PI sliding window', coverage_es_80)
    print('Width of 80% PI sliding window', width_es_80)
    print('Coverage of 95% PI sliding window', coverage_es_95)
    print('Width of 95% PI sliding window', width_es_95)
    return mse_es, coverage_es_80, coverage_es_95, width_es_80, width_es_95
Beispiel #6
0
def sliding_monte_carlo_forecast(train, test, model, cfg, inherent_noise):
    window_length = int(cfg['forecast_horizon'])
    prediction_sequence = np.zeros([len(test)-window_length, cfg['number_of_mc_forward_passes'], window_length, cfg['forecasting_horizon']])
    func = K.function([model.layers[0].input, K.learning_phase()], [model.layers[-1].output])
    # Number of MC samples
    forward_validation_set = [x for x in train]
    print("=== Forwarding", cfg['number_of_mc_forward_passes'], "passes ===")
    for l in tqdm.tqdm(range(len(test)-window_length)):
        for j in range(cfg['number_of_mc_forward_passes']):
            history = [x for x in forward_validation_set]
            # Prediction horizon / test length
            for i in range(window_length):
                # fit model and make forecast for history
                x_input = np.array(history[-cfg['sequence_length']:]).reshape((1, cfg['sequence_length'], 1))
                mc_sample = func([x_input, cfg['dropout_rate_test']])[0]
                # store forecast in list of predictions
                if cfg['multi_step_prediction']:
                    history.append(mc_sample[0, 0])
                else:
                    history.append(test[i])
                prediction_sequence[l, j, i] = mc_sample
        forward_validation_set.append(test[l])
        """
        total_uncertainty = np.sqrt(inherent_noise + np.var(prediction_sequence[l], axis=0))
        mean = prediction_sequence[l].mean(axis=0)
        t = np.linspace(1, window_length, window_length)
        mean = mean[:, 0]
        total_uncertainty = total_uncertainty[:, 0]

        plt.figure()
        plt.title("Time Series Forecasting")
        plt.plot(t, mean, label='Mean')
        plt.plot(t, test[l:window_length+l])
        plt.fill_between(t, mean - 1.28*total_uncertainty, mean + 1.28*total_uncertainty,
                         alpha=0.5, edgecolor='#CC4F1B', facecolor='#FF9848', label='80%-PI')
        plt.fill_between(t, mean - 1.96*total_uncertainty, mean + 1.96*total_uncertainty,
                         alpha=0.2, edgecolor='#CC4F1B', facecolor='#FF9848', label='95%-PI')
        plt.legend()
        plt.show()
        """

    mse_sliding = []
    coverage_95_pi, width_95_pi = [], []
    coverage_80_pi, width_80_pi = [], []
    for i in range(window_length):
        total_uncertainty = np.sqrt(inherent_noise + np.var(prediction_sequence[:, :, i], axis=1))
        mean = prediction_sequence[:, :, i].mean(axis=1)
        mse_sliding.append(mean_squared_error(test[i:len(test)-window_length+i], mean))
        coverage_95_pi.append(compute_coverage(upper_limits=mean + 1.96*total_uncertainty,
                                               lower_limits=mean - 1.96*total_uncertainty,
                                               actual_values=test[i:len(test)-window_length+i]))
        coverage_80_pi.append(compute_coverage(upper_limits=mean + 1.28 * total_uncertainty,
                                               lower_limits=mean - 1.28 * total_uncertainty,
                                               actual_values=test[i:len(test) - window_length + i]))
        width_95_pi.append(2*1.96*np.mean(total_uncertainty, axis=0)[0])
        width_80_pi.append(2*1.28*np.mean(total_uncertainty, axis=0)[0])
    """
    t = np.linspace(1, window_length, window_length)
    plt.figure()
    plt.plot(t, coverage_95_pi)
    plt.title('95% Prediction Interval Coverage')
    plt.xlabel('Forecast length (months)')
    plt.ylabel('Average coverage')
    plt.show()

    plt.figure()
    plt.plot(t, width_95_pi)
    plt.title('95% Prediction Interval Width')
    plt.xlabel('Forecast length (months)')
    plt.ylabel('Width of prediction interval')
    plt.show()
    """
    """
    print(prediction_sequence[0, :, 0, 0].mean())
    print(inherent_noise)
    print(np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0], axis=0)))
    plt.hist(prediction_sequence[0, :, 0, 0], color='blue', edgecolor='black',
             bins=int(50), density=True)
    plt.title('Histogram of predictions')
    plt.xlabel('Predicted value')
    plt.ylabel('Density')
    plt.axvline(prediction_sequence[0, :, 0, 0].mean(), color='b', linewidth=1)
    plt.axvline(prediction_sequence[0, :, 0, 0].mean() - 1.96*np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0], axis=0)), color='r', linewidth=1)
    plt.axvline(prediction_sequence[0, :, 0, 0].mean() + 1.96*np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0], axis=0)), color='r', linewidth=1)
    min_ylim, max_ylim = plt.ylim()
    plt.text(prediction_sequence[0, :, 0, 0].mean() * 1.01, max_ylim * 0.95, 'Mean: {:.3f}'.format(prediction_sequence[0, :, 0, 0].mean()))
    plt.text(
        (prediction_sequence[0, :, 0, 0].mean() - 1.96*np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0]))) * 0.9,
        max_ylim * 0.8, '95% PI: {:.3f}'.format(prediction_sequence[0, :, 0, 0].mean() - 1.96*np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0], axis=0))[0]))
    plt.text(
        (prediction_sequence[0, :, 0, 0].mean() + 1.96*np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0]))) * 0.9,
        max_ylim * 0.8, '95% PI: {:.3f}'.format(prediction_sequence[0, :, 0, 0].mean() + 1.96*np.sqrt(inherent_noise + np.var(prediction_sequence[0, :, 0], axis=0))[0]))
    plt.show()
    """
    return mse_sliding, coverage_95_pi, width_95_pi, coverage_80_pi, width_80_pi
Beispiel #7
0
def baseline_models(df, coverage, cfg):
    train, test = train_test_split(df, cfg['test_size'])
    scaler = MinMaxScaler(feature_range=(10**(-10), 1))
    train['y'] = scaler.fit_transform(train.values.reshape(-1, 1))
    test['y'] = scaler.transform(test.values.reshape(-1, 1))

    trend = None
    seasonal = 'add'
    model_es = ExponentialSmoothing(train, seasonal_periods=12,
                                    trend=trend, seasonal=seasonal, damped=False)
    model_es = model_es.fit(optimized=True)
    print('ETS: T=', trend, ', S=', seasonal)
    print('AICc', model_es.aicc)
    residual_variance = model_es.sse/len(train-2)
    var = []
    alpha = model_es.params['smoothing_level']
    beta = model_es.params['smoothing_slope']
    gamma = model_es.params['smoothing_seasonal']
    for j in range(cfg['forecast_horizon']):
        s = 12
        h = j+1
        k = int((h-1)/s)
        #var.append(residual_variance*(1+j*alpha**2*(1+(j+1)*beta+(j+1)/6*(2*(j+1)-1)*beta**2)
        #                              + 12*(gamma**2*(1-alpha)**2)+alpha*gamma*(1-alpha)*(2+cfg['forecast_horizon']*beta*13)))
        #var.append(residual_variance*(1+(h-1)*alpha**2*(1+h*beta + h/6*(2*h-1)*beta**2)
        #                              + k*(gamma**2*(1-alpha)**2 + alpha*gamma*(1-alpha)*(2+k*beta*(s+1)))))
        if seasonal == 'add':
            if trend == 'add':
                var.append(residual_variance * (1 + (h - 1) * (alpha**2 + alpha*h*beta + h / 6 * (2 * h - 1) * beta ** 2)
                                                + k*gamma*(2*alpha + gamma + beta*s*(k + 1))))
            else:
                var.append(
                    residual_variance * (1 + (h - 1) * alpha ** 2 + k * gamma * (2 * alpha + gamma)))
        else:
            if trend == 'add':
                var.append(
                    residual_variance*(1 + (h - 1)*(alpha ** 2 + alpha * h * beta + h / 6 * (2 * h - 1) * beta ** 2)))
            else:
                var.append(residual_variance * (1 + (h - 1) * alpha ** 2))

    auto_model = auto_arima(train, start_p=1, start_q=1, max_p=11, max_q=11, max_d=3, max_P=5, max_Q=5, max_D=3,
                            m=12, start_P=1, start_Q=1, seasonal=True, d=None, D=None, suppress_warnings=True,
                            stepwise=True, information_criterion='aicc')

    print(auto_model.summary())

    pred_es = np.zeros([len(test)-cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_es = np.zeros([len(test)-cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    pred_arima = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_arima = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    for i in range(len(test)-cfg['forecast_horizon']):
        forecast_arima = auto_model.predict(n_periods=cfg['forecast_horizon'],
                                            return_conf_int=True, alpha=1-coverage)
        #pred_es[i] = model_es.predict(start=df.index[-len(test-i)], end=df.index[-len(test+cfg['forecast_horizon']-i)])
        pred_es[i] = model_es.forecast(steps=cfg['forecast_horizon']+i)[-cfg['forecast_horizon']:]

        #for j in range(cfg['forecast_horizon']):
        conf_int_es[i, :, 0] = pred_es[i]-st.norm.ppf(1-(1-coverage)/2)*np.sqrt(var)
        conf_int_es[i, :, 1] = pred_es[i]+st.norm.ppf(1-(1-coverage)/2)*np.sqrt(var)
        pred_arima[i] = forecast_arima[0]
        conf_int_arima[i] = forecast_arima[1]
        auto_model.update(y=[test.values[i]])

        """
        t = np.linspace(1, cfg['forecast_horizon'], cfg['forecast_horizon'])
        plt.figure()
        plt.title("Time Series Forecasting")
        plt.plot(t, pred_es[i], label='Predicted')
        plt.plot(t, test[i:cfg['forecast_horizon'] + i], label='True')
        plt.fill_between(t, conf_int_es[i, :, 0],  conf_int_es[i, :, 1],
                         alpha=0.2, edgecolor='#CC4F1B', facecolor='#FF9848', label='95%-PI')
        plt.legend()
        plt.show()
        """

    mse_arima, coverage_arima, width_arima = [], [], []
    mse_es, coverage_es, width_es = [], [], []

    for i in range(cfg['forecast_horizon']):
        mse_arima.append(mean_squared_error(test[i:len(test)-cfg['forecast_horizon']+i], pred_arima[:, i]))
        coverage_arima.append(compute_coverage(upper_limits=conf_int_arima[:, i, 1],
                                               lower_limits=conf_int_arima[:, i, 0],
                                               actual_values=test.values[i:len(test)-cfg['forecast_horizon']+i]))
        width_arima.append(np.mean(conf_int_arima[:, i, 1]-conf_int_arima[:, i, 0], axis=0))
        mse_es.append(mean_squared_error(test[i:len(test) - cfg['forecast_horizon'] + i], pred_es[:, i]))
        coverage_es.append(compute_coverage(upper_limits=conf_int_es[:, i, 1],
                                            lower_limits=conf_int_es[:, i, 0],
                                            actual_values=test.values[i:len(test) - cfg['forecast_horizon'] + i]))
        width_es.append(np.mean(conf_int_es[:, i, 1] - conf_int_es[:, i, 0], axis=0))

    print('ARIMA')
    print('MSE sliding window', mse_arima)
    print('Coverage', coverage*100, 'PI sliding window', coverage_arima)
    print('Width', coverage*100, 'PI sliding window', width_arima)
    print(np.mean(mse_arima))
    print('ES')
    print('MSE sliding window', mse_es)
    print('Coverage', coverage * 100, 'PI sliding window', coverage_es)
    print('Width', coverage * 100, 'PI sliding window', width_es)
    print(np.mean(mse_es))
    return mse_arima, coverage_arima, width_arima
def baseline_models(df, cfg):
    train, test = train_test_split(df['y'], cfg['test_size'])
    model_es = ExponentialSmoothing(train,
                                    seasonal_periods=12,
                                    trend='add',
                                    seasonal='add')
    model_es = model_es.fit(optimized=True)
    pred_es = model_es.predict(start=df.index[-len(test)], end=df.index[-1])
    auto_model = auto_arima(train,
                            start_p=1,
                            start_q=1,
                            max_p=3,
                            max_q=3,
                            m=12,
                            start_P=1,
                            start_Q=1,
                            seasonal=True,
                            d=1,
                            D=1,
                            suppress_warnings=True,
                            stepwise=True)
    print('Auto arima', auto_model.aic())
    print(auto_model.summary())
    forecast_arima = auto_model.predict(n_periods=len(test),
                                        return_conf_int=True,
                                        alpha=0.05)
    pred_arima = forecast_arima[0]
    conf_int_95_arima = forecast_arima[1]
    forecast_arima = auto_model.predict(n_periods=len(test),
                                        return_conf_int=True,
                                        alpha=0.2)
    conf_inf_80_arima = forecast_arima[1]

    coverage_95pi = compute_coverage(upper_limits=conf_int_95_arima[:, 1],
                                     lower_limits=conf_int_95_arima[:, 0],
                                     actual_values=test)
    coverage_80pi = compute_coverage(upper_limits=conf_inf_80_arima[:, 1],
                                     lower_limits=conf_inf_80_arima[:, 0],
                                     actual_values=test)
    print('ARIMA 95%-prediction interval coverage: ', coverage_95pi)
    print('ARIMA 80%-prediction interval coverage: ', coverage_80pi)

    # model_arima = SARIMAX(df['y'].iloc[:-test_size], order=(1, 1, 0), seasonal_order=(0, 1, 0, 12))
    # model_arima = model_arima.fit(full_output=False, disp=False)
    # print('Not auto arima', model_arima.aic)
    # pred_arima = model_arima.predict(start=df.index[int(len(df)*(1-cfg['test_size'])+1)], end=df.index[-1])
    # forecast_arima = model_arima.forecast(steps=int(len(df)*cfg['test_size']))

    pred_es = np.asarray(pred_es)
    pred_arima = np.asarray(pred_arima)

    x_data = np.linspace(1, len(df), len(df))
    x_predictions = np.linspace(len(train) + 1, len(df), len(test))
    plt.figure()
    plt.title("SARIMA Time Series Forecasting")
    plt.plot(x_data, df, label='Data')
    plt.plot(x_predictions, pred_arima, label='SARIMA')
    plt.plot(x_predictions, pred_es, label='Exponential Smoothing')

    plt.fill_between(x_predictions,
                     conf_inf_80_arima[:, 0],
                     conf_inf_80_arima[:, 1],
                     alpha=0.5,
                     edgecolor='#CC4F1B',
                     facecolor='#FF9848',
                     label='80%-PI')
    plt.fill_between(x_predictions,
                     conf_int_95_arima[:, 0],
                     conf_int_95_arima[:, 1],
                     alpha=0.2,
                     edgecolor='#CC4F1B',
                     facecolor='#FF9848',
                     label='95%-PI')
    plt.legend()
    plt.show()
    return pred_arima, pred_es
def pipeline(df, cfg):
    train_and_val, test = train_test_split(df, cfg['test_size'])
    print('Length train', len(train_and_val))
    print('Length test', len(test))

    train, val = train_test_split(train_and_val, cfg['validation_size'])
    train_x, train_y = split_sequence(train, cfg)
    val_x, val_y = split_sequence(
        np.concatenate([train[-cfg['sequence_length']:], val]), cfg)

    # If using an encoder, extract features from training data,
    model = train_model(train_x, train_y, cfg, val_x, val_y)

    # Compute inherent noise on validation set
    history = [x for x in train]
    y_hat = np.zeros([len(val), train_y.shape[1]])
    for i in range(len(val)):
        x_input = np.array(history[-cfg['sequence_length']:]).reshape(
            (1, cfg['sequence_length'], 1))
        y_hat[i] = model.predict(x_input)[0]
        history.append(val[i])
    inherent_noise = np.zeros(cfg['forecasting_horizon'])
    print(y_hat.shape)
    print(val.shape)
    """
    plt.figure()
    plt.plot(np.linspace(1, len(val), len(val)), val, label='Val')
    plt.plot(np.linspace(1, len(val), len(val)), y_hat, label='Pred')
    plt.legend()
    plt.show()
    """

    for i in range(cfg['forecasting_horizon']):
        inherent_noise[i] = mean_squared_error(val[i:], y_hat[:-i or None, 0])
    print('Validation mse: ', inherent_noise)
    # Predict sequence over testing set using Monte Carlo dropout with n forward passes
    # train_x, train_y = split_sequence(train_and_val, cfg)
    # model = train_model(train_x, train_y, cfg)

    # coverage_95_pi_sliding_window, width_95_pi_sliding_window, mse_sliding_window = sliding_monte_carlo_forecast(train_and_val, test, model, cfg, inherent_noise)
    prediction_sequence = monte_carlo_forecast(train_and_val, test, model, cfg)

    # Compute mean and uncertainty for the Monte Carlo estimates
    mc_mean = np.zeros(
        [prediction_sequence.shape[1], prediction_sequence.shape[2]])
    mc_uncertainty = np.zeros(
        [prediction_sequence.shape[1], prediction_sequence.shape[2]])
    for i in range(cfg['forecasting_horizon']):
        mc_mean[:, i] = np.mean(prediction_sequence[:, :, i], axis=0)
        mc_uncertainty[:, i] = np.var(prediction_sequence[:, :, i], axis=0)
    # Add inherent noise and uncertainty obtained from Monte Carlo samples
    print(np.mean(inherent_noise))
    print(np.mean(mc_uncertainty))
    total_uncertainty = np.sqrt(inherent_noise + mc_uncertainty)

    # estimate prediction error
    mse = mean_squared_error(test, mc_mean)
    print(' > %.5f' % mse)

    # Compute quantiles of the Monte Carlo estimates
    for i in range(cfg['forecasting_horizon']):
        coverage_80pi = compute_coverage(
            upper_limits=mc_mean[:, i] + 1.28 * total_uncertainty[:, i],
            lower_limits=mc_mean[:, i] - 1.28 * total_uncertainty[:, i],
            actual_values=test)
        coverage_95pi = compute_coverage(
            upper_limits=mc_mean[:, i] + 1.96 * total_uncertainty[:, i],
            lower_limits=mc_mean[:, i] - 1.96 * total_uncertainty[:, i],
            actual_values=test)
        # print('80%-prediction interval coverage: ', i, coverage_80pi)
        # print('95%-prediction interval coverage: ', i, coverage_95pi)

    return prediction_sequence, mc_mean, total_uncertainty, mse, coverage_80pi, coverage_95pi, inherent_noise
def walk_forward_validation(train_and_val, test, cfg):

    train, val = train_test_split(train_and_val, cfg['validation_size'])
    train_x, train_y = split_multiple_sequences(train, cfg)
    val_x, val_y = split_multiple_sequences(val, cfg)

    # If using an encoder, extract features from training data,
    model = train_model(train_x, train_y, cfg, val_x, val_y)

    # Compute inherent noise on validation set
    y_hat = np.zeros(
        [len(val),
         len(val[0]) - cfg['sequence_length'], train_y.shape[1]])
    for i in range(len(val)):
        history = [x for x in val[i, :cfg['sequence_length']]]
        for j in range(len(val[i]) - cfg['sequence_length']):
            x_input = np.array(history[-cfg['sequence_length']:]).reshape(
                (1, cfg['sequence_length'], 1))
            y_hat[i, j] = model.predict(x_input)[0]
            history.append(val[i, j + cfg['sequence_length']])
    inherent_noise = np.zeros(cfg['forecasting_horizon'])
    print(y_hat.shape)
    print(val.shape)
    plt.figure()
    plt.plot(val[0, cfg['sequence_length']:])
    plt.plot(y_hat[0])
    plt.show()
    for i in range(cfg['forecasting_horizon']):
        inherent_noise[i] = measure_rmse(
            val[:, i + cfg['sequence_length']:].flatten(),
            y_hat[:, :, -i or None, 0].flatten())
    print('Validation mse: ', inherent_noise**2)
    # Predict sequence over testing set using Monte Carlo dropout with n forward passes
    # train_x, train_y = split_sequence(train_and_val, cfg)
    # model = train_model(train_x, train_y, cfg)

    prediction_sequence = monte_carlo_forecast(test, model, cfg,
                                               inherent_noise)

    # Compute mean and uncertainty for the Monte Carlo estimates
    mc_mean = np.zeros(
        [prediction_sequence.shape[1], prediction_sequence.shape[2]])
    mc_uncertainty = np.zeros(
        [prediction_sequence.shape[1], prediction_sequence.shape[2]])
    for i in range(cfg['forecasting_horizon']):
        mc_mean[:, i] = prediction_sequence[:, :, i].mean(axis=0)
        mc_uncertainty[:, i] = prediction_sequence[:, :, i].std(axis=0)
    # Add inherent noise and uncertainty obtained from Monte Carlo samples
    print(np.mean(inherent_noise))
    print(np.mean(mc_uncertainty))
    total_uncertainty = np.sqrt(inherent_noise**2 + mc_uncertainty**2)

    # estimate prediction error
    mse = mean_squared_error(test, mc_mean)
    print(' > %.5f' % mse)

    # Compute quantiles of the Monte Carlo estimates
    for i in range(cfg['forecasting_horizon']):
        coverage_80pi = compute_coverage(
            upper_limits=mc_mean[:, i] + 1.28 * total_uncertainty[:, i],
            lower_limits=mc_mean[:, i] - 1.28 * total_uncertainty[:, i],
            actual_values=test)
        coverage_95pi = compute_coverage(
            upper_limits=mc_mean[:, i] + 1.96 * total_uncertainty[:, i],
            lower_limits=mc_mean[:, i] - 1.96 * total_uncertainty[:, i],
            actual_values=test)
        # print('80%-prediction interval coverage: ', i, coverage_80pi)
        # print('95%-prediction interval coverage: ', i, coverage_95pi)

    return prediction_sequence, mc_mean, total_uncertainty, mse, coverage_80pi, coverage_95pi, inherent_noise**2
Beispiel #11
0
def baseline_models(df, cfg):
    result = adfuller(df['y'].values)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    # df = df.diff(periods=1).dropna()
    # result = adfuller(df['y'].values)
    # print('ADF Statistic: %f' % result[0])
    # print('p-value: %f' % result[1])

    train, test = train_test_split(df['y'], cfg['test_size'])

    # scaler = MinMaxScaler()
    # train['y'] = scaler.fit_transform(train.values.reshape(-1, 1))
    # test['y'] = scaler.transform(test.values.reshape(-1, 1))
    trends = [None, 'add', 'add_damped']
    seasons = [None, 'add', 'mul']
    best_model_parameters = [None, None, False]  # trend, season, damped
    best_aicc = np.inf
    for trend in trends:
        for season in seasons:
            if trend == 'add_damped':
                trend = 'add'
                damped = True
            else:
                damped = False
            model_es = ExponentialSmoothing(train,
                                            seasonal_periods=52,
                                            trend=trend,
                                            seasonal=season,
                                            damped=damped)
            model_es = model_es.fit(optimized=True)
            if model_es.aicc < best_aicc:
                best_model_parameters = [trend, season, damped]
                best_aicc = model_es.aicc
    model_es = ExponentialSmoothing(train,
                                    seasonal_periods=52,
                                    trend=best_model_parameters[0],
                                    seasonal=best_model_parameters[1],
                                    damped=best_model_parameters[2])
    model_es = model_es.fit(optimized=True)
    print('ETS: T=', best_model_parameters[0], ', S=',
          best_model_parameters[1], ', damped=', best_model_parameters[2])
    print('AICc', model_es.aicc)
    residual_variance = model_es.sse / len(train - 2)
    var = []
    alpha = model_es.params['smoothing_level']
    beta = model_es.params['smoothing_slope']
    gamma = model_es.params['smoothing_seasonal']
    for j in range(cfg['forecast_horizon']):
        s = 12
        h = j + 1
        k = int((h - 1) / s)
        #var.append(residual_variance*(1+j*alpha**2*(1+(j+1)*beta+(j+1)/6*(2*(j+1)-1)*beta**2)
        #                              + 12*(gamma**2*(1-alpha)**2)+alpha*gamma*(1-alpha)*(2+cfg['forecast_horizon']*beta*13)))
        #var.append(residual_variance*(1+(h-1)*alpha**2*(1+h*beta + h/6*(2*h-1)*beta**2)
        #                              + k*(gamma**2*(1-alpha)**2 + alpha*gamma*(1-alpha)*(2+k*beta*(s+1)))))
        var.append(residual_variance *
                   (1 + (h - 1) * (alpha**2 + alpha * h * beta + h / 6 *
                                   (2 * h - 1) * beta**2) + k * gamma *
                    (2 * alpha + gamma + beta * s * (k + 1))))

    auto_model = auto_arima(train,
                            start_p=1,
                            start_q=1,
                            max_p=3,
                            max_q=3,
                            m=52,
                            start_P=1,
                            start_Q=1,
                            seasonal=True,
                            d=1,
                            D=1,
                            suppress_warnings=True,
                            stepwise=True)

    print(auto_model.summary())

    pred_es = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_es_80 = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    conf_int_es_95 = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])

    pred_arima = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_arima_80 = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    conf_int_arima_95 = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])

    for i in range(len(test) - cfg['forecast_horizon']):
        forecast_arima_95 = auto_model.predict(
            n_periods=cfg['forecast_horizon'],
            return_conf_int=True,
            alpha=1 - 0.95)
        forecast_arima_80 = auto_model.predict(
            n_periods=cfg['forecast_horizon'],
            return_conf_int=True,
            alpha=1 - 0.8)
        pred_es[i] = model_es.forecast(steps=cfg['forecast_horizon'] +
                                       i)[-cfg['forecast_horizon']:]
        conf_int_es_80[i, :, 0] = pred_es[i] - 1.28 * np.sqrt(var)
        conf_int_es_80[i, :, 1] = pred_es[i] + 1.28 * np.sqrt(var)
        conf_int_es_95[i, :, 0] = pred_es[i] - 1.96 * np.sqrt(var)
        conf_int_es_95[i, :, 1] = pred_es[i] + 1.96 * np.sqrt(var)
        pred_arima[i] = forecast_arima_95[0]
        conf_int_arima_80[i] = forecast_arima_80[1]
        conf_int_arima_95[i] = forecast_arima_95[1]
        auto_model.update(y=[test.values[i]])

    # Store results
    mse_arima, coverage_arima_95, coverage_arima_80, width_arima_95, width_arima_80 = [], [], [], [], []
    mse_es, coverage_es_95, coverage_es_80, width_es_95, width_es_80 = [], [], [], [], []

    for i in range(cfg['forecast_horizon']):
        # ARIMA mean squared error (MSE):
        mse_arima.append(
            mean_squared_error(test[i:len(test) - cfg['forecast_horizon'] + i],
                               pred_arima[:, i]))

        # ARIMA 80% PI
        coverage_arima_80.append(
            compute_coverage(
                upper_limits=conf_int_arima_80[:, i, 1],
                lower_limits=conf_int_arima_80[:, i, 0],
                actual_values=test.values[i:len(test) -
                                          cfg['forecast_horizon'] + i]))
        width_arima_80.append(
            np.mean(conf_int_arima_80[:, i, 1] - conf_int_arima_80[:, i, 0],
                    axis=0))

        # ARIMA 95% PI
        coverage_arima_95.append(
            compute_coverage(
                upper_limits=conf_int_arima_95[:, i, 1],
                lower_limits=conf_int_arima_95[:, i, 0],
                actual_values=test.values[i:len(test) -
                                          cfg['forecast_horizon'] + i]))
        width_arima_95.append(
            np.mean(conf_int_arima_95[:, i, 1] - conf_int_arima_95[:, i, 0],
                    axis=0))

        # Exponential Smoothing MSE
        mse_es.append(
            mean_squared_error(test[i:len(test) - cfg['forecast_horizon'] + i],
                               pred_es[:, i]))

        # Exponential Smoothing 80% PI
        coverage_es_80.append(
            compute_coverage(
                upper_limits=conf_int_es_80[:, i, 1],
                lower_limits=conf_int_es_80[:, i, 0],
                actual_values=test.values[i:len(test) -
                                          cfg['forecast_horizon'] + i]))
        width_es_80.append(
            np.mean(conf_int_es_80[:, i, 1] - conf_int_es_80[:, i, 0], axis=0))

        # Exponential Smoothing 95% PI
        coverage_es_95.append(
            compute_coverage(
                upper_limits=conf_int_es_95[:, i, 1],
                lower_limits=conf_int_es_95[:, i, 0],
                actual_values=test.values[i:len(test) -
                                          cfg['forecast_horizon'] + i]))
        width_es_95.append(
            np.mean(conf_int_es_95[:, i, 1] - conf_int_es_95[:, i, 0], axis=0))

    print('================ ARIMA =================')
    print('Mean MSE', np.mean(mse_arima))
    print('MSE sliding window', mse_arima)
    print('Coverage of 80% PI sliding window', coverage_arima_80)
    print('Width of 80% PI sliding window', width_arima_80)
    print('Coverage of 95% PI sliding window', coverage_arima_95)
    print('Width of 95% PI sliding window', width_arima_95)
    print('================ ES ====================')
    print('MSE sliding window', np.mean(mse_es))
    print('Mean MSE', np.mean(mse_arima))
    print('Coverage of 80% PI sliding window', coverage_es_80)
    print('Width of 80% PI sliding window', width_es_80)
    print('Coverage of 95% PI sliding window', coverage_es_95)
    print('Width of 95% PI sliding window', width_es_95)
    return mse_arima, coverage_arima_80, coverage_arima_95, width_arima_80, width_arima_95, coverage_es_80, mse_es, \
           coverage_es_95, width_es_80, width_es_95
Beispiel #12
0
def arima(df, cfg):
    # df = df.diff(periods=1).dropna()
    # result = adfuller(df['y'].values)
    # print('ADF Statistic: %f' % result[0])
    # print('p-value: %f' % result[1])

    train, test = train_test_split(df['y'], cfg['test_size'])
    #auto_model = auto_arima(train, start_p=1, start_q=1, max_p=3, max_q=3,
    #                        m=52, start_P=1, start_Q=1, seasonal=True, d=1, D=1, suppress_warnings=True,
    #                        stepwise=True)
    auto_model = auto_arima(train,
                            start_p=1,
                            start_q=1,
                            max_p=3,
                            max_q=3,
                            max_d=1,
                            max_P=1,
                            max_Q=1,
                            max_D=1,
                            m=52,
                            start_P=0,
                            start_Q=0,
                            seasonal=True,
                            d=None,
                            D=None,
                            suppress_warnings=True,
                            stepwise=True,
                            information_criterion='aicc')

    print(auto_model.summary())

    pred_arima = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']])
    conf_int_arima_80 = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])
    conf_int_arima_95 = np.zeros(
        [len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2])

    for i in range(len(test) - cfg['forecast_horizon']):
        forecast_arima_95 = auto_model.predict(
            n_periods=cfg['forecast_horizon'],
            return_conf_int=True,
            alpha=1 - 0.95)
        forecast_arima_80 = auto_model.predict(
            n_periods=cfg['forecast_horizon'],
            return_conf_int=True,
            alpha=1 - 0.8)
        pred_arima[i] = forecast_arima_95[0]
        conf_int_arima_80[i] = forecast_arima_80[1]
        conf_int_arima_95[i] = forecast_arima_95[1]
        auto_model.update(y=[test.values[i]])

    # Store results
    mse_arima, coverage_arima_95, coverage_arima_80, width_arima_95, width_arima_80 = [], [], [], [], []

    for i in range(cfg['forecast_horizon']):
        # ARIMA mean squared error (MSE):
        mse_arima.append(
            mean_squared_error(test[i:len(test) - cfg['forecast_horizon'] + i],
                               pred_arima[:, i]))

        # ARIMA 80% PI
        coverage_arima_80.append(
            compute_coverage(
                upper_limits=conf_int_arima_80[:, i, 1],
                lower_limits=conf_int_arima_80[:, i, 0],
                actual_values=test.values[i:len(test) -
                                          cfg['forecast_horizon'] + i]))
        width_arima_80.append(
            np.mean(conf_int_arima_80[:, i, 1] - conf_int_arima_80[:, i, 0],
                    axis=0))

        # ARIMA 95% PI
        coverage_arima_95.append(
            compute_coverage(
                upper_limits=conf_int_arima_95[:, i, 1],
                lower_limits=conf_int_arima_95[:, i, 0],
                actual_values=test.values[i:len(test) -
                                          cfg['forecast_horizon'] + i]))
        width_arima_95.append(
            np.mean(conf_int_arima_95[:, i, 1] - conf_int_arima_95[:, i, 0],
                    axis=0))

    print('================ ARIMA =================')
    print('Mean MSE', np.mean(mse_arima))
    print('MSE sliding window', mse_arima)
    print('Coverage of 80% PI sliding window', coverage_arima_80)
    print('Width of 80% PI sliding window', width_arima_80)
    print('Coverage of 95% PI sliding window', coverage_arima_95)
    print('Width of 95% PI sliding window', width_arima_95)
    return mse_arima, coverage_arima_80, coverage_arima_95, width_arima_80, width_arima_95