Example #1
0
def test_dlt_full_reproducibility(synthetic_data, estimator_type,
                                  regressor_signs, seasonality):
    train_df, test_df, coef = synthetic_data

    dlt_first = DLTFull(response_col='response',
                        date_col='week',
                        regressor_col=train_df.columns.tolist()[2:],
                        regressor_sign=regressor_signs,
                        prediction_percentiles=[5, 95],
                        seasonality=seasonality,
                        num_warmup=50,
                        verbose=False,
                        estimator_type=estimator_type)

    # first fit and predict
    dlt_first.fit(train_df)
    posteriors_first = copy(dlt_first._posterior_samples)
    predict_df_first = dlt_first.predict(test_df)
    regression_out_first = dlt_first.get_regression_coefs()

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    dlt_second = DLTFull(response_col='response',
                         date_col='week',
                         regressor_col=train_df.columns.tolist()[2:],
                         regressor_sign=regressor_signs,
                         prediction_percentiles=[5, 95],
                         seasonality=seasonality,
                         num_warmup=50,
                         verbose=False,
                         estimator_type=estimator_type)

    dlt_second.fit(train_df)
    posteriors_second = copy(dlt_second._posterior_samples)
    predict_df_second = dlt_second.predict(test_df)
    regression_out_second = dlt_second.get_regression_coefs()

    # assert same posterior keys
    assert set(posteriors_first.keys()) == set(posteriors_second.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors_first.items():
        assert np.allclose(posteriors_first[k], posteriors_second[k])

    # assert identical regression columns
    # this is also checked in posterior samples, but an extra layer just in case
    # since this one very commonly retrieved by end users
    assert regression_out_first.equals(regression_out_second)

    # assert prediction is reproducible
    assert predict_df_first.equals(predict_df_second)
Example #2
0
def test_dlt_full_with_regression(synthetic_data, estimator_type,
                                  regressor_signs):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  regressor_col=train_df.columns.tolist()[2:],
                  regressor_sign=regressor_signs,
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = [
        'week', 'prediction_lower', 'prediction', 'prediction_upper'
    ]
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])
Example #3
0
def test_dlt_full_univariate(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)

    init_call = dlt.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    assert init_call.s == 52
    init_values = init_call()
    assert init_values['init_sea'].shape == (51, )

    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Example #4
0
def test_dlt_non_seasonal_fit(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 11

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Example #5
0
def test_dlt_full_with_regression(synthetic_data, estimator_type,
                                  regressor_signs):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  regressor_col=train_df.columns.tolist()[2:],
                  regressor_sign=regressor_signs,
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    init_call = dlt.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    init_values = init_call()
    assert init_values['init_sea'].shape == (51, )

    if regressor_signs.count('+') > 0:
        assert init_values['pr_beta'].shape == (regressor_signs.count('+'), )
    if regressor_signs.count('-') > 0:
        assert init_values['nr_beta'].shape == (regressor_signs.count('-'), )
    if regressor_signs.count('=') > 0:
        assert init_values['rr_beta'].shape == (regressor_signs.count('='), )

    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])

    assert np.sum(regression_out['coefficient'].values >= 0) <= \
           regressor_signs.count('+') + regressor_signs.count('=')
    assert np.sum(regression_out['coefficient'].values <= 0) <= \
           regressor_signs.count('-') + regressor_signs.count('=')
Example #6
0
def test_dlt_full_univariate(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 13

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Example #7
0
                                         incl_smooth_params=True)

    trace_plot = plot_posterior_params(dlt,
                                       kind='trace',
                                       incl_trend_params=True,
                                       incl_smooth_params=True)

    pair_plot = plot_posterior_params(dlt,
                                      kind='pair',
                                      pair_type='reg',
                                      incl_trend_params=False,
                                      incl_smooth_params=False)

    num_periods = 12 * 1
    freq = 1
    date_col = dlt.date_col
    last_dt = (dlt.date_col.dt.to_pydatetime())[-1]
    dts = [
        last_dt + timedelta(days=x * freq) for x in range(1, num_periods + 1)
    ]
    future_df = pd.DataFrame(dts, columns=[date_col])

    predicted_df_dlt = dlt.predict(df=future_df, decompose=True)

    plot_predicted_data(training_actual_df=credit_agg_short[-90:],
                        predicted_df=predicted_df_dlt[-90:],
                        test_actual_df=test_df,
                        date_col=dlt.date_col,
                        actual_col='pct_chg_in_sales_from_prev_mnth',
                        pred_col='predicted_pct_chg_in_sales')