Beispiel #1
0
def test_dlt_predict_mixed_regular_positive(iclaims_training_data):
    df = iclaims_training_data

    dlt = DLTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'],
        regressor_sign=['=', '+', '='],
        seasonality=52,
        seed=8888,
    )
    dlt.fit(df)
    predicted_df = dlt.predict(df)

    dlt_new = DLTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=['trend.unemploy', 'trend.job', 'trend.filling'],
        regressor_sign=['=', '=', '+'],
        seasonality=52,
        seed=8888,
    )
    dlt_new.fit(df)
    predicted_df_new = dlt_new.predict(df)

    assert np.allclose(predicted_df['prediction'].values,
                       predicted_df_new['prediction'].values)
Beispiel #2
0
def test_dlt_predict_all_positive_reg(iclaims_training_data):
    df = iclaims_training_data

    dlt = DLTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'],
        regressor_sign=['+', '+', '+'],
        seasonality=52,
        seed=8888,
    )

    dlt.fit(df)
    predicted_df = dlt.predict(df, decompose=True)

    assert any(predicted_df['regression'].values)
Beispiel #3
0
def test_dlt_map_global_trend(synthetic_data, global_trend_option):
    train_df, test_df, coef = synthetic_data

    dlt = DLTMAP(
        response_col='response',
        date_col='week',
        seasonality=52,
        global_trend_option=global_trend_option,
    )

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
Beispiel #4
0
def test_dlt_map_univariate(synthetic_data):
    train_df, test_df, coef = synthetic_data

    dlt = DLTMAP(
        response_col='response',
        date_col='week',
        seasonality=52,
        num_warmup=50,
        verbose=False,
    )

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12  # no `lp__` parameter in optimizing()

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Beispiel #5
0
def test_dlt_map_single_regressor(iclaims_training_data):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    regressor_col = ['trend.unemploy']

    dlt = DLTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=regressor_col,
        seasonality=52,
        seed=8888,
    )
    dlt.fit(df)
    predicted_df = dlt.predict(df)

    expected_num_parameters = 13
    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']

    assert predicted_df.shape[0] == df.shape[0]
    assert predicted_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Beispiel #6
0
def test_dlt_map_univariate(synthetic_data):
    train_df, test_df, coef = synthetic_data

    dlt = DLTMAP(
        response_col='response',
        date_col='week',
        seasonality=52,
        num_warmup=50,
        verbose=False,
    )

    dlt.fit(train_df)

    init_call = dlt.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    assert init_call.s == 52
    init_values = init_call()
    assert init_values['init_sea'].shape == (51, )

    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Beispiel #7
0
def test_dlt_fixed_sm_input(synthetic_data, level_sm_input,
                            seasonality_sm_input, slope_sm_input):
    train_df, test_df, coef = synthetic_data

    dlt = DLTMAP(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        level_sm_input=level_sm_input,
        seasonality_sm_input=seasonality_sm_input,
        slope_sm_input=slope_sm_input,
        seasonality=52,
        num_warmup=50,
        verbose=False,
    )

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])
Beispiel #8
0
def test_dlt_map_reproducibility(synthetic_data, seasonality):
    train_df, test_df, coef = synthetic_data

    dlt1 = DLTMAP(
        response_col='response',
        date_col='week',
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
    )

    # first fit and predict
    dlt1.fit(train_df)
    posteriors1 = copy(dlt1._aggregate_posteriors['map'])
    prediction1 = dlt1.predict(test_df)

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    dlt2 = DLTMAP(
        response_col='response',
        date_col='week',
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
    )

    dlt2.fit(train_df)
    posteriors2 = copy(dlt2._aggregate_posteriors['map'])
    prediction2 = dlt2.predict(test_df)

    # assert same posterior keys
    assert set(posteriors1.keys()) == set(posteriors2.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors1.items():
        assert np.allclose(posteriors1[k], posteriors2[k])

    # assert prediction is reproducible
    assert np.allclose(prediction1['prediction'].values,
                       prediction2['prediction'].values)
Beispiel #9
0
def test_dlt_grid_tuning(synthetic_data, param_grid):
    train_df, test_df, coef = synthetic_data
    args = {'response_col': 'response', 'date_col': 'week', 'seasonality': 52}

    dlt = DLTMAP(**args)

    best_params, tuned_df = grid_search_orbit(param_grid,
                                              model=dlt,
                                              df=train_df,
                                              min_train_len=80,
                                              incremental_len=20,
                                              forecast_len=20,
                                              metrics=None,
                                              criteria=None,
                                              verbose=True)

    assert best_params[0].keys() == param_grid.keys()
    assert set(tuned_df.columns.to_list()) == set(
        list(param_grid.keys()) + ['metrics'])
    assert tuned_df.shape == (9, 3)
Beispiel #10
0
def test_dlt_mixed_signs_and_order(iclaims_training_data, regressor_signs):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job']
    new_regressor_col = [raw_regressor_col[idx] for idx in [1, 2, 0]]
    new_regressor_signs = [regressor_signs[idx] for idx in [1, 2, 0]]
    # mixiing ordering of cols in df of prediction
    new_df = df[['claims', 'week'] + new_regressor_col]

    dlt = DLTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=raw_regressor_col,
        regressor_sign=regressor_signs,
        seasonality=52,
        seed=8888,
    )
    dlt.fit(df)
    predicted_df_v1 = dlt.predict(df)
    predicted_df_v2 = dlt.predict(new_df)

    # mixing ordering of signs
    dlt_new = DLTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=new_regressor_col,
        regressor_sign=new_regressor_signs,
        seasonality=52,
        seed=8888,
    )
    dlt_new.fit(df)
    predicted_df_v3 = dlt_new.predict(df)
    predicted_df_v4 = dlt_new.predict(new_df)

    pred_v1 = predicted_df_v1['prediction'].values
    pred_v2 = predicted_df_v2['prediction'].values
    pred_v3 = predicted_df_v3['prediction'].values
    pred_v4 = predicted_df_v4['prediction'].values

    # they should be all identical; ordering of signs or columns in prediction show not matter
    assert np.allclose(pred_v1, pred_v2, atol=1e-3)
    assert np.allclose(pred_v1, pred_v3, atol=1e-3)
    assert np.allclose(pred_v1, pred_v4, atol=1e-3)