Example #1
0
def test_lgt_full_fit(synthetic_data, seasonality, estimator_type):
    train_df, test_df, coef = synthetic_data
    args = {
        'response_col': 'response',
        'date_col': 'week',
        'prediction_percentiles': [5, 95],
        'seasonality': seasonality,
        'verbose': False,
        'estimator_type': estimator_type
    }
    if issubclass(estimator_type, StanEstimator):
        expected_num_parameters = 11
        args.update({'num_warmup': 50})
    else:
        # no `lp__` in pyro
        expected_num_parameters = 10
        args.update({'num_steps': 10})

    if seasonality == 52:
        expected_num_parameters += 2

    lgt = LGTFull(**args)
    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Example #2
0
def test_lgt_non_seasonal_fit_pyro(synthetic_data):
    train_df, test_df, coef = synthetic_data

    lgt = LGTFull(response_col='response',
                  date_col='week',
                  estimator_type=PyroEstimatorVI,
                  num_steps=10)

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 10  # no `lp__` in pyro

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Example #3
0
def test_lgt_non_seasonal_fit(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    lgt = LGTFull(
        response_col='response',
        date_col='week',
        estimator_type=estimator_type,
        num_warmup=50,
    )

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 11

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Example #4
0
def test_prediction_percentiles(iclaims_training_data, prediction_percentiles):
    df = iclaims_training_data

    lgt = LGTFull(
        response_col='claims',
        date_col='week',
        seasonality=52,
        seed=8888,
        prediction_percentiles=prediction_percentiles,
    )

    if not prediction_percentiles:
        p_labels = ['_5', '', '_95']
    else:
        p_labels = ['_5', '_10', '', '_95']

    lgt.fit(df)
    predicted_df = lgt.predict(df)
    expected_columns = ['week'] + ["prediction" + p for p in p_labels]
    assert predicted_df.columns.tolist() == expected_columns
    assert predicted_df.shape[0] == df.shape[0]

    predicted_df = lgt.predict(df, decompose=True)
    predicted_components = [
        'prediction', PredictedComponents.TREND.value,
        PredictedComponents.SEASONALITY.value,
        PredictedComponents.REGRESSION.value
    ]

    expected_columns = ['week']
    for pc in predicted_components:
        for p in p_labels:
            expected_columns.append(pc + p)
    assert predicted_df.columns.tolist() == expected_columns
    assert predicted_df.shape[0] == df.shape[0]
Example #5
0
def test_lgt_full_univariate_pyro(synthetic_data):
    train_df, test_df, coef = synthetic_data

    lgt = LGTFull(response_col='response',
                  date_col='week',
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_steps=10,
                  verbose=False,
                  estimator_type=PyroEstimatorVI)

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    expected_columns = [
        'week', 'prediction_lower', 'prediction', 'prediction_upper'
    ]
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12  # no `lp__` in pyro

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Example #6
0
def test_lgt_full_with_regression(synthetic_data, estimator_type, regressor_signs):
    train_df, test_df, coef = synthetic_data

    if issubclass(estimator_type, StanEstimator):
        lgt = LGTFull(
            response_col='response',
            date_col='week',
            regressor_col=train_df.columns.tolist()[2:],
            regressor_sign=regressor_signs,
            prediction_percentiles=[5, 95],
            seasonality=52,
            num_warmup=50,
            verbose=False,
            estimator_type=estimator_type
        )
    elif issubclass(estimator_type, PyroEstimator):
        lgt = LGTFull(
            response_col='response',
            date_col='week',
            regressor_col=train_df.columns.tolist()[2:],
            regressor_sign=regressor_signs,
            prediction_percentiles=[5, 95],
            seasonality=52,
            num_steps=10,
            verbose=False,
            estimator_type=estimator_type
        )
    else:
        return None

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    regression_out = lgt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])
Example #7
0
def test_lgt_full_fit(synthetic_data, seasonality, estimator_type):
    train_df, test_df, coef = synthetic_data
    args = {
        'response_col': 'response',
        'date_col': 'week',
        'prediction_percentiles': [5, 95],
        'seasonality': seasonality,
        'verbose': False,
        'estimator_type': estimator_type,
    }

    if issubclass(estimator_type, StanEstimator):
        args.update({'num_warmup': 50, 'num_sample': 50})
    elif issubclass(estimator_type, PyroEstimator):
        args.update({'num_steps': 10})

    expected_num_parameters = 10

    if seasonality == 52:
        expected_num_parameters += 2

    lgt = LGTFull(**args)
    lgt.fit(train_df)
    init_call = lgt.get_init_values()
    if seasonality:
        assert isinstance(init_call, LGTInitializer)
        assert init_call.s == 52
        init_values = init_call()
        assert init_values['init_sea'].shape == (51, )
    else:
        assert not init_call

    predict_df = lgt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Example #8
0
def test_lgt_full_reproducibility(synthetic_data, estimator_type,
                                  regressor_signs, seasonality):
    train_df, test_df, coef = synthetic_data

    lgt_first = LGTFull(response_col='response',
                        date_col='week',
                        regressor_col=train_df.columns.tolist()[2:],
                        regressor_sign=regressor_signs,
                        prediction_percentiles=[5, 95],
                        seasonality=seasonality,
                        num_warmup=50,
                        verbose=False,
                        estimator_type=estimator_type)

    # first fit and predict
    lgt_first.fit(train_df)
    posteriors_first = copy(lgt_first._posterior_samples)
    predict_df_first = lgt_first.predict(test_df)
    regression_out_first = lgt_first.get_regression_coefs()

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    lgt_second = LGTFull(response_col='response',
                         date_col='week',
                         regressor_col=train_df.columns.tolist()[2:],
                         regressor_sign=regressor_signs,
                         prediction_percentiles=[5, 95],
                         seasonality=seasonality,
                         num_warmup=50,
                         verbose=False,
                         estimator_type=estimator_type)

    lgt_second.fit(train_df)
    posteriors_second = copy(lgt_second._posterior_samples)
    predict_df_second = lgt_second.predict(test_df)
    regression_out_second = lgt_second.get_regression_coefs()

    # assert same posterior keys
    assert set(posteriors_first.keys()) == set(posteriors_second.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors_first.items():
        assert np.allclose(posteriors_first[k], posteriors_second[k])

    # assert identical regression columns
    # this is also checked in posterior samples, but an extra layer just in case
    # since this one very commonly retrieved by end users
    assert regression_out_first.equals(regression_out_second)

    # assert prediction is reproducible
    assert predict_df_first.equals(predict_df_second)