Esempio n. 1
0
def test_backtester_test_metrics(iclaims_training_data, metrics):
    df = iclaims_training_data

    lgt = LGTMAP(response_col='claims',
                 date_col='week',
                 seasonality=1,
                 verbose=False)

    backtester = BackTester(
        model=lgt,
        df=df,
        forecast_len=3,
        n_splits=1,
    )

    backtester.fit_predict()
    eval_out = backtester.score(metrics=metrics)
    evaluated_metrics = set(eval_out['metric_name'].tolist())

    if metrics is None:
        expected_metrics = [x.__name__ for x in backtester._default_metrics]
    elif isinstance(metrics, list):
        expected_metrics = [x.__name__ for x in metrics]
    else:
        expected_metrics = [metrics.__name__]

    assert set(expected_metrics) == evaluated_metrics
Esempio n. 2
0
def test_lgt_predict_all_positive_reg(iclaims_training_data):
    df = iclaims_training_data

    lgt = LGTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'],
        regressor_sign=['+', '+', '+'],
        seasonality=52,
        seed=8888,
    )

    lgt.fit(df)
    predicted_df = lgt.predict(df, decompose=True)

    assert any(predicted_df['regression'].values)
Esempio n. 3
0
def test_backtester_with_training_data(iclaims_training_data):
    df = iclaims_training_data

    lgt = LGTMAP(response_col='claims',
                 date_col='week',
                 seasonality=1,
                 verbose=False)

    backtester = BackTester(
        model=lgt,
        df=df,
        min_train_len=100,
        incremental_len=100,
        forecast_len=20,
    )

    backtester.fit_predict()
    eval_out = backtester.score(include_training_metrics=True)
    evaluated_test_metrics = set(eval_out.loc[~eval_out['is_training_metric'],
                                              'metric_name'].tolist())
    evaluated_train_metrics = set(eval_out.loc[eval_out['is_training_metric'],
                                               'metric_name'].tolist())

    expected_test_metrics = [x.__name__ for x in backtester._default_metrics]

    expected_train_metrics = list(
        filter(
            lambda x: backtester._get_metric_callable_signature(x) ==
            {'actual', 'predicted'}, backtester._default_metrics))
    expected_train_metrics = [x.__name__ for x in expected_train_metrics]

    assert set(expected_test_metrics) == evaluated_test_metrics
    assert set(expected_train_metrics) == evaluated_train_metrics
Esempio n. 4
0
def test_lgt_map_univariate(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    lgt = LGTMAP(response_col='response',
                 date_col='week',
                 seasonality=52,
                 verbose=False,
                 estimator_type=estimator_type)

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12  # no `lp__` parameter in optimizing()

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Esempio n. 5
0
def test_lgt_map_single_regressor(iclaims_training_data):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    regressor_col = ['trend.unemploy']

    lgt = LGTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=regressor_col,
        seasonality=52,
        seed=8888,
    )
    lgt.fit(df)
    predicted_df = lgt.predict(df)

    expected_num_parameters = 13
    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']

    assert predicted_df.shape[0] == df.shape[0]
    assert predicted_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Esempio n. 6
0
def test_lgt_fixed_sm_input(synthetic_data, level_sm_input,
                            seasonality_sm_input, slope_sm_input):
    train_df, test_df, coef = synthetic_data

    lgt = LGTMAP(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        level_sm_input=level_sm_input,
        seasonality_sm_input=seasonality_sm_input,
        slope_sm_input=slope_sm_input,
        seasonality=52,
        verbose=False,
    )

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df, n_bootstrap_draw=100)

    regression_out = lgt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])
Esempio n. 7
0
def test_lgt_map_fit(synthetic_data, seasonality, estimator_type):
    train_df, test_df, coef = synthetic_data

    lgt = LGTMAP(response_col='response',
                 date_col='week',
                 seasonality=seasonality,
                 verbose=False,
                 estimator_type=estimator_type)

    lgt.fit(train_df)
    init_call = lgt.get_init_values()
    if seasonality:

        assert isinstance(init_call, LGTInitializer)
        assert init_call.s == 52
        init_values = init_call()
        assert init_values['init_sea'].shape == (51, )
    else:
        assert not init_call

    predict_df = lgt.predict(test_df)

    expected_num_parameters = 10
    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    if seasonality == 52:
        expected_num_parameters += 2

    expected_shape = (51, len(expected_columns))
    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Esempio n. 8
0
def test_lgt_map_reproducibility(synthetic_data, seasonality):
    train_df, test_df, coef = synthetic_data

    lgt1 = LGTMAP(
        response_col='response',
        date_col='week',
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
    )

    # first fit and predict
    lgt1.fit(train_df)
    posteriors1 = copy(lgt1._aggregate_posteriors['map'])
    prediction1 = lgt1.predict(test_df)

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    lgt2 = LGTMAP(
        response_col='response',
        date_col='week',
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
    )

    lgt2.fit(train_df)
    posteriors2 = copy(lgt2._aggregate_posteriors['map'])
    prediction2 = lgt2.predict(test_df)

    # assert same posterior keys
    assert set(posteriors1.keys()) == set(posteriors2.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors1.items():
        assert np.allclose(posteriors1[k], posteriors2[k])

    # assert prediction is reproducible
    assert np.allclose(prediction1['prediction'].values,
                       prediction2['prediction'].values)
Esempio n. 9
0
def test_lgt_map_fit(synthetic_data, seasonality, estimator_type):
    train_df, test_df, coef = synthetic_data

    lgt = LGTMAP(
        response_col='response',
        date_col='week',
        seasonality=seasonality,
        verbose=False,
        estimator_type=estimator_type
    )

    lgt.fit(train_df)
    predict_df = lgt.predict(test_df)

    expected_num_parameters = 10
    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    if seasonality == 52:
        expected_num_parameters += 2

    expected_shape = (51, len(expected_columns))
    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(lgt._posterior_samples) == expected_num_parameters
Esempio n. 10
0
def test_backtester_sceduler_args(iclaims_training_data, scheduler_args):
    df = iclaims_training_data

    lgt = LGTMAP(response_col='claims',
                 date_col='week',
                 seasonality=1,
                 verbose=False)

    backtester = BackTester(
        model=lgt,
        df=df,
        **scheduler_args,
    )

    backtester.fit_predict()
    eval_out = backtester.score(metrics=[smape])
    assert np.all(eval_out['metric_values'].values > 0)
Esempio n. 11
0
def test_lgt_grid_tuning(synthetic_data, param_grid):
    train_df, test_df, coef = synthetic_data
    args = {'response_col': 'response', 'date_col': 'week', 'seasonality': 52}

    lgt = LGTMAP(**args)

    best_params, tuned_df = grid_search_orbit(param_grid,
                                              model=lgt,
                                              df=train_df,
                                              min_train_len=80,
                                              incremental_len=20,
                                              forecast_len=20,
                                              metrics=None,
                                              criteria=None,
                                              verbose=True)

    assert best_params[0].keys() == param_grid.keys()
    assert set(tuned_df.columns.to_list()) == set(
        list(param_grid.keys()) + ['metrics'])
    assert tuned_df.shape == (9, 3)
Esempio n. 12
0
def test_backtester_test_data_only(iclaims_training_data):
    df = iclaims_training_data

    lgt = LGTMAP(response_col='claims',
                 date_col='week',
                 seasonality=1,
                 verbose=False)

    backtester = BackTester(
        model=lgt,
        df=df,
        min_train_len=100,
        incremental_len=100,
        forecast_len=20,
    )

    backtester.fit_predict()
    eval_out = backtester.score()
    evaluated_metrics = set(eval_out['metric_name'].tolist())

    expected_metrics = [x.__name__ for x in backtester._default_metrics]

    assert set(expected_metrics) == evaluated_metrics
Esempio n. 13
0
def test_lgt_mixed_signs_and_order(iclaims_training_data, regressor_signs):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job']
    new_regressor_col = [raw_regressor_col[idx] for idx in [2, 1, 0]]
    new_regressor_signs = [regressor_signs[idx] for idx in [2, 1, 0]]
    # mixiing ordering of cols in df of prediction
    new_df = df[['claims', 'week'] + new_regressor_col]

    lgt = LGTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=raw_regressor_col,
        regressor_sign=regressor_signs,
        seasonality=52,
        seed=8888,
    )
    lgt.fit(df)
    predicted_df_v1 = lgt.predict(df)
    predicted_df_v2 = lgt.predict(new_df)

    # mixing ordering of signs
    lgt_new = LGTMAP(
        response_col='claims',
        date_col='week',
        regressor_col=new_regressor_col,
        regressor_sign=new_regressor_signs,
        seasonality=52,
        seed=8888,
    )
    lgt_new.fit(df)
    predicted_df_v3 = lgt_new.predict(df)
    predicted_df_v4 = lgt_new.predict(new_df)

    pred_v1 = predicted_df_v1['prediction'].values
    pred_v2 = predicted_df_v2['prediction'].values
    pred_v3 = predicted_df_v3['prediction'].values
    pred_v4 = predicted_df_v4['prediction'].values

    # they should be all identical; ordering of signs or columns in prediction show not matter
    assert np.allclose(pred_v1, pred_v2, atol=1e-3)
    assert np.allclose(pred_v1, pred_v3, atol=1e-3)
    assert np.allclose(pred_v1, pred_v4, atol=1e-3)