def test_ktrlite_dual_seas(make_daily_data, seasonality_fs_order): train_df, _, _ = make_daily_data ktrlite = KTRLite( response_col='response', date_col='date', seasonality=[7, 365.25], seasonality_fs_order=seasonality_fs_order, estimator='stan-map', n_bootstrap_draws=-1, ) ktrlite.fit(train_df) predict_df = ktrlite.predict(train_df) expected_columns = ['date', 'prediction'] expected_shape = (train_df.shape[0], len(expected_columns)) expected_num_parameters = 6 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(ktrlite._posterior_samples) == expected_num_parameters smape_val = smape(train_df['response'].values, predict_df['prediction'].values) assert smape_val <= SMAPE_TOLERANCE
def test_ktrlite_predict_decompose(make_daily_data): train_df, test_df, coef = make_daily_data ktrlite = KTRLite( response_col='response', date_col='date', seasonality=[7, 365.25], seasonality_fs_order=[2, 5], estimator='stan-map', n_bootstrap_draws=1e4, ) ktrlite.fit(train_df) predict_df = ktrlite.predict(test_df, decompose=True) expected_columns = [ 'date', 'prediction_5', 'prediction', 'prediction_95', 'trend_5', 'trend', 'trend_95', 'seasonality_7_5', 'seasonality_7', 'seasonality_7_95', 'seasonality_365.25_5', 'seasonality_365.25', 'seasonality_365.25_95' ] expected_shape = (364, len(expected_columns)) expected_num_parameters = 6 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(ktrlite._posterior_samples) == expected_num_parameters
def test_ktrlite_level_segments(make_daily_data, level_segments): train_df, test_df, coef = make_daily_data ktrlite = KTRLite( response_col='response', date_col='date', level_segments=level_segments, estimator='stan-map', n_bootstrap_draws=-1, ) ktrlite.fit(train_df) predict_df = ktrlite.predict(test_df) expected_columns = ['date', 'prediction'] expected_shape = (364, len(expected_columns)) expected_num_parameters = 4 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(ktrlite._posterior_samples) == expected_num_parameters knots_df = ktrlite.get_level_knots() levels_df = ktrlite.get_levels() assert knots_df.shape[0] in [level_segments + 1, level_segments + 2] assert levels_df.shape[0] == ktrlite.get_training_meta()['num_of_obs']
def test_ktrlite_level_knot_distance(make_daily_data, level_knot_distance): train_df, test_df, coef = make_daily_data ktrlite = KTRLite( response_col='response', date_col='date', level_knot_distance=level_knot_distance, estimator='stan-map', n_bootstrap_draws=1e4, ) ktrlite.fit(train_df) predict_df = ktrlite.predict(test_df) expected_columns = ['date', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (364, len(expected_columns)) expected_num_parameters = 4 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(ktrlite._posterior_samples) == expected_num_parameters
def test_ktrlite_hourly_data(ca_hourly_electricity_data): train_df, test_df = ca_hourly_electricity_data ktrlite = KTRLite( response_col='SDGE', date_col='Dates', seasonality=[24, 7, 365.25], seasonality_fs_order=[3, 3, 5], estimator='stan-map', n_bootstrap_draws=-1, ) ktrlite.fit(train_df) predict_df = ktrlite.predict(train_df) expected_columns = ['Dates', 'prediction'] expected_shape = (train_df.shape[0], len(expected_columns)) expected_num_parameters = 6 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(ktrlite._posterior_samples) == expected_num_parameters smape_val = smape(train_df['SDGE'].values, predict_df['prediction'].values) assert smape_val <= SMAPE_TOLERANCE
def test_ktrlite_seas_segments(make_daily_data, seas_segments): train_df, test_df, coef = make_daily_data ktrlite = KTRLite( response_col='response', date_col='date', seasonality=[7, 365.25], seasonality_fs_order=[2, 5], level_segments=10, seasonality_segments=seas_segments, estimator='stan-map', n_bootstrap_draws=-1, ) ktrlite.fit(train_df) predict_df = ktrlite.predict(test_df) expected_columns = ['date', 'prediction'] expected_shape = (364, len(expected_columns)) expected_num_parameters = 6 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(ktrlite._posterior_samples) == expected_num_parameters
def test_backtester_ktr_and_missing_val(make_daily_data, missing_flag): train_df, test_df, _ = make_daily_data df = pd.concat([train_df, test_df], axis=0, ignore_index=True) if missing_flag: # create a missing value in testing df.loc[df.shape[0] - 3, 'response'] = np.nan # create a missing value in training df.loc[10, 'response'] = np.nan ktr = KTRLite(date_col='date', response_col='response', seasonality=[365.25], verbose=False) bt = BackTester( model=ktr, df=df, n_splits=3, incremental_len=100, forecast_len=20, ) bt.fit_predict() predicted_df = bt.get_predicted_df() assert set(predicted_df['split_key'].tolist()) == {0, 1, 2} bt_score_df = bt.score(include_training_metrics=False) num_testing_metrics = 6 expected_shape = (num_testing_metrics, 3) assert bt_score_df.shape == expected_shape testing_metrics_df = bt_score_df[~bt_score_df['is_training_metric']] # rmsse is the only one not working for null values; otherwise, they should have valid values if missing_flag: metric_vals = testing_metrics_df.loc[ testing_metrics_df['metric_name'] != 'rmsse', 'metric_values'].values assert np.all(~np.isnan(metric_vals)) missing_metric_val = testing_metrics_df.loc[ testing_metrics_df['metric_name'] == 'rmsse', 'metric_values'].values assert np.all(np.isnan(missing_metric_val)) else: metric_vals = testing_metrics_df['metric_values'].values assert np.all(~np.isnan(metric_vals))