def test_backtest_regression(self): gaussian_series = gt(mean=2, length=50) sine_series = st(length=50) features = gaussian_series.stack(sine_series) features_multivariate = ( gaussian_series + sine_series).stack(gaussian_series).stack(sine_series) target = sine_series features = TimeSeries(features.pd_dataframe().rename( { "0": "Value0", "1": "Value1" }, axis=1)) features_multivariate = TimeSeries( features_multivariate.pd_dataframe().rename( { "0": "Value0", "1": "Value1", "2": "Value2" }, axis=1)) # univariate feature test score = LinearRegressionModel(lags=None, lags_exog=[0, 1]).backtest( series=target, covariates=features, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score, last_points_only=True) self.assertGreater(score, 0.95) # Using an int or float value for start score = RandomForest(lags=12, lags_exog=[0]).backtest(series=target, covariates=features, start=30, forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) score = RandomForest(lags=12, lags_exog=[0]).backtest(series=target, covariates=features, start=0.5, forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) # Using a too small start value with self.assertRaises(ValueError): RandomForest(lags=12).backtest(series=target, start=0, forecast_horizon=3) with self.assertRaises(ValueError): RandomForest(lags=12).backtest(series=target, start=0.01, forecast_horizon=3) # Using RandomForest's start default value score = RandomForest(lags=12).backtest(series=target, forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) # multivariate feature test score = RandomForest(lags=12, lags_exog=[0, 1]).backtest( series=target, covariates=features_multivariate, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) # multivariate with stride score = RandomForest(lags=12, lags_exog=[0]).backtest( series=target, covariates=features_multivariate, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score, last_points_only=True, stride=3) self.assertGreater(score, 0.95)
def test_backtest_regression(self): gaussian_series = gt(mean=2, length=50) sine_series = st(length=50) features = [gaussian_series + sine_series, gaussian_series] features_multivariate = [ (gaussian_series + sine_series).stack(gaussian_series), gaussian_series ] target = st(length=50) # univariate feature test score = StandardRegressionModel(15).backtest(features, target, pd.Timestamp('20000201'), 3, metric=r2_score) self.assertEqual(score, 1.0) # Using an int or float value for start score = StandardRegressionModel(15).backtest(features, target, start=30, forecast_horizon=3, metric=r2_score) self.assertEqual(score, 1.0) score = StandardRegressionModel(15).backtest(features, target, start=0.5, forecast_horizon=3, metric=r2_score) self.assertEqual(score, 1.0) # Using a too small start value with self.assertRaises(ValueError): StandardRegressionModel(15).backtest(features, target, start=0, forecast_horizon=3) with self.assertRaises(ValueError): StandardRegressionModel(15).backtest(features, target, start=0.01, forecast_horizon=3) # Using StandardRegressionModel's start default value score = StandardRegressionModel(15).backtest(features, target, forecast_horizon=3, metric=r2_score) self.assertEqual(score, 1.0) # multivariate feature test score = StandardRegressionModel(15).backtest(features_multivariate, target, pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score) self.assertEqual(score, 1.0) # multivariate target score = StandardRegressionModel(15).backtest(features_multivariate, target.stack(target), pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score) self.assertEqual(score, 1.0) # multivariate target with stride hist = StandardRegressionModel(15).historical_forecasts( features_multivariate, target.stack(target), pd.Timestamp('20000201'), forecast_horizon=3, stride=3, last_points_only=True) self.assertEqual(r2_score(target.stack(target), hist), 1.0) self.assertEqual((hist.time_index()[1] - hist.time_index()[0]).days, 3)
def test_backtest_regression(self): np.random.seed(4) gaussian_series = gt(mean=2, length=50) sine_series = st(length=50) features = gaussian_series.stack(sine_series) features_multivariate = ( (gaussian_series + sine_series).stack(gaussian_series).stack(sine_series) ) target = sine_series features = features.with_columns_renamed( features.components, ["Value0", "Value1"] ) features_multivariate = features_multivariate.with_columns_renamed( features_multivariate.components, ["Value0", "Value1", "Value2"] ) # univariate feature test score = LinearRegressionModel( lags=None, lags_future_covariates=[0, -1] ).backtest( series=target, future_covariates=features, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, last_points_only=True, ) self.assertGreater(score, 0.9) # univariate feature test + train length score = LinearRegressionModel( lags=None, lags_future_covariates=[0, -1] ).backtest( series=target, future_covariates=features, start=pd.Timestamp("20000201"), train_length=20, forecast_horizon=3, metric=r2_score, last_points_only=True, ) self.assertGreater(score, 0.9) # Using an int or float value for start score = RandomForest( lags=12, lags_future_covariates=[0], random_state=0 ).backtest( series=target, future_covariates=features, start=30, forecast_horizon=3, metric=r2_score, ) self.assertGreater(score, 0.9) score = RandomForest( lags=12, lags_future_covariates=[0], random_state=0 ).backtest( series=target, future_covariates=features, start=0.5, forecast_horizon=3, metric=r2_score, ) self.assertGreater(score, 0.9) # Using a too small start value with self.assertRaises(ValueError): RandomForest(lags=12).backtest(series=target, start=0, forecast_horizon=3) with self.assertRaises(ValueError): RandomForest(lags=12).backtest( series=target, start=0.01, forecast_horizon=3 ) # Using RandomForest's start default value score = RandomForest(lags=12, random_state=0).backtest( series=target, forecast_horizon=3, metric=r2_score ) self.assertGreater(score, 0.95) # multivariate feature test score = RandomForest( lags=12, lags_future_covariates=[0, -1], random_state=0 ).backtest( series=target, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) self.assertGreater(score, 0.94) # multivariate feature test with train window 35 score_35 = RandomForest( lags=12, lags_future_covariates=[0, -1], random_state=0 ).backtest( series=target, train_length=35, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) logger.info( "Score for multivariate feature test with train window 35 is: ", score_35 ) self.assertGreater(score_35, 0.92) # multivariate feature test with train window 45 score_45 = RandomForest( lags=12, lags_future_covariates=[0, -1], random_state=0 ).backtest( series=target, train_length=45, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) logger.info( "Score for multivariate feature test with train window 45 is: ", score_45 ) self.assertGreater(score_45, 0.94) self.assertGreater(score_45, score_35) # multivariate with stride score = RandomForest( lags=12, lags_future_covariates=[0], random_state=0 ).backtest( series=target, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, last_points_only=True, stride=3, ) self.assertGreater(score, 0.9)
def test_backtest_regression(self): gaussian_series = gt(mean=2, length=50) sine_series = st(length=50) features = [gaussian_series + sine_series, gaussian_series] features_multivariate = [ (gaussian_series + sine_series).stack(gaussian_series), gaussian_series ] target = st(length=50) # univariate feature test pred = StandardRegressionModel(15).backtest(features, target, pd.Timestamp('20000201'), 3) self.assertEqual(r2_score(pred, target), 1.0) # Using an int or float value for start pred = StandardRegressionModel(15).backtest(features, target, start=30, forecast_horizon=3) self.assertEqual(r2_score(pred, target), 1.0) pred = StandardRegressionModel(15).backtest(features, target, start=0.5, forecast_horizon=3) self.assertEqual(r2_score(pred, target), 1.0) # Using a too small start value with self.assertRaises(ValueError): StandardRegressionModel(15).backtest(features, target, start=0, forecast_horizon=3) with self.assertRaises(ValueError): StandardRegressionModel(15).backtest(features, target, start=0.01, forecast_horizon=3) # Using StandardRegressionModel's start default value pred = StandardRegressionModel(15).backtest(features, target, forecast_horizon=3) self.assertEqual(r2_score(pred, target), 1.0) # multivariate feature test pred = StandardRegressionModel(15).backtest(features_multivariate, target, pd.Timestamp('20000201'), 3) self.assertEqual(r2_score(pred, target), 1.0) # multivariate target pred = StandardRegressionModel(15).backtest(features_multivariate, target.stack(target), pd.Timestamp('20000201'), 3) self.assertEqual(r2_score(pred, target.stack(target)), 1.0) # multivariate target with stride pred = StandardRegressionModel(15).backtest(features_multivariate, target.stack(target), pd.Timestamp('20000201'), 3, stride=3) self.assertEqual(r2_score(pred, target.stack(target)), 1.0) self.assertEqual((pred.time_index()[1] - pred.time_index()[0]).days, 3)