def test_get_df(): dl = DataLoader() # Daily data data_path = dl.get_data_home(data_dir=None, data_sub_dir="daily") df = dl.get_df(data_path=data_path, data_name="daily_peyton_manning") assert list(df.columns) == ["ts", "y"] assert df.shape == (2905, 2) # Hourly data data_path = dl.get_data_home(data_dir=None, data_sub_dir="hourly") df = dl.get_df(data_path=data_path, data_name="hourly_parking") assert list(df.columns) == [ "SystemCodeNumber", "Capacity", "Occupancy", "LastUpdated" ] assert df.shape == (35717, 4) # Error due to wrong file name data_path = dl.get_data_home(data_dir=None, data_sub_dir="daily") file_path = os.path.join(data_path, "parking.csv") file_names = dl.get_data_names(data_path=data_path) with pytest.raises( ValueError, match= fr"Given file path '{file_path}' is not found. Available datasets " fr"in data directory '{data_path}' are \{file_names}\."): dl.get_df(data_path=data_path, data_name="parking")
def test_get_data_names(): dl = DataLoader() # Returns empty set as there is no .csv file in 'data' folder data_path = dl.get_data_home() file_names = dl.get_data_names(data_path=data_path) assert file_names == [] data_path = dl.get_data_home(data_sub_dir="daily") file_names = dl.get_data_names(data_path=data_path) assert set(file_names) == { "daily_temperature_australia", "daily_demand_order", "daily_female_births", "daily_istanbul_stock", "daily_peyton_manning" }
def test_get_data_home(): dl = DataLoader() # Default parameters data_home = dl.get_data_home() assert os.path.basename(os.path.normpath(data_home)) == "data" # With subdirectory data_home = dl.get_data_home(data_sub_dir="daily") assert os.path.basename(os.path.normpath(data_home)) == "daily" # Error due to non existing folder data_dir = "/home/data" with pytest.raises(ValueError, match=f"Requested data directory '{data_dir}' does not exist."): dl.get_data_home(data_dir=data_dir)
def test_benchmark_silverkite_template_with_real_data(): # setting every list to 1 item to speed up test case forecast_horizons = [30] max_cvs = [3] fit_algorithms = ["linear"] metric = EvaluationMetricEnum.MeanSquaredError evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name) # real data dl = DataLoader() data_path = dl.get_data_home(data_sub_dir="daily") data_name = "daily_female_births" df = dl.get_df(data_path=data_path, data_name="daily_female_births") time_col = "Date" value_col = "Births" metadata = MetadataParam(time_col=time_col, value_col=value_col, freq="D") result_silverkite_real = benchmark_silverkite_template( data_name=data_name, df=df, metadata=metadata, evaluation_metric=evaluation_metric, forecast_horizons=forecast_horizons, fit_algorithms=fit_algorithms, max_cvs=max_cvs) result_silverkite_real = result_silverkite_real[0] assert result_silverkite_real["data_name"] == data_name assert result_silverkite_real["forecast_model_name"] == "silverkite_linear" assert result_silverkite_real["train_period"] == df.shape[0] assert result_silverkite_real["forecast_horizon"] == 30 assert result_silverkite_real["cv_folds"] == 3
def test_estimator_plot_components_from_forecaster(): """Tests estimator's plot_components function after the Forecaster has set everything up at the top most level""" # Test with real data (Female-births) via model template dl = DataLoader() data_path = dl.get_data_home(data_sub_dir="daily") df = dl.get_df(data_path=data_path, data_name="daily_female_births") metadata = MetadataParam(time_col="Date", value_col="Births", freq="D") model_components = ModelComponentsParam( seasonality={ "yearly_seasonality": True, "quarterly_seasonality": True, "weekly_seasonality": True, "daily_seasonality": False }) result = Forecaster().run_forecast_config( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=30, # forecast 1 month coverage=0.95, # 95% prediction intervals metadata_param=metadata, model_components_param=model_components)) estimator = result.model.steps[-1][-1] assert estimator.plot_components()