def test_pandas_datasets(self): registered_name = "PandasFileBasedDataset" name = "titanic-regression-{test_name}" save_patterns = {"dataset": ["{save_pattern}"]} for save_pattern in [ "pandas_disk_json", "pandas_disk_csv", "pandas_disk_parquet", ]: with self.subTest("Pandas Dataset Regression with Titanic", save_pattern=save_pattern): save_patterns["dataset"] = [save_pattern] regression_dataset = self.get_regression_artifact( "dataset", name=name.format(test_name=save_pattern)) with FILEPATH_REGISTRY.context_register( "filestore", ARTIFACTS_PATH): new_dataset = DatasetCreator.create( registered_name=registered_name, name=name.format(test_name=f"{save_pattern}_new"), save_patterns=save_patterns, **dataset_kwargs_template, ) self.compare_datasets(new_dataset, regression_dataset)
def get_regression_artifact(self, persistable_type, **filters): with FILEPATH_REGISTRY.context_register("filestore", ARTIFACTS_PATH): persistable = getattr(PersistableLoader, f"load_{persistable_type}")(**filters) persistable.load_external_files() return persistable