def test_exists(self, file_format, tmp_path, sample_spark_df): filepath = (tmp_path / "test_data").as_posix() spark_data_set = SparkDataSet(filepath=filepath, file_format=file_format) assert not spark_data_set.exists() spark_data_set.save(sample_spark_df) assert spark_data_set.exists()
def test_exists_raises_error(self, mocker): # exists should raise all errors except for # AnalysisExceptions clearly indicating a missing file spark_data_set = SparkDataSet(filepath="") mocker.patch.object( spark_data_set, "_get_spark", side_effect=AnalysisException("Other Exception", []), ) with pytest.raises(DataSetError, match="Other Exception"): spark_data_set.exists()
def test_versioning_existing_dataset(self, versioned_dataset_local, sample_spark_df): """Check behavior when attempting to save a versioned dataset on top of an already existing (non-versioned) dataset. Note: because SparkDataSet saves to a directory even if non-versioned, an error is not expected.""" spark_data_set = SparkDataSet( filepath=versioned_dataset_local._filepath.as_posix()) spark_data_set.save(sample_spark_df) assert spark_data_set.exists() versioned_dataset_local.save(sample_spark_df) assert versioned_dataset_local.exists()