def test_exists(self, file_format, tmp_path, sample_spark_df): filepath = str(tmp_path / "test_data") spark_data_set = SparkDataSet(filepath=filepath, file_format=file_format) assert not spark_data_set.exists() spark_data_set.save(sample_spark_df) assert spark_data_set.exists()
def test_exists(file_format): with tempfile.TemporaryDirectory() as temp_dir: temp_path = join(temp_dir, "test_data") spark_data_set = SparkDataSet(filepath=temp_path, file_format=file_format) spark_df = _get_sample_spark_data_frame().coalesce(1) assert not spark_data_set.exists() spark_data_set.save(spark_df) assert spark_data_set.exists()
def test_exists_raises_error(self, mocker): # exists should raise all errors except for # AnalysisExceptions clearly indicating a missing file spark_data_set = SparkDataSet(filepath="") mocker.patch.object( spark_data_set, "_get_spark", side_effect=AnalysisException("Other Exception", []), ) with pytest.raises(DataSetError, match="Other Exception"): spark_data_set.exists()
def test_exists_raises_error(monkeypatch): # exists should raise all errors except for # AnalysisExceptions clearly indicating a missing file def faulty_get_spark(): raise AnalysisException("Other Exception", []) spark_data_set = SparkDataSet(filepath="") monkeypatch.setattr(spark_data_set, "_get_spark", faulty_get_spark) with pytest.raises(DataSetError) as error: spark_data_set.exists() assert "Other Exception" in str(error.value)