Ejemplo n.º 1
0
    def test_exists(self, file_format, tmp_path, sample_spark_df):
        filepath = (tmp_path / "test_data").as_posix()
        spark_data_set = SparkDataSet(filepath=filepath, file_format=file_format)

        assert not spark_data_set.exists()

        spark_data_set.save(sample_spark_df)
        assert spark_data_set.exists()
Ejemplo n.º 2
0
    def test_exists_raises_error(self, mocker):
        # exists should raise all errors except for
        # AnalysisExceptions clearly indicating a missing file
        spark_data_set = SparkDataSet(filepath="")
        mocker.patch.object(
            spark_data_set,
            "_get_spark",
            side_effect=AnalysisException("Other Exception", []),
        )

        with pytest.raises(DataSetError, match="Other Exception"):
            spark_data_set.exists()
Ejemplo n.º 3
0
 def test_versioning_existing_dataset(self, versioned_dataset_local,
                                      sample_spark_df):
     """Check behavior when attempting to save a versioned dataset on top of an
     already existing (non-versioned) dataset. Note: because SparkDataSet saves to a
     directory even if non-versioned, an error is not expected."""
     spark_data_set = SparkDataSet(
         filepath=versioned_dataset_local._filepath.as_posix())
     spark_data_set.save(sample_spark_df)
     assert spark_data_set.exists()
     versioned_dataset_local.save(sample_spark_df)
     assert versioned_dataset_local.exists()