Esempio n. 1
0
 def test_load_parquet(self, tmp_path, sample_pandas_df):
     temp_path = str(tmp_path / "data")
     local_parquet_set = ParquetDataSet(filepath=temp_path)
     local_parquet_set.save(sample_pandas_df)
     spark_data_set = SparkDataSet(filepath=temp_path)
     spark_df = spark_data_set.load()
     assert spark_df.count() == 4
 def test_save_and_load_non_existing_dir(self, tmp_path, dummy_dataframe):
     """Test saving and reloading the data set to non-existing directory."""
     filepath = (tmp_path / "non-existing" / FILENAME).as_posix()
     data_set = ParquetDataSet(filepath=filepath)
     data_set.save(dummy_dataframe)
     reloaded = data_set.load()
     assert_frame_equal(dummy_dataframe, reloaded)
    def test_save_and_load(self, tmp_path, dummy_dataframe):
        """Test saving and reloading the data set."""
        filepath = (tmp_path / FILENAME).as_posix()
        data_set = ParquetDataSet(filepath=filepath)
        data_set.save(dummy_dataframe)
        reloaded = data_set.load()
        assert_frame_equal(dummy_dataframe, reloaded)
        assert data_set._fs_open_args_load == {}

        files = [child.is_file() for child in tmp_path.iterdir()]
        assert all(files)
        assert len(files) == 1
    def test_write_to_dir(self, dummy_dataframe, tmp_path):
        data_set = ParquetDataSet(filepath=tmp_path.as_posix())
        pattern = "Saving ParquetDataSet to a directory is not supported"

        with pytest.raises(DataSetError, match=pattern):
            data_set.save(dummy_dataframe)