Exemple #1
0
 def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full):
     # GH #23283
     partition_cols = ['bool', 'int']
     df = df_full
     with pytest.raises(ValueError):
         with tm.ensure_clean_dir() as path:
             df.to_parquet(path, engine="fastparquet", compression=None,
                           partition_on=partition_cols,
                           partition_cols=partition_cols)
Exemple #2
0
 def test_partition_on_supported(self, fp, df_full):
     # GH #23283
     partition_cols = ['bool', 'int']
     df = df_full
     with tm.ensure_clean_dir() as path:
         df.to_parquet(path, engine="fastparquet", compression=None,
                       partition_on=partition_cols)
         assert os.path.exists(path)
         import fastparquet
         actual_partition_cols = fastparquet.ParquetFile(path, False).cats
         assert len(actual_partition_cols) == 2
Exemple #3
0
 def test_partition_cols_supported(self, pa, df_full):
     # GH #23283
     partition_cols = ['bool', 'int']
     df = df_full
     with tm.ensure_clean_dir() as path:
         df.to_parquet(path, partition_cols=partition_cols,
                       compression=None)
         import pyarrow.parquet as pq
         dataset = pq.ParquetDataset(path, validate_schema=False)
         assert len(dataset.partitions.partition_names) == 2
         assert dataset.partitions.partition_names == set(partition_cols)
Exemple #4
0
    def test_partition_cols_string(self, pa, df_full):
        # GH #27117
        partition_cols = "bool"
        partition_cols_list = [partition_cols]
        df = df_full
        with tm.ensure_clean_dir() as path:
            df.to_parquet(path, partition_cols=partition_cols, compression=None)
            import pyarrow.parquet as pq

            dataset = pq.ParquetDataset(path, validate_schema=False)
            assert len(dataset.partitions.partition_names) == 1
            assert dataset.partitions.partition_names == set(partition_cols_list)
Exemple #5
0
    def test_partition_cols_string(self, fp, df_full):
        # GH #27117
        partition_cols = "bool"
        df = df_full
        with tm.ensure_clean_dir() as path:
            df.to_parquet(
                path,
                engine="fastparquet",
                partition_cols=partition_cols,
                compression=None,
            )
            assert os.path.exists(path)
            import fastparquet  # noqa: F811

            actual_partition_cols = fastparquet.ParquetFile(path, False).cats
            assert len(actual_partition_cols) == 1
Exemple #6
0
def test_create_temp_directory():
    with tm.ensure_clean_dir() as path:
        assert os.path.exists(path)
        assert os.path.isdir(path)
    assert not os.path.exists(path)
Exemple #7
0
def test_create_temp_directory():
    with tm.ensure_clean_dir() as path:
        assert os.path.exists(path)
        assert os.path.isdir(path)
    assert not os.path.exists(path)