def test_build_parquet_pyarrow(self): """ Test compilation Parquet via pyarrow """ os.environ["QUILT_PARQUET_LIBRARY"] = ParquetLib.ARROW.value Package.reset_parquet_lib() mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build.yml') build.build_package('test_arrow', PACKAGE, path) # TODO load DFs based on contents of .yml file at path # not hardcoded vals (this will require loading modules from variable # names, probably using __module__) from quilt.data.test_arrow.groot import dataframes, README csv = dataframes.csv() tsv = dataframes.csv() xls = dataframes.xls() rows = len(csv.index) assert rows == len(tsv.index) and rows == len(xls.index), \ 'Expected dataframes to have same # rows' cols = len(csv.columns) print(csv.columns, xls.columns, tsv.columns) assert cols == len(tsv.columns) and cols == len(xls.columns), \ 'Expected dataframes to have same # columns' assert os.path.exists(README()) # TODO add more integrity checks, incl. negative test cases assert Package.get_parquet_lib() is ParquetLib.ARROW del os.environ["QUILT_PARQUET_LIBRARY"]
def _test_dataframes(self, dataframes): csv = dataframes.csv() tsv = dataframes.csv() xls = dataframes.xls() xls_skip = dataframes.xls_skip() rows = len(csv.index) assert rows == len(tsv.index) and rows == len(xls.index), \ 'Expected dataframes to have same # rows' cols = len(csv.columns) assert cols == len(tsv.columns) and cols == len(xls.columns), \ 'Expected dataframes to have same # columns' assert xls_skip.shape == (9997, 13), \ 'Expected 9,997 Rows and 13 Columns' nulls = dataframes.nulls() assert ptypes.is_string_dtype(nulls['strings']), \ 'Expected column of strings to deserialize as strings' assert ptypes.is_integer_dtype(nulls['integers']), \ 'Expected column of integers to deserialize as integers' assert ptypes.is_float_dtype(nulls['floats']), \ 'Expected column of floats to deserialize as floats' assert ptypes.is_numeric_dtype(nulls['integers_nulled']), \ 'Expected column of ints with nulls to deserialize as numeric'
def test_build_parquet_default(self): """ Test compilation to Parquet via the default library """ Package.reset_parquet_lib() mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build.yml') build.build_package('test_parquet', PACKAGE, path) # TODO load DFs based on contents of .yml file at PATH # not hardcoded vals (this will require loading modules from variable # names, probably using __module__) from quilt.data.test_parquet.groot import dataframes, README csv = dataframes.csv() tsv = dataframes.csv() xls = dataframes.xls() rows = len(csv.index) assert rows == len(tsv.index) and rows == len(xls.index), \ 'Expected dataframes to have same # rows' assert os.path.exists(README()) cols = len(csv.columns) print(csv.columns, xls.columns, tsv.columns) assert cols == len(tsv.columns) and cols == len(xls.columns), \ 'Expected dataframes to have same # columns'