def test_version_str_repr(self, load_version, save_version): """Test that version is in string representation of the class instance when applicable.""" filepath = "test.xlsx" ds = ExcelDataSet(filepath=filepath) ds_versioned = ExcelDataSet(filepath=filepath, version=Version(load_version, save_version)) assert filepath in str(ds) assert "version" not in str(ds) assert filepath in str(ds_versioned) ver_str = "version=Version(load={}, save='{}')".format( load_version, save_version) assert ver_str in str(ds_versioned) assert "ExcelDataSet" in str(ds_versioned) assert "ExcelDataSet" in str(ds) assert "protocol" in str(ds_versioned) assert "protocol" in str(ds) assert "writer_args" in str(ds_versioned) assert "writer_args" in str(ds) # Default save_args and load_args assert "save_args={'index': False}" in str(ds) assert "save_args={'index': False}" in str(ds_versioned) assert "load_args={'engine': xlrd}" in str(ds_versioned) assert "load_args={'engine': xlrd}" in str(ds)
def excel_data_set(filepath_excel, load_args, save_args, fs_args): return ExcelDataSet( filepath=filepath_excel, load_args=load_args, save_args=save_args, fs_args=fs_args, )
def test_http_filesystem_no_versioning(self): pattern = r"HTTP\(s\) DataSet doesn't support versioning\." with pytest.raises(DataSetError, match=pattern): ExcelDataSet( filepath="https://example.com/file.xlsx", version=Version(None, None) )
def test_protocol_usage(self, filepath, instance_type): data_set = ExcelDataSet(filepath=filepath) assert isinstance(data_set._fs, instance_type) path = filepath.split(PROTOCOL_DELIMITER, 1)[-1] assert str(data_set._filepath) == path assert isinstance(data_set._filepath, PurePosixPath)
def excel_multisheet_data_set(filepath_excel, save_args, fs_args): load_args = {"sheet_name": None} return ExcelDataSet( filepath=filepath_excel, load_args=load_args, save_args=save_args, fs_args=fs_args, )
def test_protocol_usage(self, filepath, instance_type): data_set = ExcelDataSet(filepath=filepath) assert isinstance(data_set._fs, instance_type) # _strip_protocol() doesn't strip http(s) protocol if data_set._protocol == "https": path = filepath.split("://")[-1] else: path = data_set._fs._strip_protocol(filepath) assert str(data_set._filepath) == path assert isinstance(data_set._filepath, PurePosixPath)
def setup_excel_dataset(path): excel_dataset = ExcelDataSet(path) df = pd.DataFrame({"col1": [1, 2]}) excel_dataset.save(df) return excel_dataset, df
def versioned_excel_data_set(filepath_excel, load_version, save_version): return ExcelDataSet(filepath=filepath_excel, version=Version(load_version, save_version))
def test_catalog_release(self, mocker): fs_mock = mocker.patch("fsspec.filesystem").return_value filepath = "test.xlsx" data_set = ExcelDataSet(filepath=filepath) data_set.release() fs_mock.invalidate_cache.assert_called_once_with(filepath)