def test_filedatasource_df(file_type, filedatasource_cfg_and_file): cfg, file = filedatasource_cfg_and_file(file_type) cfg["path"] = BytesIO(file) # sort-of mocking the file for pandas to open ds = mllp_ds.FileDataSource("bla", cfg) df = ds.get_dataframe() assert str(df["a"].dtype) == "float64" assert df["a"][1] == 2.3 assert df["b"][0] == "ad"
def test_filedatasource_df_chunksize(filedatasource_cfg_and_file): cfg, file = filedatasource_cfg_and_file("csv") cfg["path"] = BytesIO(file) # sort-of mocking the file for pandas to open ds = mllp_ds.FileDataSource("bla", cfg) df_iter = ds.get_dataframe(chunksize=1) df1, df2 = df_iter assert not isinstance(df_iter, pd.DataFrame) assert isinstance(df1, pd.DataFrame) assert isinstance(df2, pd.DataFrame)
def test_filedatasource_notimplemented(filedatasource_cfg_and_file): cfg, _ = filedatasource_cfg_and_file("csv") ds = mllp_ds.FileDataSource("bla", cfg) with pytest.raises(NotImplementedError): ds.get_dataframe(params={"a": "hallo"}) with pytest.raises(TypeError, match="get_dataframe"): ds.get_raw() cfg, _ = filedatasource_cfg_and_file("text_file") ds = mllp_ds.FileDataSource("bla", cfg) with pytest.raises(NotImplementedError): ds.get_raw(params={"a": "hallo"}) with pytest.raises(NotImplementedError): ds.get_raw(chunksize=5) with pytest.raises(TypeError, match="get_raw"): ds.get_dataframe() cfg["type"] = "sausage" with pytest.raises(TypeError, match="file type"): mllp_ds.FileDataSource("bla", cfg)
def test_filedatasource_df_dtypes(file_type, filedatasource_cfg_and_file): cfg, file, dtfile = filedatasource_cfg_and_file( file_type, dtypes="some_filename.dtypes") cfg["path"] = BytesIO(file) # sort-of mocking the file for pandas to open print(cfg["dtypes_path"]) cfg["dtypes_path"] = BytesIO(dtfile) print(cfg["dtypes_path"]) ds = mllp_ds.FileDataSource("some_datasource", cfg) df = ds.get_dataframe() assert str(df["a"].dtype) == "object" assert str(df["d"].dtype) == "float64"
def test_filedatasource_raw(file_type, filedatasource_cfg_and_file): cfg, file = filedatasource_cfg_and_file(file_type) mo = mock.mock_open(read_data=file) mo.return_value.name = "./foobar" with mock.patch("builtins.open", mo, create=True): ds = mllp_ds.FileDataSource("bla", cfg) raw = ds.get_raw() if isinstance(raw, bytes): assert raw == b"Hello world!" elif isinstance(raw, str): assert raw == "Hello world!" else: assert False # Unsupported type