def test_read_write_insecure(self): secure_type = TypedDfBuilder("a").secure().build() bad_type = TypedDfBuilder("a").recommended_only().build() with pytest.raises(UnsupportedOperationError): # noinspection HttpUrlsUsage secure_type.read_url("http://google.com") # nosec secure = secure_type.new_df() bad = bad_type.new_df() for fmt in FileFormat: for suffix in fmt.suffixes: try: with tmpfile(suffix) as path: # should always complain about insecurity FIRST if not fmt.is_secure: with pytest.raises(FormatInsecureError): secure_type.read_file(path) with pytest.raises(FormatInsecureError): secure.write_file(path) path.unlink(missing_ok=True) if not fmt.is_recommended: with pytest.raises(FormatDiscouragedError): bad_type.read_file(path) with pytest.raises(FormatDiscouragedError): bad.write_file(path) except Exception: logger.error(f"Failed on suffix {suffix}") raise
def test_dir_hash(self): t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))])) with tmpfile(".csv") as path: hash_dir = Checksums().get_dirsum_of_file(path) hash_dir.unlink(missing_ok=True) df.write_file(path, dir_hash=True) assert hash_dir.exists() got = Checksums().load_dirsum_exact(hash_dir) assert list(got.keys()) == [path] hit = got[path] assert len(hit) == 64 t.read_file(path, dir_hash=True) t.read_file(path, hex_hash=hit)
def test_file_hash(self): t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))])) # unfortunately, the file that gets output is os-dependent # \n vs \r\n is an issue, so we can't check the exact hash with tmpfile(".csv") as path: df.write_file(path, file_hash=True) hash_file = Checksums().get_filesum_of_file(path) assert hash_file.exists() got = Checksums().load_filesum_of_file(path) assert got.file_path == path hit = got.hash_value assert len(hit) == 64 t.read_file(path, file_hash=True) t.read_file(path, hex_hash=hit)
def test_attrs_hard(self): meta = None try: t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))])) df.attrs["matrix"] = np.zeros((2, 2)) with tmpfile(".csv") as path: df.write_file(path, attrs=True) meta = Path(str(path) + ".attrs.json") assert meta.exists() df = t.read_file(path, attrs=True) assert df.attrs == {"matrix": [["0.0", "0.0"], ["0.0", "0.0"]]} finally: if meta is not None: meta.unlink(missing_ok=True)
def test_attrs(self): meta = None try: t = TypedDfBuilder("a").reserve("x", "y").build() df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))])) df.attrs["fruit"] = "apple" with tmpfile(".csv") as path: df.write_file(path, attrs=True) meta = Path(str(path) + ".attrs.json") assert meta.exists() data = meta.read_text(encoding="utf-8").replace("\n", "").replace( " ", "") assert data == '{"fruit": "apple"}' df = t.read_file(path, attrs=True) assert df.attrs == {"fruit": "apple"} finally: if meta is not None: meta.unlink(missing_ok=True)