コード例 #1
0
 def test_reserve_dtype(self):
     t = TypedDfBuilder("a").reserve("x", dtype=np.float32).build()
     df = t.convert(pd.DataFrame([pd.Series(dict(x="0.5"))]))
     assert df.column_names() == ["x"]
     assert df.to_numpy().tolist() == [[0.5]]
     with pytest.raises(ValueError):
         t.convert(pd.DataFrame([pd.Series(dict(x="kitten"))]))
コード例 #2
0
 def test_drop(self):
     t = TypedDfBuilder("a").reserve("column").drop("trash").build()
     typ: DfTyping = t.get_typing()
     assert typ.columns_to_drop == {"trash"}
     df = t.convert(pd.DataFrame([pd.Series(dict(x="x", zz="y"))]))
     assert df.column_names() == ["x", "zz"]
     df = t.convert(pd.DataFrame([pd.Series(dict(x="x", trash="y"))]))
     assert df.column_names() == ["x"]
コード例 #3
0
 def test_condition(self):
     t = TypedDfBuilder("a").verify(always_ok).build()
     typ: DfTyping = t.get_typing()
     assert typ.required_columns == []
     assert typ.required_index_names == []
     assert typ.verifications == [always_ok]
     TypedDf(pd.DataFrame())
     t = TypedDfBuilder("a").verify(always_fail).build()
     with pytest.raises(VerificationFailedError):
         t.convert(pd.DataFrame())
コード例 #4
0
 def test_no_overwrite(self):
     t = TypedDfBuilder("a").reserve("x", "y").build()
     df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))]))
     with tmpfile(".csv") as path:
         df.write_file(path, overwrite=False)
         with pytest.raises(FileExistsError):
             df.write_file(path, overwrite=False)
コード例 #5
0
 def test_mkdir(self):
     t = TypedDfBuilder("a").reserve("x", "y").build()
     df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))]))
     with tmpdir() as path:
         df.write_file(path / "a.csv", mkdirs=True)
     with tmpdir() as path:
         with pytest.raises(FileNotFoundError):
             df.write_file(path / "b.csv")
コード例 #6
0
 def test_pass_io_options(self):
     t = TypedDfBuilder("a").reserve("x",
                                     "y").add_write_kwargs(FileFormat.csv,
                                                           sep="&").build()
     df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))]))
     with tmpfile(".csv") as path:
         df.write_file(path)
         lines = path.read_text(encoding="utf8").splitlines()
         assert lines == ["x&y", "cat&dog"]
コード例 #7
0
 def test_dir_hash(self):
     t = TypedDfBuilder("a").reserve("x", "y").build()
     df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="kitten"))]))
     with tmpfile(".csv") as path:
         hash_dir = Checksums().get_dirsum_of_file(path)
         hash_dir.unlink(missing_ok=True)
         df.write_file(path, dir_hash=True)
         assert hash_dir.exists()
         got = Checksums().load_dirsum_exact(hash_dir)
         assert list(got.keys()) == [path]
         hit = got[path]
         assert len(hit) == 64
         t.read_file(path, dir_hash=True)
         t.read_file(path, hex_hash=hit)
コード例 #8
0
 def test_file_hash(self):
     t = TypedDfBuilder("a").reserve("x", "y").build()
     df = t.convert(pd.DataFrame([pd.Series(dict(x="cat", y="dog"))]))
     # unfortunately, the file that gets output is os-dependent
     # \n vs \r\n is an issue, so we can't check the exact hash
     with tmpfile(".csv") as path:
         df.write_file(path, file_hash=True)
         hash_file = Checksums().get_filesum_of_file(path)
         assert hash_file.exists()
         got = Checksums().load_filesum_of_file(path)
         assert got.file_path == path
         hit = got.hash_value
         assert len(hit) == 64
         t.read_file(path, file_hash=True)
         t.read_file(path, hex_hash=hit)
コード例 #9
0
 def test_strict(self):
     # strict columns but not index
     t = TypedDfBuilder("a").strict(index=False, cols=True).build()
     typ: DfTyping = t.get_typing()
     assert typ.more_indices_allowed
     assert not typ.more_columns_allowed
     t.convert(pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x"))
     with pytest.raises(UnexpectedColumnError):
         t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
     # strict index but not columns
     t = TypedDfBuilder("a").strict(True, False).build()
     typ: DfTyping = t.get_typing()
     assert typ.more_columns_allowed
     assert not typ.more_indices_allowed
     t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
     with pytest.raises(UnexpectedIndexNameError):
         df = PrettyDf(
             pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x"))
         assert df.index_names() == ["x"]
         assert df.column_names() == []
         t.convert(df)
     # neither strict
     t = TypedDfBuilder("a").strict(False, False).build()
     t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
コード例 #10
0
 def test_attrs_hard(self):
     meta = None
     try:
         t = TypedDfBuilder("a").reserve("x", "y").build()
         df = t.convert(pd.DataFrame([pd.Series(dict(x="cat",
                                                     y="kitten"))]))
         df.attrs["matrix"] = np.zeros((2, 2))
         with tmpfile(".csv") as path:
             df.write_file(path, attrs=True)
             meta = Path(str(path) + ".attrs.json")
             assert meta.exists()
             df = t.read_file(path, attrs=True)
             assert df.attrs == {"matrix": [["0.0", "0.0"], ["0.0", "0.0"]]}
     finally:
         if meta is not None:
             meta.unlink(missing_ok=True)
コード例 #11
0
 def test_attrs(self):
     meta = None
     try:
         t = TypedDfBuilder("a").reserve("x", "y").build()
         df = t.convert(pd.DataFrame([pd.Series(dict(x="cat",
                                                     y="kitten"))]))
         df.attrs["fruit"] = "apple"
         with tmpfile(".csv") as path:
             df.write_file(path, attrs=True)
             meta = Path(str(path) + ".attrs.json")
             assert meta.exists()
             data = meta.read_text(encoding="utf-8").replace("\n",
                                                             "").replace(
                                                                 "  ", "")
             assert data == '{"fruit": "apple"}'
             df = t.read_file(path, attrs=True)
             assert df.attrs == {"fruit": "apple"}
     finally:
         if meta is not None:
             meta.unlink(missing_ok=True)