def test_fancy(self): new = TypedDfs.typed("a class", doc="A doc").build() assert new.__name__ == "a class" assert new.__doc__ == "A doc" df = new.convert(pd.DataFrame(sample_data())) assert isinstance(df, TypedDf) assert df.__class__.__name__ == "a class"
def test_fancy_with_no_index(self): new = TypedDfs.typed("a class").require("abc", "123", "xyz", index=False).build() df = new.convert(pd.DataFrame(sample_data())) assert df.index_names() == [] assert df.column_names() == ["abc", "123", "xyz"] assert isinstance(df, TypedDf) assert df.__class__.__name__ == "a class"
def test_matrix(self): clazz = (TypedDfs.matrix("pretty bird matrix").doc( "A table of species of pretty birds and whether they like each other." ).dtype(bool)).build() z = DfCliHelp.help(clazz) # # test long text # txt = z.get_long_text() lines = txt.splitlines() expected_lines = 3 + 1 + len(FileFormat) assert len( lines ) == expected_lines, f"{len(lines)} != {expected_lines}: {txt}" assert "whether they like each other" in lines[1] assert "cast to bool" in lines[2] # # test short text # txt = z.get_short_text(recommended_only=True) lines = txt.splitlines() expected_lines = 4 assert len( lines ) == expected_lines, f"{len(lines)} != {expected_lines}: {txt}" assert "Boolean (bool)" in lines[2]
def test_wrap_multilayer(self): # not fully supported yet, but let's check that it's reasonable rows = ["yes", "no", "maybe"] cols = [("animal", "cat"), ("animal", "armadillo", ("person", "matt"))] cols = pd.MultiIndex.from_tuples(cols) df = pd.DataFrame(np.zeros((3, 2)), rows, cols) df = TypedDfs.wrap(df) assert df.column_names() == [ ("animal", "cat", np.nan), ("animal", "armadillo", ("person", "matt")), ]
def test_typed(self): clazz = (TypedDfs.typed("pretty bird table").doc( "A table of species of pretty birds and their characteristics."). require("species", dtype=str).require("prettiness", dtype=float).require( "cuteness", dtype=np.int16).reserve("lifespan", dtype=Period)).build() z = DfCliHelp.help(clazz) # # test long text # txt = z.get_long_text(nl="\n") lines = txt.splitlines() expected_lines = 1 + 1 # header and docstring expected_lines += 1 + 3 # required columns expected_lines += 1 + 1 + 1 # optional columns, inc. "any others allowed" (e.g.) expected_lines += 1 + len(FileFormat) # file formats assert len( lines ) == expected_lines, f"{len(lines)} != {expected_lines}: " + txt assert "pretty bird table" in lines[0] assert "table of species" in lines[1] assert "columns" in lines[2].lower() assert "- species (string)" in lines[3] assert "- prettiness (floating-point)" in txt assert "- cuteness (integer)" in txt assert "- lifespan (time period)" in txt assert "[avoid]" in txt or "[discouraged]" in txt or "[not recommended]" in txt # # test short text # txt = z.get_short_text(recommended_only=True, nl="\n") lines = txt.splitlines() expected_lines = 4 # 1 for header, 1 for doc, 1 for formats, 1 for typing assert len( lines ) == expected_lines, f"{len(lines)} != {expected_lines}: {txt}" assert "pretty bird table" in lines[0] assert "table of species" in lines[1] assert "species (str)" in lines[2] assert "prettiness (float)" in lines[2] assert "lifespan (period)" in lines[2] assert "csv" in lines[3]
def test_no_name_fancy(self): with pytest.raises(TypeError): # noinspection PyTypeChecker TypedDfs.typed(None).build()
def test_no_name_simple(self): with pytest.raises(TypeError): # noinspection PyTypeChecker TypedDfs.untyped(None)
def test_empty_simple(self): new = TypedDfs.untyped("a class") df = new.convert(pd.DataFrame()) assert list(df.columns) == []
def test_wrap(self): df = pd.DataFrame({}) df2 = TypedDfs.wrap(df) assert not isinstance(df, AbsDf) assert isinstance(df2, BaseDf)
def test_missing(self): new = TypedDfs.typed("a class").require("qqq", index=False).strict().build() df = pd.DataFrame(sample_data()) with pytest.raises(TypedDfs.MissingColumnError): new.convert(df)
def test_extra_index(self): new = TypedDfs.typed("a class").require("xyz", index=False).strict().build() df = pd.DataFrame(sample_data()) with pytest.raises(TypedDfs.UnexpectedColumnError): new.convert(df)