def test_allow_different_columns(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"B": RenderColumn("B", "number", "{}")}, pd.DataFrame({"B": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) # This tests the ordering of columns, too assert_frame_equal( result, pd.DataFrame({ "A": [1, 2, np.nan, np.nan], "B": [np.nan, np.nan, 3, 4] }), )
def test_add_source_column(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": True, "source_column_name": "S", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) expected = pd.DataFrame({ # Source column comes _first_ "S": ["Tab 1", "Tab 1", "Tab 2", "Tab 2"], "A": [1, 2, 3, 4], }) # Source column should be categorical: no need to load it with useless # copied bytes. expected["S"] = expected["S"].astype("category") assert_frame_equal(result, expected)
def test_prevent_overwrite(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": ["1", "2"], "B": ["X", "Y"]}) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["B"]}, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, ) self.assertEqual( result, ( 'You tried to add "B" from Tab 2, but your table already has that ' "column. Please rename the column in one of the tabs, or unselect " "the column." ), )
def test_error_different_types(self): result = render( pd.DataFrame({"A": ["x", "y"]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "text", None)}, ) self.assertEqual( result, ('Cannot concatenate column "A" of type "number" in "Tab 2" to ' 'column "A" of type "text" in "Tab 1". Please convert one or the ' "other so they are the same type."), )
def test_on_types_differ(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": ["1", "2"], "C": ["X", "Y"]}) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["C"]}, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, ) self.assertEqual( result, ( 'Column "A" is *number* in this tab and *text* in Tab 2. ' "Please convert one or the other so they are both the same type." ), )
def test_import_columns_without_formats(self): dataframe = pd.DataFrame({ "A": [1, 2, 3], "B": pd.Series(["2012-01-01", "2015-02-03", "2019-05-23"], dtype="datetime64[ns]"), "C": ["a", "b", "c"], }) result = render( pd.DataFrame(), { "tab": TabOutput( "tab-2", "Tab 2", { "A": RenderColumn("A", "number", "{,.2f}"), "B": RenderColumn("B", "datetime", None), "C": RenderColumn("C", "text", None), }, dataframe, ) }, ) assert_frame_equal(result["dataframe"], dataframe) self.assertEqual(result["column_formats"], {"A": "{,.2f}"})
def test_happy_path(self): result = render( pd.DataFrame(), { "tab": TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) }, ) assert_frame_equal(result["dataframe"], pd.DataFrame({"A": [3, 4]})) self.assertEqual(result["column_formats"], {"A": "{}"})
def test_right_join_delete_unused_categories_in_input_columns(self): left = pd.DataFrame( { "A": pd.Series(["a", "b"], dtype="category"), # join column "B": pd.Series(["c", "d"], dtype="category"), # other column } ) right = pd.DataFrame( {"A": pd.Series(["a"], dtype="category"), "C": ["e"]} # join column ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["C"]}, "type": "right", }, input_columns={ "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, ) # 'b' and 'd' categories don't appear in result, so it should not be # categories in the result dataframe. assert_frame_equal( result["dataframe"], pd.DataFrame( { "A": pd.Series(["a"], dtype="category"), "B": pd.Series(["c"], dtype="category"), "C": ["e"], } ), )
def test_happy_path(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3, 4]}))
def test_coerce_categories_and_str(self): result = render( pd.DataFrame({"A": ["a", "b"]}, dtype="category"), # cat params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "text", None)}, pd.DataFrame({"A": ["c", "d"]}), ) # str ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "text", None)}, ) assert_frame_equal(result, pd.DataFrame({"A": ["a", "b", "c", "d"]})) # str
def test_inner_join_delete_unused_categories_in_all_columns(self): left = pd.DataFrame( { "A": pd.Series(["a", "b"], dtype="category"), # join column "B": pd.Series(["c", "d"], dtype="category"), # other column } ) right = pd.DataFrame( { "A": pd.Series(["a", "x"], dtype="category"), # join column "C": pd.Series(["e", "y"], dtype="category"), # other column } ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["C"]}, "type": "inner", }, input_columns={ "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, ) # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the # dtypes should not contain them. assert_frame_equal( result["dataframe"], pd.DataFrame({"A": ["a"], "B": ["c"], "C": ["e"]}, dtype="category"), )
def test_left(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": [1, 2], "C": ["X", "Y"], "D": [0.1, 0.2]}) result = render( left, { "right_tab": TabOutput( "slug", "name", { "A": RenderColumn("A", "number", "{:,.2f}"), "C": RenderColumn("C", "text", None), "D": RenderColumn("D", "number", "{:,}"), }, right, ), "join_columns": {"on": ["A"], "right": ["C", "D"]}, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{:d}"), "B": RenderColumn("B", "text", None), }, ) assert_frame_equal( result["dataframe"], pd.DataFrame( { "A": [1, 2, 3], "B": ["x", "y", "z"], "C": ["X", "Y", np.nan], "D": [0.1, 0.2, np.nan], } ), ) self.assertEqual(result["column_formats"], {"C": None, "D": "{:,}"})
def test_left_join_delete_unused_categories_in_added_columns(self): left = pd.DataFrame({"A": ["a", "b"]}, dtype="category") right = pd.DataFrame( { "A": pd.Series(["a", "z"], dtype="category"), "B": pd.Series(["x", "y"], dtype="category"), } ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["B"]}, "type": "left", }, input_columns={"A": RenderColumn("A", "text", None)}, ) # 'z' category does not appear in result, so it should not be a # category in the 'B' column. assert_frame_equal( result["dataframe"], pd.DataFrame( { "A": pd.Series(["a", "b"], dtype="category"), "B": pd.Series(["x", np.nan], dtype="category"), } ), )
def test_import_empty_tab(self): result = render( pd.DataFrame(), {"tab": TabOutput("tab-2", "Tab 2", {}, pd.DataFrame())}) assert_frame_equal(result["dataframe"], pd.DataFrame()) self.assertEqual(result["column_formats"], {})