def test_allow_different_columns(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"B": RenderColumn("B", "number", "{}")}, pd.DataFrame({"B": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) # This tests the ordering of columns, too assert_frame_equal( result, pd.DataFrame({ "A": [1, 2, np.nan, np.nan], "B": [np.nan, np.nan, 3, 4] }), )
def test_error_different_types(self): result = render( pd.DataFrame({"A": ["x", "y"]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "text", None)}, ) self.assertEqual( result, ('Cannot concatenate column "A" of type "number" in "Tab 2" to ' 'column "A" of type "text" in "Tab 1". Please convert one or the ' "other so they are the same type."), )
def test_add_source_column(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": True, "source_column_name": "S", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) expected = pd.DataFrame({ # Source column comes _first_ "S": ["Tab 1", "Tab 1", "Tab 2", "Tab 2"], "A": [1, 2, 3, 4], }) # Source column should be categorical: no need to load it with useless # copied bytes. expected["S"] = expected["S"].astype("category") assert_frame_equal(result, expected)
def test_prevent_overwrite(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": ["1", "2"], "B": ["X", "Y"]}) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["B"]}, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, ) self.assertEqual( result, ( 'You tried to add "B" from Tab 2, but your table already has that ' "column. Please rename the column in one of the tabs, or unselect " "the column." ), )
def test_on_types_differ(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": ["1", "2"], "C": ["X", "Y"]}) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["C"]}, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, ) self.assertEqual( result, ( 'Column "A" is *number* in this tab and *text* in Tab 2. ' "Please convert one or the other so they are both the same type." ), )
def test_import_columns_without_formats(self): dataframe = pd.DataFrame({ "A": [1, 2, 3], "B": pd.Series(["2012-01-01", "2015-02-03", "2019-05-23"], dtype="datetime64[ns]"), "C": ["a", "b", "c"], }) result = render( pd.DataFrame(), { "tab": TabOutput( "tab-2", "Tab 2", { "A": RenderColumn("A", "number", "{,.2f}"), "B": RenderColumn("B", "datetime", None), "C": RenderColumn("C", "text", None), }, dataframe, ) }, ) assert_frame_equal(result["dataframe"], dataframe) self.assertEqual(result["column_formats"], {"A": "{,.2f}"})
def test_convert_float(self): result = render( pd.DataFrame({ "A": [1.111], "B": [2.6] }), {"colnames": ["A", "B"]}, input_columns={ "A": RenderColumn("A", "number", "{:.2f}"), "B": RenderColumn("B", "number", "{:d}"), }, ) assert_frame_equal(result, pd.DataFrame({"A": ["1.11"], "B": ["2"]}))
def test_convert_null(self): result = render( pd.DataFrame({"A": [1, np.nan]}), {"colnames": ["A"]}, input_columns={"A": RenderColumn("A", "number", "{:,d}")}, ) assert_frame_equal(result, pd.DataFrame({"A": ["1", np.nan]}))
def test_convert_str(self): result = render( pd.DataFrame({"A": ["a"]}), {"colnames": ["A"]}, input_columns={"A": RenderColumn("A", "text", None)}, ) assert_frame_equal(result, pd.DataFrame({"A": ["a"]}))
def test_arrow_date32_column(self): self.assertEqual( arrow_schema_to_render_columns( pa.schema([pa.field("A", pa.date32(), metadata={"unit": "month"})]) ), {"A": RenderColumn("A", "date", "month")}, )
def test_arrow_schema_uint8_column(self): self.assertEqual( arrow_schema_to_render_columns( pa.schema([pa.field("A", pa.uint8(), metadata={"format": "{:,d}"})]) ), {"A": RenderColumn("A", "number", "{:,d}")}, )
def test_arrow_timestamp_column(self): self.assertEqual( arrow_schema_to_render_columns( pa.schema([pa.field("A", pa.timestamp("ns"))]) ), {"A": RenderColumn("A", "timestamp", None)}, )
def test_arrow_schema_category_column(self): self.assertEqual( arrow_schema_to_render_columns( pa.schema([pa.field("A", pa.dictionary(pa.int32(), pa.string()))]) ), {"A": RenderColumn("A", "text", None)}, )
def test_convert_numbers_all_null(self): result = render( pd.DataFrame({"A": [np.nan, np.nan]}, dtype=np.float64), {"colnames": ["A"]}, input_columns={"A": RenderColumn("A", "number", "{:d}")}, ) assert_frame_equal(result, pd.DataFrame({"A": [np.nan, np.nan]}, dtype=object))
def test_NOP(self): # should NOP when first applied result = render( pd.DataFrame({"A": [0.006]}), {"colnames": []}, input_columns={"A": RenderColumn("A", "number", "{:.2f}")}, ) assert_frame_equal(result, pd.DataFrame({"A": [0.006]}))
def test_right_join_delete_unused_categories_in_input_columns(self): left = pd.DataFrame( { "A": pd.Series(["a", "b"], dtype="category"), # join column "B": pd.Series(["c", "d"], dtype="category"), # other column } ) right = pd.DataFrame( {"A": pd.Series(["a"], dtype="category"), "C": ["e"]} # join column ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["C"]}, "type": "right", }, input_columns={ "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, ) # 'b' and 'd' categories don't appear in result, so it should not be # categories in the result dataframe. assert_frame_equal( result["dataframe"], pd.DataFrame( { "A": pd.Series(["a"], dtype="category"), "B": pd.Series(["c"], dtype="category"), "C": ["e"], } ), )
def test_happy_path(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3, 4]}))
def test_convert_datetime(self): result = render( pd.DataFrame({ "A": [np.datetime64("2018-01-01"), np.datetime64("2019-02-13")] }), {"colnames": ["A"]}, input_columns={"A": RenderColumn("A", "datetime", None)}, ) assert_frame_equal(result, pd.DataFrame({"A": ["2018-01-01", "2019-02-13"]}))
def test_inner_join_delete_unused_categories_in_all_columns(self): left = pd.DataFrame( { "A": pd.Series(["a", "b"], dtype="category"), # join column "B": pd.Series(["c", "d"], dtype="category"), # other column } ) right = pd.DataFrame( { "A": pd.Series(["a", "x"], dtype="category"), # join column "C": pd.Series(["e", "y"], dtype="category"), # other column } ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["C"]}, "type": "inner", }, input_columns={ "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, ) # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the # dtypes should not contain them. assert_frame_equal( result["dataframe"], pd.DataFrame({"A": ["a"], "B": ["c"], "C": ["e"]}, dtype="category"), )
def test_coerce_categories_and_str(self): result = render( pd.DataFrame({"A": ["a", "b"]}, dtype="category"), # cat params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "text", None)}, pd.DataFrame({"A": ["c", "d"]}), ) # str ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "text", None)}, ) assert_frame_equal(result, pd.DataFrame({"A": ["a", "b", "c", "d"]})) # str
def test_left_join_delete_unused_categories_in_added_columns(self): left = pd.DataFrame({"A": ["a", "b"]}, dtype="category") right = pd.DataFrame( { "A": pd.Series(["a", "z"], dtype="category"), "B": pd.Series(["x", "y"], dtype="category"), } ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, right, ), "join_columns": {"on": ["A"], "right": ["B"]}, "type": "left", }, input_columns={"A": RenderColumn("A", "text", None)}, ) # 'z' category does not appear in result, so it should not be a # category in the 'B' column. assert_frame_equal( result["dataframe"], pd.DataFrame( { "A": pd.Series(["a", "b"], dtype="category"), "B": pd.Series(["x", np.nan], dtype="category"), } ), )
def test_happy_path(self): result = render( pd.DataFrame(), { "tab": TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) }, ) assert_frame_equal(result["dataframe"], pd.DataFrame({"A": [3, 4]})) self.assertEqual(result["column_formats"], {"A": "{}"})
def test_left(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": [1, 2], "C": ["X", "Y"], "D": [0.1, 0.2]}) result = render( left, { "right_tab": TabOutput( "slug", "name", { "A": RenderColumn("A", "number", "{:,.2f}"), "C": RenderColumn("C", "text", None), "D": RenderColumn("D", "number", "{:,}"), }, right, ), "join_columns": {"on": ["A"], "right": ["C", "D"]}, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{:d}"), "B": RenderColumn("B", "text", None), }, ) assert_frame_equal( result["dataframe"], pd.DataFrame( { "A": [1, 2, 3], "B": ["x", "y", "z"], "C": ["X", "Y", np.nan], "D": [0.1, 0.2, np.nan], } ), ) self.assertEqual(result["column_formats"], {"C": None, "D": "{:,}"})
def test_arrow_schema_text_column(self): self.assertEqual( arrow_schema_to_render_columns(pa.schema([pa.field("A", pa.string())])), {"A": RenderColumn("A", "text", None)}, )