def test_allow_different_columns(self):
     result = render(
         pd.DataFrame({"A": [1, 2]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"B": RenderColumn("B", "number", "{}")},
                     pd.DataFrame({"B": [3, 4]}),
                 )
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "number", "{}")},
     )
     # This tests the ordering of columns, too
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": [1, 2, np.nan, np.nan],
             "B": [np.nan, np.nan, 3, 4]
         }),
     )
 def test_error_different_types(self):
     result = render(
         pd.DataFrame({"A": ["x", "y"]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "number", "{}")},
                     pd.DataFrame({"A": [3, 4]}),
                 )
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     self.assertEqual(
         result,
         ('Cannot concatenate column "A" of type "number" in "Tab 2" to '
          'column "A" of type "text" in "Tab 1". Please convert one or the '
          "other so they are the same type."),
     )
 def test_add_source_column(self):
     result = render(
         pd.DataFrame({"A": [1, 2]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "number", "{}")},
                     pd.DataFrame({"A": [3, 4]}),
                 )
             ],
             "add_source_column":
             True,
             "source_column_name":
             "S",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "number", "{}")},
     )
     expected = pd.DataFrame({
         # Source column comes _first_
         "S": ["Tab 1", "Tab 1", "Tab 2", "Tab 2"],
         "A": [1, 2, 3, 4],
     })
     # Source column should be categorical: no need to load it with useless
     # copied bytes.
     expected["S"] = expected["S"].astype("category")
     assert_frame_equal(result, expected)
    def test_prevent_overwrite(self):
        left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
        right = pd.DataFrame({"A": ["1", "2"], "B": ["X", "Y"]})
        result = render(
            left,
            {
                "right_tab": TabOutput(
                    "slug",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "number", "{}"),
                        "B": RenderColumn("B", "text", None),
                    },
                    right,
                ),
                "join_columns": {"on": ["A"], "right": ["B"]},
                "type": "left",
            },
            input_columns={
                "A": RenderColumn("A", "number", "{}"),
                "B": RenderColumn("B", "text", None),
            },
        )

        self.assertEqual(
            result,
            (
                'You tried to add "B" from Tab 2, but your table already has that '
                "column. Please rename the column in one of the tabs, or unselect "
                "the column."
            ),
        )
    def test_on_types_differ(self):
        left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
        right = pd.DataFrame({"A": ["1", "2"], "C": ["X", "Y"]})
        result = render(
            left,
            {
                "right_tab": TabOutput(
                    "slug",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "text", None),
                        "C": RenderColumn("C", "text", None),
                    },
                    right,
                ),
                "join_columns": {"on": ["A"], "right": ["C"]},
                "type": "left",
            },
            input_columns={
                "A": RenderColumn("A", "number", "{}"),
                "B": RenderColumn("B", "text", None),
            },
        )

        self.assertEqual(
            result,
            (
                'Column "A" is *number* in this tab and *text* in Tab 2. '
                "Please convert one or the other so they are both the same type."
            ),
        )
    def test_import_columns_without_formats(self):
        dataframe = pd.DataFrame({
            "A": [1, 2, 3],
            "B":
            pd.Series(["2012-01-01", "2015-02-03", "2019-05-23"],
                      dtype="datetime64[ns]"),
            "C": ["a", "b", "c"],
        })

        result = render(
            pd.DataFrame(),
            {
                "tab":
                TabOutput(
                    "tab-2",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "number", "{,.2f}"),
                        "B": RenderColumn("B", "datetime", None),
                        "C": RenderColumn("C", "text", None),
                    },
                    dataframe,
                )
            },
        )
        assert_frame_equal(result["dataframe"], dataframe)
        self.assertEqual(result["column_formats"], {"A": "{,.2f}"})
 def test_convert_float(self):
     result = render(
         pd.DataFrame({
             "A": [1.111],
             "B": [2.6]
         }),
         {"colnames": ["A", "B"]},
         input_columns={
             "A": RenderColumn("A", "number", "{:.2f}"),
             "B": RenderColumn("B", "number", "{:d}"),
         },
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["1.11"], "B": ["2"]}))
 def test_convert_null(self):
     result = render(
         pd.DataFrame({"A": [1, np.nan]}),
         {"colnames": ["A"]},
         input_columns={"A": RenderColumn("A", "number", "{:,d}")},
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["1", np.nan]}))
 def test_convert_str(self):
     result = render(
         pd.DataFrame({"A": ["a"]}),
         {"colnames": ["A"]},
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["a"]}))
Beispiel #10
0
 def test_arrow_date32_column(self):
     self.assertEqual(
         arrow_schema_to_render_columns(
             pa.schema([pa.field("A", pa.date32(), metadata={"unit": "month"})])
         ),
         {"A": RenderColumn("A", "date", "month")},
     )
Beispiel #11
0
 def test_arrow_schema_uint8_column(self):
     self.assertEqual(
         arrow_schema_to_render_columns(
             pa.schema([pa.field("A", pa.uint8(), metadata={"format": "{:,d}"})])
         ),
         {"A": RenderColumn("A", "number", "{:,d}")},
     )
Beispiel #12
0
 def test_arrow_timestamp_column(self):
     self.assertEqual(
         arrow_schema_to_render_columns(
             pa.schema([pa.field("A", pa.timestamp("ns"))])
         ),
         {"A": RenderColumn("A", "timestamp", None)},
     )
Beispiel #13
0
 def test_arrow_schema_category_column(self):
     self.assertEqual(
         arrow_schema_to_render_columns(
             pa.schema([pa.field("A", pa.dictionary(pa.int32(), pa.string()))])
         ),
         {"A": RenderColumn("A", "text", None)},
     )
 def test_convert_numbers_all_null(self):
     result = render(
         pd.DataFrame({"A": [np.nan, np.nan]}, dtype=np.float64),
         {"colnames": ["A"]},
         input_columns={"A": RenderColumn("A", "number", "{:d}")},
     )
     assert_frame_equal(result,
                        pd.DataFrame({"A": [np.nan, np.nan]}, dtype=object))
 def test_NOP(self):
     # should NOP when first applied
     result = render(
         pd.DataFrame({"A": [0.006]}),
         {"colnames": []},
         input_columns={"A": RenderColumn("A", "number", "{:.2f}")},
     )
     assert_frame_equal(result, pd.DataFrame({"A": [0.006]}))
 def test_right_join_delete_unused_categories_in_input_columns(self):
     left = pd.DataFrame(
         {
             "A": pd.Series(["a", "b"], dtype="category"),  # join column
             "B": pd.Series(["c", "d"], dtype="category"),  # other column
         }
     )
     right = pd.DataFrame(
         {"A": pd.Series(["a"], dtype="category"), "C": ["e"]}  # join column
     )
     result = render(
         left,
         {
             "right_tab": TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "C": RenderColumn("C", "text", None),
                 },
                 right,
             ),
             "join_columns": {"on": ["A"], "right": ["C"]},
             "type": "right",
         },
         input_columns={
             "A": RenderColumn("A", "text", None),
             "B": RenderColumn("B", "text", None),
         },
     )
     # 'b' and 'd' categories don't appear in result, so it should not be
     # categories in the result dataframe.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame(
             {
                 "A": pd.Series(["a"], dtype="category"),
                 "B": pd.Series(["c"], dtype="category"),
                 "C": ["e"],
             }
         ),
     )
 def test_happy_path(self):
     result = render(
         pd.DataFrame({"A": [1, 2]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "number", "{}")},
                     pd.DataFrame({"A": [3, 4]}),
                 )
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "number", "{}")},
     )
     assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3, 4]}))
 def test_convert_datetime(self):
     result = render(
         pd.DataFrame({
             "A":
             [np.datetime64("2018-01-01"),
              np.datetime64("2019-02-13")]
         }),
         {"colnames": ["A"]},
         input_columns={"A": RenderColumn("A", "datetime", None)},
     )
     assert_frame_equal(result,
                        pd.DataFrame({"A": ["2018-01-01", "2019-02-13"]}))
 def test_inner_join_delete_unused_categories_in_all_columns(self):
     left = pd.DataFrame(
         {
             "A": pd.Series(["a", "b"], dtype="category"),  # join column
             "B": pd.Series(["c", "d"], dtype="category"),  # other column
         }
     )
     right = pd.DataFrame(
         {
             "A": pd.Series(["a", "x"], dtype="category"),  # join column
             "C": pd.Series(["e", "y"], dtype="category"),  # other column
         }
     )
     result = render(
         left,
         {
             "right_tab": TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "C": RenderColumn("C", "text", None),
                 },
                 right,
             ),
             "join_columns": {"on": ["A"], "right": ["C"]},
             "type": "inner",
         },
         input_columns={
             "A": RenderColumn("A", "text", None),
             "B": RenderColumn("B", "text", None),
         },
     )
     # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the
     # dtypes should not contain them.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({"A": ["a"], "B": ["c"], "C": ["e"]}, dtype="category"),
     )
 def test_coerce_categories_and_str(self):
     result = render(
         pd.DataFrame({"A": ["a", "b"]}, dtype="category"),  # cat
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "text", None)},
                     pd.DataFrame({"A": ["c", "d"]}),
                 )  # str
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["a", "b", "c",
                                                    "d"]}))  # str
 def test_left_join_delete_unused_categories_in_added_columns(self):
     left = pd.DataFrame({"A": ["a", "b"]}, dtype="category")
     right = pd.DataFrame(
         {
             "A": pd.Series(["a", "z"], dtype="category"),
             "B": pd.Series(["x", "y"], dtype="category"),
         }
     )
     result = render(
         left,
         {
             "right_tab": TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "B": RenderColumn("B", "text", None),
                 },
                 right,
             ),
             "join_columns": {"on": ["A"], "right": ["B"]},
             "type": "left",
         },
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     # 'z' category does not appear in result, so it should not be a
     # category in the 'B' column.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame(
             {
                 "A": pd.Series(["a", "b"], dtype="category"),
                 "B": pd.Series(["x", np.nan], dtype="category"),
             }
         ),
     )
 def test_happy_path(self):
     result = render(
         pd.DataFrame(),
         {
             "tab":
             TabOutput(
                 "tab-2",
                 "Tab 2",
                 {"A": RenderColumn("A", "number", "{}")},
                 pd.DataFrame({"A": [3, 4]}),
             )
         },
     )
     assert_frame_equal(result["dataframe"], pd.DataFrame({"A": [3, 4]}))
     self.assertEqual(result["column_formats"], {"A": "{}"})
 def test_left(self):
     left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
     right = pd.DataFrame({"A": [1, 2], "C": ["X", "Y"], "D": [0.1, 0.2]})
     result = render(
         left,
         {
             "right_tab": TabOutput(
                 "slug",
                 "name",
                 {
                     "A": RenderColumn("A", "number", "{:,.2f}"),
                     "C": RenderColumn("C", "text", None),
                     "D": RenderColumn("D", "number", "{:,}"),
                 },
                 right,
             ),
             "join_columns": {"on": ["A"], "right": ["C", "D"]},
             "type": "left",
         },
         input_columns={
             "A": RenderColumn("A", "number", "{:d}"),
             "B": RenderColumn("B", "text", None),
         },
     )
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame(
             {
                 "A": [1, 2, 3],
                 "B": ["x", "y", "z"],
                 "C": ["X", "Y", np.nan],
                 "D": [0.1, 0.2, np.nan],
             }
         ),
     )
     self.assertEqual(result["column_formats"], {"C": None, "D": "{:,}"})
Beispiel #24
0
 def test_arrow_schema_text_column(self):
     self.assertEqual(
         arrow_schema_to_render_columns(pa.schema([pa.field("A", pa.string())])),
         {"A": RenderColumn("A", "text", None)},
     )