Exemple #1
0
    def test_prevent_overwrite(self):
        left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
        right = pd.DataFrame({'A': ['1', '2'], 'B': ['X', 'Y']})
        result = render(left, {
            'right_tab': TabOutput(
                'slug',
                'Tab 2',
                {'A': RenderColumn('A', 'number', '{}'),
                 'B': RenderColumn('B', 'text', None)},
                right
            ),
            'join_columns': {
                'on': ['A'],
                'right': ['B'],
            },
            'type': 'left',
        }, input_columns={
            'A': RenderColumn('A', 'number', '{}'),
            'B': RenderColumn('B', 'text', None),
        })

        self.assertEqual(result, (
            'You tried to add "B" from Tab 2, but your table already has that '
            'column. Please rename the column in one of the tabs, or unselect '
            'the column.'
        ))
Exemple #2
0
 def test_left(self):
     left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
     right = pd.DataFrame({'A': [1, 2], 'C': ['X', 'Y'], 'D': [0.1, 0.2]})
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'name',
             {'A': RenderColumn('A', 'number', '{:,.2f}'),
              'C': RenderColumn('C', 'text', None),
              'D': RenderColumn('D', 'number', '{:,}')},
             right),
         'join_columns': {
             'on': ['A'],
             'right': ['C', 'D'],
         },
         'type': 'left',
     }, input_columns={
         'A': RenderColumn('A', 'number', '{:d}'),
         'B': RenderColumn('B', 'text', None),
     })
     assert_frame_equal(result['dataframe'], pd.DataFrame({
         'A': [1, 2, 3],
         'B': ['x', 'y', 'z'],
         'C': ['X', 'Y', np.nan],
         'D': [0.1, 0.2, np.nan],
     }))
     self.assertEqual(result['column_formats'], {'C': None, 'D': '{:,}'})
Exemple #3
0
 def test_left_join_delete_unused_categories_in_added_columns(self):
     left = pd.DataFrame({'A': ['a', 'b']}, dtype='category')
     right = pd.DataFrame({
         'A': pd.Series(['a', 'z'], dtype='category'),
         'B': pd.Series(['x', 'y'], dtype='category'),
     })
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'Tab 2',
             {'A': RenderColumn('A', 'text', None),
              'B': RenderColumn('B', 'text', None)},
             right
         ),
         'join_columns': {'on': ['A'], 'right': ['B']},
         'type': 'left',
     }, input_columns={
         'A': RenderColumn('A', 'text', None),
     })
     # 'z' category does not appear in result, so it should not be a
     # category in the 'B' column.
     assert_frame_equal(result['dataframe'], pd.DataFrame({
         'A': pd.Series(['a', 'b'], dtype='category'),
         'B': pd.Series(['x', np.nan], dtype='category')
     }))
Exemple #4
0
 def test_inner_join_delete_unused_categories_in_all_columns(self):
     left = pd.DataFrame({
         'A': pd.Series(['a', 'b'], dtype='category'),  # join column
         'B': pd.Series(['c', 'd'], dtype='category'),  # other column
     })
     right = pd.DataFrame({
         'A': pd.Series(['a', 'x'], dtype='category'),  # join column
         'C': pd.Series(['e', 'y'], dtype='category'),  # other column
     })
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'Tab 2',
             {'A': RenderColumn('A', 'text', None),
              'C': RenderColumn('C', 'text', None)},
             right
         ),
         'join_columns': {'on': ['A'], 'right': ['C']},
         'type': 'inner',
     }, input_columns={
         'A': RenderColumn('A', 'text', None),
         'B': RenderColumn('B', 'text', None),
     })
     # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the
     # dtypes should not contain them.
     assert_frame_equal(result['dataframe'], pd.DataFrame({
         'A': ['a'],
         'B': ['c'],
         'C': ['e'],
     }, dtype='category'))
    def test_prevent_overwrite(self):
        left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
        right = pd.DataFrame({"A": ["1", "2"], "B": ["X", "Y"]})
        result = render(
            left,
            {
                "right_tab":
                TabOutput(
                    "slug",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "number", "{}"),
                        "B": RenderColumn("B", "text", None),
                    },
                    right,
                ),
                "join_columns": {
                    "on": ["A"],
                    "right": ["B"]
                },
                "type":
                "left",
            },
            input_columns={
                "A": RenderColumn("A", "number", "{}"),
                "B": RenderColumn("B", "text", None),
            },
        )

        self.assertEqual(
            result,
            ('You tried to add "B" from Tab 2, but your table already has that '
             "column. Please rename the column in one of the tabs, or unselect "
             "the column."),
        )
    def test_on_types_differ(self):
        left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
        right = pd.DataFrame({"A": ["1", "2"], "C": ["X", "Y"]})
        result = render(
            left,
            {
                "right_tab":
                TabOutput(
                    "slug",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "text", None),
                        "C": RenderColumn("C", "text", None),
                    },
                    right,
                ),
                "join_columns": {
                    "on": ["A"],
                    "right": ["C"]
                },
                "type":
                "left",
            },
            input_columns={
                "A": RenderColumn("A", "number", "{}"),
                "B": RenderColumn("B", "text", None),
            },
        )

        self.assertEqual(
            result,
            ('Column "A" is *number* in this tab and *text* in Tab 2. '
             "Please convert one or the other so they are both the same type."
             ),
        )
 def test_inner_join_delete_unused_categories_in_all_columns(self):
     left = pd.DataFrame({
         "A": pd.Series(["a", "b"], dtype="category"),  # join column
         "B": pd.Series(["c", "d"], dtype="category"),  # other column
     })
     right = pd.DataFrame({
         "A": pd.Series(["a", "x"], dtype="category"),  # join column
         "C": pd.Series(["e", "y"], dtype="category"),  # other column
     })
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "C": RenderColumn("C", "text", None),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["C"]
             },
             "type":
             "inner",
         },
         input_columns={
             "A": RenderColumn("A", "text", None),
             "B": RenderColumn("B", "text", None),
         },
     )
     # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the
     # dtypes should not contain them.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": ["a"],
             "B": ["c"],
             "C": ["e"]
         },
                      dtype="category"),
     )
 def test_right_join_delete_unused_categories_in_input_columns(self):
     left = pd.DataFrame({
         "A": pd.Series(["a", "b"], dtype="category"),  # join column
         "B": pd.Series(["c", "d"], dtype="category"),  # other column
     })
     right = pd.DataFrame({
         "A": pd.Series(["a"], dtype="category"),
         "C": ["e"]
     }  # join column
                          )
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "C": RenderColumn("C", "text", None),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["C"]
             },
             "type":
             "right",
         },
         input_columns={
             "A": RenderColumn("A", "text", None),
             "B": RenderColumn("B", "text", None),
         },
     )
     # 'b' and 'd' categories don't appear in result, so it should not be
     # categories in the result dataframe.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": pd.Series(["a"], dtype="category"),
             "B": pd.Series(["c"], dtype="category"),
             "C": ["e"],
         }),
     )
 def test_left(self):
     left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
     right = pd.DataFrame({"A": [1, 2], "C": ["X", "Y"], "D": [0.1, 0.2]})
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "name",
                 {
                     "A": RenderColumn("A", "number", "{:,.2f}"),
                     "C": RenderColumn("C", "text", None),
                     "D": RenderColumn("D", "number", "{:,}"),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["C", "D"]
             },
             "type":
             "left",
         },
         input_columns={
             "A": RenderColumn("A", "number", "{:d}"),
             "B": RenderColumn("B", "text", None),
         },
     )
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": [1, 2, 3],
             "B": ["x", "y", "z"],
             "C": ["X", "Y", np.nan],
             "D": [0.1, 0.2, np.nan],
         }),
     )
     self.assertEqual(result["column_formats"], {"C": None, "D": "{:,}"})
 def test_left_join_delete_unused_categories_in_added_columns(self):
     left = pd.DataFrame({"A": ["a", "b"]}, dtype="category")
     right = pd.DataFrame({
         "A": pd.Series(["a", "z"], dtype="category"),
         "B": pd.Series(["x", "y"], dtype="category"),
     })
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "B": RenderColumn("B", "text", None),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["B"]
             },
             "type":
             "left",
         },
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     # 'z' category does not appear in result, so it should not be a
     # category in the 'B' column.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": pd.Series(["a", "b"], dtype="category"),
             "B": pd.Series(["x", np.nan], dtype="category"),
         }),
     )
Exemple #11
0
    def test_on_types_differ(self):
        left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
        right = pd.DataFrame({'A': ['1', '2'], 'C': ['X', 'Y']})
        result = render(left, {
            'right_tab': TabOutput(
                'slug',
                'Tab 2',
                {'A': RenderColumn('A', 'text', None),
                 'C': RenderColumn('C', 'text', None)},
                right),
            'join_columns': {
                'on': ['A'],
                'right': ['C'],
            },
            'type': 'left',
        }, input_columns={
            'A': RenderColumn('A', 'number', '{}'),
            'B': RenderColumn('B', 'text', None),
        })

        self.assertEqual(result, (
            'Column "A" is *number* in this tab and *text* in Tab 2. '
            'Please convert one or the other so they are both the same type.'
        ))
Exemple #12
0
 def test_left(self):
     left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
     right = pd.DataFrame({'A': [1, 2], 'C': ['X', 'Y']})
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'name',
             {'A': RenderColumn('A', 'number'),
              'C': RenderColumn('C', 'text')},
             right),
         'join_columns': {
             'on': 'A',
             'right': 'C',
         },
         'type': 0,
     }, input_columns={
         'A': RenderColumn('A', 'number'),
         'B': RenderColumn('B', 'text'),
     })
     assert_frame_equal(result, pd.DataFrame({
         'A': [1, 2, 3],
         'B': ['x', 'y', 'z'],
         'C': ['X', 'Y', np.nan],
     }))