Exemple #1
0
    def test_subclass_align(self):
        # GH 12983
        df1 = tm.SubclassedDataFrame(
            {"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")
        )
        df2 = tm.SubclassedDataFrame(
            {"c": [1, 2, 4], "d": [1, 2, 4]}, index=list("ABD")
        )

        res1, res2 = df1.align(df2, axis=0)
        exp1 = tm.SubclassedDataFrame(
            {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
            index=list("ABCDE"),
        )
        exp2 = tm.SubclassedDataFrame(
            {"c": [1, 2, np.nan, 4, np.nan], "d": [1, 2, np.nan, 4, np.nan]},
            index=list("ABCDE"),
        )
        assert isinstance(res1, tm.SubclassedDataFrame)
        tm.assert_frame_equal(res1, exp1)
        assert isinstance(res2, tm.SubclassedDataFrame)
        tm.assert_frame_equal(res2, exp2)

        res1, res2 = df1.a.align(df2.c)
        assert isinstance(res1, tm.SubclassedSeries)
        tm.assert_series_equal(res1, exp1.a)
        assert isinstance(res2, tm.SubclassedSeries)
        tm.assert_series_equal(res2, exp2.c)
Exemple #2
0
 def test_replace_list_method(self):
     # https://github.com/pandas-dev/pandas/pull/46018
     df = tm.SubclassedDataFrame({"A": [0, 1, 2]})
     result = df.replace([1, 2], method="ffill")
     expected = tm.SubclassedDataFrame({"A": [0, 0, 0]})
     assert isinstance(result, tm.SubclassedDataFrame)
     tm.assert_frame_equal(result, expected)
Exemple #3
0
    def test_subclassed_wide_to_long(self):
        # GH 9762

        np.random.seed(123)
        x = np.random.randn(3)
        df = tm.SubclassedDataFrame(
            {
                "A1970": {0: "a", 1: "b", 2: "c"},
                "A1980": {0: "d", 1: "e", 2: "f"},
                "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
                "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
                "X": dict(zip(range(3), x)),
            }
        )

        df["id"] = df.index
        exp_data = {
            "X": x.tolist() + x.tolist(),
            "A": ["a", "b", "c", "d", "e", "f"],
            "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
            "year": [1970, 1970, 1970, 1980, 1980, 1980],
            "id": [0, 1, 2, 0, 1, 2],
        }
        expected = tm.SubclassedDataFrame(exp_data)
        expected = expected.set_index(["id", "year"])[["X", "A", "B"]]
        long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year")

        tm.assert_frame_equal(long_frame, expected)
Exemple #4
0
    def test_subclassed_count(self):

        df = tm.SubclassedDataFrame(
            {
                "Person": ["John", "Myla", "Lewis", "John", "Myla"],
                "Age": [24.0, np.nan, 21.0, 33, 26],
                "Single": [False, True, True, True, False],
            }
        )
        result = df.count()
        assert isinstance(result, tm.SubclassedSeries)

        df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]})
        result = df.count()
        assert isinstance(result, tm.SubclassedSeries)

        df = tm.SubclassedDataFrame(
            [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
            index=MultiIndex.from_tuples(
                list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
            ),
            columns=MultiIndex.from_tuples(
                list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
            ),
        )
        result = df.count(level=1)
        assert isinstance(result, tm.SubclassedDataFrame)

        df = tm.SubclassedDataFrame()
        result = df.count()
        assert isinstance(result, tm.SubclassedSeries)
Exemple #5
0
    def test_subclass_pivot(self):
        # GH 15564
        df = tm.SubclassedDataFrame({
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        })

        pivoted = df.pivot(index="index", columns="columns", values="values")

        expected = tm.SubclassedDataFrame({
            "One": {
                "A": 1.0,
                "B": 2.0,
                "C": 3.0
            },
            "Two": {
                "A": 1.0,
                "B": 2.0,
                "C": 3.0
            },
        })

        expected.index.name, expected.columns.name = "index", "columns"

        tm.assert_frame_equal(pivoted, expected)
Exemple #6
0
    def test_subclass_align_combinations(self):
        # GH 12983
        df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE"))
        s = tm.SubclassedSeries([1, 2, 4], index=list("ABD"), name="x")

        # frame + series
        res1, res2 = df.align(s, axis=0)
        exp1 = tm.SubclassedDataFrame(
            {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
            index=list("ABCDE"),
        )
        # name is lost when
        exp2 = tm.SubclassedSeries(
            [1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x"
        )

        assert isinstance(res1, tm.SubclassedDataFrame)
        tm.assert_frame_equal(res1, exp1)
        assert isinstance(res2, tm.SubclassedSeries)
        tm.assert_series_equal(res2, exp2)

        # series + frame
        res1, res2 = s.align(df)
        assert isinstance(res1, tm.SubclassedSeries)
        tm.assert_series_equal(res1, exp2)
        assert isinstance(res2, tm.SubclassedDataFrame)
        tm.assert_frame_equal(res2, exp1)
Exemple #7
0
    def test_subclass_stack_multi(self):
        # GH 15564
        df = tm.SubclassedDataFrame(
            [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33],
             [40, 41, 42, 43]],
            index=MultiIndex.from_tuples(list(zip(list("AABB"), list("cdcd"))),
                                         names=["aaa", "ccc"]),
            columns=MultiIndex.from_tuples(list(zip(list("WWXX"),
                                                    list("yzyz"))),
                                           names=["www", "yyy"]),
        )

        exp = tm.SubclassedDataFrame(
            [
                [10, 12],
                [11, 13],
                [20, 22],
                [21, 23],
                [30, 32],
                [31, 33],
                [40, 42],
                [41, 43],
            ],
            index=MultiIndex.from_tuples(
                list(zip(list("AAAABBBB"), list("ccddccdd"),
                         list("yzyzyzyz"))),
                names=["aaa", "ccc", "yyy"],
            ),
            columns=Index(["W", "X"], name="www"),
        )

        res = df.stack()
        tm.assert_frame_equal(res, exp)

        res = df.stack("yyy")
        tm.assert_frame_equal(res, exp)

        exp = tm.SubclassedDataFrame(
            [
                [10, 11],
                [12, 13],
                [20, 21],
                [22, 23],
                [30, 31],
                [32, 33],
                [40, 41],
                [42, 43],
            ],
            index=MultiIndex.from_tuples(
                list(zip(list("AAAABBBB"), list("ccddccdd"),
                         list("WXWXWXWX"))),
                names=["aaa", "ccc", "www"],
            ),
            columns=Index(["y", "z"], name="yyy"),
        )

        res = df.stack("www")
        tm.assert_frame_equal(res, exp)
Exemple #8
0
    def test_duplicated(self):

        df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
        result = df.duplicated()
        assert isinstance(result, tm.SubclassedSeries)

        df = tm.SubclassedDataFrame()
        result = df.duplicated()
        assert isinstance(result, tm.SubclassedSeries)
Exemple #9
0
    def test_subclassed_apply(self):
        # GH 19822

        def check_row_subclass(row):
            assert isinstance(row, tm.SubclassedSeries)

        def stretch(row):
            if row["variable"] == "height":
                row["value"] += 0.5
            return row

        df = tm.SubclassedDataFrame(
            [
                ["John", "Doe", "height", 5.5],
                ["Mary", "Bo", "height", 6.0],
                ["John", "Doe", "weight", 130],
                ["Mary", "Bo", "weight", 150],
            ],
            columns=["first", "last", "variable", "value"],
        )

        df.apply(lambda x: check_row_subclass(x))
        df.apply(lambda x: check_row_subclass(x), axis=1)

        expected = tm.SubclassedDataFrame(
            [
                ["John", "Doe", "height", 6.0],
                ["Mary", "Bo", "height", 6.5],
                ["John", "Doe", "weight", 130],
                ["Mary", "Bo", "weight", 150],
            ],
            columns=["first", "last", "variable", "value"],
        )

        result = df.apply(lambda x: stretch(x), axis=1)
        assert isinstance(result, tm.SubclassedDataFrame)
        tm.assert_frame_equal(result, expected)

        expected = tm.SubclassedDataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3],
                                           [1, 2, 3]])

        result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1)
        assert isinstance(result, tm.SubclassedDataFrame)
        tm.assert_frame_equal(result, expected)

        result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
        assert isinstance(result, tm.SubclassedDataFrame)
        tm.assert_frame_equal(result, expected)

        expected = tm.SubclassedSeries([[1, 2, 3], [1, 2, 3], [1, 2, 3],
                                        [1, 2, 3]])

        result = df.apply(lambda x: [1, 2, 3], axis=1)
        assert not isinstance(result, tm.SubclassedDataFrame)
        tm.assert_series_equal(result, expected)
Exemple #10
0
    def test_dot(self):

        df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
        s = tm.SubclassedSeries([1, 1, 2, 1])
        result = df.dot(s)
        assert isinstance(result, tm.SubclassedSeries)

        df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
        s = tm.SubclassedDataFrame([1, 1, 2, 1])
        result = df.dot(s)
        assert isinstance(result, tm.SubclassedDataFrame)
Exemple #11
0
    def test_corrwith(self):
        index = ["a", "b", "c", "d", "e"]
        columns = ["one", "two", "three", "four"]
        df1 = tm.SubclassedDataFrame(
            np.random.randn(5, 4), index=index, columns=columns
        )
        df2 = tm.SubclassedDataFrame(
            np.random.randn(4, 4), index=index[:4], columns=columns
        )
        correls = df1.corrwith(df2, axis=1, drop=True, method="kendall")

        assert isinstance(correls, (tm.SubclassedSeries))
Exemple #12
0
    def test_subclass_unstack_multi_mixed(self):
        # GH 15564
        df = tm.SubclassedDataFrame(
            [
                [10, 11, 12.0, 13.0],
                [20, 21, 22.0, 23.0],
                [30, 31, 32.0, 33.0],
                [40, 41, 42.0, 43.0],
            ],
            index=MultiIndex.from_tuples(list(zip(list("AABB"), list("cdcd"))),
                                         names=["aaa", "ccc"]),
            columns=MultiIndex.from_tuples(list(zip(list("WWXX"),
                                                    list("yzyz"))),
                                           names=["www", "yyy"]),
        )

        exp = tm.SubclassedDataFrame(
            [
                [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0],
                [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0],
            ],
            index=Index(["A", "B"], name="aaa"),
            columns=MultiIndex.from_tuples(
                list(zip(list("WWWWXXXX"), list("yyzzyyzz"),
                         list("cdcdcdcd"))),
                names=["www", "yyy", "ccc"],
            ),
        )

        res = df.unstack()
        tm.assert_frame_equal(res, exp)

        res = df.unstack("ccc")
        tm.assert_frame_equal(res, exp)

        exp = tm.SubclassedDataFrame(
            [
                [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0],
                [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0],
            ],
            index=Index(["c", "d"], name="ccc"),
            columns=MultiIndex.from_tuples(
                list(zip(list("WWWWXXXX"), list("yyzzyyzz"),
                         list("ABABABAB"))),
                names=["www", "yyy", "aaa"],
            ),
        )

        res = df.unstack("aaa")
        tm.assert_frame_equal(res, exp)
Exemple #13
0
def test_groupby_preserves_metadata():
    # GH-37343
    custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]})
    assert "testattr" in custom_df._metadata
    custom_df.testattr = "hello"
    for _, group_df in custom_df.groupby("c"):
        assert group_df.testattr == "hello"
Exemple #14
0
    def test_indexing_sliced(self):
        # GH 11559
        df = tm.SubclassedDataFrame(
            {"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["a", "b", "c"]
        )
        res = df.loc[:, "X"]
        exp = tm.SubclassedSeries([1, 2, 3], index=list("abc"), name="X")
        tm.assert_series_equal(res, exp)
        assert isinstance(res, tm.SubclassedSeries)

        res = df.iloc[:, 1]
        exp = tm.SubclassedSeries([4, 5, 6], index=list("abc"), name="Y")
        tm.assert_series_equal(res, exp)
        assert isinstance(res, tm.SubclassedSeries)

        res = df.loc[:, "Z"]
        exp = tm.SubclassedSeries([7, 8, 9], index=list("abc"), name="Z")
        tm.assert_series_equal(res, exp)
        assert isinstance(res, tm.SubclassedSeries)

        res = df.loc["a", :]
        exp = tm.SubclassedSeries([1, 4, 7], index=list("XYZ"), name="a")
        tm.assert_series_equal(res, exp)
        assert isinstance(res, tm.SubclassedSeries)

        res = df.iloc[1, :]
        exp = tm.SubclassedSeries([2, 5, 8], index=list("XYZ"), name="b")
        tm.assert_series_equal(res, exp)
        assert isinstance(res, tm.SubclassedSeries)

        res = df.loc["c", :]
        exp = tm.SubclassedSeries([3, 6, 9], index=list("XYZ"), name="c")
        tm.assert_series_equal(res, exp)
        assert isinstance(res, tm.SubclassedSeries)
def test_groupby_preserves_metadata():
    # GH-37343
    custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]})
    assert "testattr" in custom_df._metadata
    custom_df.testattr = "hello"
    for _, group_df in custom_df.groupby("c"):
        assert group_df.testattr == "hello"

    # GH-45314
    def func(group):
        assert isinstance(group, tm.SubclassedDataFrame)
        assert hasattr(group, "testattr")
        return group.testattr

    result = custom_df.groupby("c").apply(func)
    expected = tm.SubclassedSeries(["hello"] * 3, index=Index([7, 8, 9], name="c"))
    tm.assert_series_equal(result, expected)

    def func2(group):
        assert isinstance(group, tm.SubclassedSeries)
        assert hasattr(group, "testattr")
        return group.testattr

    custom_series = tm.SubclassedSeries([1, 2, 3])
    custom_series.testattr = "hello"
    result = custom_series.groupby(custom_df["c"]).apply(func2)
    tm.assert_series_equal(result, expected)
    result = custom_series.groupby(custom_df["c"]).agg(func2)
    tm.assert_series_equal(result, expected)
Exemple #16
0
 def test_equals_subclass(self):
     # https://github.com/pandas-dev/pandas/pull/34402
     # allow subclass in both directions
     df1 = DataFrame({"a": [1, 2, 3]})
     df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]})
     assert df1.equals(df2)
     assert df2.equals(df1)
Exemple #17
0
    def test_isin(self):

        df = tm.SubclassedDataFrame(
            {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
        )
        result = df.isin([0, 2])
        assert isinstance(result, tm.SubclassedDataFrame)
Exemple #18
0
    def test_memory_usage(self):

        df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
        result = df.memory_usage()
        assert isinstance(result, tm.SubclassedSeries)

        result = df.memory_usage(index=False)
        assert isinstance(result, tm.SubclassedSeries)
Exemple #19
0
    def test_subclass_unstack(self):
        # GH 15564
        s = tm.SubclassedSeries([1, 2, 3, 4], index=[list("aabb"), list("xyxy")])

        res = s.unstack()
        exp = tm.SubclassedDataFrame({"x": [1, 3], "y": [2, 4]}, index=["a", "b"])

        tm.assert_frame_equal(res, exp)
Exemple #20
0
    def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df):
        # GH 43668
        df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
        result = df.convert_dtypes()
        assert isinstance(result, tm.SubclassedDataFrame)

        result = gpd_style_subclass_df.convert_dtypes()
        assert isinstance(result, type(gpd_style_subclass_df))
Exemple #21
0
    def test_idx(self, idx_method):

        df = tm.SubclassedDataFrame({
            "A": [1, 2, 3],
            "B": [4, 5, 6],
            "C": [7, 8, 9]
        })
        result = getattr(df, idx_method)()
        assert isinstance(result, tm.SubclassedSeries)
Exemple #22
0
    def test_subclassed_reductions(self, all_reductions):
        # GH 25596

        df = tm.SubclassedDataFrame({
            "A": [1, 2, 3],
            "B": [4, 5, 6],
            "C": [7, 8, 9]
        })
        result = getattr(df, all_reductions)()
        assert isinstance(result, tm.SubclassedSeries)
Exemple #23
0
    def test_idxmax_preserves_subclass(self):
        # GH 28330

        df = tm.SubclassedDataFrame({
            "A": [1, 2, 3],
            "B": [4, 5, 6],
            "C": [7, 8, 9]
        })
        result = df.idxmax()
        assert isinstance(result, tm.SubclassedSeries)
Exemple #24
0
    def test_astype_preserves_subclass(self):
        # GH#40810
        df = tm.SubclassedDataFrame({
            "A": [1, 2, 3],
            "B": [4, 5, 6],
            "C": [7, 8, 9]
        })

        result = df.astype({"A": np.int64, "B": np.int32, "C": np.float64})
        assert isinstance(result, tm.SubclassedDataFrame)
Exemple #25
0
    def test_subclassed_melt(self):
        # GH 15564
        cheese = tm.SubclassedDataFrame({
            "first": ["John", "Mary"],
            "last": ["Doe", "Bo"],
            "height": [5.5, 6.0],
            "weight": [130, 150],
        })

        melted = pd.melt(cheese, id_vars=["first", "last"])

        expected = tm.SubclassedDataFrame(
            [
                ["John", "Doe", "height", 5.5],
                ["Mary", "Bo", "height", 6.0],
                ["John", "Doe", "weight", 130],
                ["Mary", "Bo", "weight", 150],
            ],
            columns=["first", "last", "variable", "value"],
        )

        tm.assert_frame_equal(melted, expected)
Exemple #26
0
    def test_subclass_unstack(self):
        # GH 15564
        df = tm.SubclassedDataFrame(
            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
            index=["a", "b", "c"],
            columns=["X", "Y", "Z"],
        )

        res = df.unstack()
        exp = tm.SubclassedSeries(
            [1, 4, 7, 2, 5, 8, 3, 6, 9], index=[list("XXXYYYZZZ"), list("abcabcabc")]
        )

        tm.assert_series_equal(res, exp)
Exemple #27
0
    def test_subclass_stack(self):
        # GH 15564
        df = tm.SubclassedDataFrame(
            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
            index=["a", "b", "c"],
            columns=["X", "Y", "Z"],
        )

        res = df.stack()
        exp = tm.SubclassedSeries(
            [1, 2, 3, 4, 5, 6, 7, 8, 9], index=[list("aaabbbccc"), list("XYZXYZXYZ")]
        )

        tm.assert_series_equal(res, exp)
Exemple #28
0
    def test_supported_for_subclass_dataframe(self):
        data = {"a": [1, 2], "b": [3, 4]}
        sdf = tm.SubclassedDataFrame(data, dtype=np.intp)

        expected = DataFrame(data, dtype=np.intp)

        with ensure_clean_path("temp.h5") as path:
            sdf.to_hdf(path, "df")
            result = read_hdf(path, "df")
            tm.assert_frame_equal(result, expected)

        with ensure_clean_path("temp.h5") as path:
            with HDFStore(path) as store:
                store.put("df", sdf)
            result = read_hdf(path, "df")
            tm.assert_frame_equal(result, expected)
Exemple #29
0
    def test_dataframe_metadata(self):
        df = tm.SubclassedDataFrame(
            {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"]
        )
        df.testattr = "XXX"

        assert df.testattr == "XXX"
        assert df[["X"]].testattr == "XXX"
        assert df.loc[["a", "b"], :].testattr == "XXX"
        assert df.iloc[[0, 1], :].testattr == "XXX"

        # see gh-9776
        assert df.iloc[0:1, :].testattr == "XXX"

        # see gh-10553
        unpickled = tm.round_trip_pickle(df)
        tm.assert_frame_equal(df, unpickled)
        assert df._metadata == unpickled._metadata
        assert df.testattr == unpickled.testattr
Exemple #30
0
    def test_asof(self):

        N = 3
        rng = pd.date_range("1/1/1990", periods=N, freq="53s")
        df = tm.SubclassedDataFrame(
            {
                "A": [np.nan, np.nan, np.nan],
                "B": [np.nan, np.nan, np.nan],
                "C": [np.nan, np.nan, np.nan],
            },
            index=rng,
        )

        result = df.asof(rng[-2:])
        assert isinstance(result, tm.SubclassedDataFrame)

        result = df.asof(rng[-2])
        assert isinstance(result, tm.SubclassedSeries)

        result = df.asof("1989-12-31")
        assert isinstance(result, tm.SubclassedSeries)