def test_column_dups2(self):

        # drop buggy GH 6240
        df = DataFrame(
            {"A": np.random.randn(5), "B": np.random.randn(5), "C": np.random.randn(5), "D": ["a", "b", "c", "d", "e"]}
        )

        expected = df.take([0, 1, 1], axis=1)
        df2 = df.take([2, 0, 1, 2, 1], axis=1)
        result = df2.drop("C", axis=1)
        assert_frame_equal(result, expected)

        # dropna
        df = DataFrame(
            {"A": np.random.randn(5), "B": np.random.randn(5), "C": np.random.randn(5), "D": ["a", "b", "c", "d", "e"]}
        )
        df.iloc[2, [0, 1, 2]] = np.nan
        df.iloc[0, 0] = np.nan
        df.iloc[1, 1] = np.nan
        df.iloc[:, 3] = np.nan
        expected = df.dropna(subset=["A", "B", "C"], how="all")
        expected.columns = ["A", "A", "B", "C"]

        df.columns = ["A", "A", "B", "C"]

        result = df.dropna(subset=["A", "C"], how="all")
        assert_frame_equal(result, expected)
Example #2
0
    def test_astype(self):
        # astype
        expected = np.array(
            [
                [Timestamp("2013-01-01 00:00:00"), Timestamp("2013-01-02 00:00:00"), Timestamp("2013-01-03 00:00:00")],
                [
                    Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"),
                    pd.NaT,
                    Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"),
                ],
                [
                    Timestamp("2013-01-01 00:00:00+0100", tz="CET"),
                    pd.NaT,
                    Timestamp("2013-01-03 00:00:00+0100", tz="CET"),
                ],
            ],
            dtype=object,
        ).T
        result = self.tzframe.astype(object)
        assert_frame_equal(result, DataFrame(expected, index=self.tzframe.index, columns=self.tzframe.columns))

        result = self.tzframe.astype("datetime64[ns]")
        expected = DataFrame(
            {
                "A": date_range("20130101", periods=3),
                "B": (date_range("20130101", periods=3, tz="US/Eastern").tz_convert("UTC").tz_localize(None)),
                "C": (date_range("20130101", periods=3, tz="CET").tz_convert("UTC").tz_localize(None)),
            }
        )
        expected.iloc[1, 1] = pd.NaT
        expected.iloc[1, 2] = pd.NaT
        assert_frame_equal(result, expected)
Example #3
0
    def test_replace_mixed(self):
        self.mixed_frame.ix[5:20, "foo"] = nan
        self.mixed_frame.ix[-10:, "A"] = nan

        result = self.mixed_frame.replace(np.nan, -18)
        expected = self.mixed_frame.fillna(value=-18)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-18, nan), self.mixed_frame)

        result = self.mixed_frame.replace(np.nan, -1e8)
        expected = self.mixed_frame.fillna(value=-1e8)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-1e8, nan), self.mixed_frame)

        # int block upcasting
        df = DataFrame({"A": Series([1.0, 2.0], dtype="float64"), "B": Series([0, 1], dtype="int64")})
        expected = DataFrame({"A": Series([1.0, 2.0], dtype="float64"), "B": Series([0.5, 1], dtype="float64")})
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        df.replace(0, 0.5, inplace=True)
        assert_frame_equal(df, expected)

        # int block splitting
        df = DataFrame(
            {
                "A": Series([1.0, 2.0], dtype="float64"),
                "B": Series([0, 1], dtype="int64"),
                "C": Series([1, 2], dtype="int64"),
            }
        )
        expected = DataFrame(
            {
                "A": Series([1.0, 2.0], dtype="float64"),
                "B": Series([0.5, 1], dtype="float64"),
                "C": Series([1, 2], dtype="int64"),
            }
        )
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        # to object block upcasting
        df = DataFrame({"A": Series([1.0, 2.0], dtype="float64"), "B": Series([0, 1], dtype="int64")})
        expected = DataFrame({"A": Series([1, "foo"], dtype="object"), "B": Series([0, 1], dtype="int64")})
        result = df.replace(2, "foo")
        assert_frame_equal(result, expected)

        expected = DataFrame({"A": Series(["foo", "bar"], dtype="object"), "B": Series([0, "foo"], dtype="object")})
        result = df.replace([1, 2], ["foo", "bar"])
        assert_frame_equal(result, expected)

        # test case from
        df = DataFrame({"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")})
        result = df.replace(3, df.mean().to_dict())
        expected = df.copy().astype("float64")
        m = df.mean()
        expected.iloc[0, 0] = m[0]
        expected.iloc[1, 1] = m[1]
        assert_frame_equal(result, expected)
Example #4
0
    def test_datetime_with_tz_dtypes(self):
        tzframe = DataFrame(
            {
                "A": date_range("20130101", periods=3),
                "B": date_range("20130101", periods=3, tz="US/Eastern"),
                "C": date_range("20130101", periods=3, tz="CET"),
            }
        )
        tzframe.iloc[1, 1] = pd.NaT
        tzframe.iloc[1, 2] = pd.NaT
        result = tzframe.dtypes.sort_index()
        expected = Series(
            [
                np.dtype("datetime64[ns]"),
                com.DatetimeTZDtype("datetime64[ns, US/Eastern]"),
                com.DatetimeTZDtype("datetime64[ns, CET]"),
            ],
            ["A", "B", "C"],
        )

        assert_series_equal(result, expected)