def test_insert(self):
        df = DataFrame(np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"])

        df.insert(0, "foo", df["a"])
        self.assert_numpy_array_equal(df.columns, ["foo", "c", "b", "a"])
        assert_almost_equal(df["a"], df["foo"])

        df.insert(2, "bar", df["c"])
        self.assert_numpy_array_equal(df.columns, ["foo", "c", "bar", "b", "a"])
        assert_almost_equal(df["c"], df["bar"])

        # diff dtype

        # new item
        df["x"] = df["a"].astype("float32")
        result = Series(dict(float64=5, float32=1))
        self.assertTrue((df.get_dtype_counts() == result).all())

        # replacing current (in different block)
        df["a"] = df["a"].astype("float32")
        result = Series(dict(float64=4, float32=2))
        self.assertTrue((df.get_dtype_counts() == result).all())

        df["y"] = df["a"].astype("int32")
        result = Series(dict(float64=4, float32=2, int32=1))
        self.assertTrue((df.get_dtype_counts() == result).all())

        with assertRaisesRegexp(ValueError, "already exists"):
            df.insert(1, "a", df["b"])
        self.assertRaises(ValueError, df.insert, 1, "c", df["b"])

        df.columns.name = "some_name"
        # preserve columns name field
        df.insert(0, "baz", df["c"])
        self.assertEqual(df.columns.name, "some_name")
Example #2
0
    def test_fillna_dtype_conversion(self):
        # make sure that fillna on an empty frame works
        df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = df.get_dtype_counts().sort_values()
        expected = Series({"object": 5})
        assert_series_equal(result, expected)

        result = df.fillna(1)
        expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = result.get_dtype_counts().sort_values()
        expected = Series({"int64": 5})
        assert_series_equal(result, expected)

        # empty block
        df = DataFrame(index=lrange(3), columns=["A", "B"], dtype="float64")
        result = df.fillna("nan")
        expected = DataFrame("nan", index=lrange(3), columns=["A", "B"])
        assert_frame_equal(result, expected)

        # equiv of replace
        df = DataFrame(dict(A=[1, np.nan], B=[1.0, 2.0]))
        for v in ["", 1, np.nan, 1.0]:
            expected = df.replace(np.nan, v)
            result = df.fillna(v)
            assert_frame_equal(result, expected)
Example #3
0
    def test_timedeltas(self):
        df = DataFrame(
            dict(A=Series(date_range("2012-1-1", periods=3, freq="D")), B=Series([timedelta(days=i) for i in range(3)]))
        )
        result = df.get_dtype_counts().sort_values()
        expected = Series({"datetime64[ns]": 1, "timedelta64[ns]": 1}).sort_values()
        assert_series_equal(result, expected)

        df["C"] = df["A"] + df["B"]
        expected = Series({"datetime64[ns]": 2, "timedelta64[ns]": 1}).sort_values()
        result = df.get_dtype_counts().sort_values()
        assert_series_equal(result, expected)

        # mixed int types
        df["D"] = 1
        expected = Series({"datetime64[ns]": 2, "timedelta64[ns]": 1, "int64": 1}).sort_values()
        result = df.get_dtype_counts().sort_values()
        assert_series_equal(result, expected)
Example #4
0
    def test_insert(self):
        df = DataFrame(np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"])

        df.insert(0, "foo", df["a"])
        self.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
        tm.assert_series_equal(df["a"], df["foo"], check_names=False)

        df.insert(2, "bar", df["c"])
        self.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
        tm.assert_almost_equal(df["c"], df["bar"], check_names=False)

        # diff dtype

        # new item
        df["x"] = df["a"].astype("float32")
        result = Series(dict(float64=5, float32=1))
        self.assertTrue((df.get_dtype_counts() == result).all())

        # replacing current (in different block)
        df["a"] = df["a"].astype("float32")
        result = Series(dict(float64=4, float32=2))
        self.assertTrue((df.get_dtype_counts() == result).all())

        df["y"] = df["a"].astype("int32")
        result = Series(dict(float64=4, float32=2, int32=1))
        self.assertTrue((df.get_dtype_counts() == result).all())

        with assertRaisesRegexp(ValueError, "already exists"):
            df.insert(1, "a", df["b"])
        self.assertRaises(ValueError, df.insert, 1, "c", df["b"])

        df.columns.name = "some_name"
        # preserve columns name field
        df.insert(0, "baz", df["c"])
        self.assertEqual(df.columns.name, "some_name")

        # GH 13522
        df = DataFrame(index=["A", "B", "C"])
        df["X"] = df.index
        df["X"] = ["x", "y", "z"]
        exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
        assert_frame_equal(df, exp)
Example #5
0
    def test_frame_no_datetime64_dtype(self):

        dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
        dr_tz = dr.tz_localize(self.tzstr("US/Eastern"))
        e = DataFrame({"A": "foo", "B": dr_tz}, index=dr)
        self.assertEqual(e["B"].dtype, "M8[ns]")

        # GH 2810 (with timezones)
        datetimes_naive = [ts.to_pydatetime() for ts in dr]
        datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz]
        df = DataFrame(
            {"dr": dr, "dr_tz": dr_tz, "datetimes_naive": datetimes_naive, "datetimes_with_tz": datetimes_with_tz}
        )
        result = df.get_dtype_counts()
        expected = Series({"datetime64[ns]": 3, "object": 1})
        assert_series_equal(result, expected)
Example #6
0
    def test_unstack_dtypes(self):

        # GH 2929
        rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]]

        df = DataFrame(rows, columns=list("ABCD"))
        result = df.get_dtype_counts()
        expected = Series({"int64": 4})
        assert_series_equal(result, expected)

        # single dtype
        df2 = df.set_index(["A", "B"])
        df3 = df2.unstack("B")
        result = df3.get_dtype_counts()
        expected = Series({"int64": 4})
        assert_series_equal(result, expected)

        # mixed
        df2 = df.set_index(["A", "B"])
        df2["C"] = 3.0
        df3 = df2.unstack("B")
        result = df3.get_dtype_counts()
        expected = Series({"int64": 2, "float64": 2})
        assert_series_equal(result, expected)

        df2["D"] = "foo"
        df3 = df2.unstack("B")
        result = df3.get_dtype_counts()
        expected = Series({"float64": 2, "object": 2})
        assert_series_equal(result, expected)

        # GH7405
        for c, d in (np.zeros(5), np.zeros(5)), (np.arange(5, dtype="f8"), np.arange(5, 10, dtype="f8")):

            df = DataFrame({"A": ["a"] * 5, "C": c, "D": d, "B": pd.date_range("2012-01-01", periods=5)})

            right = df.iloc[:3].copy(deep=True)

            df = df.set_index(["A", "B"])
            df["D"] = df["D"].astype("int64")

            left = df.iloc[:3].unstack(0)
            right = right.set_index(["A", "B"]).unstack(0)
            right[("D", "a")] = right[("D", "a")].astype("int64")

            self.assertEqual(left.shape, (3, 2))
            assert_frame_equal(left, right)
    def test_get_numeric_data(self):
        # TODO(wesm): unused?
        intname = np.dtype(np.int_).name  # noqa
        floatname = np.dtype(np.float_).name  # noqa

        datetime64name = np.dtype("M8[ns]").name
        objectname = np.dtype(np.object_).name

        df = DataFrame({"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, index=np.arange(10))
        result = df.get_dtype_counts()
        expected = Series({"int64": 1, "float64": 1, datetime64name: 1, objectname: 1})
        result.sort_index()
        expected.sort_index()
        assert_series_equal(result, expected)

        df = DataFrame(
            {
                "a": 1.0,
                "b": 2,
                "c": "foo",
                "d": np.array([1.0] * 10, dtype="float32"),
                "e": np.array([1] * 10, dtype="int32"),
                "f": np.array([1] * 10, dtype="int16"),
                "g": Timestamp("20010102"),
            },
            index=np.arange(10),
        )

        result = df._get_numeric_data()
        expected = df.ix[:, ["a", "b", "d", "e", "f"]]
        assert_frame_equal(result, expected)

        only_obj = df.ix[:, ["c", "g"]]
        result = only_obj._get_numeric_data()
        expected = df.ix[:, []]
        assert_frame_equal(result, expected)

        df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]})
        result = df._get_numeric_data()
        expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]})
        assert_frame_equal(result, expected)

        df = result.copy()
        result = df._get_numeric_data()
        expected = df
        assert_frame_equal(result, expected)
Example #8
0
    def test_frame_no_datetime64_dtype(self):

        # after 7822
        # these retain the timezones on dict construction

        dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
        dr_tz = dr.tz_localize(self.tzstr("US/Eastern"))
        e = DataFrame({"A": "foo", "B": dr_tz}, index=dr)
        tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo)
        self.assertEqual(e["B"].dtype, tz_expected)

        # GH 2810 (with timezones)
        datetimes_naive = [ts.to_pydatetime() for ts in dr]
        datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz]
        df = DataFrame(
            {"dr": dr, "dr_tz": dr_tz, "datetimes_naive": datetimes_naive, "datetimes_with_tz": datetimes_with_tz}
        )
        result = df.get_dtype_counts().sort_index()
        expected = Series({"datetime64[ns]": 2, str(tz_expected): 2}).sort_index()
        tm.assert_series_equal(result, expected)
    def test_construction_with_mixed(self):
        # test construction edge cases with mixed types

        # f7u12, this does not work without extensive workaround
        data = [
            [datetime(2001, 1, 5), nan, datetime(2001, 1, 2)],
            [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)],
        ]
        df = DataFrame(data)

        # check dtypes
        result = df.get_dtype_counts().sort_values()
        expected = Series({"datetime64[ns]": 3})

        # mixed-type frames
        self.mixed_frame["datetime"] = datetime.now()
        self.mixed_frame["timedelta"] = timedelta(days=1, seconds=1)
        self.assertEqual(self.mixed_frame["datetime"].dtype, "M8[ns]")
        self.assertEqual(self.mixed_frame["timedelta"].dtype, "m8[ns]")
        result = self.mixed_frame.get_dtype_counts().sort_values()
        expected = Series({"float64": 4, "object": 1, "datetime64[ns]": 1, "timedelta64[ns]": 1}).sort_values()
        assert_series_equal(result, expected)