Example #1
0
class TestToXArray:
    @pytest.mark.skipif(
        not _XARRAY_INSTALLED
        or _XARRAY_INSTALLED
        and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"),
        reason="xarray >= 0.10.0 required",
    )
    @pytest.mark.parametrize("index", tm.all_index_generator(6))
    def test_to_xarray_index_types(self, index):
        from xarray import DataArray

        s = Series(range(6), index=index)
        s.index.name = "foo"
        result = s.to_xarray()
        repr(result)
        assert len(result) == 6
        assert len(result.coords) == 1
        tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
        assert isinstance(result, DataArray)

        # idempotency
        tm.assert_series_equal(
            result.to_series(), s, check_index_type=False, check_categorical=True
        )

    @td.skip_if_no("xarray", min_version="0.7.0")
    def test_to_xarray(self):
        from xarray import DataArray

        s = Series([], dtype=object)
        s.index.name = "foo"
        result = s.to_xarray()
        assert len(result) == 0
        assert len(result.coords) == 1
        tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
        assert isinstance(result, DataArray)

        s = Series(range(6))
        s.index.name = "foo"
        s.index = pd.MultiIndex.from_product(
            [["a", "b"], range(3)], names=["one", "two"]
        )
        result = s.to_xarray()
        assert len(result) == 2
        tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
        assert isinstance(result, DataArray)
        tm.assert_series_equal(result.to_series(), s)
Example #2
0
class Generic:
    @property
    def _ndim(self):
        return self._typ._AXIS_LEN

    def _axes(self):
        """ return the axes for my object typ """
        return self._typ._AXIS_ORDERS

    def _construct(self, shape, value=None, dtype=None, **kwargs):
        """
        construct an object for the given shape
        if value is specified use that if its a scalar
        if value is an array, repeat it as needed
        """
        if isinstance(shape, int):
            shape = tuple([shape] * self._ndim)
        if value is not None:
            if is_scalar(value):
                if value == "empty":
                    arr = None
                    dtype = np.float64

                    # remove the info axis
                    kwargs.pop(self._typ._info_axis_name, None)
                else:
                    arr = np.empty(shape, dtype=dtype)
                    arr.fill(value)
            else:
                fshape = np.prod(shape)
                arr = value.ravel()
                new_shape = fshape / arr.shape[0]
                if fshape % arr.shape[0] != 0:
                    raise Exception("invalid value passed in _construct")

                arr = np.repeat(arr, new_shape).reshape(shape)
        else:
            arr = np.random.randn(*shape)
        return self._typ(arr, dtype=dtype, **kwargs)

    def _compare(self, result, expected):
        self._comparator(result, expected)

    def test_rename(self):

        # single axis
        idx = list("ABCD")
        # relabeling values passed into self.rename
        args = [
            str.lower,
            {x: x.lower()
             for x in idx},
            Series({x: x.lower()
                    for x in idx}),
        ]

        for axis in self._axes():
            kwargs = {axis: idx}
            obj = self._construct(4, **kwargs)

            for arg in args:
                # rename a single axis
                result = obj.rename(**{axis: arg})
                expected = obj.copy()
                setattr(expected, axis, list("abcd"))
                self._compare(result, expected)

        # multiple axes at once

    def test_get_numeric_data(self):

        n = 4
        kwargs = {
            self._typ._AXIS_NAMES[i]: list(range(n))
            for i in range(self._ndim)
        }

        # get the numeric data
        o = self._construct(n, **kwargs)
        result = o._get_numeric_data()
        self._compare(result, o)

        # non-inclusion
        result = o._get_bool_data()
        expected = self._construct(n, value="empty", **kwargs)
        self._compare(result, expected)

        # get the bool data
        arr = np.array([True, True, False, True])
        o = self._construct(n, value=arr, **kwargs)
        result = o._get_numeric_data()
        self._compare(result, o)

        # _get_numeric_data is includes _get_bool_data, so can't test for
        # non-inclusion

    def test_nonzero(self):

        # GH 4633
        # look at the boolean/nonzero behavior for objects
        obj = self._construct(shape=4)
        msg = f"The truth value of a {self._typ.__name__} is ambiguous"
        with pytest.raises(ValueError, match=msg):
            bool(obj == 0)
        with pytest.raises(ValueError, match=msg):
            bool(obj == 1)
        with pytest.raises(ValueError, match=msg):
            bool(obj)

        obj = self._construct(shape=4, value=1)
        with pytest.raises(ValueError, match=msg):
            bool(obj == 0)
        with pytest.raises(ValueError, match=msg):
            bool(obj == 1)
        with pytest.raises(ValueError, match=msg):
            bool(obj)

        obj = self._construct(shape=4, value=np.nan)
        with pytest.raises(ValueError, match=msg):
            bool(obj == 0)
        with pytest.raises(ValueError, match=msg):
            bool(obj == 1)
        with pytest.raises(ValueError, match=msg):
            bool(obj)

        # empty
        obj = self._construct(shape=0)
        with pytest.raises(ValueError, match=msg):
            bool(obj)

        # invalid behaviors

        obj1 = self._construct(shape=4, value=1)
        obj2 = self._construct(shape=4, value=1)

        with pytest.raises(ValueError, match=msg):
            if obj1:
                pass

        with pytest.raises(ValueError, match=msg):
            obj1 and obj2
        with pytest.raises(ValueError, match=msg):
            obj1 or obj2
        with pytest.raises(ValueError, match=msg):
            not obj1

    def test_downcast(self):
        # test close downcasting

        o = self._construct(shape=4, value=9, dtype=np.int64)
        result = o.copy()
        result._data = o._data.downcast(dtypes="infer")
        self._compare(result, o)

        o = self._construct(shape=4, value=9.0)
        expected = o.astype(np.int64)
        result = o.copy()
        result._data = o._data.downcast(dtypes="infer")
        self._compare(result, expected)

        o = self._construct(shape=4, value=9.5)
        result = o.copy()
        result._data = o._data.downcast(dtypes="infer")
        self._compare(result, o)

        # are close
        o = self._construct(shape=4, value=9.000000000005)
        result = o.copy()
        result._data = o._data.downcast(dtypes="infer")
        expected = o.astype(np.int64)
        self._compare(result, expected)

    def test_constructor_compound_dtypes(self):
        # see gh-5191
        # Compound dtypes should raise NotImplementedError.

        def f(dtype):
            return self._construct(shape=3, value=1, dtype=dtype)

        msg = ("compound dtypes are not implemented "
               f"in the {self._typ.__name__} constructor")

        with pytest.raises(NotImplementedError, match=msg):
            f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])

        # these work (though results may be unexpected)
        f("int64")
        f("float64")
        f("M8[ns]")

    def check_metadata(self, x, y=None):
        for m in x._metadata:
            v = getattr(x, m, None)
            if y is None:
                assert v is None
            else:
                assert v == getattr(y, m, None)

    def test_metadata_propagation(self):
        # check that the metadata matches up on the resulting ops

        o = self._construct(shape=3)
        o.name = "foo"
        o2 = self._construct(shape=3)
        o2.name = "bar"

        # ----------
        # preserving
        # ----------

        # simple ops with scalars
        for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
            result = getattr(o, op)(1)
            self.check_metadata(o, result)

        # ops with like
        for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
            result = getattr(o, op)(o)
            self.check_metadata(o, result)

        # simple boolean
        for op in ["__eq__", "__le__", "__ge__"]:
            v1 = getattr(o, op)(o)
            self.check_metadata(o, v1)
            self.check_metadata(o, v1 & v1)
            self.check_metadata(o, v1 | v1)

        # combine_first
        result = o.combine_first(o2)
        self.check_metadata(o, result)

        # ---------------------------
        # non-preserving (by default)
        # ---------------------------

        # add non-like
        result = o + o2
        self.check_metadata(result)

        # simple boolean
        for op in ["__eq__", "__le__", "__ge__"]:

            # this is a name matching op
            v1 = getattr(o, op)(o)
            v2 = getattr(o, op)(o2)
            self.check_metadata(v2)
            self.check_metadata(v1 & v2)
            self.check_metadata(v1 | v2)

    @pytest.mark.parametrize("index", tm.all_index_generator(10))
    def test_head_tail(self, index):
        # GH5370

        o = self._construct(shape=10)

        axis = o._get_axis_name(0)
        setattr(o, axis, index)

        o.head()

        self._compare(o.head(), o.iloc[:5])
        self._compare(o.tail(), o.iloc[-5:])

        # 0-len
        self._compare(o.head(0), o.iloc[0:0])
        self._compare(o.tail(0), o.iloc[0:0])

        # bounded
        self._compare(o.head(len(o) + 1), o)
        self._compare(o.tail(len(o) + 1), o)

        # neg index
        self._compare(o.head(-3), o.head(7))
        self._compare(o.tail(-3), o.tail(7))

    def test_sample(self):
        # Fixes issue: 2419

        o = self._construct(shape=10)

        ###
        # Check behavior of random_state argument
        ###

        # Check for stability when receives seed or random state -- run 10
        # times.
        for test in range(10):
            seed = np.random.randint(0, 100)
            self._compare(o.sample(n=4, random_state=seed),
                          o.sample(n=4, random_state=seed))

            self._compare(
                o.sample(frac=0.7, random_state=seed),
                o.sample(frac=0.7, random_state=seed),
            )

            self._compare(
                o.sample(n=4, random_state=np.random.RandomState(test)),
                o.sample(n=4, random_state=np.random.RandomState(test)),
            )

            self._compare(
                o.sample(frac=0.7, random_state=np.random.RandomState(test)),
                o.sample(frac=0.7, random_state=np.random.RandomState(test)),
            )

            self._compare(
                o.sample(frac=2,
                         replace=True,
                         random_state=np.random.RandomState(test)),
                o.sample(frac=2,
                         replace=True,
                         random_state=np.random.RandomState(test)),
            )

            os1, os2 = [], []
            for _ in range(2):
                np.random.seed(test)
                os1.append(o.sample(n=4))
                os2.append(o.sample(frac=0.7))
            self._compare(*os1)
            self._compare(*os2)

        # Check for error when random_state argument invalid.
        with pytest.raises(ValueError):
            o.sample(random_state="astring!")

        ###
        # Check behavior of `frac` and `N`
        ###

        # Giving both frac and N throws error
        with pytest.raises(ValueError):
            o.sample(n=3, frac=0.3)

        # Check that raises right error for negative lengths
        with pytest.raises(ValueError):
            o.sample(n=-3)
        with pytest.raises(ValueError):
            o.sample(frac=-0.3)

        # Make sure float values of `n` give error
        with pytest.raises(ValueError):
            o.sample(n=3.2)

        # Check lengths are right
        assert len(o.sample(n=4) == 4)
        assert len(o.sample(frac=0.34) == 3)
        assert len(o.sample(frac=0.36) == 4)

        ###
        # Check weights
        ###

        # Weight length must be right
        with pytest.raises(ValueError):
            o.sample(n=3, weights=[0, 1])

        with pytest.raises(ValueError):
            bad_weights = [0.5] * 11
            o.sample(n=3, weights=bad_weights)

        with pytest.raises(ValueError):
            bad_weight_series = Series([0, 0, 0.2])
            o.sample(n=4, weights=bad_weight_series)

        # Check won't accept negative weights
        with pytest.raises(ValueError):
            bad_weights = [-0.1] * 10
            o.sample(n=3, weights=bad_weights)

        # Check inf and -inf throw errors:
        with pytest.raises(ValueError):
            weights_with_inf = [0.1] * 10
            weights_with_inf[0] = np.inf
            o.sample(n=3, weights=weights_with_inf)

        with pytest.raises(ValueError):
            weights_with_ninf = [0.1] * 10
            weights_with_ninf[0] = -np.inf
            o.sample(n=3, weights=weights_with_ninf)

        # All zeros raises errors
        zero_weights = [0] * 10
        with pytest.raises(ValueError):
            o.sample(n=3, weights=zero_weights)

        # All missing weights
        nan_weights = [np.nan] * 10
        with pytest.raises(ValueError):
            o.sample(n=3, weights=nan_weights)

        # Check np.nan are replaced by zeros.
        weights_with_nan = [np.nan] * 10
        weights_with_nan[5] = 0.5
        self._compare(o.sample(n=1, axis=0, weights=weights_with_nan),
                      o.iloc[5:6])

        # Check None are also replaced by zeros.
        weights_with_None = [None] * 10
        weights_with_None[5] = 0.5
        self._compare(o.sample(n=1, axis=0, weights=weights_with_None),
                      o.iloc[5:6])

    def test_sample_upsampling_without_replacement(self):
        # GH27451

        df = pd.DataFrame({"A": list("abc")})
        msg = ("Replace has to be set to `True` when "
               "upsampling the population `frac` > 1.")
        with pytest.raises(ValueError, match=msg):
            df.sample(frac=2, replace=False)

    def test_sample_is_copy(self):
        # GH-27357, GH-30784: ensure the result of sample is an actual copy and
        # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
        df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
        df2 = df.sample(3)

        with tm.assert_produces_warning(None):
            df2["d"] = 1

    def test_size_compat(self):
        # GH8846
        # size property should be defined

        o = self._construct(shape=10)
        assert o.size == np.prod(o.shape)
        assert o.size == 10**len(o.axes)

    def test_split_compat(self):
        # xref GH8846
        o = self._construct(shape=10)
        assert len(np.array_split(o, 5)) == 5
        assert len(np.array_split(o, 2)) == 2

    # See gh-12301
    def test_stat_unexpected_keyword(self):
        obj = self._construct(5)
        starwars = "Star Wars"
        errmsg = "unexpected keyword"

        with pytest.raises(TypeError, match=errmsg):
            obj.max(epic=starwars)  # stat_function
        with pytest.raises(TypeError, match=errmsg):
            obj.var(epic=starwars)  # stat_function_ddof
        with pytest.raises(TypeError, match=errmsg):
            obj.sum(epic=starwars)  # cum_function
        with pytest.raises(TypeError, match=errmsg):
            obj.any(epic=starwars)  # logical_function

    @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
    def test_api_compat(self, func):

        # GH 12021
        # compat for __name__, __qualname__

        obj = self._construct(5)
        f = getattr(obj, func)
        assert f.__name__ == func
        assert f.__qualname__.endswith(func)

    def test_stat_non_defaults_args(self):
        obj = self._construct(5)
        out = np.array([0])
        errmsg = "the 'out' parameter is not supported"

        with pytest.raises(ValueError, match=errmsg):
            obj.max(out=out)  # stat_function
        with pytest.raises(ValueError, match=errmsg):
            obj.var(out=out)  # stat_function_ddof
        with pytest.raises(ValueError, match=errmsg):
            obj.sum(out=out)  # cum_function
        with pytest.raises(ValueError, match=errmsg):
            obj.any(out=out)  # logical_function

    def test_truncate_out_of_bounds(self):
        # GH11382

        # small
        shape = [int(2e3)] + ([1] * (self._ndim - 1))
        small = self._construct(shape, dtype="int8", value=1)
        self._compare(small.truncate(), small)
        self._compare(small.truncate(before=0, after=3e3), small)
        self._compare(small.truncate(before=-1, after=2e3), small)

        # big
        shape = [int(2e6)] + ([1] * (self._ndim - 1))
        big = self._construct(shape, dtype="int8", value=1)
        self._compare(big.truncate(), big)
        self._compare(big.truncate(before=0, after=3e6), big)
        self._compare(big.truncate(before=-1, after=2e6), big)

    @pytest.mark.parametrize(
        "func",
        [
            copy, deepcopy, lambda x: x.copy(deep=False),
            lambda x: x.copy(deep=True)
        ],
    )
    @pytest.mark.parametrize("shape", [0, 1, 2])
    def test_copy_and_deepcopy(self, shape, func):
        # GH 15444
        obj = self._construct(shape)
        obj_copy = func(obj)
        assert obj_copy is not obj
        self._compare(obj_copy, obj)

    @pytest.mark.parametrize(
        "periods,fill_method,limit,exp",
        [
            (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]),
            (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]),
            (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]),
            (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]),
            (-1, "ffill", None,
             [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]),
            (-1, "ffill", 1,
             [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]),
            (-1, "bfill", None,
             [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
            (-1, "bfill", 1,
             [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
        ],
    )
    def test_pct_change(self, periods, fill_method, limit, exp):
        vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
        obj = self._typ(vals)
        func = getattr(obj, "pct_change")
        res = func(periods=periods, fill_method=fill_method, limit=limit)
        if type(obj) is DataFrame:
            tm.assert_frame_equal(res, DataFrame(exp))
        else:
            tm.assert_series_equal(res, Series(exp))
Example #3
0
class TestFancy(Base):
    """ pure get/set item & fancy indexing """
    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(np.arange(1, 11)))
        df["foo"] = np.zeros(10, dtype=np.float64)
        df["bar"] = np.zeros(10, dtype=np.complex)

        # invalid
        with pytest.raises(ValueError):
            df.loc[df.index[2:5],
                   "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        # valid
        df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        result = df.loc[df.index[2:6], "bar"]
        expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0],
                          index=[3, 4, 5, 6],
                          name="bar")
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(np.arange(1, 11)))
        df["foo"] = np.zeros(10, dtype=np.float64)
        df["bar"] = np.zeros(10, dtype=np.complex)

        with pytest.raises(ValueError):
            df[2:5] = np.arange(1, 4) * 1j

    @pytest.mark.parametrize("index",
                             tm.all_index_generator(5),
                             ids=lambda x: type(x).__name__)
    @pytest.mark.parametrize(
        "obj",
        [
            lambda i: Series(np.arange(len(i)), index=i),
            lambda i: DataFrame(
                np.random.randn(len(i), len(i)), index=i, columns=i),
        ],
        ids=["Series", "DataFrame"],
    )
    @pytest.mark.parametrize(
        "idxr, idxr_id",
        [
            (lambda x: x, "getitem"),
            (lambda x: x.loc, "loc"),
            (lambda x: x.iloc, "iloc"),
        ],
    )
    def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = (r"Buffer has wrong number of dimensions \(expected 1, "
               r"got 3\)|"
               "Cannot index with multidimensional key|"
               r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
               "Index data must be 1-dimensional")

        if (isinstance(obj, Series) and idxr_id == "getitem"
                and index.inferred_type in [
                    "string",
                    "datetime64",
                    "period",
                    "timedelta64",
                    "boolean",
                    "categorical",
                ]):
            with tm.assert_produces_warning(DeprecationWarning,
                                            check_stacklevel=False):
                idxr[nd3]
        else:
            with pytest.raises(ValueError, match=msg):
                with tm.assert_produces_warning(DeprecationWarning):
                    idxr[nd3]

    @pytest.mark.parametrize("index",
                             tm.all_index_generator(5),
                             ids=lambda x: type(x).__name__)
    @pytest.mark.parametrize(
        "obj",
        [
            lambda i: Series(np.arange(len(i)), index=i),
            lambda i: DataFrame(
                np.random.randn(len(i), len(i)), index=i, columns=i),
        ],
        ids=["Series", "DataFrame"],
    )
    @pytest.mark.parametrize(
        "idxr, idxr_id",
        [
            (lambda x: x, "setitem"),
            (lambda x: x.loc, "loc"),
            (lambda x: x.iloc, "iloc"),
        ],
    )
    def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = (
            r"Buffer has wrong number of dimensions \(expected 1, "
            r"got 3\)|"
            "'pandas._libs.interval.IntervalTree' object has no attribute "
            "'get_loc'|"  # AttributeError
            "unhashable type: 'numpy.ndarray'|"  # TypeError
            "No matching signature found|"  # TypeError
            r"^\[\[\[|"  # pandas.core.indexing.IndexingError
            "Index data must be 1-dimensional")

        if (idxr_id == "iloc") or ((isinstance(obj, Series)
                                    and idxr_id == "setitem"
                                    and index.inferred_type in [
                                        "floating",
                                        "string",
                                        "datetime64",
                                        "period",
                                        "timedelta64",
                                        "boolean",
                                        "categorical",
                                    ])):
            idxr[nd3] = 0
        else:
            err = (ValueError, AttributeError)
            with pytest.raises(err, match=msg):
                idxr[nd3] = 0

    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = pd.Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)

        # Test with np.inf in columns
        df = DataFrame()
        df.loc[0, 0] = 1
        df.loc[1, 1] = 2
        df.loc[0, np.inf] = 3

        result = df.columns
        expected = pd.Float64Index([0, 1, np.inf])
        tm.assert_index_equal(result, expected)

    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df["c"] = np.nan
        assert df["c"].dtype == np.float64

        df.loc[0, "c"] = "foo"
        expected = DataFrame([{
            "a": 1,
            "b": np.nan,
            "c": "foo"
        }, {
            "a": 3,
            "b": 2,
            "c": np.nan
        }])
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(
            np.arange(6, dtype="int64").reshape(2, 3),
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )

        for val in [3.14, "wxyz"]:
            left = df.copy()
            left.loc["a", "bar"] = val
            right = DataFrame(
                [[0, val, 2], [3, 4, 5]],
                index=list("ab"),
                columns=["foo", "bar", "baz"],
            )

            tm.assert_frame_equal(left, right)
            assert is_integer_dtype(left["foo"])
            assert is_integer_dtype(left["baz"])

        left = DataFrame(
            np.arange(6, dtype="int64").reshape(2, 3) / 10.0,
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )
        left.loc["a", "bar"] = "wxyz"

        right = DataFrame(
            [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]],
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )

        tm.assert_frame_equal(left, right)
        assert is_float_dtype(left["foo"])
        assert is_float_dtype(left["baz"])

    def test_dups_fancy_indexing(self):

        # GH 3455

        df = tm.makeCustomDataframe(10, 3)
        df.columns = ["a", "a", "b"]
        result = df[["b", "a"]].columns
        expected = Index(["b", "a", "a"])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]],
                       columns=list("aaaaaaa"))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]])
        result.columns = list("aaaaaaa")

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {
                "test": [5, 7, 9, 11],
                "test1": [4.0, 5, 6, 7],
                "other": list("abcd")
            },
            index=["A", "A", "B", "C"],
        )
        rows = ["C", "B"]
        expected = DataFrame(
            {
                "test": [11, 9],
                "test1": [7.0, 6],
                "other": ["d", "c"]
            },
            index=rows)
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ["C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # see GH5553, make sure we use the right indexer
        rows = ["F", "G", "H", "C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # List containing only missing label
        dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
        with pytest.raises(
                KeyError,
                match=re.escape(
                    "\"None of [Index(['E'], dtype='object')] are in the [index]\""
                ),
        ):
            dfnu.loc[["E"]]

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        df = DataFrame({"A": list("abc")})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        # non unique with non unique selector
        df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[["A", "A", "E"]]

    def test_dups_fancy_indexing2(self):
        # GH 5835
        # dups on index and missing values
        df = DataFrame(np.random.randn(5, 5),
                       columns=["A", "B", "B", "B", "A"])

        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[:, ["A", "B", "C"]]

        # GH 6504, multi-axis indexing
        df = DataFrame(np.random.randn(9, 2),
                       index=[1, 1, 1, 2, 2, 2, 3, 3, 3],
                       columns=["a", "b"])

        expected = df.iloc[0:6]
        result = df.loc[[1, 2]]
        tm.assert_frame_equal(result, expected)

        expected = df
        result = df.loc[:, ["a", "b"]]
        tm.assert_frame_equal(result, expected)

        expected = df.iloc[0:6, :]
        result = df.loc[[1, 2], ["a", "b"]]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("case", [lambda s: s, lambda s: s.loc])
    def test_duplicate_int_indexing(self, case):
        # GH 17347
        s = pd.Series(range(3), index=[1, 1, 3])
        expected = s[1]
        result = case(s)[[1]]
        tm.assert_series_equal(result, expected)

    def test_indexing_mixed_frame_bug(self):

        # GH3492
        df = DataFrame({
            "a": {
                1: "aaa",
                2: "bbb",
                3: "ccc"
            },
            "b": {
                1: 111,
                2: 222,
                3: 333
            }
        })

        # this works, new column is created correctly
        df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x)

        # this does not work, ie column test is not changed
        idx = df["test"] == "_"
        temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x)
        df.loc[idx, "test"] = temp
        assert df.iloc[0, 2] == "-----"

    def test_multitype_list_index_access(self):
        # GH 10610
        df = DataFrame(np.random.random((10, 5)),
                       columns=["a"] + [20, 21, 22, 23])

        with pytest.raises(KeyError,
                           match=re.escape("'[-8, 26] not in index'")):
            df[[22, 26, -8]]
        assert df[21].shape[0] == df.shape[0]

    def test_set_index_nan(self):

        # GH 3586
        df = DataFrame({
            "PRuid": {
                17: "nonQC",
                18: "nonQC",
                19: "nonQC",
                20: "10",
                21: "11",
                22: "12",
                23: "13",
                24: "24",
                25: "35",
                26: "46",
                27: "47",
                28: "48",
                29: "59",
                30: "10",
            },
            "QC": {
                17: 0.0,
                18: 0.0,
                19: 0.0,
                20: np.nan,
                21: np.nan,
                22: np.nan,
                23: np.nan,
                24: 1.0,
                25: np.nan,
                26: np.nan,
                27: np.nan,
                28: np.nan,
                29: np.nan,
                30: np.nan,
            },
            "data": {
                17: 7.9544899999999998,
                18: 8.0142609999999994,
                19: 7.8591520000000008,
                20: 0.86140349999999999,
                21: 0.87853110000000001,
                22: 0.8427041999999999,
                23: 0.78587700000000005,
                24: 0.73062459999999996,
                25: 0.81668560000000001,
                26: 0.81927080000000008,
                27: 0.80705009999999999,
                28: 0.81440240000000008,
                29: 0.80140849999999997,
                30: 0.81307740000000006,
            },
            "year": {
                17: 2006,
                18: 2007,
                19: 2008,
                20: 1985,
                21: 1985,
                22: 1985,
                23: 1985,
                24: 1985,
                25: 1985,
                26: 1985,
                27: 1985,
                28: 1985,
                29: 1985,
                30: 1986,
            },
        }).reset_index()

        result = (df.set_index(["year", "PRuid", "QC"
                                ]).reset_index().reindex(columns=df.columns))
        tm.assert_frame_equal(result, df)

    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame({
            "FC": ["a", "b", "a", "b", "a", "b"],
            "PF": [0, 0, 0, 0, 1, 1],
            "col1": list(range(6)),
            "col2": list(range(6, 12)),
        })
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ["col1", "col2"]

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame({
            "FC": ["a", np.nan, "a", "b", "a", "b"],
            "PF": [0, 0, 0, 0, 1, 1],
            "col1": Series([0, 1, 4, 6, 8, 10]),
            "col2": [12, 7, 16, np.nan, 20, 22],
        })

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame({
            "FC": ["a", np.nan, "a", "b", "a", "b"],
            "PF": [0, 0, 0, 0, 1, 1],
            "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0],
            "col2": [12, 7, 16, np.nan, 20, 22],
        })
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(
            dict(
                A=[1, 2, 0, 0, 0],
                B=[0, 0, 0, 10, 11],
                C=[0, 0, 0, 10, 11],
                D=[3, 4, 5, 6, 7],
            ))

        expected = df.copy()
        mask = expected["A"] == 0
        for col in ["A", "B"]:
            expected.loc[mask, col] = df["D"]

        df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
        tm.assert_frame_equal(df, expected)

    def test_setitem_list(self):

        # GH 6043
        # iloc with a list
        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = [1, 2, 3]
        df.iloc[1, 0] = [1, 2]

        result = DataFrame(index=[0, 1], columns=[0])
        result.iloc[1, 0] = [1, 2]

        tm.assert_frame_equal(result, df)

        # iloc with an object
        class TO:
            def __init__(self, value):
                self.value = value

            def __str__(self) -> str:
                return "[{0}]".format(self.value)

            __repr__ = __str__

            def __eq__(self, other) -> bool:
                return self.value == other.value

            def view(self):
                return self

        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = TO(1)
        df.iloc[1, 0] = TO(2)

        result = DataFrame(index=[0, 1], columns=[0])
        result.iloc[1, 0] = TO(2)

        tm.assert_frame_equal(result, df)

        # remains object dtype even after setting it back
        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = TO(1)
        df.iloc[1, 0] = np.nan
        result = DataFrame(index=[0, 1], columns=[0])

        tm.assert_frame_equal(result, df)

    def test_string_slice(self):
        # GH 14424
        # string indexing against datetimelike with object
        # dtype should properly raises KeyError
        df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object))
        assert df.index.is_all_dates
        with pytest.raises(KeyError, match="'2011'"):
            df["2011"]

        with pytest.raises(KeyError, match="'2011'"):
            df.loc["2011", 0]

        df = DataFrame()
        assert not df.index.is_all_dates
        with pytest.raises(KeyError, match="'2011'"):
            df["2011"]

        with pytest.raises(KeyError, match="'2011'"):
            df.loc["2011", 0]

    def test_astype_assignment(self):

        # GH4312 (iloc)
        df_orig = DataFrame([["1", "2", "3", ".4", 5, 6.0, "foo"]],
                            columns=list("ABCDEFG"))

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
        expected = DataFrame([[1, 2, "3", ".4", 5, 6.0, "foo"]],
                             columns=list("ABCDEFG"))
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
        expected = DataFrame([[1, 2, "3", ".4", 5, 6.0, "foo"]],
                             columns=list("ABCDEFG"))
        tm.assert_frame_equal(df, expected)

        # GH5702 (loc)
        df = df_orig.copy()
        df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
        expected = DataFrame([[1, "2", "3", ".4", 5, 6.0, "foo"]],
                             columns=list("ABCDEFG"))
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
        expected = DataFrame([["1", 2, 3, ".4", 5, 6.0, "foo"]],
                             columns=list("ABCDEFG"))
        tm.assert_frame_equal(df, expected)

        # full replacements / no nans
        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
        df.iloc[:, 0] = df["A"].astype(np.int64)
        expected = DataFrame({"A": [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
        df.loc[:, "A"] = df["A"].astype(np.int64)
        expected = DataFrame({"A": [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), 2),
            (Index([0, 1, "2"]), "2"),
            (Index([0, 1, 2, np.inf, 4]), 4),
            (Index([0, 1, 2, np.nan, 4]), 4),
            (Index([0, 1, 2, np.inf]), np.inf),
            (Index([0, 1, 2, np.nan]), np.nan),
        ],
    )
    def test_index_contains(self, index, val):
        assert val in index

    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), "2"),
            (Index([0, 1, "2"]), 2),
            (Index([0, 1, 2, np.inf]), 4),
            (Index([0, 1, 2, np.nan]), 4),
            (Index([0, 1, 2, np.inf]), np.nan),
            (Index([0, 1, 2, np.nan]), np.inf),
            # Checking if np.inf in Int64Index should not cause an OverflowError
            # Related to GH 16957
            (pd.Int64Index([0, 1, 2]), np.inf),
            (pd.Int64Index([0, 1, 2]), np.nan),
            (pd.UInt64Index([0, 1, 2]), np.inf),
            (pd.UInt64Index([0, 1, 2]), np.nan),
        ],
    )
    def test_index_not_contains(self, index, val):
        assert val not in index

    @pytest.mark.parametrize("index,val", [(Index([0, 1, "2"]), 0),
                                           (Index([0, 1, "2"]), "2")])
    def test_mixed_index_contains(self, index, val):
        # GH 19860
        assert val in index

    @pytest.mark.parametrize("index,val", [(Index([0, 1, "2"]), "1"),
                                           (Index([0, 1, "2"]), 2)])
    def test_mixed_index_not_contains(self, index, val):
        # GH 19860
        assert val not in index

    def test_contains_with_float_index(self):
        # GH#22085
        integer_index = pd.Int64Index([0, 1, 2, 3])
        uinteger_index = pd.UInt64Index([0, 1, 2, 3])
        float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3])

        for index in (integer_index, uinteger_index):
            assert 1.1 not in index
            assert 1.0 in index
            assert 1 in index

        assert 1.1 in float_index
        assert 1.0 not in float_index
        assert 1 not in float_index

    def test_index_type_coercion(self):

        # GH 11836
        # if we have an index type and set it with something that looks
        # to numpy like the same, but is actually, not
        # (e.g. setting with a float or string '0')
        # then we need to coerce to object

        # integer indexes
        for s in [Series(range(5)), Series(range(5), index=range(1, 6))]:

            assert s.index.is_integer()

            for indexer in [lambda x: x.loc, lambda x: x]:
                s2 = s.copy()
                indexer(s2)[0.1] = 0
                assert s2.index.is_floating()
                assert indexer(s2)[0.1] == 0

                s2 = s.copy()
                indexer(s2)[0.0] = 0
                exp = s.index
                if 0 not in s:
                    exp = Index(s.index.tolist() + [0])
                tm.assert_index_equal(s2.index, exp)

                s2 = s.copy()
                indexer(s2)["0"] = 0
                assert s2.index.is_object()

        for s in [Series(range(5), index=np.arange(5.0))]:

            assert s.index.is_floating()

            for idxr in [lambda x: x.loc, lambda x: x]:

                s2 = s.copy()
                idxr(s2)[0.1] = 0
                assert s2.index.is_floating()
                assert idxr(s2)[0.1] == 0

                s2 = s.copy()
                idxr(s2)[0.0] = 0
                tm.assert_index_equal(s2.index, s.index)

                s2 = s.copy()
                idxr(s2)["0"] = 0
                assert s2.index.is_object()
Example #4
0
class TestToXArray:
    @pytest.mark.skipif(
        not _XARRAY_INSTALLED or _XARRAY_INSTALLED
        and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"),
        reason="xarray >= 0.10.0 required",
    )
    @pytest.mark.parametrize("index", tm.all_index_generator(3))
    def test_to_xarray_index_types(self, index):
        from xarray import Dataset

        df = DataFrame({
            "a":
            list("abc"),
            "b":
            list(range(1, 4)),
            "c":
            np.arange(3, 6).astype("u1"),
            "d":
            np.arange(4.0, 7.0, dtype="float64"),
            "e": [True, False, True],
            "f":
            pd.Categorical(list("abc")),
            "g":
            pd.date_range("20130101", periods=3),
            "h":
            pd.date_range("20130101", periods=3, tz="US/Eastern"),
        })

        df.index = index
        df.index.name = "foo"
        df.columns.name = "bar"
        result = df.to_xarray()
        assert result.dims["foo"] == 3
        assert len(result.coords) == 1
        assert len(result.data_vars) == 8
        tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
        assert isinstance(result, Dataset)

        # idempotency
        # categoricals are not preserved
        # datetimes w/tz are preserved
        # column names are lost
        expected = df.copy()
        expected["f"] = expected["f"].astype(object)
        expected.columns.name = None
        tm.assert_frame_equal(
            result.to_dataframe(),
            expected,
            check_index_type=False,
            check_categorical=False,
        )

    @td.skip_if_no("xarray", min_version="0.7.0")
    def test_to_xarray(self):
        from xarray import Dataset

        df = DataFrame({
            "a":
            list("abc"),
            "b":
            list(range(1, 4)),
            "c":
            np.arange(3, 6).astype("u1"),
            "d":
            np.arange(4.0, 7.0, dtype="float64"),
            "e": [True, False, True],
            "f":
            pd.Categorical(list("abc")),
            "g":
            pd.date_range("20130101", periods=3),
            "h":
            pd.date_range("20130101", periods=3, tz="US/Eastern"),
        })

        df.index.name = "foo"
        result = df[0:0].to_xarray()
        assert result.dims["foo"] == 0
        assert isinstance(result, Dataset)

        # available in 0.7.1
        # MultiIndex
        df.index = pd.MultiIndex.from_product([["a"], range(3)],
                                              names=["one", "two"])
        result = df.to_xarray()
        assert result.dims["one"] == 1
        assert result.dims["two"] == 3
        assert len(result.coords) == 2
        assert len(result.data_vars) == 8
        tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
        assert isinstance(result, Dataset)

        result = result.to_dataframe()
        expected = df.copy()
        expected["f"] = expected["f"].astype(object)
        expected.columns.name = None
        tm.assert_frame_equal(result, expected, check_index_type=False)
Example #5
0
class TestSeriesMap:
    def test_map(self, datetime_series):
        index, data = tm.getMixedTypeDict()

        source = Series(data["B"], index=data["C"])
        target = Series(data["C"][:4], index=data["D"][:4])

        merged = target.map(source)

        for k, v in merged.items():
            assert v == source[target[k]]

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in merged.items():
            assert v == source[target[k]]

        # function
        result = datetime_series.map(lambda x: x * 2)
        tm.assert_series_equal(result, datetime_series * 2)

        # GH 10324
        a = Series([1, 2, 3, 4])
        b = Series(["even", "odd", "even", "odd"], dtype="category")
        c = Series(["even", "odd", "even", "odd"])

        exp = Series(["odd", "even", "odd", np.nan], dtype="category")
        tm.assert_series_equal(a.map(b), exp)
        exp = Series(["odd", "even", "odd", np.nan])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series([1, 2, 3, 4],
                   index=pd.CategoricalIndex(["b", "c", "d", "e"]))
        c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))

        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series(
            ["B", "C", "D", "E"],
            dtype="category",
            index=pd.CategoricalIndex(["b", "c", "d", "e"]),
        )
        c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))

        exp = Series(
            pd.Categorical([np.nan, "B", "C", "D"],
                           categories=["B", "C", "D", "E"]))
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, "B", "C", "D"])
        tm.assert_series_equal(a.map(c), exp)

    @pytest.mark.parametrize("index", tm.all_index_generator(10))
    def test_map_empty(self, index):
        s = Series(index)
        result = s.map({})

        expected = pd.Series(np.nan, index=s.index)
        tm.assert_series_equal(result, expected)

    def test_map_compat(self):
        # related GH 8024
        s = Series([True, True, False], index=[1, 2, 3])
        result = s.map({True: "foo", False: "bar"})
        expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

    def test_map_int(self):
        left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
        right = Series({1: 11, 2: 22, 3: 33})

        assert left.dtype == np.float_
        assert issubclass(right.dtype.type, np.integer)

        merged = left.map(right)
        assert merged.dtype == np.float_
        assert isna(merged["d"])
        assert not isna(merged["c"])

    def test_map_type_inference(self):
        s = Series(range(3))
        s2 = s.map(lambda x: np.where(x == 0, 0, 1))
        assert issubclass(s2.dtype.type, np.integer)

    def test_map_decimal(self, string_series):
        from decimal import Decimal

        result = string_series.map(lambda x: Decimal(str(x)))
        assert result.dtype == np.object_
        assert isinstance(result[0], Decimal)

    def test_map_na_exclusion(self):
        s = Series([1.5, np.nan, 3, np.nan, 5])

        result = s.map(lambda x: x * 2, na_action="ignore")
        exp = s * 2
        tm.assert_series_equal(result, exp)

    def test_map_dict_with_tuple_keys(self):
        """
        Due to new MultiIndex-ing behaviour in v0.14.0,
        dicts with tuple keys passed to map were being
        converted to a multi-index, preventing tuple values
        from being mapped properly.
        """
        # GH 18496
        df = pd.DataFrame({"a": [(1, ), (2, ), (3, 4), (5, 6)]})
        label_mappings = {(1, ): "A", (2, ): "B", (3, 4): "A", (5, 6): "B"}

        df["labels"] = df["a"].map(label_mappings)
        df["expected_labels"] = pd.Series(["A", "B", "A", "B"], index=df.index)
        # All labels should be filled now
        tm.assert_series_equal(df["labels"],
                               df["expected_labels"],
                               check_names=False)

    def test_map_counter(self):
        s = Series(["a", "b", "c"], index=[1, 2, 3])
        counter = Counter()
        counter["b"] = 5
        counter["c"] += 1
        result = s.map(counter)
        expected = Series([0, 5, 1], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

    def test_map_defaultdict(self):
        s = Series([1, 2, 3], index=["a", "b", "c"])
        default_dict = defaultdict(lambda: "blank")
        default_dict[1] = "stuff"
        result = s.map(default_dict)
        expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
        tm.assert_series_equal(result, expected)

    def test_map_dict_na_key(self):
        # https://github.com/pandas-dev/pandas/issues/17648
        # Checks that np.nan key is appropriately mapped
        s = Series([1, 2, np.nan])
        expected = Series(["a", "b", "c"])
        result = s.map({1: "a", 2: "b", np.nan: "c"})
        tm.assert_series_equal(result, expected)

    def test_map_dict_subclass_with_missing(self):
        """
        Test Series.map with a dictionary subclass that defines __missing__,
        i.e. sets a default value (GH #15999).
        """
        class DictWithMissing(dict):
            def __missing__(self, key):
                return "missing"

        s = Series([1, 2, 3])
        dictionary = DictWithMissing({3: "three"})
        result = s.map(dictionary)
        expected = Series(["missing", "missing", "three"])
        tm.assert_series_equal(result, expected)

    def test_map_dict_subclass_without_missing(self):
        class DictWithoutMissing(dict):
            pass

        s = Series([1, 2, 3])
        dictionary = DictWithoutMissing({3: "three"})
        result = s.map(dictionary)
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_abc_mapping(self, non_mapping_dict_subclass):
        # https://github.com/pandas-dev/pandas/issues/29733
        # Check collections.abc.Mapping support as mapper for Series.map
        s = Series([1, 2, 3])
        not_a_dictionary = non_mapping_dict_subclass({3: "three"})
        result = s.map(not_a_dictionary)
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_abc_mapping_with_missing(self, non_mapping_dict_subclass):
        # https://github.com/pandas-dev/pandas/issues/29733
        # Check collections.abc.Mapping support as mapper for Series.map
        class NonDictMappingWithMissing(non_mapping_dict_subclass):
            def __missing__(self, key):
                return "missing"

        s = Series([1, 2, 3])
        not_a_dictionary = NonDictMappingWithMissing({3: "three"})
        result = s.map(not_a_dictionary)
        # __missing__ is a dict concept, not a Mapping concept,
        # so it should not change the result!
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_box(self):
        vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
        s = pd.Series(vals)
        assert s.dtype == "datetime64[ns]"
        # boxed value must be Timestamp instance
        res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
        exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"])
        tm.assert_series_equal(res, exp)

        vals = [
            pd.Timestamp("2011-01-01", tz="US/Eastern"),
            pd.Timestamp("2011-01-02", tz="US/Eastern"),
        ]
        s = pd.Series(vals)
        assert s.dtype == "datetime64[ns, US/Eastern]"
        res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
        exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
        tm.assert_series_equal(res, exp)

        # timedelta
        vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
        s = pd.Series(vals)
        assert s.dtype == "timedelta64[ns]"
        res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
        exp = pd.Series(["Timedelta_1", "Timedelta_2"])
        tm.assert_series_equal(res, exp)

        # period
        vals = [
            pd.Period("2011-01-01", freq="M"),
            pd.Period("2011-01-02", freq="M")
        ]
        s = pd.Series(vals)
        assert s.dtype == "Period[M]"
        res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
        exp = pd.Series(["Period_M", "Period_M"])
        tm.assert_series_equal(res, exp)

    def test_map_categorical(self):
        values = pd.Categorical(list("ABBABCD"),
                                categories=list("DCBA"),
                                ordered=True)
        s = pd.Series(values, name="XX", index=list("abcdefg"))

        result = s.map(lambda x: x.lower())
        exp_values = pd.Categorical(list("abbabcd"),
                                    categories=list("dcba"),
                                    ordered=True)
        exp = pd.Series(exp_values, name="XX", index=list("abcdefg"))
        tm.assert_series_equal(result, exp)
        tm.assert_categorical_equal(result.values, exp_values)

        result = s.map(lambda x: "A")
        exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg"))
        tm.assert_series_equal(result, exp)
        assert result.dtype == np.object

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action="ignore")

    def test_map_datetimetz(self):
        values = pd.date_range("2011-01-01", "2011-01-02",
                               freq="H").tz_localize("Asia/Tokyo")
        s = pd.Series(values, name="XX")

        # keep tz
        result = s.map(lambda x: x + pd.offsets.Day())
        exp_values = pd.date_range("2011-01-02", "2011-01-03",
                                   freq="H").tz_localize("Asia/Tokyo")
        exp = pd.Series(exp_values, name="XX")
        tm.assert_series_equal(result, exp)

        # change dtype
        # GH 14506 : Returned dtype changed from int32 to int64
        result = s.map(lambda x: x.hour)
        exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64)
        tm.assert_series_equal(result, exp)

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action="ignore")

        # not vectorized
        def f(x):
            if not isinstance(x, pd.Timestamp):
                raise ValueError
            return str(x.tz)

        result = s.map(f)
        exp = pd.Series(["Asia/Tokyo"] * 25, name="XX")
        tm.assert_series_equal(result, exp)

    @pytest.mark.parametrize(
        "vals,mapping,exp",
        [
            (list("abc"), {
                np.nan: "not NaN"
            }, [np.nan] * 3 + ["not NaN"]),
            (list("abc"), {
                "a": "a letter"
            }, ["a letter"] + [np.nan] * 3),
            (list(range(3)), {
                0: 42
            }, [42] + [np.nan] * 3),
        ],
    )
    def test_map_missing_mixed(self, vals, mapping, exp):
        # GH20495
        s = pd.Series(vals + [np.nan])
        result = s.map(mapping)

        tm.assert_series_equal(result, pd.Series(exp))

    @pytest.mark.parametrize(
        "dti,exp",
        [
            (
                Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
                DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
            ),
            (
                tm.makeTimeSeries(nper=30),
                DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
            ),
        ],
    )
    def test_apply_series_on_date_time_index_aware_series(self, dti, exp):
        # GH 25959
        # Calling apply on a localized time series should not cause an error
        index = dti.tz_localize("UTC").index
        result = pd.Series(index).apply(lambda x: pd.Series([1, 2]))
        tm.assert_frame_equal(result, exp)

    def test_apply_scaler_on_date_time_index_aware_series(self):
        # GH 25959
        # Calling apply on a localized time series should not cause an error
        series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
        result = pd.Series(series.index).apply(lambda x: 1)
        tm.assert_series_equal(result, pd.Series(np.ones(30), dtype="int64"))

    def test_map_float_to_string_precision(self):
        # GH 13228
        ser = pd.Series(1 / 3)
        result = ser.map(lambda val: str(val)).to_dict()
        expected = {0: "0.3333333333333333"}
        assert result == expected