Beispiel #1
0
    def test_constructor_bool(self):
        # GH 10648
        data = np.array([False, False, True, True, False, False])
        arr = SparseArray(data, fill_value=False, dtype=bool)

        assert arr.dtype == SparseDtype(bool)
        tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([2, 3], np.int32))

        dense = arr.to_dense()
        assert dense.dtype == bool
        tm.assert_numpy_array_equal(dense, data)
Beispiel #2
0
 def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
     result = get_dummies(df, sparse=sparse, dtype=dtype)
     if sparse:
         arr = SparseArray
         typ = SparseDtype(dtype, 0)
     else:
         arr = np.array
         typ = dtype
     expected = DataFrame({'C': [1, 2, 3],
                           'A_a': arr([1, 0, 1], dtype=typ),
                           'A_b': arr([0, 1, 0], dtype=typ),
                           'B_b': arr([1, 1, 0], dtype=typ),
                           'B_c': arr([0, 0, 1], dtype=typ)})
     expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
     assert_frame_equal(result, expected)
Beispiel #3
0
 def test_frame_basic_dtypes(self):
     for _, row in self.sdf.iterrows():
         assert row.dtype == SparseDtype(object)
     tm.assert_sp_series_equal(self.sdf['string'],
                               self.string_series,
                               check_names=False)
     tm.assert_sp_series_equal(self.sdf['int'],
                               self.int_series,
                               check_names=False)
     tm.assert_sp_series_equal(self.sdf['float'],
                               self.float_series,
                               check_names=False)
     tm.assert_sp_series_equal(self.sdf['object'],
                               self.object_series,
                               check_names=False)
Beispiel #4
0
    def test_astype(self):
        # float -> float
        arr = SparseArray([None, None, 0, 2])
        result = arr.astype("Sparse[float32]")
        expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32"))
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("float64", fill_value=0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(
            np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype
        )
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("int64", 0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(
            np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype
        )
        tm.assert_sp_array_equal(result, expected)

        arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
        with pytest.raises(ValueError, match="NA"):
            arr.astype("Sparse[i8]")
Beispiel #5
0
def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')

    if (spmatrix is scipy.sparse.dok_matrix
            and LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')):
        pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm,
                          index=index,
                          columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(SparseDtype(object, fill_value))
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype),
                           {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype
Beispiel #6
0
    def test_constructor_float32(self):
        # GH 10648
        data = np.array([1., np.nan, 3], dtype=np.float32)
        arr = SparseArray(data, dtype=np.float32)

        assert arr.dtype == SparseDtype(np.float32)
        tm.assert_numpy_array_equal(arr.sp_values,
                                    np.array([1, 3], dtype=np.float32))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([0, 2], dtype=np.int32))

        for dense in [arr.to_dense(), arr.values]:
            assert dense.dtype == np.float32
            tm.assert_numpy_array_equal(dense, data)
Beispiel #7
0
def test_apply_nonuq():
    orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
    sparse = orig.to_sparse()
    res = sparse.apply(lambda s: s[0], axis=1)
    exp = orig.apply(lambda s: s[0], axis=1)

    # dtype must be kept
    assert res.dtype == SparseDtype(np.int64)

    # ToDo: apply must return subclassed dtype
    assert isinstance(res, Series)
    tm.assert_series_equal(res.to_dense(), exp)

    # df.T breaks
    sparse = orig.T.to_sparse()
    res = sparse.apply(lambda s: s[0], axis=0)  # noqa
    exp = orig.T.apply(lambda s: s[0], axis=0)
Beispiel #8
0
 def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
     result = get_dummies(df, sparse=sparse, dtype=dtype)
     if sparse:
         arr = SparseArray
         typ = SparseDtype(dtype, 0)
     else:
         arr = np.array
         typ = dtype
     expected = DataFrame({
         "C": [1, 2, 3],
         "A_a": arr([1, 0, 1], dtype=typ),
         "A_b": arr([0, 1, 0], dtype=typ),
         "B_b": arr([1, 1, 0], dtype=typ),
         "B_c": arr([0, 0, 1], dtype=typ),
     })
     expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]]
     assert_frame_equal(result, expected)
Beispiel #9
0
    def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
        df['cat'] = pd.Categorical(['x', 'y', 'y'])
        result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
        if sparse:
            arr = SparseArray
            typ = SparseDtype(dtype, 0)
        else:
            arr = np.array
            typ = dtype

        expected = DataFrame({'C': [1, 2, 3],
                              'A_a': arr([1, 0, 1], dtype=typ),
                              'A_b': arr([0, 1, 0], dtype=typ),
                              'B_b': arr([1, 1, 0], dtype=typ),
                              'B_c': arr([0, 0, 1], dtype=typ),
                              'cat_x': arr([1, 0, 0], dtype=typ),
                              'cat_y': arr([0, 1, 1], dtype=typ)
                              }).sort_index(axis=1)

        assert_frame_equal(result, expected)
Beispiel #10
0
    def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
        df["cat"] = pd.Categorical(["x", "y", "y"])
        result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
        if sparse:
            arr = SparseArray
            typ = SparseDtype(dtype, 0)
        else:
            arr = np.array
            typ = dtype

        expected = DataFrame({
            "C": [1, 2, 3],
            "A_a": arr([1, 0, 1], dtype=typ),
            "A_b": arr([0, 1, 0], dtype=typ),
            "B_b": arr([1, 1, 0], dtype=typ),
            "B_c": arr([0, 0, 1], dtype=typ),
            "cat_x": arr([1, 0, 0], dtype=typ),
            "cat_y": arr([0, 1, 1], dtype=typ),
        }).sort_index(axis=1)

        assert_frame_equal(result, expected)
Beispiel #11
0
    def tests_indexing_with_sparse(self):
        # GH 13985

        for kind in ['integer', 'block']:
            for fill in [True, False, np.nan]:
                arr = pd.SparseArray([1, 2, 3], kind=kind)
                indexer = pd.SparseArray([True, False, True],
                                         fill_value=fill,
                                         dtype=bool)

                tm.assert_sp_array_equal(
                    pd.SparseArray([1, 3], kind=kind),
                    arr[indexer],
                )

                s = pd.SparseSeries(arr,
                                    index=['a', 'b', 'c'],
                                    dtype=np.float64)

                exp = pd.SparseSeries([1, 3],
                                      index=['a', 'c'],
                                      dtype=SparseDtype(
                                          np.float64, s.fill_value),
                                      kind=kind)
                tm.assert_sp_series_equal(s[indexer], exp)
                tm.assert_sp_series_equal(s.loc[indexer], exp)
                tm.assert_sp_series_equal(s.iloc[indexer], exp)

                indexer = pd.SparseSeries(indexer, index=['a', 'b', 'c'])
                tm.assert_sp_series_equal(s[indexer], exp)
                tm.assert_sp_series_equal(s.loc[indexer], exp)

                msg = ("iLocation based boolean indexing cannot use an "
                       "indexable as a mask")
                with tm.assert_raises_regex(ValueError, msg):
                    s.iloc[indexer]
Beispiel #12
0
def test_construct_from_string(string, expected):
    result = SparseDtype.construct_from_string(string)
    assert result == expected
Beispiel #13
0
def test__get_dtype_sparse():
    ser = pd.SparseSeries([1, 2], dtype="int32")
    expected = SparseDtype("int32")
    assert com._get_dtype(ser) == expected
    assert com._get_dtype(ser.dtype) == expected
Beispiel #14
0
class TestSparseArray:
    def setup_method(self, method):
        self.arr_data = np.array(
            [np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
        self.arr = SparseArray(self.arr_data)
        self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)

    def test_constructor_dtype(self):
        arr = SparseArray([np.nan, 1, 2, np.nan])
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert arr.dtype.subtype == np.float64
        assert np.isnan(arr.fill_value)

        arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
        assert arr.dtype == SparseDtype(np.float64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert np.isnan(arr.fill_value)

        arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

    def test_constructor_dtype_str(self):
        result = SparseArray([1, 2, 3], dtype="int")
        expected = SparseArray([1, 2, 3], dtype=int)
        tm.assert_sp_array_equal(result, expected)

    def test_constructor_sparse_dtype(self):
        result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
        expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
        tm.assert_sp_array_equal(result, expected)
        assert result.sp_values.dtype == np.dtype("int64")

    def test_constructor_sparse_dtype_str(self):
        result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
        expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
        tm.assert_sp_array_equal(result, expected)
        assert result.sp_values.dtype == np.dtype("int32")

    def test_constructor_object_dtype(self):
        # GH 11856
        arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object)
        assert arr.dtype == SparseDtype(np.object)
        assert np.isnan(arr.fill_value)

        arr = SparseArray(["A", "A", np.nan, "B"],
                          dtype=np.object,
                          fill_value="A")
        assert arr.dtype == SparseDtype(np.object, "A")
        assert arr.fill_value == "A"

        # GH 17574
        data = [False, 0, 100.0, 0.0]
        arr = SparseArray(data, dtype=np.object, fill_value=False)
        assert arr.dtype == SparseDtype(np.object, False)
        assert arr.fill_value is False
        arr_expected = np.array(data, dtype=np.object)
        it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
        assert np.fromiter(it, dtype=np.bool).all()

    @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
    def test_constructor_na_dtype(self, dtype):
        with pytest.raises(ValueError, match="Cannot convert"):
            SparseArray([0, 1, np.nan], dtype=dtype)

    def test_constructor_spindex_dtype(self):
        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
        # XXX: Behavior change: specifying SparseIndex no longer changes the
        # fill_value
        expected = SparseArray([0, 1, 2, 0], kind="integer")
        tm.assert_sp_array_equal(arr, expected)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(
            data=[1, 2, 3],
            sparse_index=IntIndex(4, [1, 2, 3]),
            dtype=np.int64,
            fill_value=0,
        )
        exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=np.int64)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(
            data=[1, 2, 3],
            sparse_index=IntIndex(4, [1, 2, 3]),
            dtype=None,
            fill_value=0,
        )
        exp = SparseArray([0, 1, 2, 3], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
    def test_constructor_spindex_dtype_scalar(self, sparse_index):
        # scalar input
        arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    def test_constructor_spindex_dtype_scalar_broadcasts(self):
        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=None)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    @pytest.mark.parametrize(
        "data, fill_value",
        [
            (np.array([1, 2]), 0),
            (np.array([1.0, 2.0]), np.nan),
            ([True, False], False),
            ([pd.Timestamp("2017-01-01")], pd.NaT),
        ],
    )
    def test_constructor_inferred_fill_value(self, data, fill_value):
        result = SparseArray(data).fill_value

        if pd.isna(fill_value):
            assert pd.isna(result)
        else:
            assert result == fill_value

    @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
    @pytest.mark.parametrize(
        "size",
        [
            pytest.param(
                0, marks=td.skip_if_np_lt("1.16", reason="NumPy-11383")), 10
        ],
    )
    @td.skip_if_no_scipy
    def test_from_spmatrix(self, size, format):
        import scipy.sparse

        mat = scipy.sparse.random(size, 1, density=0.5, format=format)
        result = SparseArray.from_spmatrix(mat)

        result = np.asarray(result)
        expected = mat.toarray().ravel()
        tm.assert_numpy_array_equal(result, expected)

    @td.skip_if_no_scipy
    def test_from_spmatrix_raises(self):
        import scipy.sparse

        mat = scipy.sparse.eye(5, 4, format="csc")

        with pytest.raises(ValueError, match="not '4'"):
            SparseArray.from_spmatrix(mat)

    @pytest.mark.parametrize(
        "scalar,dtype",
        [
            (False, SparseDtype(bool, False)),
            (0.0, SparseDtype("float64", 0)),
            (1, SparseDtype("int64", 1)),
            ("z", SparseDtype("object", "z")),
        ],
    )
    def test_scalar_with_index_infer_dtype(self, scalar, dtype):
        # GH 19163
        arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
        exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)

        tm.assert_sp_array_equal(arr, exp)

        assert arr.dtype == dtype
        assert exp.dtype == dtype

    def test_get_item(self):

        assert np.isnan(self.arr[1])
        assert self.arr[2] == 1
        assert self.arr[7] == 5

        assert self.zarr[0] == 0
        assert self.zarr[2] == 1
        assert self.zarr[7] == 5

        errmsg = re.compile("bounds")

        with pytest.raises(IndexError, match=errmsg):
            self.arr[11]

        with pytest.raises(IndexError, match=errmsg):
            self.arr[-11]

        assert self.arr[-1] == self.arr[len(self.arr) - 1]

    def test_take_scalar_raises(self):
        msg = "'indices' must be an array, not a scalar '2'."
        with pytest.raises(ValueError, match=msg):
            self.arr.take(2)

    def test_take(self):
        exp = SparseArray(np.take(self.arr_data, [2, 3]))
        tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)

        exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
        tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)

    def test_take_fill_value(self):
        data = np.array([1, np.nan, 0, 3, 0])
        sparse = SparseArray(data, fill_value=0)

        exp = SparseArray(np.take(data, [0]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([0]), exp)

        exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)

    def test_take_negative(self):
        exp = SparseArray(np.take(self.arr_data, [-1]))
        tm.assert_sp_array_equal(self.arr.take([-1]), exp)

        exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
        tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)

    @pytest.mark.parametrize("fill_value", [0, None, np.nan])
    def test_shift_fill_value(self, fill_value):
        # GH #24128
        sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0)
        res = sparse.shift(1, fill_value=fill_value)
        if isna(fill_value):
            fill_value = res.dtype.na_value
        exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0)
        tm.assert_sp_array_equal(res, exp)

    def test_bad_take(self):
        with pytest.raises(IndexError, match="bounds"):
            self.arr.take([11])

    def test_take_filling(self):
        # similar tests as GH 12631
        sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, 4])
        tm.assert_sp_array_equal(result, expected)

        # XXX: test change: fill_value=True -> allow_fill=True
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        expected = SparseArray([np.nan, np.nan, np.nan])
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False,
                             fill_value=True)
        expected = SparseArray([np.nan, np.nan, 4])
        tm.assert_sp_array_equal(result, expected)

        msg = "Invalid value in 'indices'"
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)

        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), allow_fill=True)

    def test_take_filling_fill_value(self):
        # same tests as GH 12631
        sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # fill_value
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        # XXX: behavior change.
        # the old way of filling self.fill_value doesn't follow EA rules.
        # It's supposed to be self.dtype.na_value (nan in this case)
        expected = SparseArray([0, np.nan, np.nan], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False,
                             fill_value=True)
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        msg = "Invalid value in 'indices'."
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)

    def test_take_filling_all_nan(self):
        sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
        # XXX: did the default kind from take change?
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, np.nan], kind="block")
        tm.assert_sp_array_equal(result, expected)

        result = sparse.take(np.array([1, 0, -1]), fill_value=True)
        expected = SparseArray([np.nan, np.nan, np.nan], kind="block")
        tm.assert_sp_array_equal(result, expected)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)

    def test_set_item(self):
        def setitem():
            self.arr[5] = 3

        def setslice():
            self.arr[1:5] = 2

        with pytest.raises(TypeError, match="assignment via setitem"):
            setitem()

        with pytest.raises(TypeError, match="assignment via setitem"):
            setslice()

    def test_constructor_from_too_large_array(self):
        with pytest.raises(TypeError, match="expected dimension <= 1 data"):
            SparseArray(np.arange(10).reshape((2, 5)))

    def test_constructor_from_sparse(self):
        res = SparseArray(self.zarr)
        assert res.fill_value == 0
        assert_almost_equal(res.sp_values, self.zarr.sp_values)

    def test_constructor_copy(self):
        cp = SparseArray(self.arr, copy=True)
        cp.sp_values[:3] = 0
        assert not (self.arr.sp_values[:3] == 0).any()

        not_copy = SparseArray(self.arr)
        not_copy.sp_values[:3] = 0
        assert (self.arr.sp_values[:3] == 0).all()

    def test_constructor_bool(self):
        # GH 10648
        data = np.array([False, False, True, True, False, False])
        arr = SparseArray(data, fill_value=False, dtype=bool)

        assert arr.dtype == SparseDtype(bool)
        tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([2, 3], np.int32))

        dense = arr.to_dense()
        assert dense.dtype == bool
        tm.assert_numpy_array_equal(dense, data)

    def test_constructor_bool_fill_value(self):
        arr = SparseArray([True, False, True], dtype=None)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
        assert arr.dtype == SparseDtype(np.bool, True)
        assert arr.fill_value

    def test_constructor_float32(self):
        # GH 10648
        data = np.array([1.0, np.nan, 3], dtype=np.float32)
        arr = SparseArray(data, dtype=np.float32)

        assert arr.dtype == SparseDtype(np.float32)
        tm.assert_numpy_array_equal(arr.sp_values,
                                    np.array([1, 3], dtype=np.float32))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([0, 2], dtype=np.int32))

        dense = arr.to_dense()
        assert dense.dtype == np.float32
        tm.assert_numpy_array_equal(dense, data)

    def test_astype(self):
        # float -> float
        arr = SparseArray([None, None, 0, 2])
        result = arr.astype("Sparse[float32]")
        expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32"))
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("float64", fill_value=0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(
            np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]),
            dtype)
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("int64", 0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64),
                                           IntIndex(4, [2, 3]), dtype)
        tm.assert_sp_array_equal(result, expected)

        arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
        with pytest.raises(ValueError, match="NA"):
            arr.astype("Sparse[i8]")

    def test_astype_bool(self):
        a = pd.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
        result = a.astype(bool)
        expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0))
        tm.assert_sp_array_equal(result, expected)

        # update fill value
        result = a.astype(SparseDtype(bool, False))
        expected = SparseArray([True, False, False, True],
                               dtype=SparseDtype(bool, False))
        tm.assert_sp_array_equal(result, expected)

    def test_astype_all(self, any_real_dtype):
        vals = np.array([1, 2, 3])
        arr = SparseArray(vals, fill_value=1)
        typ = np.dtype(any_real_dtype)
        res = arr.astype(typ)
        assert res.dtype == SparseDtype(typ, 1)
        assert res.sp_values.dtype == typ

        tm.assert_numpy_array_equal(np.asarray(res.to_dense()),
                                    vals.astype(typ))

    @pytest.mark.parametrize(
        "array, dtype, expected",
        [
            (
                SparseArray([0, 1]),
                "float",
                SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)),
            ),
            (SparseArray([0, 1]), bool, SparseArray([False, True])),
            (
                SparseArray([0, 1], fill_value=1),
                bool,
                SparseArray([False, True], dtype=SparseDtype(bool, True)),
            ),
            pytest.param(
                SparseArray([0, 1]),
                "datetime64[ns]",
                SparseArray(
                    np.array([0, 1], dtype="datetime64[ns]"),
                    dtype=SparseDtype("datetime64[ns]", pd.Timestamp("1970")),
                ),
                marks=[pytest.mark.xfail(reason="NumPy-7619")],
            ),
            (
                SparseArray([0, 1, 10]),
                str,
                SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
            ),
            (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
            (
                SparseArray([0, 1, 0]),
                object,
                SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)),
            ),
        ],
    )
    def test_astype_more(self, array, dtype, expected):
        result = array.astype(dtype)
        tm.assert_sp_array_equal(result, expected)

    def test_astype_nan_raises(self):
        arr = SparseArray([1.0, np.nan])
        with pytest.raises(ValueError, match="Cannot convert non-finite"):
            arr.astype(int)

    def test_set_fill_value(self):
        arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan)
        arr.fill_value = 2
        assert arr.fill_value == 2

        arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
        arr.fill_value = 2
        assert arr.fill_value == 2

        # XXX: this seems fine? You can construct an integer
        # sparsearray with NaN fill value, why not update one?
        # coerces to int
        # msg = "unable to set fill_value 3\\.1 to int64 dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = 3.1
        assert arr.fill_value == 3.1

        # msg = "unable to set fill_value nan to int64 dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = np.nan
        assert np.isnan(arr.fill_value)

        arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
        arr.fill_value = True
        assert arr.fill_value

        # coerces to bool
        # msg = "unable to set fill_value 0 to bool dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = 0
        assert arr.fill_value == 0

        # msg = "unable to set fill_value nan to bool dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = np.nan
        assert np.isnan(arr.fill_value)

    @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)])
    def test_set_fill_invalid_non_scalar(self, val):
        arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
        msg = "fill_value must be a scalar"

        with pytest.raises(ValueError, match=msg):
            arr.fill_value = val

    def test_copy(self):
        arr2 = self.arr.copy()
        assert arr2.sp_values is not self.arr.sp_values
        assert arr2.sp_index is self.arr.sp_index

    def test_values_asarray(self):
        assert_almost_equal(self.arr.to_dense(), self.arr_data)

    @pytest.mark.parametrize(
        "data,shape,dtype",
        [
            ([0, 0, 0, 0, 0], (5, ), None),
            ([], (0, ), None),
            ([0], (1, ), None),
            (["A", "A", np.nan, "B"], (4, ), np.object),
        ],
    )
    def test_shape(self, data, shape, dtype):
        # GH 21126
        out = SparseArray(data, dtype=dtype)
        assert out.shape == shape

    @pytest.mark.parametrize(
        "vals",
        [
            [np.nan, np.nan, np.nan, np.nan, np.nan],
            [1, np.nan, np.nan, 3, np.nan],
            [1, np.nan, 0, 3, 0],
        ],
    )
    @pytest.mark.parametrize("fill_value", [None, 0])
    def test_dense_repr(self, vals, fill_value):
        vals = np.array(vals)
        arr = SparseArray(vals, fill_value=fill_value)

        res = arr.to_dense()
        tm.assert_numpy_array_equal(res, vals)

        with tm.assert_produces_warning(FutureWarning):
            res2 = arr.get_values()

        tm.assert_numpy_array_equal(res2, vals)

    def test_getitem(self):
        def _checkit(i):
            assert_almost_equal(self.arr[i], self.arr.to_dense()[i])

        for i in range(len(self.arr)):
            _checkit(i)
            _checkit(-i)

    def test_getitem_arraylike_mask(self):
        arr = SparseArray([0, 1, 2])
        result = arr[[True, False, True]]
        expected = SparseArray([0, 2])
        tm.assert_sp_array_equal(result, expected)

    def test_getslice(self):
        result = self.arr[:-3]
        exp = SparseArray(self.arr.to_dense()[:-3])
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[-4:]
        exp = SparseArray(self.arr.to_dense()[-4:])
        tm.assert_sp_array_equal(result, exp)

        # two corner cases from Series
        result = self.arr[-12:]
        exp = SparseArray(self.arr)
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[:-12]
        exp = SparseArray(self.arr.to_dense()[:0])
        tm.assert_sp_array_equal(result, exp)

    def test_getslice_tuple(self):
        dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])

        sparse = SparseArray(dense)
        res = sparse[4:, ]  # noqa: E231
        exp = SparseArray(dense[4:, ])  # noqa: E231
        tm.assert_sp_array_equal(res, exp)

        sparse = SparseArray(dense, fill_value=0)
        res = sparse[4:, ]  # noqa: E231
        exp = SparseArray(dense[4:, ], fill_value=0)  # noqa: E231
        tm.assert_sp_array_equal(res, exp)

        with pytest.raises(IndexError):
            sparse[4:, :]

        with pytest.raises(IndexError):
            # check numpy compat
            dense[4:, :]

    def test_boolean_slice_empty(self):
        arr = pd.SparseArray([0, 1, 2])
        res = arr[[False, False, False]]
        assert res.dtype == arr.dtype

    @pytest.mark.parametrize(
        "op", ["add", "sub", "mul", "truediv", "floordiv", "pow"])
    def test_binary_operators(self, op):
        op = getattr(operator, op)
        data1 = np.random.randn(20)
        data2 = np.random.randn(20)

        data1[::2] = np.nan
        data2[::3] = np.nan

        arr1 = SparseArray(data1)
        arr2 = SparseArray(data2)

        data1[::2] = 3
        data2[::3] = 3
        farr1 = SparseArray(data1, fill_value=3)
        farr2 = SparseArray(data2, fill_value=3)

        def _check_op(op, first, second):
            res = op(first, second)
            exp = SparseArray(op(first.to_dense(), second.to_dense()),
                              fill_value=first.fill_value)
            assert isinstance(res, SparseArray)
            assert_almost_equal(res.to_dense(), exp.to_dense())

            res2 = op(first, second.to_dense())
            assert isinstance(res2, SparseArray)
            tm.assert_sp_array_equal(res, res2)

            res3 = op(first.to_dense(), second)
            assert isinstance(res3, SparseArray)
            tm.assert_sp_array_equal(res, res3)

            res4 = op(first, 4)
            assert isinstance(res4, SparseArray)

            # Ignore this if the actual op raises (e.g. pow).
            try:
                exp = op(first.to_dense(), 4)
                exp_fv = op(first.fill_value, 4)
            except ValueError:
                pass
            else:
                assert_almost_equal(res4.fill_value, exp_fv)
                assert_almost_equal(res4.to_dense(), exp)

        with np.errstate(all="ignore"):
            for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
                _check_op(op, first_arr, second_arr)

    def test_pickle(self):
        def _check_roundtrip(obj):
            unpickled = tm.round_trip_pickle(obj)
            tm.assert_sp_array_equal(unpickled, obj)

        _check_roundtrip(self.arr)
        _check_roundtrip(self.zarr)

    def test_generator_warnings(self):
        sp_arr = SparseArray([1, 2, 3])
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings(action="always",
                                    category=DeprecationWarning)
            warnings.filterwarnings(action="always",
                                    category=PendingDeprecationWarning)
            for _ in sp_arr:
                pass
            assert len(w) == 0

    def test_fillna(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        # float dtype's fill_value is np.nan, replaced by -1
        s = SparseArray([0.0, 0.0, 0.0, 0.0])
        res = s.fillna(-1)
        exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

        # int dtype shouldn't have missing. No changes.
        s = SparseArray([0, 0, 0, 0])
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        tm.assert_sp_array_equal(res, s)

        s = SparseArray([0, 0, 0, 0], fill_value=0)
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        # fill_value can be nan if there is no missing hole.
        # only fill_value will be changed
        s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
        assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
        assert np.isnan(s.fill_value)
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

    def test_fillna_overlap(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        # filling with existing value doesn't replace existing value with
        # fill_value, i.e. existing 3 remains in sp_values
        res = s.fillna(3)
        exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
        tm.assert_numpy_array_equal(res.to_dense(), exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(3)
        exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

    def test_nonzero(self):
        # Tests regression #21172.
        sa = pd.SparseArray(
            [float("nan"),
             float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
        expected = np.array([2, 5, 9], dtype=np.int32)
        result, = sa.nonzero()
        tm.assert_numpy_array_equal(expected, result)

        sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
        result, = sa.nonzero()
        tm.assert_numpy_array_equal(expected, result)
Beispiel #15
0
def test_from_sparse_dtype():
    dtype = SparseDtype("float", 0)
    result = SparseDtype(dtype)
    assert result.fill_value == 0
Beispiel #16
0
def test_parse_subtype(string, expected):
    subtype, _ = SparseDtype._parse_subtype(string)
    assert subtype == expected
Beispiel #17
0
def test_construct_from_string_raises():
    with pytest.raises(TypeError):
        SparseDtype.construct_from_string('not a dtype')
Beispiel #18
0
 def test_constructor_sparse_dtype(self):
     result = SparseArray([1, 0, 0, 1], dtype=SparseDtype('int64', -1))
     expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
     tm.assert_sp_array_equal(result, expected)
     assert result.sp_values.dtype == np.dtype('int64')
Beispiel #19
0
    b = SparseDtype(dtype, fill_value)
    assert a == b
    assert b == a


def test_nans_equal():
    a = SparseDtype(float, float("nan"))
    b = SparseDtype(float, np.nan)
    assert a == b
    assert b == a


@pytest.mark.parametrize(
    "a, b",
    [
        (SparseDtype("float64"), SparseDtype("float32")),
        (SparseDtype("float64"), SparseDtype("float64", 0)),
        (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
        (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
        (SparseDtype("float64"), np.dtype("float64")),
    ],
)
def test_not_equal(a, b):
    assert a != b


def test_construct_from_string_raises():
    with pytest.raises(TypeError):
        SparseDtype.construct_from_string("not a dtype")

Beispiel #20
0
def test_construct_from_string_raises():
    with pytest.raises(TypeError):
        SparseDtype.construct_from_string("not a dtype")
Beispiel #21
0
def test_nans_equal():
    a = SparseDtype(float, float("nan"))
    b = SparseDtype(float, np.nan)
    assert a == b
    assert b == a
Beispiel #22
0
def test_equal(dtype, fill_value):
    a = SparseDtype(dtype, fill_value)
    b = SparseDtype(dtype, fill_value)
    assert a == b
    assert b == a
Beispiel #23
0
def test_from_sparse_dtype_fill_value():
    dtype = SparseDtype("int", 1)
    result = SparseDtype(dtype, fill_value=2)
    expected = SparseDtype("int", 2)
    assert result == expected
Beispiel #24
0
def test_parse_subtype(string, expected):
    subtype, _ = SparseDtype._parse_subtype(string)
    assert subtype == expected
Beispiel #25
0
def test_construct_from_string_fill_value_raises(string):
    with tm.assert_raises_regex(TypeError, 'fill_value in the string is not'):
        SparseDtype.construct_from_string(string)
Beispiel #26
0
def test_equal(dtype, fill_value):
    a = SparseDtype(dtype, fill_value)
    b = SparseDtype(dtype, fill_value)
    assert a == b
    assert b == a


def test_nans_equal():
    a = SparseDtype(float, float('nan'))
    b = SparseDtype(float, np.nan)
    assert a == b
    assert b == a


@pytest.mark.parametrize('a, b', [
    (SparseDtype('float64'), SparseDtype('float32')),
    (SparseDtype('float64'), SparseDtype('float64', 0)),
    (SparseDtype('float64'), SparseDtype('datetime64[ns]', np.nan)),
    (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
    (SparseDtype('float64'), np.dtype('float64')),
])
def test_not_equal(a, b):
    assert a != b


def test_construct_from_string_raises():
    with pytest.raises(TypeError):
        SparseDtype.construct_from_string('not a dtype')


@pytest.mark.parametrize("dtype, expected", [
Beispiel #27
0
class TestSparseArray(object):
    def setup_method(self, method):
        self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6])
        self.arr = SparseArray(self.arr_data)
        self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)

    def test_constructor_dtype(self):
        arr = SparseArray([np.nan, 1, 2, np.nan])
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert arr.dtype.subtype == np.float64
        assert np.isnan(arr.fill_value)

        arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
        assert arr.dtype == SparseDtype(np.float64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert np.isnan(arr.fill_value)

        arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

    def test_constructor_dtype_str(self):
        result = SparseArray([1, 2, 3], dtype='int')
        expected = SparseArray([1, 2, 3], dtype=int)
        tm.assert_sp_array_equal(result, expected)

    def test_constructor_sparse_dtype(self):
        result = SparseArray([1, 0, 0, 1], dtype=SparseDtype('int64', -1))
        expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
        tm.assert_sp_array_equal(result, expected)
        assert result.sp_values.dtype == np.dtype('int64')

    def test_constructor_sparse_dtype_str(self):
        result = SparseArray([1, 0, 0, 1], dtype='Sparse[int32]')
        expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
        tm.assert_sp_array_equal(result, expected)
        assert result.sp_values.dtype == np.dtype('int32')

    def test_constructor_object_dtype(self):
        # GH 11856
        arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object)
        assert arr.dtype == SparseDtype(np.object)
        assert np.isnan(arr.fill_value)

        arr = SparseArray(['A', 'A', np.nan, 'B'],
                          dtype=np.object,
                          fill_value='A')
        assert arr.dtype == SparseDtype(np.object, 'A')
        assert arr.fill_value == 'A'

        # GH 17574
        data = [False, 0, 100.0, 0.0]
        arr = SparseArray(data, dtype=np.object, fill_value=False)
        assert arr.dtype == SparseDtype(np.object, False)
        assert arr.fill_value is False
        arr_expected = np.array(data, dtype=np.object)
        it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
        assert np.fromiter(it, dtype=np.bool).all()

    @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
    def test_constructor_na_dtype(self, dtype):
        with pytest.raises(ValueError, match="Cannot convert"):
            SparseArray([0, 1, np.nan], dtype=dtype)

    def test_constructor_spindex_dtype(self):
        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
        # XXX: Behavior change: specifying SparseIndex no longer changes the
        # fill_value
        expected = SparseArray([0, 1, 2, 0], kind='integer')
        tm.assert_sp_array_equal(arr, expected)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2, 3],
                          sparse_index=IntIndex(4, [1, 2, 3]),
                          dtype=np.int64,
                          fill_value=0)
        exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=np.int64)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2, 3],
                          sparse_index=IntIndex(4, [1, 2, 3]),
                          dtype=None,
                          fill_value=0)
        exp = SparseArray([0, 1, 2, 3], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    @pytest.mark.parametrize("sparse_index", [
        None,
        IntIndex(1, [0]),
    ])
    def test_constructor_spindex_dtype_scalar(self, sparse_index):
        # scalar input
        arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    def test_constructor_spindex_dtype_scalar_broadcasts(self):
        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=None)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    @pytest.mark.parametrize('data, fill_value', [
        (np.array([1, 2]), 0),
        (np.array([1.0, 2.0]), np.nan),
        ([True, False], False),
        ([pd.Timestamp('2017-01-01')], pd.NaT),
    ])
    def test_constructor_inferred_fill_value(self, data, fill_value):
        result = SparseArray(data).fill_value

        if pd.isna(fill_value):
            assert pd.isna(result)
        else:
            assert result == fill_value

    @pytest.mark.parametrize('scalar,dtype',
                             [(False, SparseDtype(bool, False)),
                              (0.0, SparseDtype('float64', 0)),
                              (1, SparseDtype('int64', 1)),
                              ('z', SparseDtype('object', 'z'))])
    def test_scalar_with_index_infer_dtype(self, scalar, dtype):
        # GH 19163
        arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
        exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)

        tm.assert_sp_array_equal(arr, exp)

        assert arr.dtype == dtype
        assert exp.dtype == dtype

    @pytest.mark.parametrize("fill", [1, np.nan, 0])
    def test_sparse_series_round_trip(self, kind, fill):
        # see gh-13999
        arr = SparseArray([np.nan, 1, np.nan, 2, 3],
                          kind=kind,
                          fill_value=fill)
        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

        arr = SparseArray([0, 0, 0, 1, 1, 2],
                          dtype=np.int64,
                          kind=kind,
                          fill_value=fill)
        res = SparseArray(SparseSeries(arr), dtype=np.int64)
        tm.assert_sp_array_equal(arr, res)

        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

    @pytest.mark.parametrize("fill", [True, False, np.nan])
    def test_sparse_series_round_trip2(self, kind, fill):
        # see gh-13999
        arr = SparseArray([True, False, True, True],
                          dtype=np.bool,
                          kind=kind,
                          fill_value=fill)
        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

    def test_get_item(self):

        assert np.isnan(self.arr[1])
        assert self.arr[2] == 1
        assert self.arr[7] == 5

        assert self.zarr[0] == 0
        assert self.zarr[2] == 1
        assert self.zarr[7] == 5

        errmsg = re.compile("bounds")

        with pytest.raises(IndexError, match=errmsg):
            self.arr[11]

        with pytest.raises(IndexError, match=errmsg):
            self.arr[-11]

        assert self.arr[-1] == self.arr[len(self.arr) - 1]

    def test_take_scalar_raises(self):
        msg = "'indices' must be an array, not a scalar '2'."
        with pytest.raises(ValueError, match=msg):
            self.arr.take(2)

    def test_take(self):
        exp = SparseArray(np.take(self.arr_data, [2, 3]))
        tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)

        exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
        tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)

    def test_take_fill_value(self):
        data = np.array([1, np.nan, 0, 3, 0])
        sparse = SparseArray(data, fill_value=0)

        exp = SparseArray(np.take(data, [0]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([0]), exp)

        exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)

    def test_take_negative(self):
        exp = SparseArray(np.take(self.arr_data, [-1]))
        tm.assert_sp_array_equal(self.arr.take([-1]), exp)

        exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
        tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)

    def test_bad_take(self):
        with pytest.raises(IndexError, match="bounds"):
            self.arr.take([11])

    def test_take_filling(self):
        # similar tests as GH 12631
        sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, 4])
        tm.assert_sp_array_equal(result, expected)

        # XXX: test change: fill_value=True -> allow_fill=True
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        expected = SparseArray([np.nan, np.nan, np.nan])
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False,
                             fill_value=True)
        expected = SparseArray([np.nan, np.nan, 4])
        tm.assert_sp_array_equal(result, expected)

        msg = "Invalid value in 'indices'"
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)

        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), allow_fill=True)

    def test_take_filling_fill_value(self):
        # same tests as GH 12631
        sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # fill_value
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        # XXX: behavior change.
        # the old way of filling self.fill_value doesn't follow EA rules.
        # It's supposed to be self.dtype.na_value (nan in this case)
        expected = SparseArray([0, np.nan, np.nan], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False,
                             fill_value=True)
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        msg = ("Invalid value in 'indices'.")
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)

    def test_take_filling_all_nan(self):
        sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
        # XXX: did the default kind from take change?
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, np.nan], kind='block')
        tm.assert_sp_array_equal(result, expected)

        result = sparse.take(np.array([1, 0, -1]), fill_value=True)
        expected = SparseArray([np.nan, np.nan, np.nan], kind='block')
        tm.assert_sp_array_equal(result, expected)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)

    def test_set_item(self):
        def setitem():
            self.arr[5] = 3

        def setslice():
            self.arr[1:5] = 2

        with pytest.raises(TypeError, match="item assignment"):
            setitem()

        with pytest.raises(TypeError, match="item assignment"):
            setslice()

    def test_constructor_from_too_large_array(self):
        with pytest.raises(TypeError, match="expected dimension <= 1 data"):
            SparseArray(np.arange(10).reshape((2, 5)))

    def test_constructor_from_sparse(self):
        res = SparseArray(self.zarr)
        assert res.fill_value == 0
        assert_almost_equal(res.sp_values, self.zarr.sp_values)

    def test_constructor_copy(self):
        cp = SparseArray(self.arr, copy=True)
        cp.sp_values[:3] = 0
        assert not (self.arr.sp_values[:3] == 0).any()

        not_copy = SparseArray(self.arr)
        not_copy.sp_values[:3] = 0
        assert (self.arr.sp_values[:3] == 0).all()

    def test_constructor_bool(self):
        # GH 10648
        data = np.array([False, False, True, True, False, False])
        arr = SparseArray(data, fill_value=False, dtype=bool)

        assert arr.dtype == SparseDtype(bool)
        tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([2, 3], np.int32))

        for dense in [arr.to_dense(), arr.values]:
            assert dense.dtype == bool
            tm.assert_numpy_array_equal(dense, data)

    def test_constructor_bool_fill_value(self):
        arr = SparseArray([True, False, True], dtype=None)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
        assert arr.dtype == SparseDtype(np.bool, True)
        assert arr.fill_value

    def test_constructor_float32(self):
        # GH 10648
        data = np.array([1., np.nan, 3], dtype=np.float32)
        arr = SparseArray(data, dtype=np.float32)

        assert arr.dtype == SparseDtype(np.float32)
        tm.assert_numpy_array_equal(arr.sp_values,
                                    np.array([1, 3], dtype=np.float32))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([0, 2], dtype=np.int32))

        for dense in [arr.to_dense(), arr.values]:
            assert dense.dtype == np.float32
            tm.assert_numpy_array_equal(dense, data)

    def test_astype(self):
        # float -> float
        arr = SparseArray([None, None, 0, 2])
        result = arr.astype("Sparse[float32]")
        expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("float64", fill_value=0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(
            np.array([0., 2.], dtype=dtype.subtype), IntIndex(4, [2, 3]),
            dtype)
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("int64", 0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64),
                                           IntIndex(4, [2, 3]), dtype)
        tm.assert_sp_array_equal(result, expected)

        arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
        with pytest.raises(ValueError, match='NA'):
            arr.astype('Sparse[i8]')

    def test_astype_bool(self):
        a = pd.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
        result = a.astype(bool)
        expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0))
        tm.assert_sp_array_equal(result, expected)

        # update fill value
        result = a.astype(SparseDtype(bool, False))
        expected = SparseArray([True, False, False, True],
                               dtype=SparseDtype(bool, False))
        tm.assert_sp_array_equal(result, expected)

    def test_astype_all(self, any_real_dtype):
        vals = np.array([1, 2, 3])
        arr = SparseArray(vals, fill_value=1)
        typ = np.dtype(any_real_dtype)
        res = arr.astype(typ)
        assert res.dtype == SparseDtype(typ, 1)
        assert res.sp_values.dtype == typ

        tm.assert_numpy_array_equal(np.asarray(res.values), vals.astype(typ))

    def test_set_fill_value(self):
        arr = SparseArray([1., np.nan, 2.], fill_value=np.nan)
        arr.fill_value = 2
        assert arr.fill_value == 2

        arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
        arr.fill_value = 2
        assert arr.fill_value == 2

        # XXX: this seems fine? You can construct an integer
        # sparsearray with NaN fill value, why not update one?
        # coerces to int
        # msg = "unable to set fill_value 3\\.1 to int64 dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = 3.1
        assert arr.fill_value == 3.1

        # msg = "unable to set fill_value nan to int64 dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = np.nan
        assert np.isnan(arr.fill_value)

        arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
        arr.fill_value = True
        assert arr.fill_value

        # coerces to bool
        # msg = "unable to set fill_value 0 to bool dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = 0
        assert arr.fill_value == 0

        # msg = "unable to set fill_value nan to bool dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = np.nan
        assert np.isnan(arr.fill_value)

    @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)])
    def test_set_fill_invalid_non_scalar(self, val):
        arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
        msg = "fill_value must be a scalar"

        with pytest.raises(ValueError, match=msg):
            arr.fill_value = val

    def test_copy_shallow(self):
        arr2 = self.arr.copy(deep=False)
        assert arr2.sp_values is self.arr.sp_values
        assert arr2.sp_index is self.arr.sp_index

    def test_values_asarray(self):
        assert_almost_equal(self.arr.values, self.arr_data)
        assert_almost_equal(self.arr.to_dense(), self.arr_data)

    @pytest.mark.parametrize('data,shape,dtype',
                             [([0, 0, 0, 0, 0], (5, ), None),
                              ([], (0, ), None), ([0], (1, ), None),
                              (['A', 'A', np.nan, 'B'], (4, ), np.object)])
    def test_shape(self, data, shape, dtype):
        # GH 21126
        out = SparseArray(data, dtype=dtype)
        assert out.shape == shape

    @pytest.mark.parametrize("vals", [
        [np.nan, np.nan, np.nan, np.nan, np.nan],
        [1, np.nan, np.nan, 3, np.nan],
        [1, np.nan, 0, 3, 0],
    ])
    @pytest.mark.parametrize("method", ["to_dense", "get_values"])
    @pytest.mark.parametrize("fill_value", [None, 0])
    def test_dense_repr(self, vals, fill_value, method):
        vals = np.array(vals)
        arr = SparseArray(vals, fill_value=fill_value)
        dense_func = getattr(arr, method)

        res = dense_func()
        tm.assert_numpy_array_equal(res, vals)

    def test_getitem(self):
        def _checkit(i):
            assert_almost_equal(self.arr[i], self.arr.values[i])

        for i in range(len(self.arr)):
            _checkit(i)
            _checkit(-i)

    def test_getitem_arraylike_mask(self):
        arr = SparseArray([0, 1, 2])
        result = arr[[True, False, True]]
        expected = SparseArray([0, 2])
        tm.assert_sp_array_equal(result, expected)

    def test_getslice(self):
        result = self.arr[:-3]
        exp = SparseArray(self.arr.values[:-3])
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[-4:]
        exp = SparseArray(self.arr.values[-4:])
        tm.assert_sp_array_equal(result, exp)

        # two corner cases from Series
        result = self.arr[-12:]
        exp = SparseArray(self.arr)
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[:-12]
        exp = SparseArray(self.arr.values[:0])
        tm.assert_sp_array_equal(result, exp)

    def test_getslice_tuple(self):
        dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])

        sparse = SparseArray(dense)
        res = sparse[4:, ]
        exp = SparseArray(dense[4:, ])
        tm.assert_sp_array_equal(res, exp)

        sparse = SparseArray(dense, fill_value=0)
        res = sparse[4:, ]
        exp = SparseArray(dense[4:, ], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        with pytest.raises(IndexError):
            sparse[4:, :]

        with pytest.raises(IndexError):
            # check numpy compat
            dense[4:, :]

    def test_boolean_slice_empty(self):
        arr = pd.SparseArray([0, 1, 2])
        res = arr[[False, False, False]]
        assert res.dtype == arr.dtype

    @pytest.mark.parametrize(
        "op", ["add", "sub", "mul", "truediv", "floordiv", "pow"])
    def test_binary_operators(self, op):
        op = getattr(operator, op)
        data1 = np.random.randn(20)
        data2 = np.random.randn(20)

        data1[::2] = np.nan
        data2[::3] = np.nan

        arr1 = SparseArray(data1)
        arr2 = SparseArray(data2)

        data1[::2] = 3
        data2[::3] = 3
        farr1 = SparseArray(data1, fill_value=3)
        farr2 = SparseArray(data2, fill_value=3)

        def _check_op(op, first, second):
            res = op(first, second)
            exp = SparseArray(op(first.values, second.values),
                              fill_value=first.fill_value)
            assert isinstance(res, SparseArray)
            assert_almost_equal(res.values, exp.values)

            res2 = op(first, second.values)
            assert isinstance(res2, SparseArray)
            tm.assert_sp_array_equal(res, res2)

            res3 = op(first.values, second)
            assert isinstance(res3, SparseArray)
            tm.assert_sp_array_equal(res, res3)

            res4 = op(first, 4)
            assert isinstance(res4, SparseArray)

            # Ignore this if the actual op raises (e.g. pow).
            try:
                exp = op(first.values, 4)
                exp_fv = op(first.fill_value, 4)
            except ValueError:
                pass
            else:
                assert_almost_equal(res4.fill_value, exp_fv)
                assert_almost_equal(res4.values, exp)

        with np.errstate(all="ignore"):
            for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
                _check_op(op, first_arr, second_arr)

    def test_pickle(self):
        def _check_roundtrip(obj):
            unpickled = tm.round_trip_pickle(obj)
            tm.assert_sp_array_equal(unpickled, obj)

        _check_roundtrip(self.arr)
        _check_roundtrip(self.zarr)

    def test_generator_warnings(self):
        sp_arr = SparseArray([1, 2, 3])
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings(action='always',
                                    category=DeprecationWarning)
            warnings.filterwarnings(action='always',
                                    category=PendingDeprecationWarning)
            for _ in sp_arr:
                pass
            assert len(w) == 0

    def test_fillna(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        # float dtype's fill_value is np.nan, replaced by -1
        s = SparseArray([0., 0., 0., 0.])
        res = s.fillna(-1)
        exp = SparseArray([0., 0., 0., 0.], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

        # int dtype shouldn't have missing. No changes.
        s = SparseArray([0, 0, 0, 0])
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        tm.assert_sp_array_equal(res, s)

        s = SparseArray([0, 0, 0, 0], fill_value=0)
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        # fill_value can be nan if there is no missing hole.
        # only fill_value will be changed
        s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
        assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
        assert np.isnan(s.fill_value)
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

    def test_fillna_overlap(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        # filling with existing value doesn't replace existing value with
        # fill_value, i.e. existing 3 remains in sp_values
        res = s.fillna(3)
        exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
        tm.assert_numpy_array_equal(res.to_dense(), exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(3)
        exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)
Beispiel #28
0
def test_construct_from_string(string, expected):
    result = SparseDtype.construct_from_string(string)
    assert result == expected
Beispiel #29
0
    ('category', CategoricalDtype()),
    ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')),
])
def test_registry_find(dtype, expected):
    assert registry.find(dtype) == expected


@pytest.mark.parametrize('dtype, expected',
                         [(str, False), (int, False), (bool, True),
                          (np.bool, True), (np.array(['a', 'b']), False),
                          (pd.Series([1, 2]), False),
                          (np.array([True, False]), True),
                          (pd.Series([True, False]), True),
                          (pd.SparseSeries([True, False]), True),
                          (pd.SparseArray([True, False]), True),
                          (SparseDtype(bool), True)])
def test_is_bool_dtype(dtype, expected):
    result = is_bool_dtype(dtype)
    assert result is expected


@pytest.mark.parametrize("check", [
    is_categorical_dtype,
    is_datetime64tz_dtype,
    is_period_dtype,
    is_datetime64_ns_dtype,
    is_datetime64_dtype,
    is_interval_dtype,
    is_datetime64_any_dtype,
    is_string_dtype,
    is_bool_dtype,
Beispiel #30
0
def test_construct_from_string_fill_value_raises(string):
    with pytest.raises(TypeError, match="fill_value in the string is not"):
        SparseDtype.construct_from_string(string)
Beispiel #31
0
def test_construct_from_string_fill_value_raises(string):
    with tm.assert_raises_regex(TypeError, 'fill_value in the string is not'):
        SparseDtype.construct_from_string(string)
Beispiel #32
0
def test_str_uses_object():
    result = SparseDtype(str).subtype
    assert result == np.dtype("object")
Beispiel #33
0
    (pd.Series([1, 2], dtype=np.dtype('int16')), np.dtype('int16')),
    (pd.Series(['a', 'b']), np.dtype(object)),
    (pd.Index([1, 2]), np.dtype('int64')),
    (pd.Index(['a', 'b']), np.dtype(object)),
    ('category', 'category'),
    (pd.Categorical(['a', 'b']).dtype, CategoricalDtype(['a', 'b'])),
    (pd.Categorical(['a', 'b']), CategoricalDtype(['a', 'b'])),
    (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtype(['a', 'b'])),
    (pd.CategoricalIndex(['a', 'b']), CategoricalDtype(['a', 'b'])),
    (CategoricalDtype(), CategoricalDtype()),
    (CategoricalDtype(['a', 'b']), CategoricalDtype()),
    (pd.DatetimeIndex([1, 2]), np.dtype('=M8[ns]')),
    (pd.DatetimeIndex([1, 2]).dtype, np.dtype('=M8[ns]')),
    ('<M8[ns]', np.dtype('<M8[ns]')),
    ('datetime64[ns, Europe/London]', DatetimeTZDtype('ns', 'Europe/London')),
    (pd.SparseSeries([1, 2], dtype='int32'), SparseDtype('int32')),
    (pd.SparseSeries([1, 2], dtype='int32').dtype, SparseDtype('int32')),
    (PeriodDtype(freq='D'), PeriodDtype(freq='D')),
    ('period[D]', PeriodDtype(freq='D')),
    (IntervalDtype(), IntervalDtype()),
])
def test__get_dtype(input_param, result):
    assert com._get_dtype(input_param) == result


@pytest.mark.parametrize('input_param',
                         [None, 1, 1.2, 'random string',
                          pd.DataFrame([1, 2])])
def test__get_dtype_fails(input_param):
    # python objects
    pytest.raises(TypeError, com._get_dtype, input_param)
Beispiel #34
0
def test_construct_from_string_fill_value_raises(string):
    with pytest.raises(TypeError, match='fill_value in the string is not'):
        SparseDtype.construct_from_string(string)