コード例 #1
0
def _cast_sparse_series_op(left, right, opname):
    """
    For SparseSeries operation, coerce to float64 if the result is expected
    to have NaN or inf values

    Parameters
    ----------
    left : SparseArray
    right : SparseArray
    opname : str

    Returns
    -------
    left : SparseArray
    right : SparseArray
    """
    from pandas.core.sparse.api import SparseDtype

    opname = opname.strip("_")

    # TODO: This should be moved to the array?
    if is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if opname in ("floordiv", "mod") and (right.to_dense() == 0).any():
            left = left.astype(SparseDtype(np.float64, left.fill_value))
            right = right.astype(SparseDtype(np.float64, right.fill_value))
        elif opname in ("rfloordiv", "rmod") and (left.to_dense() == 0).any():
            left = left.astype(SparseDtype(np.float64, left.fill_value))
            right = right.astype(SparseDtype(np.float64, right.fill_value))

    return left, right
コード例 #2
0
ファイル: test_arithmetics.py プロジェクト: fudp/pandas-1
    def test_mixed_array_comparison(self, kind):
        rdtype = "int64"
        # int32 NI ATM

        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)

        a = self._klass(values, kind=kind)
        b = self._klass(rvalues, kind=kind)
        assert b.dtype == SparseDtype(rdtype)

        self._check_comparison_ops(a, b, values, rvalues)
        self._check_comparison_ops(a, b * 0, values, rvalues * 0)

        a = self._klass(values, kind=kind, fill_value=0)
        b = self._klass(rvalues, kind=kind)
        assert b.dtype == SparseDtype(rdtype)
        self._check_comparison_ops(a, b, values, rvalues)

        a = self._klass(values, kind=kind, fill_value=0)
        b = self._klass(rvalues, kind=kind, fill_value=0)
        assert b.dtype == SparseDtype(rdtype)
        self._check_comparison_ops(a, b, values, rvalues)

        a = self._klass(values, kind=kind, fill_value=1)
        b = self._klass(rvalues, kind=kind, fill_value=2)
        assert b.dtype == SparseDtype(rdtype, fill_value=2)
        self._check_comparison_ops(a, b, values, rvalues)
コード例 #3
0
    def test_astype(self):
        # float -> float
        arr = SparseArray([None, None, 0, 2])
        result = arr.astype("Sparse[float32]")
        expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("float64", fill_value=0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0., 2.],
                                                    dtype=dtype.subtype),
                                           IntIndex(4, [2, 3]),
                                           dtype)
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("int64", 0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64),
                                           IntIndex(4, [2, 3]),
                                           dtype)
        tm.assert_sp_array_equal(result, expected)

        arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
        with pytest.raises(ValueError, match='NA'):
            arr.astype('Sparse[i8]')
コード例 #4
0
    def test_mixed_array_comparison(self):

        # int32 NI ATM
        for rdtype in ['int64']:
            values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
            rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)

            for kind in ['integer', 'block']:
                a = self._klass(values, kind=kind)
                b = self._klass(rvalues, kind=kind)
                assert b.dtype == SparseDtype(rdtype)

                self._check_comparison_ops(a, b, values, rvalues)
                self._check_comparison_ops(a, b * 0, values, rvalues * 0)

                a = self._klass(values, kind=kind, fill_value=0)
                b = self._klass(rvalues, kind=kind)
                assert b.dtype == SparseDtype(rdtype)
                self._check_comparison_ops(a, b, values, rvalues)

                a = self._klass(values, kind=kind, fill_value=0)
                b = self._klass(rvalues, kind=kind, fill_value=0)
                assert b.dtype == SparseDtype(rdtype)
                self._check_comparison_ops(a, b, values, rvalues)

                a = self._klass(values, kind=kind, fill_value=1)
                b = self._klass(rvalues, kind=kind, fill_value=2)
                assert b.dtype == SparseDtype(rdtype, fill_value=2)
                self._check_comparison_ops(a, b, values, rvalues)
コード例 #5
0
ファイル: test_array.py プロジェクト: zhengpingwan/pandas
    def test_constructor_spindex_dtype(self):
        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
        # XXX: Behavior change: specifying SparseIndex no longer changes the
        # fill_value
        expected = SparseArray([0, 1, 2, 0], kind='integer')
        tm.assert_sp_array_equal(arr, expected)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2, 3],
                          sparse_index=IntIndex(4, [1, 2, 3]),
                          dtype=np.int64,
                          fill_value=0)
        exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=np.int64)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2, 3],
                          sparse_index=IntIndex(4, [1, 2, 3]),
                          dtype=None,
                          fill_value=0)
        exp = SparseArray([0, 1, 2, 3], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0
コード例 #6
0
    def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions):
        op = all_arithmetic_functions

        rdtype = "int64"

        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)

        a = self._klass(values, kind=kind)
        b = self._klass(rvalues, kind=kind)
        assert b.dtype == SparseDtype(rdtype)

        self._check_numeric_ops(a, b, values, rvalues, mix, op)
        self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)

        a = self._klass(values, kind=kind, fill_value=0)
        b = self._klass(rvalues, kind=kind)
        assert b.dtype == SparseDtype(rdtype)
        self._check_numeric_ops(a, b, values, rvalues, mix, op)

        a = self._klass(values, kind=kind, fill_value=0)
        b = self._klass(rvalues, kind=kind, fill_value=0)
        assert b.dtype == SparseDtype(rdtype)
        self._check_numeric_ops(a, b, values, rvalues, mix, op)

        a = self._klass(values, kind=kind, fill_value=1)
        b = self._klass(rvalues, kind=kind, fill_value=2)
        assert b.dtype == SparseDtype(rdtype, fill_value=2)
        self._check_numeric_ops(a, b, values, rvalues, mix, op)
コード例 #7
0
    def test_fillna(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        # float dtype's fill_value is np.nan, replaced by -1
        s = SparseArray([0., 0., 0., 0.])
        res = s.fillna(-1)
        exp = SparseArray([0., 0., 0., 0.], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

        # int dtype shouldn't have missing. No changes.
        s = SparseArray([0, 0, 0, 0])
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        tm.assert_sp_array_equal(res, s)

        s = SparseArray([0, 0, 0, 0], fill_value=0)
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        # fill_value can be nan if there is no missing hole.
        # only fill_value will be changed
        s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
        assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
        assert np.isnan(s.fill_value)
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)
コード例 #8
0
    def test_astype_bool(self):
        a = pd.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
        result = a.astype(bool)
        expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0))
        tm.assert_sp_array_equal(result, expected)

        # update fill value
        result = a.astype(SparseDtype(bool, False))
        expected = SparseArray([True, False, False, True],
                               dtype=SparseDtype(bool, False))
        tm.assert_sp_array_equal(result, expected)
コード例 #9
0
    def test_constructor_bool_fill_value(self):
        arr = SparseArray([True, False, True], dtype=None)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
        assert arr.dtype == SparseDtype(np.bool, True)
        assert arr.fill_value
コード例 #10
0
    def test_constructor_spindex_dtype_scalar(self, sparse_index):
        # scalar input
        arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0
コード例 #11
0
    def test_getitem_int_dtype(self):
        # GH 8292
        s = pd.SparseSeries([0, 1, 2, 3, 4, 5, 6], name='xxx')
        res = s[::2]
        exp = pd.SparseSeries([0, 2, 4, 6], index=[0, 2, 4, 6], name='xxx')
        tm.assert_sp_series_equal(res, exp)
        assert res.dtype == SparseDtype(np.int64)

        s = pd.SparseSeries([0, 1, 2, 3, 4, 5, 6], fill_value=0, name='xxx')
        res = s[::2]
        exp = pd.SparseSeries([0, 2, 4, 6], index=[0, 2, 4, 6],
                              fill_value=0, name='xxx')
        tm.assert_sp_series_equal(res, exp)
        assert res.dtype == SparseDtype(np.int64)
コード例 #12
0
    def test_int_array(self):
        # have to specify dtype explicitly until fixing GH 667
        dtype = np.int64

        values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)

        for kind in ['integer', 'block']:
            a = self._klass(values, dtype=dtype, kind=kind)
            assert a.dtype == SparseDtype(dtype)
            b = self._klass(rvalues, dtype=dtype, kind=kind)
            assert b.dtype == SparseDtype(dtype)

            self._check_numeric_ops(a, b, values, rvalues)
            self._check_numeric_ops(a, b * 0, values, rvalues * 0)

            a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
            assert a.dtype == SparseDtype(dtype)
            b = self._klass(rvalues, dtype=dtype, kind=kind)
            assert b.dtype == SparseDtype(dtype)

            self._check_numeric_ops(a, b, values, rvalues)

            a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
            assert a.dtype == SparseDtype(dtype)
            b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
            assert b.dtype == SparseDtype(dtype)
            self._check_numeric_ops(a, b, values, rvalues)

            a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
            assert a.dtype == SparseDtype(dtype, fill_value=1)
            b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
            assert b.dtype == SparseDtype(dtype, fill_value=2)
            self._check_numeric_ops(a, b, values, rvalues)
コード例 #13
0
    def test_int_array(self, kind, mix, all_arithmetic_functions):
        op = all_arithmetic_functions

        # have to specify dtype explicitly until fixing GH 667
        dtype = np.int64

        values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)

        a = self._klass(values, dtype=dtype, kind=kind)
        assert a.dtype == SparseDtype(dtype)
        b = self._klass(rvalues, dtype=dtype, kind=kind)
        assert b.dtype == SparseDtype(dtype)

        self._check_numeric_ops(a, b, values, rvalues, mix, op)
        self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)

        a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
        assert a.dtype == SparseDtype(dtype)
        b = self._klass(rvalues, dtype=dtype, kind=kind)
        assert b.dtype == SparseDtype(dtype)

        self._check_numeric_ops(a, b, values, rvalues, mix, op)

        a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
        assert a.dtype == SparseDtype(dtype)
        b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
        assert b.dtype == SparseDtype(dtype)
        self._check_numeric_ops(a, b, values, rvalues, mix, op)

        a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
        assert a.dtype == SparseDtype(dtype, fill_value=1)
        b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
        assert b.dtype == SparseDtype(dtype, fill_value=2)
        self._check_numeric_ops(a, b, values, rvalues, mix, op)
コード例 #14
0
    def tests_indexing_with_sparse(self, kind, fill):
        # see gh-13985
        arr = pd.SparseArray([1, 2, 3], kind=kind)
        indexer = pd.SparseArray([True, False, True],
                                 fill_value=fill,
                                 dtype=bool)

        expected = arr[indexer]
        result = pd.SparseArray([1, 3], kind=kind)
        tm.assert_sp_array_equal(result, expected)

        s = pd.SparseSeries(arr, index=["a", "b", "c"], dtype=np.float64)
        expected = pd.SparseSeries([1, 3], index=["a", "c"], kind=kind,
                                   dtype=SparseDtype(np.float64, s.fill_value))

        tm.assert_sp_series_equal(s[indexer], expected)
        tm.assert_sp_series_equal(s.loc[indexer], expected)
        tm.assert_sp_series_equal(s.iloc[indexer], expected)

        indexer = pd.SparseSeries(indexer, index=["a", "b", "c"])
        tm.assert_sp_series_equal(s[indexer], expected)
        tm.assert_sp_series_equal(s.loc[indexer], expected)

        msg = ("iLocation based boolean indexing cannot "
               "use an indexable as a mask")
        with pytest.raises(ValueError, match=msg):
            s.iloc[indexer]
コード例 #15
0
ファイル: test_reshape.py プロジェクト: ziggi0703/pandas
    def test_dataframe_dummies_with_na(self, df, sparse, dtype):
        df.loc[3, :] = [np.nan, np.nan, np.nan]
        result = get_dummies(df, dummy_na=True, sparse=sparse,
                             dtype=dtype).sort_index(axis=1)

        if sparse:
            arr = SparseArray
            typ = SparseDtype(dtype, 0)
        else:
            arr = np.array
            typ = dtype

        expected = DataFrame({
            'C': [1, 2, 3, np.nan],
            'A_a': arr([1, 0, 1, 0], dtype=typ),
            'A_b': arr([0, 1, 0, 0], dtype=typ),
            'A_nan': arr([0, 0, 0, 1], dtype=typ),
            'B_b': arr([1, 1, 0, 0], dtype=typ),
            'B_c': arr([0, 0, 1, 0], dtype=typ),
            'B_nan': arr([0, 0, 0, 1], dtype=typ)
        }).sort_index(axis=1)

        assert_frame_equal(result, expected)

        result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype)
        expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
        assert_frame_equal(result, expected)
コード例 #16
0
def test_inferred_dtype(dtype, fill_value):
    sparse_dtype = SparseDtype(dtype)
    result = sparse_dtype.fill_value
    if pd.isna(fill_value):
        assert pd.isna(result) and type(result) == type(fill_value)
    else:
        assert result == fill_value
コード例 #17
0
 def test_constructor_spindex_dtype_scalar_broadcasts(self):
     arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
                       fill_value=0, dtype=None)
     exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
     tm.assert_sp_array_equal(arr, exp)
     assert arr.dtype == SparseDtype(np.int64)
     assert arr.fill_value == 0
コード例 #18
0
    def test_constructor_dtype(self):
        arr = SparseArray([np.nan, 1, 2, np.nan])
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert arr.dtype.subtype == np.float64
        assert np.isnan(arr.fill_value)

        arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
        assert arr.dtype == SparseDtype(np.float64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert np.isnan(arr.fill_value)

        arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0
コード例 #19
0
 def test_concat_bug(self):
     from pandas.core.sparse.api import SparseDtype
     x = pd.SparseDataFrame({"A": pd.SparseArray([np.nan, np.nan],
                                                 fill_value=0)})
     y = pd.SparseDataFrame({"B": []})
     res = pd.concat([x, y], sort=False)[['A']]
     exp = pd.DataFrame({"A": pd.SparseArray([np.nan, np.nan],
                                             dtype=SparseDtype(float, 0))})
     tm.assert_frame_equal(res, exp)
コード例 #20
0
    def test_astype_all(self, any_real_dtype):
        vals = np.array([1, 2, 3])
        arr = SparseArray(vals, fill_value=1)
        typ = np.dtype(any_real_dtype)
        res = arr.astype(typ)
        assert res.dtype == SparseDtype(typ, 1)
        assert res.sp_values.dtype == typ

        tm.assert_numpy_array_equal(np.asarray(res.values), vals.astype(typ))
コード例 #21
0
ファイル: test_array.py プロジェクト: 09acp/Dash-Examples
    def test_constructor_object_dtype(self):
        # GH 11856
        arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object)
        assert arr.dtype == SparseDtype(np.object)
        assert np.isnan(arr.fill_value)

        arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object, fill_value="A")
        assert arr.dtype == SparseDtype(np.object, "A")
        assert arr.fill_value == "A"

        # GH 17574
        data = [False, 0, 100.0, 0.0]
        arr = SparseArray(data, dtype=np.object, fill_value=False)
        assert arr.dtype == SparseDtype(np.object, False)
        assert arr.fill_value is False
        arr_expected = np.array(data, dtype=np.object)
        it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
        assert np.fromiter(it, dtype=np.bool).all()
コード例 #22
0
ファイル: test_reshape.py プロジェクト: 09acp/Dash-Examples
 def test_get_dummies_all_sparse(self):
     df = pd.DataFrame({"A": [1, 2]})
     result = pd.get_dummies(df, columns=["A"], sparse=True)
     dtype = SparseDtype("uint8", 0)
     expected = pd.DataFrame({
         "A_1": SparseArray([1, 0], dtype=dtype),
         "A_2": SparseArray([0, 1], dtype=dtype),
     })
     tm.assert_frame_equal(result, expected)
コード例 #23
0
def test_setting_fill_value_updates():
    arr = SparseArray([0.0, np.nan], fill_value=0)
    arr.fill_value = np.nan
    # use private constructor to get the index right
    # otherwise both nans would be un-stored.
    expected = SparseArray._simple_new(
        sparse_array=np.array([np.nan]),
        sparse_index=IntIndex(2, [1]),
        dtype=SparseDtype(float, np.nan),
    )
    tm.assert_sp_array_equal(arr, expected)
コード例 #24
0
 def test_frame_basic_dtypes(self):
     for _, row in self.sdf.iterrows():
         assert row.dtype == SparseDtype(object)
     tm.assert_sp_series_equal(self.sdf['string'], self.string_series,
                               check_names=False)
     tm.assert_sp_series_equal(self.sdf['int'], self.int_series,
                               check_names=False)
     tm.assert_sp_series_equal(self.sdf['float'], self.float_series,
                               check_names=False)
     tm.assert_sp_series_equal(self.sdf['object'], self.object_series,
                               check_names=False)
コード例 #25
0
ファイル: test_array.py プロジェクト: 09acp/Dash-Examples
    def test_constructor_bool(self):
        # GH 10648
        data = np.array([False, False, True, True, False, False])
        arr = SparseArray(data, fill_value=False, dtype=bool)

        assert arr.dtype == SparseDtype(bool)
        tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32))

        dense = arr.to_dense()
        assert dense.dtype == bool
        tm.assert_numpy_array_equal(dense, data)
コード例 #26
0
ファイル: test_reshape.py プロジェクト: yx586/pandas
 def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
     result = get_dummies(df, sparse=sparse, dtype=dtype)
     if sparse:
         arr = SparseArray
         typ = SparseDtype(dtype, 0)
     else:
         arr = np.array
         typ = dtype
     expected = DataFrame({'C': [1, 2, 3],
                           'A_a': arr([1, 0, 1], dtype=typ),
                           'A_b': arr([0, 1, 0], dtype=typ),
                           'B_b': arr([1, 1, 0], dtype=typ),
                           'B_c': arr([0, 0, 1], dtype=typ)})
     expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
     assert_frame_equal(result, expected)
コード例 #27
0
def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')

    if (spmatrix is scipy.sparse.dok_matrix
            and LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')):
        pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm,
                          index=index,
                          columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(SparseDtype(object, fill_value))
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype),
                           {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype
コード例 #28
0
    def test_constructor_float32(self):
        # GH 10648
        data = np.array([1., np.nan, 3], dtype=np.float32)
        arr = SparseArray(data, dtype=np.float32)

        assert arr.dtype == SparseDtype(np.float32)
        tm.assert_numpy_array_equal(arr.sp_values,
                                    np.array([1, 3], dtype=np.float32))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([0, 2], dtype=np.int32))

        for dense in [arr.to_dense(), arr.values]:
            assert dense.dtype == np.float32
            tm.assert_numpy_array_equal(dense, data)
コード例 #29
0
ファイル: test_reshape.py プロジェクト: 09acp/Dash-Examples
 def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
     result = get_dummies(df, sparse=sparse, dtype=dtype)
     if sparse:
         arr = SparseArray
         typ = SparseDtype(dtype, 0)
     else:
         arr = np.array
         typ = dtype
     expected = DataFrame({
         "C": [1, 2, 3],
         "A_a": arr([1, 0, 1], dtype=typ),
         "A_b": arr([0, 1, 0], dtype=typ),
         "B_b": arr([1, 1, 0], dtype=typ),
         "B_c": arr([0, 0, 1], dtype=typ),
     })
     expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]]
     assert_frame_equal(result, expected)
コード例 #30
0
ファイル: test_apply.py プロジェクト: zhengpingwan/pandas
def test_apply_nonuq():
    orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
    sparse = orig.to_sparse()
    res = sparse.apply(lambda s: s[0], axis=1)
    exp = orig.apply(lambda s: s[0], axis=1)

    # dtype must be kept
    assert res.dtype == SparseDtype(np.int64)

    # ToDo: apply must return subclassed dtype
    assert isinstance(res, Series)
    tm.assert_series_equal(res.to_dense(), exp)

    # df.T breaks
    sparse = orig.T.to_sparse()
    res = sparse.apply(lambda s: s[0], axis=0)  # noqa
    exp = orig.T.apply(lambda s: s[0], axis=0)