def _cast_sparse_series_op(left, right, opname): """ For SparseSeries operation, coerce to float64 if the result is expected to have NaN or inf values Parameters ---------- left : SparseArray right : SparseArray opname : str Returns ------- left : SparseArray right : SparseArray """ from pandas.core.sparse.api import SparseDtype opname = opname.strip("_") # TODO: This should be moved to the array? if is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf if opname in ("floordiv", "mod") and (right.to_dense() == 0).any(): left = left.astype(SparseDtype(np.float64, left.fill_value)) right = right.astype(SparseDtype(np.float64, right.fill_value)) elif opname in ("rfloordiv", "rmod") and (left.to_dense() == 0).any(): left = left.astype(SparseDtype(np.float64, left.fill_value)) right = right.astype(SparseDtype(np.float64, right.fill_value)) return left, right
def test_mixed_array_comparison(self, kind): rdtype = "int64" # int32 NI ATM values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) a = self._klass(values, kind=kind) b = self._klass(rvalues, kind=kind) assert b.dtype == SparseDtype(rdtype) self._check_comparison_ops(a, b, values, rvalues) self._check_comparison_ops(a, b * 0, values, rvalues * 0) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind) assert b.dtype == SparseDtype(rdtype) self._check_comparison_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind, fill_value=0) assert b.dtype == SparseDtype(rdtype) self._check_comparison_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=1) b = self._klass(rvalues, kind=kind, fill_value=2) assert b.dtype == SparseDtype(rdtype, fill_value=2) self._check_comparison_ops(a, b, values, rvalues)
def test_astype(self): # float -> float arr = SparseArray([None, None, 0, 2]) result = arr.astype("Sparse[float32]") expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32')) tm.assert_sp_array_equal(result, expected) dtype = SparseDtype("float64", fill_value=0) result = arr.astype(dtype) expected = SparseArray._simple_new(np.array([0., 2.], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype) tm.assert_sp_array_equal(result, expected) dtype = SparseDtype("int64", 0) result = arr.astype(dtype) expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype) tm.assert_sp_array_equal(result, expected) arr = SparseArray([0, np.nan, 0, 1], fill_value=0) with pytest.raises(ValueError, match='NA'): arr.astype('Sparse[i8]')
def test_mixed_array_comparison(self): # int32 NI ATM for rdtype in ['int64']: values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) for kind in ['integer', 'block']: a = self._klass(values, kind=kind) b = self._klass(rvalues, kind=kind) assert b.dtype == SparseDtype(rdtype) self._check_comparison_ops(a, b, values, rvalues) self._check_comparison_ops(a, b * 0, values, rvalues * 0) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind) assert b.dtype == SparseDtype(rdtype) self._check_comparison_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind, fill_value=0) assert b.dtype == SparseDtype(rdtype) self._check_comparison_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=1) b = self._klass(rvalues, kind=kind, fill_value=2) assert b.dtype == SparseDtype(rdtype, fill_value=2) self._check_comparison_ops(a, b, values, rvalues)
def test_constructor_spindex_dtype(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) # XXX: Behavior change: specifying SparseIndex no longer changes the # fill_value expected = SparseArray([0, 1, 2, 0], kind='integer') tm.assert_sp_array_equal(arr, expected) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray(data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), dtype=np.int64, fill_value=0) exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray(data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), dtype=None, fill_value=0) exp = SparseArray([0, 1, 2, 3], dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0
def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions): op = all_arithmetic_functions rdtype = "int64" values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) a = self._klass(values, kind=kind) b = self._klass(rvalues, kind=kind) assert b.dtype == SparseDtype(rdtype) self._check_numeric_ops(a, b, values, rvalues, mix, op) self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind) assert b.dtype == SparseDtype(rdtype) self._check_numeric_ops(a, b, values, rvalues, mix, op) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind, fill_value=0) assert b.dtype == SparseDtype(rdtype) self._check_numeric_ops(a, b, values, rvalues, mix, op) a = self._klass(values, kind=kind, fill_value=1) b = self._klass(rvalues, kind=kind, fill_value=2) assert b.dtype == SparseDtype(rdtype, fill_value=2) self._check_numeric_ops(a, b, values, rvalues, mix, op)
def test_fillna(self): s = SparseArray([1, np.nan, np.nan, 3, np.nan]) res = s.fillna(-1) exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) res = s.fillna(-1) exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([1, np.nan, 0, 3, 0]) res = s.fillna(-1) exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) res = s.fillna(-1) exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([np.nan, np.nan, np.nan, np.nan]) res = s.fillna(-1) exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) res = s.fillna(-1) exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) # float dtype's fill_value is np.nan, replaced by -1 s = SparseArray([0., 0., 0., 0.]) res = s.fillna(-1) exp = SparseArray([0., 0., 0., 0.], fill_value=-1) tm.assert_sp_array_equal(res, exp) # int dtype shouldn't have missing. No changes. s = SparseArray([0, 0, 0, 0]) assert s.dtype == SparseDtype(np.int64) assert s.fill_value == 0 res = s.fillna(-1) tm.assert_sp_array_equal(res, s) s = SparseArray([0, 0, 0, 0], fill_value=0) assert s.dtype == SparseDtype(np.int64) assert s.fill_value == 0 res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=0) tm.assert_sp_array_equal(res, exp) # fill_value can be nan if there is no missing hole. # only fill_value will be changed s = SparseArray([0, 0, 0, 0], fill_value=np.nan) assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) assert np.isnan(s.fill_value) res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=-1) tm.assert_sp_array_equal(res, exp)
def test_astype_bool(self): a = pd.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) result = a.astype(bool) expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0)) tm.assert_sp_array_equal(result, expected) # update fill value result = a.astype(SparseDtype(bool, False)) expected = SparseArray([True, False, False, True], dtype=SparseDtype(bool, False)) tm.assert_sp_array_equal(result, expected)
def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=None) assert arr.dtype == SparseDtype(np.bool) assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool) assert arr.dtype == SparseDtype(np.bool) assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) assert arr.dtype == SparseDtype(np.bool, True) assert arr.fill_value
def test_constructor_spindex_dtype_scalar(self, sparse_index): # scalar input arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) exp = SparseArray([1], dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) exp = SparseArray([1], dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0
def test_getitem_int_dtype(self): # GH 8292 s = pd.SparseSeries([0, 1, 2, 3, 4, 5, 6], name='xxx') res = s[::2] exp = pd.SparseSeries([0, 2, 4, 6], index=[0, 2, 4, 6], name='xxx') tm.assert_sp_series_equal(res, exp) assert res.dtype == SparseDtype(np.int64) s = pd.SparseSeries([0, 1, 2, 3, 4, 5, 6], fill_value=0, name='xxx') res = s[::2] exp = pd.SparseSeries([0, 2, 4, 6], index=[0, 2, 4, 6], fill_value=0, name='xxx') tm.assert_sp_series_equal(res, exp) assert res.dtype == SparseDtype(np.int64)
def test_int_array(self): # have to specify dtype explicitly until fixing GH 667 dtype = np.int64 values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) for kind in ['integer', 'block']: a = self._klass(values, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype) b = self._klass(rvalues, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype) self._check_numeric_ops(a, b, values, rvalues) self._check_numeric_ops(a, b * 0, values, rvalues * 0) a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype) b = self._klass(rvalues, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype) self._check_numeric_ops(a, b, values, rvalues) a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype) b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype) self._check_numeric_ops(a, b, values, rvalues) a = self._klass(values, fill_value=1, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype, fill_value=1) b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype, fill_value=2) self._check_numeric_ops(a, b, values, rvalues)
def test_int_array(self, kind, mix, all_arithmetic_functions): op = all_arithmetic_functions # have to specify dtype explicitly until fixing GH 667 dtype = np.int64 values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) a = self._klass(values, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype) b = self._klass(rvalues, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype) self._check_numeric_ops(a, b, values, rvalues, mix, op) self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype) b = self._klass(rvalues, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype) self._check_numeric_ops(a, b, values, rvalues, mix, op) a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype) b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype) self._check_numeric_ops(a, b, values, rvalues, mix, op) a = self._klass(values, fill_value=1, dtype=dtype, kind=kind) assert a.dtype == SparseDtype(dtype, fill_value=1) b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind) assert b.dtype == SparseDtype(dtype, fill_value=2) self._check_numeric_ops(a, b, values, rvalues, mix, op)
def tests_indexing_with_sparse(self, kind, fill): # see gh-13985 arr = pd.SparseArray([1, 2, 3], kind=kind) indexer = pd.SparseArray([True, False, True], fill_value=fill, dtype=bool) expected = arr[indexer] result = pd.SparseArray([1, 3], kind=kind) tm.assert_sp_array_equal(result, expected) s = pd.SparseSeries(arr, index=["a", "b", "c"], dtype=np.float64) expected = pd.SparseSeries([1, 3], index=["a", "c"], kind=kind, dtype=SparseDtype(np.float64, s.fill_value)) tm.assert_sp_series_equal(s[indexer], expected) tm.assert_sp_series_equal(s.loc[indexer], expected) tm.assert_sp_series_equal(s.iloc[indexer], expected) indexer = pd.SparseSeries(indexer, index=["a", "b", "c"]) tm.assert_sp_series_equal(s[indexer], expected) tm.assert_sp_series_equal(s.loc[indexer], expected) msg = ("iLocation based boolean indexing cannot " "use an indexable as a mask") with pytest.raises(ValueError, match=msg): s.iloc[indexer]
def test_dataframe_dummies_with_na(self, df, sparse, dtype): df.loc[3, :] = [np.nan, np.nan, np.nan] result = get_dummies(df, dummy_na=True, sparse=sparse, dtype=dtype).sort_index(axis=1) if sparse: arr = SparseArray typ = SparseDtype(dtype, 0) else: arr = np.array typ = dtype expected = DataFrame({ 'C': [1, 2, 3, np.nan], 'A_a': arr([1, 0, 1, 0], dtype=typ), 'A_b': arr([0, 1, 0, 0], dtype=typ), 'A_nan': arr([0, 0, 0, 1], dtype=typ), 'B_b': arr([1, 1, 0, 0], dtype=typ), 'B_c': arr([0, 0, 1, 0], dtype=typ), 'B_nan': arr([0, 0, 0, 1], dtype=typ) }).sort_index(axis=1) assert_frame_equal(result, expected) result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype) expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] assert_frame_equal(result, expected)
def test_inferred_dtype(dtype, fill_value): sparse_dtype = SparseDtype(dtype) result = sparse_dtype.fill_value if pd.isna(fill_value): assert pd.isna(result) and type(result) == type(fill_value) else: assert result == fill_value
def test_constructor_spindex_dtype_scalar_broadcasts(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0
def test_constructor_dtype(self): arr = SparseArray([np.nan, 1, 2, np.nan]) assert arr.dtype == SparseDtype(np.float64, np.nan) assert arr.dtype.subtype == np.float64 assert np.isnan(arr.fill_value) arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) assert arr.dtype == SparseDtype(np.float64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], dtype=np.float64) assert arr.dtype == SparseDtype(np.float64, np.nan) assert np.isnan(arr.fill_value) arr = SparseArray([0, 1, 2, 4], dtype=np.int64) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], dtype=None) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0
def test_concat_bug(self): from pandas.core.sparse.api import SparseDtype x = pd.SparseDataFrame({"A": pd.SparseArray([np.nan, np.nan], fill_value=0)}) y = pd.SparseDataFrame({"B": []}) res = pd.concat([x, y], sort=False)[['A']] exp = pd.DataFrame({"A": pd.SparseArray([np.nan, np.nan], dtype=SparseDtype(float, 0))}) tm.assert_frame_equal(res, exp)
def test_astype_all(self, any_real_dtype): vals = np.array([1, 2, 3]) arr = SparseArray(vals, fill_value=1) typ = np.dtype(any_real_dtype) res = arr.astype(typ) assert res.dtype == SparseDtype(typ, 1) assert res.sp_values.dtype == typ tm.assert_numpy_array_equal(np.asarray(res.values), vals.astype(typ))
def test_constructor_object_dtype(self): # GH 11856 arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object) assert arr.dtype == SparseDtype(np.object) assert np.isnan(arr.fill_value) arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object, fill_value="A") assert arr.dtype == SparseDtype(np.object, "A") assert arr.fill_value == "A" # GH 17574 data = [False, 0, 100.0, 0.0] arr = SparseArray(data, dtype=np.object, fill_value=False) assert arr.dtype == SparseDtype(np.object, False) assert arr.fill_value is False arr_expected = np.array(data, dtype=np.object) it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) assert np.fromiter(it, dtype=np.bool).all()
def test_get_dummies_all_sparse(self): df = pd.DataFrame({"A": [1, 2]}) result = pd.get_dummies(df, columns=["A"], sparse=True) dtype = SparseDtype("uint8", 0) expected = pd.DataFrame({ "A_1": SparseArray([1, 0], dtype=dtype), "A_2": SparseArray([0, 1], dtype=dtype), }) tm.assert_frame_equal(result, expected)
def test_setting_fill_value_updates(): arr = SparseArray([0.0, np.nan], fill_value=0) arr.fill_value = np.nan # use private constructor to get the index right # otherwise both nans would be un-stored. expected = SparseArray._simple_new( sparse_array=np.array([np.nan]), sparse_index=IntIndex(2, [1]), dtype=SparseDtype(float, np.nan), ) tm.assert_sp_array_equal(arr, expected)
def test_frame_basic_dtypes(self): for _, row in self.sdf.iterrows(): assert row.dtype == SparseDtype(object) tm.assert_sp_series_equal(self.sdf['string'], self.string_series, check_names=False) tm.assert_sp_series_equal(self.sdf['int'], self.int_series, check_names=False) tm.assert_sp_series_equal(self.sdf['float'], self.float_series, check_names=False) tm.assert_sp_series_equal(self.sdf['object'], self.object_series, check_names=False)
def test_constructor_bool(self): # GH 10648 data = np.array([False, False, True, True, False, False]) arr = SparseArray(data, fill_value=False, dtype=bool) assert arr.dtype == SparseDtype(bool) tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) # Behavior change: np.asarray densifies. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) dense = arr.to_dense() assert dense.dtype == bool tm.assert_numpy_array_equal(dense, data)
def test_dataframe_dummies_mix_default(self, df, sparse, dtype): result = get_dummies(df, sparse=sparse, dtype=dtype) if sparse: arr = SparseArray typ = SparseDtype(dtype, 0) else: arr = np.array typ = dtype expected = DataFrame({'C': [1, 2, 3], 'A_a': arr([1, 0, 1], dtype=typ), 'A_b': arr([0, 1, 0], dtype=typ), 'B_b': arr([1, 1, 0], dtype=typ), 'B_c': arr([0, 0, 1], dtype=typ)}) expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] assert_frame_equal(result, expected)
def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object columns = list('cd') index = list('ab') if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')): pytest.skip("dok_matrix from object does not work in SciPy >= 0.19") # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(SparseDtype(object, fill_value)) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype), {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype
def test_constructor_float32(self): # GH 10648 data = np.array([1., np.nan, 3], dtype=np.float32) arr = SparseArray(data, dtype=np.float32) assert arr.dtype == SparseDtype(np.float32) tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) # Behavior change: np.asarray densifies. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2], dtype=np.int32)) for dense in [arr.to_dense(), arr.values]: assert dense.dtype == np.float32 tm.assert_numpy_array_equal(dense, data)
def test_dataframe_dummies_mix_default(self, df, sparse, dtype): result = get_dummies(df, sparse=sparse, dtype=dtype) if sparse: arr = SparseArray typ = SparseDtype(dtype, 0) else: arr = np.array typ = dtype expected = DataFrame({ "C": [1, 2, 3], "A_a": arr([1, 0, 1], dtype=typ), "A_b": arr([0, 1, 0], dtype=typ), "B_b": arr([1, 1, 0], dtype=typ), "B_c": arr([0, 0, 1], dtype=typ), }) expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] assert_frame_equal(result, expected)
def test_apply_nonuq(): orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) sparse = orig.to_sparse() res = sparse.apply(lambda s: s[0], axis=1) exp = orig.apply(lambda s: s[0], axis=1) # dtype must be kept assert res.dtype == SparseDtype(np.int64) # ToDo: apply must return subclassed dtype assert isinstance(res, Series) tm.assert_series_equal(res.to_dense(), exp) # df.T breaks sparse = orig.T.to_sparse() res = sparse.apply(lambda s: s[0], axis=0) # noqa exp = orig.T.apply(lambda s: s[0], axis=0)