def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
def test_binary_ufunc_with_series( flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc ): # Test that # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b)) # with alignment between the indices a1, a2 = arrays_for_binary_ufunc if sparse: a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) name = "name" # op(pd.Series, array) preserves the name. series = pd.Series(a1, name=name) other = pd.Series(a2, name=name) idx = np.random.permutation(len(a1)) if shuffle: other = other.take(idx) if flip: index = other.align(series)[0].index else: index = series.align(other)[0].index else: index = series.index array_args = (a1, a2) series_args = (series, other) # ufunc(series, array) if flip: array_args = tuple(reversed(array_args)) series_args = tuple(reversed(series_args)) # ufunc(array, series) expected = pd.Series(ufunc(*array_args), index=index, name=name) result = ufunc(*series_args) tm.assert_series_equal(result, expected)
def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc): # Test that the same conditions from unary input apply to multi-output # ufuncs arr, _ = arrays_for_binary_ufunc if sparse: arr = SparseArray(arr) series = pd.Series(arr, name="name") result = np.modf(series) expected = np.modf(arr) assert isinstance(result, tuple) assert isinstance(expected, tuple) tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
def test_is_extension_array_dtype(check_scipy): assert not com.is_extension_array_dtype([1, 2, 3]) assert not com.is_extension_array_dtype(np.array([1, 2, 3])) assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3])) cat = pd.Categorical([1, 2, 3]) assert com.is_extension_array_dtype(cat) assert com.is_extension_array_dtype(pd.Series(cat)) assert com.is_extension_array_dtype(SparseArray([1, 2, 3])) assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") s = pd.Series([], dtype=dtype) assert com.is_extension_array_dtype(s) if check_scipy: import scipy.sparse assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3]))
def _compare_other(self, data_for_compare: SparseArray, comparison_op, other): op = comparison_op result = op(data_for_compare, other) assert isinstance(result, SparseArray) assert result.dtype.subtype == np.bool_ if isinstance(other, SparseArray): fill_value = op(data_for_compare.fill_value, other.fill_value) else: fill_value = np.all( op(np.asarray(data_for_compare.fill_value), np.asarray(other))) expected = SparseArray( op(data_for_compare.to_dense(), np.asarray(other)), fill_value=fill_value, dtype=np.bool_, ) tm.assert_sp_array_equal(result, expected)
def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): # Test that # * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar)) # * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series) arr, _ = arrays_for_binary_ufunc if sparse: arr = SparseArray(arr) other = 2 series = pd.Series(arr, name="name") series_args = (series, other) array_args = (arr, other) if flip: series_args = tuple(reversed(series_args)) array_args = tuple(reversed(array_args)) expected = pd.Series(ufunc(*array_args), name="name") result = ufunc(*series_args) tm.assert_series_equal(result, expected)
def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) # array result = pd.Series(op(data, other)) # hard to test the fill value, since we don't know what expected # is in general. # Rely on tests in `tests/sparse` to validate that. assert isinstance(result.dtype, SparseDtype) assert result.dtype.subtype == np.dtype("bool") with np.errstate(all="ignore"): expected = pd.Series( SparseArray( op(np.asarray(data), np.asarray(other)), fill_value=result.values.fill_value, )) tm.assert_series_equal(result, expected) # series s = pd.Series(data) result = op(s, other) tm.assert_series_equal(result, expected)
), # Category (["a", "b"], "category", pd.Categorical(["a", "b"])), ( ["a", "b"], pd.CategoricalDtype(None, ordered=True), pd.Categorical(["a", "b"], ordered=True), ), # Interval ( [pd.Interval(1, 2), pd.Interval(3, 4)], "interval", IntervalArray.from_tuples([(1, 2), (3, 4)]), ), # Sparse ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", integer_array([1, None], dtype="Int16")), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # String (["a", None], "string", StringArray._from_sequence(["a", None])), (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])), # Boolean ([True, None], "boolean", BooleanArray._from_sequence([True, None])), ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), # Index (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA ( pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), None,
def data_missing_for_sorting(request): return SparseArray([2, np.nan, 1], fill_value=request.param)
def gen(count): for _ in range(count): yield SparseArray(make_data(request.param), fill_value=request.param)
def data_for_twos(request): return SparseArray(np.ones(100) * 2)
def setup(self, indices, allow_fill): N = 1_000_000 fill_value = 0.0 arr = make_array(N, 1e-5, fill_value, np.float64) self.sp_arr = SparseArray(arr, fill_value=fill_value)
def setup(self, func, fill_value): N = 1_000_000 arr = make_array(N, 1e-5, fill_value, np.float64) self.sp_arr = SparseArray(arr, fill_value=fill_value)
def setup(self, dense_proportion, fill_value): N = 10**6 arr1 = make_array(N, dense_proportion, fill_value, np.int64) self.array1 = SparseArray(arr1, fill_value=fill_value) arr2 = make_array(N, dense_proportion, fill_value, np.int64) self.array2 = SparseArray(arr2, fill_value=fill_value)
def data_zeros(request): return SparseArray(np.zeros(100, dtype=int), fill_value=request.param)
def data(request): """Length-100 PeriodArray for semantics test.""" res = SparseArray(make_data(request.param), fill_value=request.param) return res
def setup(self): N = 1_000_000 arr = make_array(N, 1e-5, np.nan, np.float64) self.sp_arr = SparseArray(arr)
def data_missing(request): """Length 2 array with [NA, Valid]""" return SparseArray([np.nan, 1], fill_value=request.param)
def time_sparse_array(self, dense_proportion, fill_value, dtype): SparseArray(self.array, fill_value=fill_value, dtype=dtype)
def data_for_sorting(request): return SparseArray([2, 3, 1], fill_value=request.param)
def test_is_scipy_sparse(): from scipy.sparse import bsr_matrix assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) assert not com.is_scipy_sparse(SparseArray([1, 2, 3]))
def data_for_grouping(request): return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)
def data_for_compare(request): return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param)