def test_is_extension_type(check_scipy): assert not com.is_extension_type([1, 2, 3]) assert not com.is_extension_type(np.array([1, 2, 3])) assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3])) cat = pd.Categorical([1, 2, 3]) assert com.is_extension_type(cat) assert com.is_extension_type(pd.Series(cat)) assert com.is_extension_type(SparseArray([1, 2, 3])) assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") s = pd.Series([], dtype=dtype) assert com.is_extension_type(s) if check_scipy: import scipy.sparse assert not com.is_extension_type(scipy.sparse.bsr_matrix([1, 2, 3]))
def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): # Test that # * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar)) # * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series) arr, _ = arrays_for_binary_ufunc if sparse: arr = SparseArray(arr) other = 2 series = pd.Series(arr, name="name") series_args = (series, other) array_args = (arr, other) if flip: series_args = tuple(reversed(series_args)) array_args = tuple(reversed(array_args)) expected = pd.Series(ufunc(*array_args), name="name") result = ufunc(*series_args) tm.assert_series_equal(result, expected)
def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) # array result = pd.Series(op(data, other)) # hard to test the fill value, since we don't know what expected # is in general. # Rely on tests in `tests/sparse` to validate that. assert isinstance(result.dtype, SparseDtype) assert result.dtype.subtype == np.dtype("bool") with np.errstate(all="ignore"): expected = pd.Series( SparseArray( op(np.asarray(data), np.asarray(other)), fill_value=result.values.fill_value, )) tm.assert_series_equal(result, expected) # series s = pd.Series(data) result = op(s, other) tm.assert_series_equal(result, expected)
def data_missing(request): """Length 2 array with [NA, Valid]""" return SparseArray([np.nan, 1], fill_value=request.param)
def data_for_twos(request): return SparseArray(np.ones(100) * 2)
def data(request): """Length-100 PeriodArray for semantics test.""" res = SparseArray(make_data(request.param), fill_value=request.param) return res
def data_for_compare(request): return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param)
def data_missing_for_sorting(request): return SparseArray([2, np.nan, 1], fill_value=request.param)
def time_sparse_array(self, dense_proportion, fill_value, dtype): SparseArray(self.array, fill_value=fill_value, dtype=dtype)
def setup(self): N = 1_000_000 arr = make_array(N, 1e-5, np.nan, np.float64) self.sp_arr = SparseArray(arr)
def setup(self, indices, allow_fill): N = 1_000_000 fill_value = 0.0 arr = make_array(N, 1e-5, fill_value, np.float64) self.sp_arr = SparseArray(arr, fill_value=fill_value)
def setup(self, func, fill_value): N = 1_000_000 arr = make_array(N, 1e-5, fill_value, np.float64) self.sp_arr = SparseArray(arr, fill_value=fill_value)
def setup(self, dense_proportion, fill_value): N = 10**6 arr1 = make_array(N, dense_proportion, fill_value, np.int64) self.array1 = SparseArray(arr1, fill_value=fill_value) arr2 = make_array(N, dense_proportion, fill_value, np.int64) self.array2 = SparseArray(arr2, fill_value=fill_value)
def data_zeros(request): return SparseArray(np.zeros(100, dtype=int), fill_value=request.param)
def gen(count): for _ in range(count): yield SparseArray(make_data(request.param), fill_value=request.param)
def data_for_sorting(request): return SparseArray([2, 3, 1], fill_value=request.param)
def test_is_scipy_sparse(): from scipy.sparse import bsr_matrix assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) assert not com.is_scipy_sparse(SparseArray([1, 2, 3]))
def data_for_grouping(request): return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)
), # Category (["a", "b"], "category", pd.Categorical(["a", "b"])), ( ["a", "b"], pd.CategoricalDtype(None, ordered=True), pd.Categorical(["a", "b"], ordered=True), ), # Interval ( [pd.Interval(1, 2), pd.Interval(3, 4)], "interval", IntervalArray.from_tuples([(1, 2), (3, 4)]), ), # Sparse ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", integer_array([1, None], dtype="Int16")), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # String (["a", None], "string", StringArray._from_sequence(["a", None])), (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])), # Boolean ([True, None], "boolean", BooleanArray._from_sequence([True, None])), ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), # Index (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA ( pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), None,