Exemple #1
0
    def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected)
Exemple #2
0
def test_binary_ufunc_with_series(
    flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc
):
    # Test that
    #   * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
    #   with alignment between the indices
    a1, a2 = arrays_for_binary_ufunc
    if sparse:
        a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
        a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))

    name = "name"  # op(pd.Series, array) preserves the name.
    series = pd.Series(a1, name=name)
    other = pd.Series(a2, name=name)

    idx = np.random.permutation(len(a1))

    if shuffle:
        other = other.take(idx)
        if flip:
            index = other.align(series)[0].index
        else:
            index = series.align(other)[0].index
    else:
        index = series.index

    array_args = (a1, a2)
    series_args = (series, other)  # ufunc(series, array)

    if flip:
        array_args = tuple(reversed(array_args))
        series_args = tuple(reversed(series_args))  # ufunc(array, series)

    expected = pd.Series(ufunc(*array_args), index=index, name=name)
    result = ufunc(*series_args)
    tm.assert_series_equal(result, expected)
Exemple #3
0
def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc):
    # Test that the same conditions from unary input apply to multi-output
    # ufuncs
    arr, _ = arrays_for_binary_ufunc

    if sparse:
        arr = SparseArray(arr)

    series = pd.Series(arr, name="name")
    result = np.modf(series)
    expected = np.modf(arr)

    assert isinstance(result, tuple)
    assert isinstance(expected, tuple)

    tm.assert_series_equal(result[0], pd.Series(expected[0], name="name"))
    tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
Exemple #4
0
def test_is_extension_array_dtype(check_scipy):
    assert not com.is_extension_array_dtype([1, 2, 3])
    assert not com.is_extension_array_dtype(np.array([1, 2, 3]))
    assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3]))

    cat = pd.Categorical([1, 2, 3])
    assert com.is_extension_array_dtype(cat)
    assert com.is_extension_array_dtype(pd.Series(cat))
    assert com.is_extension_array_dtype(SparseArray([1, 2, 3]))
    assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern"))

    dtype = DatetimeTZDtype("ns", tz="US/Eastern")
    s = pd.Series([], dtype=dtype)
    assert com.is_extension_array_dtype(s)

    if check_scipy:
        import scipy.sparse

        assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3]))
Exemple #5
0
    def _compare_other(self, data_for_compare: SparseArray, comparison_op,
                       other):
        op = comparison_op

        result = op(data_for_compare, other)
        assert isinstance(result, SparseArray)
        assert result.dtype.subtype == np.bool_

        if isinstance(other, SparseArray):
            fill_value = op(data_for_compare.fill_value, other.fill_value)
        else:
            fill_value = np.all(
                op(np.asarray(data_for_compare.fill_value), np.asarray(other)))

            expected = SparseArray(
                op(data_for_compare.to_dense(), np.asarray(other)),
                fill_value=fill_value,
                dtype=np.bool_,
            )
        tm.assert_sp_array_equal(result, expected)
Exemple #6
0
def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
    # Test that
    #   * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar))
    #   * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series)
    arr, _ = arrays_for_binary_ufunc
    if sparse:
        arr = SparseArray(arr)
    other = 2
    series = pd.Series(arr, name="name")

    series_args = (series, other)
    array_args = (arr, other)

    if flip:
        series_args = tuple(reversed(series_args))
        array_args = tuple(reversed(array_args))

    expected = pd.Series(ufunc(*array_args), name="name")
    result = ufunc(*series_args)

    tm.assert_series_equal(result, expected)
Exemple #7
0
    def _compare_other(self, s, data, op_name, other):
        op = self.get_op_from_name(op_name)

        # array
        result = pd.Series(op(data, other))
        # hard to test the fill value, since we don't know what expected
        # is in general.
        # Rely on tests in `tests/sparse` to validate that.
        assert isinstance(result.dtype, SparseDtype)
        assert result.dtype.subtype == np.dtype("bool")

        with np.errstate(all="ignore"):
            expected = pd.Series(
                SparseArray(
                    op(np.asarray(data), np.asarray(other)),
                    fill_value=result.values.fill_value,
                ))

        tm.assert_series_equal(result, expected)

        # series
        s = pd.Series(data)
        result = op(s, other)
        tm.assert_series_equal(result, expected)
Exemple #8
0
 ),
 # Category
 (["a", "b"], "category", pd.Categorical(["a", "b"])),
 (
     ["a", "b"],
     pd.CategoricalDtype(None, ordered=True),
     pd.Categorical(["a", "b"], ordered=True),
 ),
 # Interval
 (
     [pd.Interval(1, 2), pd.Interval(3, 4)],
     "interval",
     IntervalArray.from_tuples([(1, 2), (3, 4)]),
 ),
 # Sparse
 ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
 # IntegerNA
 ([1, None], "Int16", integer_array([1, None], dtype="Int16")),
 (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # String
 (["a", None], "string", StringArray._from_sequence(["a", None])),
 (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])),
 # Boolean
 ([True, None], "boolean", BooleanArray._from_sequence([True, None])),
 ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])),
 # Index
 (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # Series[EA] returns the EA
 (
     pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
     None,
Exemple #9
0
def data_missing_for_sorting(request):
    return SparseArray([2, np.nan, 1], fill_value=request.param)
Exemple #10
0
 def gen(count):
     for _ in range(count):
         yield SparseArray(make_data(request.param), fill_value=request.param)
Exemple #11
0
def data_for_twos(request):
    return SparseArray(np.ones(100) * 2)
Exemple #12
0
 def setup(self, indices, allow_fill):
     N = 1_000_000
     fill_value = 0.0
     arr = make_array(N, 1e-5, fill_value, np.float64)
     self.sp_arr = SparseArray(arr, fill_value=fill_value)
Exemple #13
0
 def setup(self, func, fill_value):
     N = 1_000_000
     arr = make_array(N, 1e-5, fill_value, np.float64)
     self.sp_arr = SparseArray(arr, fill_value=fill_value)
Exemple #14
0
 def setup(self, dense_proportion, fill_value):
     N = 10**6
     arr1 = make_array(N, dense_proportion, fill_value, np.int64)
     self.array1 = SparseArray(arr1, fill_value=fill_value)
     arr2 = make_array(N, dense_proportion, fill_value, np.int64)
     self.array2 = SparseArray(arr2, fill_value=fill_value)
def data_zeros(request):
    return SparseArray(np.zeros(100, dtype=int), fill_value=request.param)
Exemple #16
0
def data(request):
    """Length-100 PeriodArray for semantics test."""
    res = SparseArray(make_data(request.param), fill_value=request.param)
    return res
Exemple #17
0
 def setup(self):
     N = 1_000_000
     arr = make_array(N, 1e-5, np.nan, np.float64)
     self.sp_arr = SparseArray(arr)
Exemple #18
0
def data_missing(request):
    """Length 2 array with [NA, Valid]"""
    return SparseArray([np.nan, 1], fill_value=request.param)
Exemple #19
0
 def time_sparse_array(self, dense_proportion, fill_value, dtype):
     SparseArray(self.array, fill_value=fill_value, dtype=dtype)
Exemple #20
0
def data_for_sorting(request):
    return SparseArray([2, 3, 1], fill_value=request.param)
Exemple #21
0
def test_is_scipy_sparse():
    from scipy.sparse import bsr_matrix

    assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))

    assert not com.is_scipy_sparse(SparseArray([1, 2, 3]))
Exemple #22
0
def data_for_grouping(request):
    return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)
Exemple #23
0
def data_for_compare(request):
    return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0],
                       fill_value=request.param)