Beispiel #1
0
    def _cmp_method(self, other, op):
        from pandas.arrays import BooleanArray

        pc_func = ARROW_CMP_FUNCS[op.__name__]
        if isinstance(other, ArrowStringArray):
            result = pc_func(self._data, other._data)
        elif isinstance(other, (np.ndarray, list)):
            result = pc_func(self._data, other)
        elif is_scalar(other):
            try:
                result = pc_func(self._data, pa.scalar(other))
            except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
                mask = isna(self) | isna(other)
                valid = ~mask
                result = np.zeros(len(self), dtype="bool")
                result[valid] = op(np.array(self)[valid], other)
                return BooleanArray(result, mask)
        else:
            return NotImplemented

        if pa_version_under2p0:
            result = result.to_pandas().values
        else:
            result = result.to_numpy()
        return BooleanArray._from_sequence(result)
Beispiel #2
0
 # Interval
 (
     [pd.Interval(1, 2), pd.Interval(3, 4)],
     "interval",
     IntervalArray.from_tuples([(1, 2), (3, 4)]),
 ),
 # Sparse
 ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
 # IntegerNA
 ([1, None], "Int16", integer_array([1, None], dtype="Int16")),
 (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # String
 (["a", None], "string", StringArray._from_sequence(["a", None])),
 (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])),
 # Boolean
 ([True, None], "boolean", BooleanArray._from_sequence([True, None])),
 ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])),
 # Index
 (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # Series[EA] returns the EA
 (
     pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
     None,
     pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
 ),
 # "3rd party" EAs work
 ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])),
 # pass an ExtensionArray, but a different dtype
 (
     period_array(["2000", "2001"], freq="D"),
     "category",