Example #1
0
    def __sub__(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCSeries, ABCDataFrame)):
            return NotImplemented

        # scalar others
        elif other is NaT:
            result = self._sub_nat()
        elif isinstance(other, (Tick, timedelta, np.timedelta64)):
            result = self._add_delta(-other)
        elif isinstance(other, DateOffset):
            # specifically _not_ a Tick
            result = self._add_offset(-other)
        elif isinstance(other, (datetime, np.datetime64)):
            result = self._sub_datetimelike_scalar(other)
        elif lib.is_integer(other):
            # This check must come after the check for np.timedelta64
            # as is_integer returns True for these
            maybe_integer_op_deprecated(self)
            result = self._time_shift(-other)

        elif isinstance(other, Period):
            result = self._sub_period(other)

        # array-like others
        elif is_timedelta64_dtype(other):
            # TimedeltaIndex, ndarray[timedelta64]
            result = self._add_delta(-other)
        elif is_offsetlike(other):
            # Array/Index of DateOffset objects
            result = self._addsub_offset_array(other, operator.sub)
        elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
            # DatetimeIndex, ndarray[datetime64]
            result = self._sub_datetime_arraylike(other)
        elif is_period_dtype(other):
            # PeriodIndex
            result = self._sub_period_array(other)
        elif is_integer_dtype(other):
            maybe_integer_op_deprecated(self)
            result = self._addsub_int_array(other, operator.sub)
        elif isinstance(other, ABCIndexClass):
            raise TypeError("cannot subtract {cls} and {typ}"
                            .format(cls=type(self).__name__,
                                    typ=type(other).__name__))
        elif is_float_dtype(other):
            # Explicitly catch invalid dtypes
            raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
        elif is_extension_array_dtype(other):
            # Categorical op will raise; defer explicitly
            return NotImplemented
        else:  # pragma: no cover
            return NotImplemented

        if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
            from pandas.core.arrays import TimedeltaArrayMixin
            # TODO: infer freq?
            return TimedeltaArrayMixin(result)
        return result
Example #2
0
        def __add__(self, other):
            from pandas.core.index import Index
            from pandas.core.indexes.timedeltas import TimedeltaIndex
            from pandas.tseries.offsets import DateOffset

            other = lib.item_from_zerodim(other)
            if isinstance(other, ABCSeries):
                return NotImplemented
            elif is_timedelta64_dtype(other):
                return self._add_delta(other)
            elif isinstance(other, (DateOffset, timedelta)):
                return self._add_delta(other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                return self._add_offset_array(other)
            elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
                if hasattr(other, '_add_delta'):
                    return other._add_delta(self)
                raise TypeError("cannot add TimedeltaIndex and {typ}"
                                .format(typ=type(other)))
            elif is_integer(other):
                return self.shift(other)
            elif isinstance(other, (datetime, np.datetime64)):
                return self._add_datelike(other)
            elif isinstance(other, Index):
                return self._add_datelike(other)
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")
            else:  # pragma: no cover
                return NotImplemented
Example #3
0
    def _evaluate_compare(self, other, op):
        """
        We have been called because a comparison between
        8 aware arrays. numpy >= 1.11 will
        now warn about NaT comparisons
        """
        # Called by comparison methods when comparing datetimelike
        # with datetimelike

        if not isinstance(other, type(self)):
            # coerce to a similar object
            if not is_list_like(other):
                # scalar
                other = [other]
            elif lib.is_scalar(lib.item_from_zerodim(other)):
                # ndarray scalar
                other = [other.item()]
            other = type(self)(other)

        # compare
        result = op(self.asi8, other.asi8)

        # technically we could support bool dtyped Index
        # for now just return the indexing array directly
        mask = (self._isnan) | (other._isnan)

        filler = iNaT
        if is_bool_dtype(result):
            filler = False

        result[mask] = filler
        return result
Example #4
0
        def cmp_method(self, other):

            op_name = op.__name__
            mask = None

            if isinstance(other, (ABCSeries, ABCIndexClass)):
                # Rely on pandas to unbox and dispatch to us.
                return NotImplemented

            if isinstance(other, IntegerArray):
                other, mask = other._data, other._mask

            elif is_list_like(other):
                other = np.asarray(other)
                if other.ndim > 0 and len(self) != len(other):
                    raise ValueError('Lengths must match to compare')

            other = lib.item_from_zerodim(other)

            # numpy will show a DeprecationWarning on invalid elementwise
            # comparisons, this will raise in the future
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", "elementwise", FutureWarning)
                with np.errstate(all='ignore'):
                    result = op(self._data, other)

            # nans propagate
            if mask is None:
                mask = self._mask
            else:
                mask = self._mask | mask

            result[mask] = op_name == 'ne'
            return result
Example #5
0
 def test_isscalar_numpy_zerodim_arrays(self):
     for zerodim in [np.array(1), np.array('foobar'),
                     np.array(np.datetime64('2014-01-01')),
                     np.array(np.timedelta64(1, 'h')),
                     np.array(np.datetime64('NaT'))]:
         assert not is_scalar(zerodim)
         assert is_scalar(lib.item_from_zerodim(zerodim))
Example #6
0
    def __mul__(self, other):
        other = lib.item_from_zerodim(other)

        if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
            return NotImplemented

        if is_scalar(other):
            # numpy will accept float and int, raise TypeError for others
            result = self._data * other
            freq = None
            if self.freq is not None and not isna(other):
                freq = self.freq * other
            return type(self)(result, freq=freq)

        if not hasattr(other, "dtype"):
            # list, tuple
            other = np.array(other)
        if len(other) != len(self) and not is_timedelta64_dtype(other):
            # Exclude timedelta64 here so we correctly raise TypeError
            #  for that instead of ValueError
            raise ValueError("Cannot multiply with unequal lengths")

        if is_object_dtype(other):
            # this multiplication will succeed only if all elements of other
            #  are int or float scalars, so we will end up with
            #  timedelta64[ns]-dtyped result
            result = [self[n] * other[n] for n in range(len(self))]
            result = np.array(result)
            return type(self)(result)

        # numpy will accept float or int dtype, raise TypeError for others
        result = self._data * other
        return type(self)(result)
Example #7
0
    def wrapper(self, other):
        op = getattr(self.asi8, opname)

        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
            return NotImplemented

        if is_list_like(other) and len(other) != len(self):
            raise ValueError("Lengths must match")

        if isinstance(other, Period):
            self._check_compatible_with(other)

            result = op(other.ordinal)
        elif isinstance(other, cls):
            self._check_compatible_with(other)

            result = op(other.asi8)

            mask = self._isnan | other._isnan
            if mask.any():
                result[mask] = nat_result

            return result
        elif other is NaT:
            result = np.empty(len(self.asi8), dtype=bool)
            result.fill(nat_result)
        else:
            other = Period(other, freq=self.freq)
            result = op(other.ordinal)

        if self._hasnans:
            result[self._isnan] = nat_result

        return result
Example #8
0
        def __sub__(self, other):
            from pandas import Index

            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._sub_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(-other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(-other)
            elif isinstance(other, Period):
                result = self._sub_period(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.sub)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                result = self._sub_datelike(other)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {cls} and {typ}"
                                .format(cls=type(self).__name__,
                                        typ=type(other).__name__))
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")

            elif is_float_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Example #9
0
    def __rmod__(self, other):
        # Note: This is a naive implementation, can likely be optimized
        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
            return NotImplemented

        other = lib.item_from_zerodim(other)
        if isinstance(other, (timedelta, np.timedelta64, Tick)):
            other = Timedelta(other)
        return other - (other // self) * self
Example #10
0
    def __add__(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCSeries, ABCDataFrame)):
            return NotImplemented

        # scalar others
        elif other is NaT:
            result = self._add_nat()
        elif isinstance(other, (Tick, timedelta, np.timedelta64)):
            result = self._add_delta(other)
        elif isinstance(other, DateOffset):
            # specifically _not_ a Tick
            result = self._add_offset(other)
        elif isinstance(other, (datetime, np.datetime64)):
            result = self._add_datetimelike_scalar(other)
        elif lib.is_integer(other):
            # This check must come after the check for np.timedelta64
            # as is_integer returns True for these
            maybe_integer_op_deprecated(self)
            result = self._time_shift(other)

        # array-like others
        elif is_timedelta64_dtype(other):
            # TimedeltaIndex, ndarray[timedelta64]
            result = self._add_delta(other)
        elif is_offsetlike(other):
            # Array/Index of DateOffset objects
            result = self._addsub_offset_array(other, operator.add)
        elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
            # DatetimeIndex, ndarray[datetime64]
            return self._add_datetime_arraylike(other)
        elif is_integer_dtype(other):
            maybe_integer_op_deprecated(self)
            result = self._addsub_int_array(other, operator.add)
        elif is_float_dtype(other):
            # Explicitly catch invalid dtypes
            raise TypeError("cannot add {dtype}-dtype to {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
        elif is_period_dtype(other):
            # if self is a TimedeltaArray and other is a PeriodArray with
            #  a timedelta-like (i.e. Tick) freq, this operation is valid.
            #  Defer to the PeriodArray implementation.
            # In remaining cases, this will end up raising TypeError.
            return NotImplemented
        elif is_extension_array_dtype(other):
            # Categorical op will raise; defer explicitly
            return NotImplemented
        else:  # pragma: no cover
            return NotImplemented

        if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
            from pandas.core.arrays import TimedeltaArrayMixin
            # TODO: infer freq?
            return TimedeltaArrayMixin(result)
        return result
Example #11
0
        def __add__(self, other):
            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._add_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._add_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.add)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                return self._add_datelike(other)
            elif is_integer_dtype(other):
                result = self._addsub_int_array(other, operator.add)
            elif is_float_dtype(other) or is_period_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot add {dtype}-dtype to {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            elif is_categorical_dtype(other):
                # Categorical op will raise; defer explicitly
                return NotImplemented
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Example #12
0
    def __truediv__(self, other):
        # timedelta / X is well-defined for timedelta-like or numeric X
        other = lib.item_from_zerodim(other)

        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
            return NotImplemented

        if isinstance(other, (timedelta, np.timedelta64, Tick)):
            other = Timedelta(other)
            if other is NaT:
                # specifically timedelta64-NaT
                result = np.empty(self.shape, dtype=np.float64)
                result.fill(np.nan)
                return result

            # otherwise, dispatch to Timedelta implementation
            return self._data / other

        elif lib.is_scalar(other):
            # assume it is numeric
            result = self._data / other
            freq = None
            if self.freq is not None:
                # Tick division is not implemented, so operate on Timedelta
                freq = self.freq.delta / other
            return type(self)(result, freq=freq)

        if not hasattr(other, "dtype"):
            # e.g. list, tuple
            other = np.array(other)

        if len(other) != len(self):
            raise ValueError("Cannot divide vectors with unequal lengths")

        elif is_timedelta64_dtype(other):
            # let numpy handle it
            return self._data / other

        elif is_object_dtype(other):
            # Note: we do not do type inference on the result, so either
            #  an object array or numeric-dtyped (if numpy does inference)
            #  will be returned.  GH#23829
            result = [self[n] / other[n] for n in range(len(self))]
            result = np.array(result)
            return result

        else:
            result = self._data / other
            return type(self)(result)
Example #13
0
    def __rfloordiv__(self, other):
        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
            return NotImplemented

        other = lib.item_from_zerodim(other)
        if is_scalar(other):
            if isinstance(other, (timedelta, np.timedelta64, Tick)):
                other = Timedelta(other)
                if other is NaT:
                    # treat this specifically as timedelta-NaT
                    result = np.empty(self.shape, dtype=np.float64)
                    result.fill(np.nan)
                    return result

                # dispatch to Timedelta implementation
                result = other.__floordiv__(self._data)
                return result

            raise TypeError("Cannot divide {typ} by {cls}"
                            .format(typ=type(other).__name__,
                                    cls=type(self).__name__))

        if not hasattr(other, "dtype"):
            # list, tuple
            other = np.array(other)
        if len(other) != len(self):
            raise ValueError("Cannot divide with unequal lengths")

        elif is_timedelta64_dtype(other):
            other = type(self)(other)

            # numpy timedelta64 does not natively support floordiv, so operate
            #  on the i8 values
            result = other.asi8 // self.asi8
            mask = self._isnan | other._isnan
            if mask.any():
                result = result.astype(np.int64)
                result[mask] = np.nan
            return result

        elif is_object_dtype(other):
            result = [other[n] // self[n] for n in range(len(self))]
            result = np.array(result)
            return result

        else:
            dtype = getattr(other, "dtype", type(other).__name__)
            raise TypeError("Cannot divide {typ} by {cls}"
                            .format(typ=dtype, cls=type(self).__name__))
Example #14
0
        def __sub__(self, other):
            from pandas.core.index import Index
            from pandas.core.indexes.datetimes import DatetimeIndex
            from pandas.core.indexes.timedeltas import TimedeltaIndex
            from pandas.tseries.offsets import DateOffset

            other = lib.item_from_zerodim(other)
            if isinstance(other, ABCSeries):
                return NotImplemented
            elif is_timedelta64_dtype(other):
                result = self._add_delta(-other)
            elif isinstance(other, (DateOffset, timedelta)):
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._sub_offset_array(other)
            elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
                if not isinstance(other, TimedeltaIndex):
                    raise TypeError("cannot subtract TimedeltaIndex and {typ}"
                                    .format(typ=type(other).__name__))
                result = self._add_delta(-other)
            elif isinstance(other, DatetimeIndex):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for timedelta64_dtype
                # or else it will incorrectly catch np.timedelta64 objects
                result = self.shift(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif isinstance(other, Period):
                result = self._sub_period(other)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {typ1} and {typ2}"
                                .format(typ1=type(self).__name__,
                                        typ2=type(other).__name__))
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")
            else:  # pragma: no cover
                return NotImplemented

            if result is not NotImplemented:
                res_name = ops.get_op_result_name(self, other)
                result.name = res_name
            return result
Example #15
0
    def __rtruediv__(self, other):
        # X / timedelta is defined only for timedelta-like X
        other = lib.item_from_zerodim(other)

        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
            return NotImplemented

        if isinstance(other, (timedelta, np.timedelta64, Tick)):
            other = Timedelta(other)
            if other is NaT:
                # specifically timedelta64-NaT
                result = np.empty(self.shape, dtype=np.float64)
                result.fill(np.nan)
                return result

            # otherwise, dispatch to Timedelta implementation
            return other / self._data

        elif lib.is_scalar(other):
            raise TypeError("Cannot divide {typ} by {cls}"
                            .format(typ=type(other).__name__,
                                    cls=type(self).__name__))

        if not hasattr(other, "dtype"):
            # e.g. list, tuple
            other = np.array(other)

        if len(other) != len(self):
            raise ValueError("Cannot divide vectors with unequal lengths")

        elif is_timedelta64_dtype(other):
            # let numpy handle it
            return other / self._data

        elif is_object_dtype(other):
            # Note: unlike in __truediv__, we do not _need_ to do type#
            #  inference on the result.  It does not raise, a numeric array
            #  is returned.  GH#23829
            result = [other[n] / self[n] for n in range(len(self))]
            return np.array(result)

        else:
            raise TypeError("Cannot divide {dtype} data by {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
Example #16
0
def _wrap_result(name, data, sparse_index, fill_value, dtype=None):
    """ wrap op result to have correct dtype """
    if name.startswith('__'):
        # e.g. __eq__ --> eq
        name = name[2:-2]

    if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
        dtype = np.bool

    fill_value = lib.item_from_zerodim(fill_value)

    if is_bool_dtype(dtype):
        # fill_value may be np.bool_
        fill_value = bool(fill_value)
    return SparseArray(data,
                       sparse_index=sparse_index,
                       fill_value=fill_value,
                       dtype=dtype)
Example #17
0
        def __add__(self, other):
            from pandas import DateOffset

            other = lib.item_from_zerodim(other)
            if isinstance(other, ABCSeries):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._add_nat()
            elif isinstance(other, (DateOffset, timedelta, np.timedelta64)):
                result = self._add_delta(other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._add_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.add)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                return self._add_datelike(other)
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Example #18
0
    def wrapper(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
            return NotImplemented

        if _is_convertible_to_td(other) or other is NaT:
            try:
                other = Timedelta(other)
            except ValueError:
                # failed to parse as timedelta
                return ops.invalid_comparison(self, other, op)

            result = op(self.view('i8'), other.value)
            if isna(other):
                result.fill(nat_result)

        elif not is_list_like(other):
            return ops.invalid_comparison(self, other, op)

        elif len(other) != len(self):
            raise ValueError("Lengths must match")

        else:
            try:
                other = type(self)._from_sequence(other)._data
            except (ValueError, TypeError):
                return ops.invalid_comparison(self, other, op)

            result = op(self.view('i8'), other.view('i8'))
            result = com.values_from_object(result)

            o_mask = np.array(isna(other))
            if o_mask.any():
                result[o_mask] = nat_result

        if self._hasnans:
            result[self._isnan] = nat_result

        return result
Example #19
0
        def __sub__(self, other):
            from pandas.core.index import Index
            from pandas.core.indexes.datetimes import DatetimeIndex
            from pandas.core.indexes.timedeltas import TimedeltaIndex
            from pandas.tseries.offsets import DateOffset

            other = lib.item_from_zerodim(other)
            if isinstance(other, ABCSeries):
                return NotImplemented
            elif is_timedelta64_dtype(other):
                return self._add_delta(-other)
            elif isinstance(other, (DateOffset, timedelta)):
                return self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                return self._sub_offset_array(other)
            elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
                if not isinstance(other, TimedeltaIndex):
                    raise TypeError("cannot subtract TimedeltaIndex and {typ}"
                                    .format(typ=type(other).__name__))
                return self._add_delta(-other)
            elif isinstance(other, DatetimeIndex):
                return self._sub_datelike(other)
            elif is_integer(other):
                return self.shift(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                return self._sub_datelike(other)
            elif isinstance(other, Period):
                return self._sub_period(other)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {typ1} and {typ2}"
                                .format(typ1=type(self).__name__,
                                        typ2=type(other).__name__))

            else:  # pragma: no cover
                return NotImplemented
Example #20
0
def dispatch_to_series(left, right, func, axis=None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar or DataFrame
    func : arithmetic or comparison operator
    axis : {None, 0, 1, "index", "columns"}

    Returns
    -------
    DataFrame
    """
    # Get the appropriate array-op to apply to each column/block's values.
    array_op = get_array_op(func)

    right = lib.item_from_zerodim(right)
    if lib.is_scalar(right) or np.ndim(right) == 0:
        bm = left._mgr.apply(array_op, right=right)
        return type(left)(bm)

    elif isinstance(right, ABCDataFrame):
        assert left.index.equals(right.index)
        assert left.columns.equals(right.columns)
        # TODO: The previous assertion `assert right._indexed_same(left)`
        #  fails in cases with empty columns reached via
        #  _frame_arith_method_with_reindex

        bm = left._mgr.operate_blockwise(right._mgr, array_op)
        return type(left)(bm)

    elif isinstance(right, ABCSeries) and axis == "columns":
        # We only get here if called via _combine_series_frame,
        # in which case we specifically want to operate row-by-row
        assert right.index.equals(left.columns)

        if right.dtype == "timedelta64[ns]":
            # ensure we treat NaT values as the correct dtype
            # Note: we do not do this unconditionally as it may be lossy or
            #  expensive for EA dtypes.
            right = np.asarray(right)
        else:
            right = right._values

        arrays = [
            array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)
        ]

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later
        right = right._values

        arrays = [array_op(l, right) for l in left._iter_column_arrays()]

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    return type(left)._from_arrays(arrays,
                                   left.columns,
                                   left.index,
                                   verify_integrity=False)
Example #21
0
def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
    """
    Evaluate a logical operation `|`, `&`, or `^`.

    Parameters
    ----------
    left : np.ndarray or ExtensionArray
    right : object
        Cannot be a DataFrame, Series, or Index.
    op : {operator.and_, operator.or_, operator.xor}
        Or one of the reversed variants from roperator.

    Returns
    -------
    ndarray or ExtensionArray
    """
    fill_int = lambda x: x

    def fill_bool(x, left=None):
        # if `left` is specifically not-boolean, we do not cast to bool
        if x.dtype.kind in ["c", "f", "O"]:
            # dtypes that can hold NA
            mask = isna(x)
            if mask.any():
                x = x.astype(object)
                x[mask] = False

        if left is None or is_bool_dtype(left.dtype):
            x = x.astype(bool)
        return x

    is_self_int_dtype = is_integer_dtype(left.dtype)

    right = lib.item_from_zerodim(right)
    if is_list_like(right) and not hasattr(right, "dtype"):
        # e.g. list, tuple
        right = construct_1d_object_array_from_listlike(right)

    # NB: We assume extract_array has already been called on left and right
    lvalues = maybe_upcast_datetimelike_array(left)
    rvalues = right

    if should_extension_dispatch(lvalues, rvalues):
        # Call the method on lvalues
        res_values = op(lvalues, rvalues)

    else:
        if isinstance(rvalues, np.ndarray):
            is_other_int_dtype = is_integer_dtype(rvalues.dtype)
            rvalues = rvalues if is_other_int_dtype else fill_bool(
                rvalues, lvalues)

        else:
            # i.e. scalar
            is_other_int_dtype = lib.is_integer(rvalues)

        # For int vs int `^`, `|`, `&` are bitwise operators and return
        #   integer dtypes.  Otherwise these are boolean ops
        filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool

        res_values = na_logical_op(lvalues, rvalues, op)
        res_values = filler(res_values)  # type: ignore

    return res_values
Example #22
0
def sanitize_array(
    data,
    index: Optional[Index],
    dtype: Optional[DtypeObj] = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    Parameters
    ----------
    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True

    Returns
    -------
    np.ndarray or ExtensionArray

    Notes
    -----
    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    """
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
        else:
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data,
                    (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
        # TODO: deque, array.array
        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")
        data = list(data)

        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            subarr = maybe_convert_platform(data)
            # error: Incompatible types in assignment (expression has type
            # "Union[ExtensionArray, ndarray, List[Any]]", variable has type
            # "ExtensionArray")
            subarr = maybe_cast_to_datetime(subarr,
                                            dtype)  # type: ignore[assignment]

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)

    elif not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

    else:
        # realize e.g. generators
        # TODO: non-standard array-likes we can convert to ndarray more efficiently?
        data = list(data)
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    subarr = _sanitize_ndim(subarr, data, dtype, index)

    if not (is_extension_array_dtype(subarr.dtype)
            or is_extension_array_dtype(dtype)):
        # error: Argument 1 to "_sanitize_str_dtypes" has incompatible type
        # "ExtensionArray"; expected "ndarray"
        subarr = _sanitize_str_dtypes(
            subarr,
            data,
            dtype,
            copy  # type: ignore[arg-type]
        )

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
            dtype)
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)
                subarr = extract_array(subarr, extract_numpy=True)

    return subarr
Example #23
0
def sanitize_array(
    data,
    index: Index | None,
    dtype: DtypeObj | None = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
    *,
    allow_2d: bool = False,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    Parameters
    ----------
    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True
    allow_2d : bool, default False
        If False, raise if we have a 2D Arraylike.

    Returns
    -------
    np.ndarray or ExtensionArray

    Notes
    -----
    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    """
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)
    elif isinstance(data, range):
        # GH#16804
        data = range_to_ndarray(data)
        copy = False

    if not is_list_like(data):
        if index is None:
            raise ValueError("index must be specified when data is not list-like")
        data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
        return data

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
        else:
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    else:
        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")

        # materialize e.g. generators, convert e.g. tuples, abc.ValueView
        # TODO: non-standard array-likes we can convert to ndarray more efficiently?
        data = list(data)

        if dtype is not None or len(data) == 0:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            # TODO: copy?
            subarr = maybe_convert_platform(data)
            if subarr.dtype == object:
                # Argument 1 to "maybe_infer_to_datetimelike" has incompatible
                # type "Union[ExtensionArray, ndarray]"; expected "ndarray"
                subarr = maybe_infer_to_datetimelike(subarr)  # type: ignore[arg-type]

    subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

    if not (
        isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype)
    ):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype)
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)
                subarr = extract_array(subarr, extract_numpy=True)

    return subarr
Example #24
0
    def wrapper(self, other, axis=None):
        # Validate the axis parameter
        if axis is not None:
            self._get_axis_number(axis)

        if isinstance(other, ABCSeries):
            name = com._maybe_match_name(self, other)
            if not self._indexed_same(other):
                msg = 'Can only compare identically-labeled Series objects'
                raise ValueError(msg)
            return self._constructor(na_op(self.values, other.values),
                                     index=self.index, name=name)
        elif isinstance(other, ABCDataFrame):  # pragma: no cover
            return NotImplemented
        elif isinstance(other, (np.ndarray, pd.Index)):
            # do not check length of zerodim array
            # as it will broadcast
            if (not is_scalar(lib.item_from_zerodim(other)) and
                    len(self) != len(other)):
                raise ValueError('Lengths must match to compare')

            if isinstance(other, ABCPeriodIndex):
                # temp workaround until fixing GH 13637
                # tested in test_nat_comparisons
                # (pandas.tests.series.test_operators.TestSeriesOperators)
                return self._constructor(na_op(self.values,
                                               other.astype(object).values),
                                         index=self.index)

            return self._constructor(na_op(self.values, np.asarray(other)),
                                     index=self.index).__finalize__(self)

        elif isinstance(other, pd.Categorical):
            if not is_categorical_dtype(self):
                msg = ("Cannot compare a Categorical for op {op} with Series "
                       "of dtype {typ}.\nIf you want to compare values, use "
                       "'series <op> np.asarray(other)'.")
                raise TypeError(msg.format(op=op, typ=self.dtype))

        if is_categorical_dtype(self):
            # cats are a special case as get_values() would return an ndarray,
            # which would then not take categories ordering into account
            # we can go directly to op, as the na_op would just test again and
            # dispatch to it.
            with np.errstate(all='ignore'):
                res = op(self.values, other)
        else:
            values = self.get_values()
            if isinstance(other, (list, np.ndarray)):
                other = np.asarray(other)

            with np.errstate(all='ignore'):
                res = na_op(values, other)
            if is_scalar(res):
                raise TypeError('Could not compare {typ} type with Series'
                                .format(typ=type(other)))

            # always return a full value series here
            res = com._values_from_object(res)

        res = pd.Series(res, index=self.index, name=self.name, dtype='bool')
        return res
Example #25
0
def to_timedelta(arg, unit=None, errors="raise"):
    """
    Convert argument to timedelta.

    Timedeltas are absolute differences in times, expressed in difference
    units (e.g. days, hours, minutes, seconds). This method converts
    an argument from a recognized timedelta format / value into
    a Timedelta type.

    Parameters
    ----------
    arg : str, timedelta, list-like or Series
        The data to be converted to timedelta.

        .. deprecated:: 1.2
            Strings with units 'M', 'Y' and 'y' do not represent
            unambiguous timedelta values and will be removed in a future version

    unit : str, optional
        Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.

        Possible values:

        * 'W'
        * 'D' / 'days' / 'day'
        * 'hours' / 'hour' / 'hr' / 'h'
        * 'm' / 'minute' / 'min' / 'minutes' / 'T'
        * 'S' / 'seconds' / 'sec' / 'second'
        * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
        * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
        * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'

        .. versionchanged:: 1.1.0

           Must not be specified when `arg` context strings and
           ``errors="raise"``.

    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception.
        - If 'coerce', then invalid parsing will be set as NaT.
        - If 'ignore', then invalid parsing will return the input.

    Returns
    -------
    timedelta64 or numpy.array of timedelta64
        Output type returned if parsing succeeded.

    See Also
    --------
    DataFrame.astype : Cast argument to a specified dtype.
    to_datetime : Convert argument to datetime.
    convert_dtypes : Convert dtypes.

    Notes
    -----
    If the precision is higher than nanoseconds, the precision of the duration is
    truncated to nanoseconds for string inputs.

    Examples
    --------
    Parsing a single string to a Timedelta:

    >>> pd.to_timedelta('1 days 06:05:01.00003')
    Timedelta('1 days 06:05:01.000030')
    >>> pd.to_timedelta('15.5us')
    Timedelta('0 days 00:00:00.000015500')

    Parsing a list or array of strings:

    >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
    TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT],
                   dtype='timedelta64[ns]', freq=None)

    Converting numbers by specifying the `unit` keyword argument:

    >>> pd.to_timedelta(np.arange(5), unit='s')
    TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
                    '0 days 00:00:03', '0 days 00:00:04'],
                   dtype='timedelta64[ns]', freq=None)
    >>> pd.to_timedelta(np.arange(5), unit='d')
    TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
                   dtype='timedelta64[ns]', freq=None)
    """
    if unit is not None:
        unit = parse_timedelta_unit(unit)

    if errors not in ("ignore", "raise", "coerce"):
        raise ValueError(
            "errors must be one of 'ignore', 'raise', or 'coerce'.")

    if unit in {"Y", "y", "M"}:
        raise ValueError(
            "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
            "represent unambiguous timedelta values durations.")

    if arg is None:
        return arg
    elif isinstance(arg, ABCSeries):
        values = _convert_listlike(arg._values, unit=unit, errors=errors)
        return arg._constructor(values, index=arg.index, name=arg.name)
    elif isinstance(arg, ABCIndex):
        return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name)
    elif isinstance(arg, np.ndarray) and arg.ndim == 0:
        # extract array scalar and process below
        arg = lib.item_from_zerodim(arg)
    elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1:
        return _convert_listlike(arg, unit=unit, errors=errors)
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, timedelta, list, tuple, 1-d array, or Series"
        )

    if isinstance(arg, str) and unit is not None:
        raise ValueError(
            "unit must not be specified if the input is/contains a str")

    # ...so it must be a scalar value. Return scalar.
    return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
Example #26
0
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
    """
    Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.

    Note: the caller is responsible for ensuring that numpy warnings are
    suppressed (with np.errstate(all="ignore")) if needed.

    Parameters
    ----------
    left : np.ndarray or ExtensionArray
    right : object
        Cannot be a DataFrame, Series, or Index.
    op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}

    Returns
    -------
    ndarray or ExtensionArray
    """
    # NB: We assume extract_array has already been called on left and right
    lvalues = ensure_wrapped_if_datetimelike(left)
    rvalues = ensure_wrapped_if_datetimelike(right)

    rvalues = lib.item_from_zerodim(rvalues)
    if isinstance(rvalues, list):
        # We don't catch tuple here bc we may be comparing e.g. MultiIndex
        #  to a tuple that represents a single entry, see test_compare_tuple_strs
        rvalues = np.asarray(rvalues)

    if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
        # TODO: make this treatment consistent across ops and classes.
        #  We are not catching all listlikes here (e.g. frozenset, tuple)
        #  The ambiguous case is object-dtype.  See GH#27803
        if len(lvalues) != len(rvalues):
            raise ValueError("Lengths must match to compare", lvalues.shape,
                             rvalues.shape)

    if should_extension_dispatch(lvalues, rvalues) or (
        (isinstance(rvalues,
                    (Timedelta, BaseOffset, Timestamp)) or right is NaT)
            and not is_object_dtype(lvalues.dtype)):
        # Call the method on lvalues
        res_values = op(lvalues, rvalues)

    elif is_scalar(rvalues) and isna(rvalues):  # TODO: but not pd.NA?
        # numpy does not like comparisons vs None
        if op is operator.ne:
            res_values = np.ones(lvalues.shape, dtype=bool)
        else:
            res_values = np.zeros(lvalues.shape, dtype=bool)

    elif is_numeric_v_string_like(lvalues, rvalues):
        # GH#36377 going through the numexpr path would incorrectly raise
        return invalid_comparison(lvalues, rvalues, op)

    elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
        res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)

    else:
        res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)

    return res_values
Example #27
0
    def wrapper(self, other, axis=None):
        # Validate the axis parameter
        if axis is not None:
            self._get_axis_number(axis)

        if isinstance(other, ABCDataFrame):  # pragma: no cover
            # Defer to DataFrame implementation; fail early
            return NotImplemented

        elif isinstance(other, ABCSeries):
            name = com._maybe_match_name(self, other)
            if not self._indexed_same(other):
                msg = 'Can only compare identically-labeled Series objects'
                raise ValueError(msg)
            res_values = na_op(self.values, other.values)
            return self._constructor(res_values, index=self.index, name=name)

        elif isinstance(other, (np.ndarray, pd.Index)):
            # do not check length of zerodim array
            # as it will broadcast
            if (not is_scalar(lib.item_from_zerodim(other))
                    and len(self) != len(other)):
                raise ValueError('Lengths must match to compare')

            res_values = na_op(self.values, np.asarray(other))
            return self._constructor(res_values,
                                     index=self.index).__finalize__(self)

        elif (isinstance(other, pd.Categorical)
              and not is_categorical_dtype(self)):
            raise TypeError(
                "Cannot compare a Categorical for op {op} with "
                "Series of dtype {typ}.\nIf you want to compare "
                "values, use 'series <op> np.asarray(other)'.".format(
                    op=op, typ=self.dtype))

        elif is_scalar(other) and isna(other):
            # numpy does not like comparisons vs None
            if op is operator.ne:
                res_values = np.ones(len(self), dtype=bool)
            else:
                res_values = np.zeros(len(self), dtype=bool)
            return self._constructor(res_values,
                                     index=self.index,
                                     name=self.name,
                                     dtype='bool')

        if is_categorical_dtype(self):
            # cats are a special case as get_values() would return an ndarray,
            # which would then not take categories ordering into account
            # we can go directly to op, as the na_op would just test again and
            # dispatch to it.
            with np.errstate(all='ignore'):
                res = op(self.values, other)
        else:
            values = self.get_values()
            if isinstance(other, (list, np.ndarray)):
                other = np.asarray(other)

            with np.errstate(all='ignore'):
                res = na_op(values, other)
            if is_scalar(res):
                raise TypeError(
                    'Could not compare {typ} type with Series'.format(
                        typ=type(other)))

            # always return a full value series here
            res = com._values_from_object(res)

        res = pd.Series(res, index=self.index, name=self.name, dtype='bool')
        return res
Example #28
0
def comparison_op(
    left: Union[np.ndarray, ABCExtensionArray], right: Any, op
) -> Union[np.ndarray, ABCExtensionArray]:
    """
    Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.

    Parameters
    ----------
    left : np.ndarray or ExtensionArray
    right : object
        Cannot be a DataFrame, Series, or Index.
    op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}

    Returns
    -------
    ndarrray or ExtensionArray
    """

    # NB: We assume extract_array has already been called on left and right
    lvalues = left
    rvalues = right

    rvalues = lib.item_from_zerodim(rvalues)
    if isinstance(rvalues, list):
        # TODO: same for tuples?
        rvalues = np.asarray(rvalues)

    if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)):
        # TODO: make this treatment consistent across ops and classes.
        #  We are not catching all listlikes here (e.g. frozenset, tuple)
        #  The ambiguous case is object-dtype.  See GH#27803
        if len(lvalues) != len(rvalues):
            raise ValueError("Lengths must match to compare")

    if should_extension_dispatch(lvalues, rvalues):
        res_values = dispatch_to_extension_op(op, lvalues, rvalues)

    elif is_scalar(rvalues) and isna(rvalues):
        # numpy does not like comparisons vs None
        if op is operator.ne:
            res_values = np.ones(len(lvalues), dtype=bool)
        else:
            res_values = np.zeros(len(lvalues), dtype=bool)

    elif is_object_dtype(lvalues.dtype):
        res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)

    else:
        op_name = "__{op}__".format(op=op.__name__)
        method = getattr(lvalues, op_name)
        with np.errstate(all="ignore"):
            res_values = method(rvalues)

        if res_values is NotImplemented:
            res_values = invalid_comparison(lvalues, rvalues, op)
        if is_scalar(res_values):
            raise TypeError(
                "Could not compare {typ} type with Series".format(typ=type(rvalues))
            )

    return res_values
Example #29
0
def dispatch_to_series(left, right, func, str_rep=None, axis=None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar or DataFrame
    func : arithmetic or comparison operator
    str_rep : str or None, default None
    axis : {None, 0, 1, "index", "columns"}

    Returns
    -------
    DataFrame
    """
    # Note: we use iloc to access columns for compat with cases
    #       with non-unique columns.
    import pandas.core.computation.expressions as expressions

    right = lib.item_from_zerodim(right)
    if lib.is_scalar(right) or np.ndim(right) == 0:

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}

    elif isinstance(right, ABCDataFrame):
        assert right._indexed_same(left)

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}

    elif isinstance(right, ABCSeries) and axis == "columns":
        # We only get here if called via _combine_frame_series,
        # in which case we specifically want to operate row-by-row
        assert right.index.equals(left.columns)

        if right.dtype == "timedelta64[ns]":
            # ensure we treat NaT values as the correct dtype
            # Note: we do not do this unconditionally as it may be lossy or
            #  expensive for EA dtypes.
            right = np.asarray(right)

            def column_op(a, b):
                return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))}

        else:

            def column_op(a, b):
                return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))}

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    new_data = expressions.evaluate(column_op, str_rep, left, right)
    return new_data
Example #30
0
def dispatch_to_series(left, right, func, axis: Optional[int] = None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar, Series, or DataFrame
    func : arithmetic or comparison operator
    axis : {None, 0, 1}

    Returns
    -------
    DataFrame
    """
    # Get the appropriate array-op to apply to each column/block's values.
    array_op = get_array_op(func)

    right = lib.item_from_zerodim(right)
    if not is_list_like(right):
        # i.e. scalar, faster than checking np.ndim(right) == 0
        bm = left._mgr.apply(array_op, right=right)
        return type(left)(bm)

    elif isinstance(right, ABCDataFrame):
        assert left.index.equals(right.index)
        assert left.columns.equals(right.columns)
        # TODO: The previous assertion `assert right._indexed_same(left)`
        #  fails in cases with empty columns reached via
        #  _frame_arith_method_with_reindex

        bm = left._mgr.operate_blockwise(right._mgr, array_op)
        return type(left)(bm)

    elif isinstance(right, ABCSeries) and axis == 1:
        # axis=1 means we want to operate row-by-row
        assert right.index.equals(left.columns)

        right = right._values
        # maybe_align_as_frame ensures we do not have an ndarray here
        assert not isinstance(right, np.ndarray)

        arrays = [
            array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)
        ]

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later
        right = right._values

        arrays = [array_op(l, right) for l in left._iter_column_arrays()]

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    return type(left)._from_arrays(arrays,
                                   left.columns,
                                   left.index,
                                   verify_integrity=False)
Example #31
0
        def __sub__(self, other):
            from pandas import Index

            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._sub_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(-other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(-other)
            elif isinstance(other, Period):
                result = self._sub_period(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.sub)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                result = self._sub_datelike(other)
            elif is_period_dtype(other):
                # PeriodIndex
                result = self._sub_period_array(other)
            elif is_integer_dtype(other):
                result = self._addsub_int_array(other, operator.sub)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {cls} and {typ}".format(
                    cls=type(self).__name__, typ=type(other).__name__))
            elif is_float_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError(
                    "cannot subtract {dtype}-dtype from {cls}".format(
                        dtype=other.dtype, cls=type(self).__name__))
            elif is_categorical_dtype(other):
                # Categorical op will raise; defer explicitly
                return NotImplemented
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Example #32
0
    def wrapper(self, other, axis=None):
        # Validate the axis parameter
        if axis is not None:
            self._get_axis_number(axis)

        if isinstance(other, ABCDataFrame):  # pragma: no cover
            # Defer to DataFrame implementation; fail early
            return NotImplemented

        elif isinstance(other, ABCSeries):
            name = com._maybe_match_name(self, other)
            if not self._indexed_same(other):
                msg = 'Can only compare identically-labeled Series objects'
                raise ValueError(msg)
            res_values = na_op(self.values, other.values)
            return self._constructor(res_values, index=self.index, name=name)

        elif isinstance(other, (np.ndarray, pd.Index)):
            # do not check length of zerodim array
            # as it will broadcast
            if (not is_scalar(lib.item_from_zerodim(other)) and
                    len(self) != len(other)):
                raise ValueError('Lengths must match to compare')

            res_values = na_op(self.values, np.asarray(other))
            return self._constructor(res_values,
                                     index=self.index).__finalize__(self)

        elif (isinstance(other, pd.Categorical) and
              not is_categorical_dtype(self)):
            raise TypeError("Cannot compare a Categorical for op {op} with "
                            "Series of dtype {typ}.\nIf you want to compare "
                            "values, use 'series <op> np.asarray(other)'."
                            .format(op=op, typ=self.dtype))

        elif is_scalar(other) and isna(other):
            # numpy does not like comparisons vs None
            if op is operator.ne:
                res_values = np.ones(len(self), dtype=bool)
            else:
                res_values = np.zeros(len(self), dtype=bool)
            return self._constructor(res_values, index=self.index,
                                     name=self.name, dtype='bool')

        if is_categorical_dtype(self):
            # cats are a special case as get_values() would return an ndarray,
            # which would then not take categories ordering into account
            # we can go directly to op, as the na_op would just test again and
            # dispatch to it.
            with np.errstate(all='ignore'):
                res = op(self.values, other)
        else:
            values = self.get_values()
            if isinstance(other, (list, np.ndarray)):
                other = np.asarray(other)

            with np.errstate(all='ignore'):
                res = na_op(values, other)
            if is_scalar(res):
                raise TypeError('Could not compare {typ} type with Series'
                                .format(typ=type(other)))

            # always return a full value series here
            res = com._values_from_object(res)

        res = pd.Series(res, index=self.index, name=self.name, dtype='bool')
        return res
Example #33
0
def quantile_with_mask(
    values: np.ndarray,
    mask: np.ndarray,
    fill_value,
    qs,
    interpolation: str,
    axis: int,
) -> np.ndarray:
    """
    Compute the quantiles of the given values for each quantile in `qs`.

    Parameters
    ----------
    values : np.ndarray
        For ExtensionArray, this is _values_for_factorize()[0]
    mask : np.ndarray[bool]
        mask = isna(values)
        For ExtensionArray, this is computed before calling _value_for_factorize
    fill_value : Scalar
        The value to interpret fill NA entries with
        For ExtensionArray, this is _values_for_factorize()[1]
    qs : a scalar or list of the quantiles to be computed
    interpolation : str
        Type of interpolation
    axis : int
        Axis along which to compute quantiles.

    Returns
    -------
    np.ndarray

    Notes
    -----
    Assumes values is already 2D.  For ExtensionArray this means np.atleast_2d
    has been called on _values_for_factorize()[0]
    """
    is_empty = values.shape[axis] == 0
    orig_scalar = not is_list_like(qs)
    if orig_scalar:
        # make list-like, unpack later
        qs = [qs]

    if is_empty:
        # create the array of na_values
        # 2d len(values) * len(qs)
        flat = np.array([fill_value] * len(qs))
        result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
    else:
        # asarray needed for Sparse, see GH#24600
        result = nanpercentile(
            values,
            np.array(qs) * 100,
            axis=axis,
            na_value=fill_value,
            mask=mask,
            ndim=values.ndim,
            interpolation=interpolation,
        )

        result = np.array(result, copy=False)
        result = result.T

    if orig_scalar:
        assert result.shape[-1] == 1, result.shape
        result = result[..., 0]
        result = lib.item_from_zerodim(result)

    return result
Example #34
0
    def __floordiv__(self, other):
        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
            return NotImplemented

        other = lib.item_from_zerodim(other)
        if is_scalar(other):
            if isinstance(other, (timedelta, np.timedelta64, Tick)):
                other = Timedelta(other)
                if other is NaT:
                    # treat this specifically as timedelta-NaT
                    result = np.empty(self.shape, dtype=np.float64)
                    result.fill(np.nan)
                    return result

                # dispatch to Timedelta implementation
                result = other.__rfloordiv__(self._data)
                return result

            # at this point we should only have numeric scalars; anything
            #  else will raise
            result = self.asi8 // other
            result[self._isnan] = iNaT
            freq = None
            if self.freq is not None:
                # Note: freq gets division, not floor-division
                freq = self.freq / other
            return type(self)(result.view('m8[ns]'), freq=freq)

        if not hasattr(other, "dtype"):
            # list, tuple
            other = np.array(other)
        if len(other) != len(self):
            raise ValueError("Cannot divide with unequal lengths")

        elif is_timedelta64_dtype(other):
            other = type(self)(other)

            # numpy timedelta64 does not natively support floordiv, so operate
            #  on the i8 values
            result = self.asi8 // other.asi8
            mask = self._isnan | other._isnan
            if mask.any():
                result = result.astype(np.int64)
                result[mask] = np.nan
            return result

        elif is_object_dtype(other):
            result = [self[n] // other[n] for n in range(len(self))]
            result = np.array(result)
            if lib.infer_dtype(result) == 'timedelta':
                result, _ = sequence_to_td64ns(result)
                return type(self)(result)
            return result

        elif is_integer_dtype(other) or is_float_dtype(other):
            result = self._data // other
            return type(self)(result)

        else:
            dtype = getattr(other, "dtype", type(other).__name__)
            raise TypeError("Cannot divide {typ} by {cls}".format(
                typ=dtype, cls=type(self).__name__))
Example #35
0
    def wrapper(self, other):

        res_name = get_op_result_name(self, other)
        other = lib.item_from_zerodim(other)

        # TODO: shouldn't we be applying finalize whenever
        #  not isinstance(other, ABCSeries)?
        finalizer = (
            lambda x: x.__finalize__(self)
            if isinstance(other, (np.ndarray, ABCIndexClass))
            else x
        )

        if isinstance(other, list):
            # TODO: same for tuples?
            other = np.asarray(other)

        if isinstance(other, ABCDataFrame):  # pragma: no cover
            # Defer to DataFrame implementation; fail early
            return NotImplemented

        if isinstance(other, ABCSeries) and not self._indexed_same(other):
            raise ValueError("Can only compare identically-labeled Series objects")

        elif isinstance(
            other, (np.ndarray, ABCExtensionArray, ABCIndexClass, ABCSeries)
        ):
            # TODO: make this treatment consistent across ops and classes.
            #  We are not catching all listlikes here (e.g. frozenset, tuple)
            #  The ambiguous case is object-dtype.  See GH#27803
            if len(self) != len(other):
                raise ValueError("Lengths must match to compare")

        lvalues = extract_array(self, extract_numpy=True)
        rvalues = extract_array(other, extract_numpy=True)

        if should_extension_dispatch(lvalues, rvalues):
            res_values = dispatch_to_extension_op(op, lvalues, rvalues)

        elif is_scalar(rvalues) and isna(rvalues):
            # numpy does not like comparisons vs None
            if op is operator.ne:
                res_values = np.ones(len(lvalues), dtype=bool)
            else:
                res_values = np.zeros(len(lvalues), dtype=bool)

        else:
            with np.errstate(all="ignore"):
                res_values = na_op(lvalues, rvalues)
            if is_scalar(res_values):
                raise TypeError(
                    "Could not compare {typ} type with Series".format(typ=type(rvalues))
                )

        result = self._constructor(res_values, index=self.index)
        result = finalizer(result)

        # Set the result's name after finalizer is called because finalizer
        #  would set it back to self.name
        result.name = res_name
        return result
Example #36
0
def dispatch_to_series(left, right, func, axis=None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar or DataFrame
    func : arithmetic or comparison operator
    axis : {None, 0, 1, "index", "columns"}

    Returns
    -------
    DataFrame
    """
    # Note: we use iloc to access columns for compat with cases
    #       with non-unique columns.
    import pandas.core.computation.expressions as expressions

    right = lib.item_from_zerodim(right)
    if lib.is_scalar(right) or np.ndim(right) == 0:

        # Get the appropriate array-op to apply to each block's values.
        array_op = get_array_op(func)
        bm = left._mgr.apply(array_op, right=right)
        return type(left)(bm)

    elif isinstance(right, ABCDataFrame):
        assert left.index.equals(right.index)
        assert left.columns.equals(right.columns)
        # TODO: The previous assertion `assert right._indexed_same(left)`
        #  fails in cases with empty columns reached via
        #  _frame_arith_method_with_reindex

        array_op = get_array_op(func)
        bm = left._mgr.operate_blockwise(right._mgr, array_op)
        return type(left)(bm)

    elif isinstance(right, ABCSeries) and axis == "columns":
        # We only get here if called via _combine_series_frame,
        # in which case we specifically want to operate row-by-row
        assert right.index.equals(left.columns)

        if right.dtype == "timedelta64[ns]":
            # ensure we treat NaT values as the correct dtype
            # Note: we do not do this unconditionally as it may be lossy or
            #  expensive for EA dtypes.
            right = np.asarray(right)

            def column_op(a, b):
                return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))}

        else:

            def column_op(a, b):
                return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))}

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    new_data = expressions.evaluate(column_op, left, right)
    return new_data
Example #37
0
def sanitize_array(
    data,
    index: Index | None,
    dtype: DtypeObj | None = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
    *,
    allow_2d: bool = False,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    Parameters
    ----------
    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True
    allow_2d : bool, default False
        If False, raise if we have a 2D Arraylike.

    Returns
    -------
    np.ndarray or ExtensionArray

    Notes
    -----
    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    """
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    if isinstance(dtype, PandasDtype):
        # Avoid ending up with a PandasArray
        dtype = dtype.numpy_dtype

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)
    elif isinstance(data, range):
        # GH#16804
        data = range_to_ndarray(data)
        copy = False

    if not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
        return data

    # GH#846
    if isinstance(data, np.ndarray):
        if isinstance(data, np.matrix):
            data = data.A

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except IntCastingNaNError:
                warnings.warn(
                    "In a future version, passing float-dtype values containing NaN "
                    "and an integer dtype will raise IntCastingNaNError "
                    "(subclass of ValueError) instead of silently ignoring the "
                    "passed dtype. To retain the old behavior, call Series(arr) or "
                    "DataFrame(arr) without passing a dtype.",
                    FutureWarning,
                    stacklevel=find_stack_level(),
                )
                subarr = np.array(data, copy=copy)
            except ValueError:
                if not raise_cast_failure:
                    # i.e. called via DataFrame constructor
                    warnings.warn(
                        "In a future version, passing float-dtype values and an "
                        "integer dtype to DataFrame will retain floating dtype "
                        "if they cannot be cast losslessly (matching Series behavior). "
                        "To retain the old behavior, use DataFrame(data).astype(dtype)",
                        FutureWarning,
                        stacklevel=find_stack_level(),
                    )
                    # GH#40110 until the deprecation is enforced, we _dont_
                    #  ignore the dtype for DataFrame, and _do_ cast even though
                    #  it is lossy.
                    dtype = cast(np.dtype, dtype)
                    return np.array(data, dtype=dtype, copy=copy)
                subarr = np.array(data, copy=copy)
        else:
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()

    else:
        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")

        # materialize e.g. generators, convert e.g. tuples, abc.ValueView
        if hasattr(data, "__array__"):
            # e.g. dask array GH#38645
            data = np.asarray(data)
        else:
            data = list(data)

        if dtype is not None or len(data) == 0:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            subarr = maybe_convert_platform(data)
            if subarr.dtype == object:
                subarr = cast(np.ndarray, subarr)
                subarr = maybe_infer_to_datetimelike(subarr)

    subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

    if isinstance(subarr, np.ndarray):
        # at this point we should have dtype be None or subarr.dtype == dtype
        dtype = cast(np.dtype, dtype)
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

    return subarr
Example #38
0
def comparison_op(
    left: ArrayLike,
    right: Any,
    op,
    str_rep: Optional[str] = None,
) -> ArrayLike:
    """
    Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.

    Parameters
    ----------
    left : np.ndarray or ExtensionArray
    right : object
        Cannot be a DataFrame, Series, or Index.
    op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}

    Returns
    -------
    ndarray or ExtensionArray
    """
    # NB: We assume extract_array has already been called on left and right
    lvalues = left
    rvalues = right

    rvalues = lib.item_from_zerodim(rvalues)
    if isinstance(rvalues, list):
        # TODO: same for tuples?
        rvalues = np.asarray(rvalues)

    if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
        # TODO: make this treatment consistent across ops and classes.
        #  We are not catching all listlikes here (e.g. frozenset, tuple)
        #  The ambiguous case is object-dtype.  See GH#27803
        if len(lvalues) != len(rvalues):
            if _can_broadcast(lvalues, rvalues):
                return _broadcast_comparison_op(lvalues, rvalues, op)
            raise ValueError("Lengths must match to compare", lvalues.shape,
                             rvalues.shape)

    if should_extension_dispatch(lvalues, rvalues):
        res_values = dispatch_to_extension_op(op, lvalues, rvalues)

    elif is_scalar(rvalues) and isna(rvalues):
        # numpy does not like comparisons vs None
        if op is operator.ne:
            res_values = np.ones(lvalues.shape, dtype=bool)
        else:
            res_values = np.zeros(lvalues.shape, dtype=bool)

    elif is_object_dtype(lvalues.dtype):
        res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)

    else:
        with np.errstate(all="ignore"):
            res_values = na_arithmetic_op(lvalues,
                                          rvalues,
                                          op,
                                          str_rep,
                                          is_cmp=True)

    return res_values
Example #39
0
def dispatch_to_series(left, right, func, str_rep=None, axis=None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar or DataFrame
    func : arithmetic or comparison operator
    str_rep : str or None, default None
    axis : {None, 0, 1, "index", "columns"}

    Returns
    -------
    DataFrame
    """
    # Note: we use iloc to access columns for compat with cases
    #       with non-unique columns.
    import pandas.core.computation.expressions as expressions

    right = lib.item_from_zerodim(right)
    if lib.is_scalar(right) or np.ndim(right) == 0:

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}

    elif isinstance(right, ABCDataFrame):
        assert right._indexed_same(left)

        def column_op(a, b):
            return {
                i: func(a.iloc[:, i], b.iloc[:, i])
                for i in range(len(a.columns))
            }

    elif isinstance(right, ABCSeries) and axis == "columns":
        # We only get here if called via left._combine_match_columns,
        # in which case we specifically want to operate row-by-row
        assert right.index.equals(left.columns)

        def column_op(a, b):
            return {
                i: func(a.iloc[:, i], b.iloc[i])
                for i in range(len(a.columns))
            }

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    new_data = expressions.evaluate(column_op, str_rep, left, right)

    result = left._constructor(new_data, index=left.index, copy=False)
    # Pin columns instead of passing to constructor for compat with
    # non-unique columns case
    result.columns = left.columns
    return result
Example #40
0
    def __sub__(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCSeries, ABCDataFrame)):
            return NotImplemented

        # scalar others
        elif other is NaT:
            result = self._sub_nat()
        elif isinstance(other, (Tick, timedelta, np.timedelta64)):
            result = self._add_delta(-other)
        elif isinstance(other, DateOffset):
            # specifically _not_ a Tick
            result = self._add_offset(-other)
        elif isinstance(other, (datetime, np.datetime64)):
            result = self._sub_datetimelike_scalar(other)
        elif lib.is_integer(other):
            # This check must come after the check for np.timedelta64
            # as is_integer returns True for these
            if not is_period_dtype(self):
                maybe_integer_op_deprecated(self)
            result = self._time_shift(-other)

        elif isinstance(other, Period):
            result = self._sub_period(other)

        # array-like others
        elif is_timedelta64_dtype(other):
            # TimedeltaIndex, ndarray[timedelta64]
            result = self._add_delta(-other)
        elif is_offsetlike(other):
            # Array/Index of DateOffset objects
            result = self._addsub_offset_array(other, operator.sub)
        elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
            # DatetimeIndex, ndarray[datetime64]
            result = self._sub_datetime_arraylike(other)
        elif is_period_dtype(other):
            # PeriodIndex
            result = self._sub_period_array(other)
        elif is_integer_dtype(other):
            if not is_period_dtype(self):
                maybe_integer_op_deprecated(self)
            result = self._addsub_int_array(other, operator.sub)
        elif isinstance(other, ABCIndexClass):
            raise TypeError("cannot subtract {cls} and {typ}"
                            .format(cls=type(self).__name__,
                                    typ=type(other).__name__))
        elif is_float_dtype(other):
            # Explicitly catch invalid dtypes
            raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
        elif is_extension_array_dtype(other):
            # Categorical op will raise; defer explicitly
            return NotImplemented
        else:  # pragma: no cover
            return NotImplemented

        if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
            from pandas.core.arrays import TimedeltaArrayMixin
            # TODO: infer freq?
            return TimedeltaArrayMixin(result)
        return result
Example #41
0
    def wrapper(self, other, axis=None):
        # Validate the axis parameter
        if axis is not None:
            self._get_axis_number(axis)

        res_name = get_op_result_name(self, other)
        other = lib.item_from_zerodim(other)

        # TODO: shouldn't we be applying finalize whenever
        #  not isinstance(other, ABCSeries)?
        finalizer = (lambda x: x.__finalize__(self)
                     if isinstance(other, (np.ndarray, ABCIndexClass)) else x)

        if isinstance(other, list):
            # TODO: same for tuples?
            other = np.asarray(other)

        if isinstance(other, ABCDataFrame):  # pragma: no cover
            # Defer to DataFrame implementation; fail early
            return NotImplemented

        if isinstance(other, ABCSeries) and not self._indexed_same(other):
            raise ValueError(
                "Can only compare identically-labeled Series objects")
        elif (is_list_like(other) and len(other) != len(self)
              and not isinstance(other, (set, frozenset))):
            raise ValueError("Lengths must match")

        elif isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
            # TODO: make this treatment consistent across ops and classes.
            #  We are not catching all listlikes here (e.g. frozenset, tuple)
            #  The ambiguous case is object-dtype.  See GH#27803
            if len(self) != len(other):
                raise ValueError("Lengths must match to compare")

        if is_categorical_dtype(self):
            # Dispatch to Categorical implementation; CategoricalIndex
            # behavior is non-canonical GH#19513
            res_values = dispatch_to_extension_op(op, self, other)

        elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
            # Dispatch to DatetimeIndex to ensure identical
            # Series/Index behavior
            from pandas.core.arrays import DatetimeArray

            res_values = dispatch_to_extension_op(op, DatetimeArray(self),
                                                  other)

        elif is_timedelta64_dtype(self):
            from pandas.core.arrays import TimedeltaArray

            res_values = dispatch_to_extension_op(op, TimedeltaArray(self),
                                                  other)

        elif is_extension_array_dtype(self) or (is_extension_array_dtype(other)
                                                and not is_scalar(other)):
            # Note: the `not is_scalar(other)` condition rules out
            #  e.g. other == "category"
            res_values = dispatch_to_extension_op(op, self, other)

        elif is_scalar(other) and isna(other):
            # numpy does not like comparisons vs None
            if op is operator.ne:
                res_values = np.ones(len(self), dtype=bool)
            else:
                res_values = np.zeros(len(self), dtype=bool)

        else:
            lvalues = extract_array(self, extract_numpy=True)
            rvalues = extract_array(other, extract_numpy=True)

            with np.errstate(all="ignore"):
                res_values = na_op(lvalues, rvalues)
            if is_scalar(res_values):
                raise TypeError(
                    "Could not compare {typ} type with Series".format(
                        typ=type(other)))

        result = self._constructor(res_values, index=self.index)
        # rename is needed in case res_name is None and result.name
        #  is not.
        return finalizer(result).rename(res_name)
Example #42
0
    def __floordiv__(self, other):
        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
            return NotImplemented

        other = lib.item_from_zerodim(other)
        if is_scalar(other):
            if isinstance(other, (timedelta, np.timedelta64, Tick)):
                other = Timedelta(other)
                if other is NaT:
                    # treat this specifically as timedelta-NaT
                    result = np.empty(self.shape, dtype=np.float64)
                    result.fill(np.nan)
                    return result

                # dispatch to Timedelta implementation
                result = other.__rfloordiv__(self._data)
                return result

            # at this point we should only have numeric scalars; anything
            #  else will raise
            result = self.asi8 // other
            result[self._isnan] = iNaT
            freq = None
            if self.freq is not None:
                # Note: freq gets division, not floor-division
                freq = self.freq / other
            return type(self)(result.view('m8[ns]'), freq=freq)

        if not hasattr(other, "dtype"):
            # list, tuple
            other = np.array(other)
        if len(other) != len(self):
            raise ValueError("Cannot divide with unequal lengths")

        elif is_timedelta64_dtype(other):
            other = type(self)(other)

            # numpy timedelta64 does not natively support floordiv, so operate
            #  on the i8 values
            result = self.asi8 // other.asi8
            mask = self._isnan | other._isnan
            if mask.any():
                result = result.astype(np.int64)
                result[mask] = np.nan
            return result

        elif is_object_dtype(other):
            result = [self[n] // other[n] for n in range(len(self))]
            result = np.array(result)
            if lib.infer_dtype(result, skipna=False) == 'timedelta':
                result, _ = sequence_to_td64ns(result)
                return type(self)(result)
            return result

        elif is_integer_dtype(other) or is_float_dtype(other):
            result = self._data // other
            return type(self)(result)

        else:
            dtype = getattr(other, "dtype", type(other).__name__)
            raise TypeError("Cannot divide {typ} by {cls}"
                            .format(typ=dtype, cls=type(self).__name__))
Example #43
0
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
    """
    Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.

    Parameters
    ----------
    left : np.ndarray or ExtensionArray
    right : object
        Cannot be a DataFrame, Series, or Index.
    op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}

    Returns
    -------
    ndarray or ExtensionArray
    """
    # NB: We assume extract_array has already been called on left and right
    lvalues = ensure_wrapped_if_datetimelike(left)
    rvalues = right

    rvalues = lib.item_from_zerodim(rvalues)
    if isinstance(rvalues, list):
        # TODO: same for tuples?
        rvalues = np.asarray(rvalues)

    if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
        # TODO: make this treatment consistent across ops and classes.
        #  We are not catching all listlikes here (e.g. frozenset, tuple)
        #  The ambiguous case is object-dtype.  See GH#27803
        if len(lvalues) != len(rvalues):
            raise ValueError("Lengths must match to compare", lvalues.shape,
                             rvalues.shape)

    if should_extension_dispatch(lvalues, rvalues):
        # Call the method on lvalues
        res_values = op(lvalues, rvalues)

    elif is_scalar(rvalues) and isna(rvalues):
        # numpy does not like comparisons vs None
        if op is operator.ne:
            res_values = np.ones(lvalues.shape, dtype=bool)
        else:
            res_values = np.zeros(lvalues.shape, dtype=bool)

    elif is_numeric_v_string_like(lvalues, rvalues):
        # GH#36377 going through the numexpr path would incorrectly raise
        return invalid_comparison(lvalues, rvalues, op)

    elif is_object_dtype(lvalues.dtype):
        res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)

    else:
        with warnings.catch_warnings():
            # suppress warnings from numpy about element-wise comparison
            warnings.simplefilter("ignore", DeprecationWarning)
            with np.errstate(all="ignore"):
                res_values = _na_arithmetic_op(lvalues,
                                               rvalues,
                                               op,
                                               is_cmp=True)

    return res_values
Example #44
0
def sanitize_array(
    data,
    index: Optional[Index],
    dtype: Optional[DtypeObj] = None,
    copy: bool = False,
    raise_cast_failure: bool = False,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.
    """

    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
        else:
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data,
                    (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
        # TODO: deque, array.array
        if isinstance(data, set):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError("Set type is unordered")
        data = list(data)

        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            subarr = maybe_convert_platform(data)
            subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)

    elif not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

    else:
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    subarr = _sanitize_ndim(subarr, data, dtype, index)

    if not (is_extension_array_dtype(subarr.dtype)
            or is_extension_array_dtype(dtype)):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
            dtype)
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)

    return subarr
Example #45
0
    def wrapper(self, other, axis=None):
        # Validate the axis parameter
        if axis is not None:
            self._get_axis_number(axis)

        if isinstance(other, ABCSeries):
            name = com._maybe_match_name(self, other)
            if not self._indexed_same(other):
                msg = 'Can only compare identically-labeled Series objects'
                raise ValueError(msg)
            return self._constructor(na_op(self.values, other.values),
                                     index=self.index,
                                     name=name)
        elif isinstance(other, ABCDataFrame):  # pragma: no cover
            return NotImplemented
        elif isinstance(other, (np.ndarray, pd.Index)):
            # do not check length of zerodim array
            # as it will broadcast
            if (not is_scalar(lib.item_from_zerodim(other))
                    and len(self) != len(other)):
                raise ValueError('Lengths must match to compare')

            if isinstance(other, ABCPeriodIndex):
                # temp workaround until fixing GH 13637
                # tested in test_nat_comparisons
                # (pandas.tests.series.test_operators.TestSeriesOperators)
                return self._constructor(na_op(self.values,
                                               other.astype(object).values),
                                         index=self.index)

            return self._constructor(na_op(self.values, np.asarray(other)),
                                     index=self.index).__finalize__(self)

        elif isinstance(other, pd.Categorical):
            if not is_categorical_dtype(self):
                msg = ("Cannot compare a Categorical for op {op} with Series "
                       "of dtype {typ}.\nIf you want to compare values, use "
                       "'series <op> np.asarray(other)'.")
                raise TypeError(msg.format(op=op, typ=self.dtype))

        if is_categorical_dtype(self):
            # cats are a special case as get_values() would return an ndarray,
            # which would then not take categories ordering into account
            # we can go directly to op, as the na_op would just test again and
            # dispatch to it.
            with np.errstate(all='ignore'):
                res = op(self.values, other)
        else:
            values = self.get_values()
            if isinstance(other, (list, np.ndarray)):
                other = np.asarray(other)

            with np.errstate(all='ignore'):
                res = na_op(values, other)
            if is_scalar(res):
                raise TypeError(
                    'Could not compare {typ} type with Series'.format(
                        typ=type(other)))

            # always return a full value series here
            res = com._values_from_object(res)

        res = pd.Series(res, index=self.index, name=self.name, dtype='bool')
        return res