def test_is_datetimelike_v_numeric(): dt = np.datetime64(pd.datetime(2017, 1, 1)) assert not com.is_datetimelike_v_numeric(1, 1) assert not com.is_datetimelike_v_numeric(dt, dt) assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) assert com.is_datetimelike_v_numeric(1, dt) assert com.is_datetimelike_v_numeric(1, dt) assert com.is_datetimelike_v_numeric(np.array([dt]), 1) assert com.is_datetimelike_v_numeric(np.array([1]), dt) assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1]))
def test_is_datetimelike_v_numeric(): dt = np.datetime64(pd.datetime(2017, 1, 1)) assert not com.is_datetimelike_v_numeric(1, 1) assert not com.is_datetimelike_v_numeric(dt, dt) assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) assert com.is_datetimelike_v_numeric(1, dt) assert com.is_datetimelike_v_numeric(1, dt) assert com.is_datetimelike_v_numeric(np.array([dt]), 1) assert com.is_datetimelike_v_numeric(np.array([1]), dt) assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1]))
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x, y) elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if is_scalar(y) and isna(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or (not is_scalar(y) and needs_i8_conversion(y))): if is_scalar(y): mask = isna(x) y = libindex.convert_scalar(x, com._values_from_object(y)) else: mask = isna(x) | isna(y) y = y.view('i8') x = x.view('i8') try: with np.errstate(all='ignore'): result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x, y) elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if is_scalar(y) and isna(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or (not is_scalar(y) and needs_i8_conversion(y))): if is_scalar(y): mask = isna(x) y = libindex.convert_scalar(x, com._values_from_object(y)) else: mask = isna(x) | isna(y) y = y.view('i8') x = x.view('i8') try: with np.errstate(all='ignore'): result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def na_op(x, y): # TODO: # should have guarantess on what x, y can be type-wise # Extension Dtypes are not called here # Checking that cases that were once handled here are no longer # reachable. assert not (is_categorical_dtype(y) and not is_scalar(y)) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) elif is_datetimelike_v_numeric(x, y): return invalid_comparison(x, y, op) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons # we have a datetime/timedelta and may need to convert assert not needs_i8_conversion(x) mask = None if not is_scalar(y) and needs_i8_conversion(y): mask = isna(x) | isna(y) y = y.view("i8") x = x.view("i8") method = getattr(x, op_name, None) if method is not None: with np.errstate(all="ignore"): result = method(y) if result is NotImplemented: return invalid_comparison(x, y, op) else: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def na_op(x, y): # TODO: # should have guarantees on what x, y can be type-wise # Extension Dtypes are not called here if is_object_dtype(x.dtype): result = comp_method_OBJECT_ARRAY(op, x, y) elif is_datetimelike_v_numeric(x, y): return invalid_comparison(x, y, op) else: method = getattr(x, op_name) with np.errstate(all="ignore"): result = method(y) if result is NotImplemented: return invalid_comparison(x, y, op) return result
def array_equivalent( left, right, strict_nan: bool = False, dtype_equal: bool = False, ) -> bool: """ True if two arrays, left and right, have equal non-NaN elements, and NaNs in corresponding locations. False otherwise. It is assumed that left and right are NumPy arrays of the same dtype. The behavior of this function (particularly with respect to NaNs) is not defined if the dtypes are different. Parameters ---------- left, right : ndarrays strict_nan : bool, default False If True, consider NaN and None to be different. dtype_equal : bool, default False Whether `left` and `right` are known to have the same dtype according to `is_dtype_equal`. Some methods like `BlockManager.equals`. require that the dtypes match. Setting this to ``True`` can improve performance, but will give different results for arrays that are equal but different dtypes. Returns ------- b : bool Returns True if the arrays are equivalent. Examples -------- >>> array_equivalent( ... np.array([1, 2, np.nan]), ... np.array([1, 2, np.nan])) True >>> array_equivalent( ... np.array([1, np.nan, 2]), ... np.array([1, 2, np.nan])) False """ left, right = np.asarray(left), np.asarray(right) # shape compat if left.shape != right.shape: return False if dtype_equal: # fastpath when we require that the dtypes match (Block.equals) if left.dtype.kind in ["f", "c"]: return _array_equivalent_float(left, right) elif is_datetimelike_v_numeric(left.dtype, right.dtype): return False elif needs_i8_conversion(left.dtype): return _array_equivalent_datetimelike(left, right) elif is_string_or_object_np_dtype(left.dtype): # TODO: fastpath for pandas' StringDtype return _array_equivalent_object(left, right, strict_nan) else: return np.array_equal(left, right) # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat if left.dtype.kind in "OSU" or right.dtype.kind in "OSU": # Note: `in "OSU"` is non-trivially faster than `in ["O", "S", "U"]` # or `in ("O", "S", "U")` return _array_equivalent_object(left, right, strict_nan) # NaNs can occur in float and complex arrays. if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): if not (left.size and right.size): return True return ((left == right) | (isna(left) & isna(right))).all() elif is_datetimelike_v_numeric(left, right): # GH#29553 avoid numpy deprecation warning return False elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): return False left = left.view("i8") right = right.view("i8") # if we have structured dtypes, compare first if (left.dtype.type is np.void or right.dtype.type is np.void) and left.dtype != right.dtype: return False return np.array_equal(left, right)
def array_equivalent(left, right, strict_nan: bool = False) -> bool: """ True if two arrays, left and right, have equal non-NaN elements, and NaNs in corresponding locations. False otherwise. It is assumed that left and right are NumPy arrays of the same dtype. The behavior of this function (particularly with respect to NaNs) is not defined if the dtypes are different. Parameters ---------- left, right : ndarrays strict_nan : bool, default False If True, consider NaN and None to be different. Returns ------- b : bool Returns True if the arrays are equivalent. Examples -------- >>> array_equivalent( ... np.array([1, 2, np.nan]), ... np.array([1, 2, np.nan])) True >>> array_equivalent( ... np.array([1, np.nan, 2]), ... np.array([1, 2, np.nan])) False """ left, right = np.asarray(left), np.asarray(right) # shape compat if left.shape != right.shape: return False # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: # isna considers NaN and None to be equivalent. return lib.array_equivalent_object( ensure_object(left.ravel()), ensure_object(right.ravel()) ) for left_value, right_value in zip(left, right): if left_value is NaT and right_value is not NaT: return False elif left_value is libmissing.NA and right_value is not libmissing.NA: return False elif isinstance(left_value, float) and np.isnan(left_value): if not isinstance(right_value, float) or not np.isnan(right_value): return False else: try: if np.any(np.asarray(left_value != right_value)): return False except TypeError as err: if "Cannot compare tz-naive" in str(err): # tzawareness compat failure, see GH#28507 return False elif "boolean value of NA is ambiguous" in str(err): return False raise return True # NaNs can occur in float and complex arrays. if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): # empty if not (np.prod(left.shape) and np.prod(right.shape)): return True return ((left == right) | (isna(left) & isna(right))).all() elif is_datetimelike_v_numeric(left, right): # GH#29553 avoid numpy deprecation warning return False elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): return False left = left.view("i8") right = right.view("i8") # if we have structured dtypes, compare first if left.dtype.type is np.void or right.dtype.type is np.void: if left.dtype != right.dtype: return False return np.array_equal(left, right)
def compare_or_regex_search( a: ArrayLike, b: Union[Scalar, Pattern], regex: bool = False, mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- a : array_like b : scalar or regex pattern regex : bool, default False mask : array_like or None (default) Returns ------- mask : array_like of bool """ def _check_comparison_types( result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern] ): """ Raises an error if the two arrays (a,b) cannot be compared. Otherwise, returns the comparison result as expected. """ if is_scalar(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] if isinstance(a, np.ndarray): type_names[0] = f"ndarray(dtype={a.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" ) if not regex: op = lambda x: operator.eq(x, b) else: op = np.vectorize( lambda x: bool(re.search(b, x)) if isinstance(x, str) and isinstance(b, (str, Pattern)) else False ) # GH#32621 use mask to avoid comparing to NAs if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): mask = np.reshape(~(isna(a)), a.shape) if isinstance(a, np.ndarray): a = a[mask] if is_numeric_v_string_like(a, b): # GH#29553 avoid deprecation warnings from numpy return np.zeros(a.shape, dtype=bool) elif is_datetimelike_v_numeric(a, b): # GH#29553 avoid deprecation warnings from numpy _check_comparison_types(False, a, b) return False result = op(a) if isinstance(result, np.ndarray) and mask is not None: # The shape of the mask can differ to that of the result # since we may compare only a subset of a's or b's elements tmp = np.zeros(mask.shape, dtype=np.bool_) tmp[mask] = result result = tmp _check_comparison_types(result, a, b) return result
def compare_or_regex_search(a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: np.ndarray) -> ArrayLike | bool: """ Compare two array-like inputs of the same shape or two scalar values Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- a : array-like b : scalar or regex pattern regex : bool mask : np.ndarray[bool] Returns ------- mask : array-like of bool """ if isna(b): return ~mask def _check_comparison_types(result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern): """ Raises an error if the two arrays (a,b) cannot be compared. Otherwise, returns the comparison result as expected. """ if is_scalar(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] type_names[0] = f"ndarray(dtype={a.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" ) if not regex or not should_use_regex(regex, b): # TODO: should use missing.mask_missing? op = lambda x: operator.eq(x, b) else: op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance( x, str) and isinstance(b, (str, Pattern)) else False) # GH#32621 use mask to avoid comparing to NAs if isinstance(a, np.ndarray): a = a[mask] if is_numeric_v_string_like(a, b): # GH#29553 avoid deprecation warnings from numpy return np.zeros(a.shape, dtype=bool) elif is_datetimelike_v_numeric(a, b): # GH#29553 avoid deprecation warnings from numpy _check_comparison_types(False, a, b) return False result = op(a) if isinstance(result, np.ndarray) and mask is not None: # The shape of the mask can differ to that of the result # since we may compare only a subset of a's or b's elements tmp = np.zeros(mask.shape, dtype=np.bool_) np.place(tmp, mask, result) result = tmp _check_comparison_types(result, a, b) return result