def mask_missing(arr, values_to_mask): """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True """ dtype, values_to_mask = infer_dtype_from_array(values_to_mask) try: values_to_mask = np.array(values_to_mask, dtype=dtype) except Exception: values_to_mask = np.array(values_to_mask, dtype=object) na_mask = isna(values_to_mask) nonna = values_to_mask[~na_mask] mask = None for x in nonna: if mask is None: # numpy elementwise comparison warning if is_numeric_v_string_like(arr, x): mask = False else: mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: # numpy elementwise comparison warning if is_numeric_v_string_like(arr, x): mask |= False else: mask |= arr == x if na_mask.any(): if mask is None: mask = isna(arr) else: mask |= isna(arr) # GH 21977 if mask is None: mask = np.zeros(arr.shape, dtype=bool) return mask
def test_is_numeric_v_string_like(): assert not com.is_numeric_v_string_like(1, 1) assert not com.is_numeric_v_string_like(1, "foo") assert not com.is_numeric_v_string_like("foo", "foo") assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) assert not com.is_numeric_v_string_like( np.array(["foo"]), np.array(["foo"])) assert com.is_numeric_v_string_like(np.array([1]), "foo") assert com.is_numeric_v_string_like("foo", np.array([1])) assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
def mask_missing(arr, values_to_mask): """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True """ dtype, values_to_mask = infer_dtype_from_array(values_to_mask) try: values_to_mask = np.array(values_to_mask, dtype=dtype) except Exception: values_to_mask = np.array(values_to_mask, dtype=object) na_mask = isnull(values_to_mask) nonna = values_to_mask[~na_mask] mask = None for x in nonna: if mask is None: # numpy elementwise comparison warning if is_numeric_v_string_like(arr, x): mask = False else: mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: # numpy elementwise comparison warning if is_numeric_v_string_like(arr, x): mask |= False else: mask |= arr == x if na_mask.any(): if mask is None: mask = isnull(arr) else: mask |= isnull(arr) return mask
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. Note: the caller is responsible for ensuring that numpy warnings are suppressed (with np.errstate(all="ignore")) if needed. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} Returns ------- ndarray or ExtensionArray """ # NB: We assume extract_array has already been called on left and right lvalues = ensure_wrapped_if_datetimelike(left) rvalues = ensure_wrapped_if_datetimelike(right) rvalues = lib.item_from_zerodim(rvalues) if isinstance(rvalues, list): # TODO: same for tuples? rvalues = np.asarray(rvalues) if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): raise ValueError("Lengths must match to compare", lvalues.shape, rvalues.shape) if should_extension_dispatch(lvalues, rvalues): # Call the method on lvalues res_values = op(lvalues, rvalues) elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(lvalues.shape, dtype=bool) else: res_values = np.zeros(lvalues.shape, dtype=bool) elif is_numeric_v_string_like(lvalues, rvalues): # GH#36377 going through the numexpr path would incorrectly raise return invalid_comparison(lvalues, rvalues, op) elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) return res_values
def test_is_numeric_v_string_like(): assert not com.is_numeric_v_string_like(np.array([1]), 1) assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) assert com.is_numeric_v_string_like(np.array([1]), "foo") assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True Parameters ---------- arr : ArrayLike values_to_mask: list, tuple, or scalar Returns ------- np.ndarray[bool] """ # When called from Block.replace/replace_list, values_to_mask is a scalar # known to be holdable by arr. # When called from Series._single_replace, values_to_mask is tuple or list dtype, values_to_mask = infer_dtype_from(values_to_mask) # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any], # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], # _DTypeDict, Tuple[Any, Any]]]" values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] na_mask = isna(values_to_mask) nonna = values_to_mask[~na_mask] # GH 21977 mask = np.zeros(arr.shape, dtype=bool) for x in nonna: if is_numeric_v_string_like(arr, x): # GH#29553 prevent numpy deprecation warnings pass else: new_mask = arr == x if not isinstance(new_mask, np.ndarray): # usually BooleanArray new_mask = new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask if na_mask.any(): mask |= isna(arr) return mask
def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True Parameters ---------- arr : ArrayLike values_to_mask: list, tuple, or scalar Returns ------- np.ndarray[bool] """ # When called from Block.replace/replace_list, values_to_mask is a scalar # known to be holdable by arr. # When called from Series._single_replace, values_to_mask is tuple or list dtype, values_to_mask = infer_dtype_from(values_to_mask) values_to_mask = np.array(values_to_mask, dtype=dtype) na_mask = isna(values_to_mask) nonna = values_to_mask[~na_mask] # GH 21977 mask = np.zeros(arr.shape, dtype=bool) for x in nonna: if is_numeric_v_string_like(arr, x): # GH#29553 prevent numpy deprecation warnings pass else: mask |= arr == x if na_mask.any(): mask |= isna(arr) return mask
def compare_or_regex_search( a: ArrayLike, b: Union[Scalar, Pattern], regex: bool = False, mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- a : array_like b : scalar or regex pattern regex : bool, default False mask : array_like or None (default) Returns ------- mask : array_like of bool """ def _check_comparison_types( result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern] ): """ Raises an error if the two arrays (a,b) cannot be compared. Otherwise, returns the comparison result as expected. """ if is_scalar(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] if isinstance(a, np.ndarray): type_names[0] = f"ndarray(dtype={a.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" ) if not regex: op = lambda x: operator.eq(x, b) else: op = np.vectorize( lambda x: bool(re.search(b, x)) if isinstance(x, str) and isinstance(b, (str, Pattern)) else False ) # GH#32621 use mask to avoid comparing to NAs if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): mask = np.reshape(~(isna(a)), a.shape) if isinstance(a, np.ndarray): a = a[mask] if is_numeric_v_string_like(a, b): # GH#29553 avoid deprecation warnings from numpy return np.zeros(a.shape, dtype=bool) elif is_datetimelike_v_numeric(a, b): # GH#29553 avoid deprecation warnings from numpy _check_comparison_types(False, a, b) return False result = op(a) if isinstance(result, np.ndarray) and mask is not None: # The shape of the mask can differ to that of the result # since we may compare only a subset of a's or b's elements tmp = np.zeros(mask.shape, dtype=np.bool_) tmp[mask] = result result = tmp _check_comparison_types(result, a, b) return result
def compare_or_regex_search(a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: np.ndarray) -> ArrayLike | bool: """ Compare two array-like inputs of the same shape or two scalar values Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- a : array-like b : scalar or regex pattern regex : bool mask : np.ndarray[bool] Returns ------- mask : array-like of bool """ if isna(b): return ~mask def _check_comparison_types(result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern): """ Raises an error if the two arrays (a,b) cannot be compared. Otherwise, returns the comparison result as expected. """ if is_scalar(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] type_names[0] = f"ndarray(dtype={a.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" ) if not regex or not should_use_regex(regex, b): # TODO: should use missing.mask_missing? op = lambda x: operator.eq(x, b) else: op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance( x, str) and isinstance(b, (str, Pattern)) else False) # GH#32621 use mask to avoid comparing to NAs if isinstance(a, np.ndarray): a = a[mask] if is_numeric_v_string_like(a, b): # GH#29553 avoid deprecation warnings from numpy return np.zeros(a.shape, dtype=bool) elif is_datetimelike_v_numeric(a, b): # GH#29553 avoid deprecation warnings from numpy _check_comparison_types(False, a, b) return False result = op(a) if isinstance(result, np.ndarray) and mask is not None: # The shape of the mask can differ to that of the result # since we may compare only a subset of a's or b's elements tmp = np.zeros(mask.shape, dtype=np.bool_) np.place(tmp, mask, result) result = tmp _check_comparison_types(result, a, b) return result