Example #1
0
def mask_missing(arr, values_to_mask):
    """
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True
    """
    dtype, values_to_mask = infer_dtype_from_array(values_to_mask)

    try:
        values_to_mask = np.array(values_to_mask, dtype=dtype)

    except Exception:
        values_to_mask = np.array(values_to_mask, dtype=object)

    na_mask = isna(values_to_mask)
    nonna = values_to_mask[~na_mask]

    mask = None
    for x in nonna:
        if mask is None:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask = False
            else:
                mask = arr == x

            # if x is a string and arr is not, then we get False and we must
            # expand the mask to size arr.shape
            if is_scalar(mask):
                mask = np.zeros(arr.shape, dtype=bool)
        else:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask |= False
            else:
                mask |= arr == x

    if na_mask.any():
        if mask is None:
            mask = isna(arr)
        else:
            mask |= isna(arr)

    # GH 21977
    if mask is None:
        mask = np.zeros(arr.shape, dtype=bool)

    return mask
Example #2
0
def test_is_numeric_v_string_like():
    assert not com.is_numeric_v_string_like(1, 1)
    assert not com.is_numeric_v_string_like(1, "foo")
    assert not com.is_numeric_v_string_like("foo", "foo")
    assert not com.is_numeric_v_string_like(np.array([1]), np.array([2]))
    assert not com.is_numeric_v_string_like(
        np.array(["foo"]), np.array(["foo"]))

    assert com.is_numeric_v_string_like(np.array([1]), "foo")
    assert com.is_numeric_v_string_like("foo", np.array([1]))
    assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
    assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
Example #3
0
def mask_missing(arr, values_to_mask):
    """
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True
    """
    dtype, values_to_mask = infer_dtype_from_array(values_to_mask)

    try:
        values_to_mask = np.array(values_to_mask, dtype=dtype)

    except Exception:
        values_to_mask = np.array(values_to_mask, dtype=object)

    na_mask = isnull(values_to_mask)
    nonna = values_to_mask[~na_mask]

    mask = None
    for x in nonna:
        if mask is None:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask = False
            else:
                mask = arr == x

            # if x is a string and arr is not, then we get False and we must
            # expand the mask to size arr.shape
            if is_scalar(mask):
                mask = np.zeros(arr.shape, dtype=bool)
        else:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask |= False
            else:
                mask |= arr == x

    if na_mask.any():
        if mask is None:
            mask = isnull(arr)
        else:
            mask |= isnull(arr)

    return mask
Example #4
0
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
    """
    Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.

    Note: the caller is responsible for ensuring that numpy warnings are
    suppressed (with np.errstate(all="ignore")) if needed.

    Parameters
    ----------
    left : np.ndarray or ExtensionArray
    right : object
        Cannot be a DataFrame, Series, or Index.
    op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}

    Returns
    -------
    ndarray or ExtensionArray
    """
    # NB: We assume extract_array has already been called on left and right
    lvalues = ensure_wrapped_if_datetimelike(left)
    rvalues = ensure_wrapped_if_datetimelike(right)

    rvalues = lib.item_from_zerodim(rvalues)
    if isinstance(rvalues, list):
        # TODO: same for tuples?
        rvalues = np.asarray(rvalues)

    if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
        # TODO: make this treatment consistent across ops and classes.
        #  We are not catching all listlikes here (e.g. frozenset, tuple)
        #  The ambiguous case is object-dtype.  See GH#27803
        if len(lvalues) != len(rvalues):
            raise ValueError("Lengths must match to compare", lvalues.shape,
                             rvalues.shape)

    if should_extension_dispatch(lvalues, rvalues):
        # Call the method on lvalues
        res_values = op(lvalues, rvalues)

    elif is_scalar(rvalues) and isna(rvalues):
        # numpy does not like comparisons vs None
        if op is operator.ne:
            res_values = np.ones(lvalues.shape, dtype=bool)
        else:
            res_values = np.zeros(lvalues.shape, dtype=bool)

    elif is_numeric_v_string_like(lvalues, rvalues):
        # GH#36377 going through the numexpr path would incorrectly raise
        return invalid_comparison(lvalues, rvalues, op)

    elif is_object_dtype(lvalues.dtype):
        res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)

    else:
        res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)

    return res_values
Example #5
0
def test_is_numeric_v_string_like():
    assert not com.is_numeric_v_string_like(np.array([1]), 1)
    assert not com.is_numeric_v_string_like(np.array([1]), np.array([2]))
    assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))

    assert com.is_numeric_v_string_like(np.array([1]), "foo")
    assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
    assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
Example #6
0
def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
    """
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True

    Parameters
    ----------
    arr : ArrayLike
    values_to_mask: list, tuple, or scalar

    Returns
    -------
    np.ndarray[bool]
    """
    # When called from Block.replace/replace_list, values_to_mask is a scalar
    #  known to be holdable by arr.
    # When called from Series._single_replace, values_to_mask is tuple or list
    dtype, values_to_mask = infer_dtype_from(values_to_mask)
    # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],
    # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
    # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
    # _DTypeDict, Tuple[Any, Any]]]"
    values_to_mask = np.array(values_to_mask,
                              dtype=dtype)  # type: ignore[arg-type]

    na_mask = isna(values_to_mask)
    nonna = values_to_mask[~na_mask]

    # GH 21977
    mask = np.zeros(arr.shape, dtype=bool)
    for x in nonna:
        if is_numeric_v_string_like(arr, x):
            # GH#29553 prevent numpy deprecation warnings
            pass
        else:
            new_mask = arr == x
            if not isinstance(new_mask, np.ndarray):
                # usually BooleanArray
                new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
            mask |= new_mask

    if na_mask.any():
        mask |= isna(arr)

    return mask
Example #7
0
def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray:
    """
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True

    Parameters
    ----------
    arr : ArrayLike
    values_to_mask: list, tuple, or scalar

    Returns
    -------
    np.ndarray[bool]
    """
    # When called from Block.replace/replace_list, values_to_mask is a scalar
    #  known to be holdable by arr.
    # When called from Series._single_replace, values_to_mask is tuple or list
    dtype, values_to_mask = infer_dtype_from(values_to_mask)
    values_to_mask = np.array(values_to_mask, dtype=dtype)

    na_mask = isna(values_to_mask)
    nonna = values_to_mask[~na_mask]

    # GH 21977
    mask = np.zeros(arr.shape, dtype=bool)
    for x in nonna:
        if is_numeric_v_string_like(arr, x):
            # GH#29553 prevent numpy deprecation warnings
            pass
        else:
            mask |= arr == x

    if na_mask.any():
        mask |= isna(arr)

    return mask
Example #8
0
def compare_or_regex_search(
    a: ArrayLike,
    b: Union[Scalar, Pattern],
    regex: bool = False,
    mask: Optional[ArrayLike] = None,
) -> Union[ArrayLike, bool]:
    """
    Compare two array_like inputs of the same shape or two scalar values

    Calls operator.eq or re.search, depending on regex argument. If regex is
    True, perform an element-wise regex matching.

    Parameters
    ----------
    a : array_like
    b : scalar or regex pattern
    regex : bool, default False
    mask : array_like or None (default)

    Returns
    -------
    mask : array_like of bool
    """

    def _check_comparison_types(
        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
    ):
        """
        Raises an error if the two arrays (a,b) cannot be compared.
        Otherwise, returns the comparison result as expected.
        """
        if is_scalar(result) and isinstance(a, np.ndarray):
            type_names = [type(a).__name__, type(b).__name__]

            if isinstance(a, np.ndarray):
                type_names[0] = f"ndarray(dtype={a.dtype})"

            raise TypeError(
                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
            )

    if not regex:
        op = lambda x: operator.eq(x, b)
    else:
        op = np.vectorize(
            lambda x: bool(re.search(b, x))
            if isinstance(x, str) and isinstance(b, (str, Pattern))
            else False
        )

    # GH#32621 use mask to avoid comparing to NAs
    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
        mask = np.reshape(~(isna(a)), a.shape)
    if isinstance(a, np.ndarray):
        a = a[mask]

    if is_numeric_v_string_like(a, b):
        # GH#29553 avoid deprecation warnings from numpy
        return np.zeros(a.shape, dtype=bool)

    elif is_datetimelike_v_numeric(a, b):
        # GH#29553 avoid deprecation warnings from numpy
        _check_comparison_types(False, a, b)
        return False

    result = op(a)

    if isinstance(result, np.ndarray) and mask is not None:
        # The shape of the mask can differ to that of the result
        # since we may compare only a subset of a's or b's elements
        tmp = np.zeros(mask.shape, dtype=np.bool_)
        tmp[mask] = result
        result = tmp

    _check_comparison_types(result, a, b)
    return result
Example #9
0
def compare_or_regex_search(a: ArrayLike, b: Scalar | Pattern, regex: bool,
                            mask: np.ndarray) -> ArrayLike | bool:
    """
    Compare two array-like inputs of the same shape or two scalar values

    Calls operator.eq or re.search, depending on regex argument. If regex is
    True, perform an element-wise regex matching.

    Parameters
    ----------
    a : array-like
    b : scalar or regex pattern
    regex : bool
    mask : np.ndarray[bool]

    Returns
    -------
    mask : array-like of bool
    """
    if isna(b):
        return ~mask

    def _check_comparison_types(result: ArrayLike | bool, a: ArrayLike,
                                b: Scalar | Pattern):
        """
        Raises an error if the two arrays (a,b) cannot be compared.
        Otherwise, returns the comparison result as expected.
        """
        if is_scalar(result) and isinstance(a, np.ndarray):
            type_names = [type(a).__name__, type(b).__name__]

            type_names[0] = f"ndarray(dtype={a.dtype})"

            raise TypeError(
                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
            )

    if not regex or not should_use_regex(regex, b):
        # TODO: should use missing.mask_missing?
        op = lambda x: operator.eq(x, b)
    else:
        op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance(
            x, str) and isinstance(b, (str, Pattern)) else False)

    # GH#32621 use mask to avoid comparing to NAs
    if isinstance(a, np.ndarray):
        a = a[mask]

    if is_numeric_v_string_like(a, b):
        # GH#29553 avoid deprecation warnings from numpy
        return np.zeros(a.shape, dtype=bool)

    elif is_datetimelike_v_numeric(a, b):
        # GH#29553 avoid deprecation warnings from numpy
        _check_comparison_types(False, a, b)
        return False

    result = op(a)

    if isinstance(result, np.ndarray) and mask is not None:
        # The shape of the mask can differ to that of the result
        # since we may compare only a subset of a's or b's elements
        tmp = np.zeros(mask.shape, dtype=np.bool_)
        np.place(tmp, mask, result)
        result = tmp

    _check_comparison_types(result, a, b)
    return result