Example #1
0
def test_match_substring_regex():
    arr = pa.array(["ab", "abc", "ba", "c", None])
    result = pc.match_substring_regex(arr, "^a?b")
    expected = pa.array([True, True, True, False, None])
    assert expected.equals(result)

    arr = pa.array(["aB", "Abc", "BA", "c", None])
    result = pc.match_substring_regex(arr, "^a?b", ignore_case=True)
    expected = pa.array([True, True, True, False, None])
    assert expected.equals(result)
    result = pc.match_substring_regex(arr, "^a?b", ignore_case=False)
    expected = pa.array([False, False, False, False, None])
    assert expected.equals(result)
Example #2
0
 def _str_endswith(self, pat, na=None):
     if hasattr(pc, "match_substring_regex"):
         result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
         result = BooleanDtype().__from_arrow__(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
     else:
         return super()._str_endswith(pat, na)
Example #3
0
    def _str_endswith(self, pat, na=None):
        if pa_version_under4p0:
            return super()._str_endswith(pat, na)

        result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
        result = BooleanDtype().__from_arrow__(result)
        if not isna(na):
            result[isna(result)] = bool(na)
        return result
Example #4
0
 def _str_startswith(self, pat, na=None):
     # match_substring_regex added in pyarrow 4.0.0
     if hasattr(pc, "match_substring_regex"):
         result = pc.match_substring_regex(self._data, "^" + re.escape(pat))
         result = BooleanDtype().__from_arrow__(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
     else:
         return super()._str_startswith(pat, na)
Example #5
0
            return lib.map_infer_mask(arr, f, mask.view("uint8"))

    def _str_contains(self,
                      pat,
                      case=True,
                      flags=0,
                      na=np.nan,
                      regex: bool = True):
        if flags:
            return super()._str_contains(pat, case, flags, na, regex)

        if regex:
            if pa_version_under4p0 or case is False:
                return super()._str_contains(pat, case, flags, na, regex)
            else:
                result = pc.match_substring_regex(self._data, pat)
        else:
            if case:
                result = pc.match_substring(self._data, pat)
            else:
                result = pc.match_substring(pc.utf8_upper(self._data),
                                            pat.upper())
        result = BooleanDtype().__from_arrow__(result)
        if not isna(na):
            result[isna(result)] = bool(na)
        return result

    def _str_startswith(self, pat: str, na=None):
        if pa_version_under4p0:
            return super()._str_startswith(pat, na)
Example #6
0
def test_match_substring_regex():
    arr = pa.array(["ab", "abc", "ba", "c", None])
    result = pc.match_substring_regex(arr, "^a?b")
    expected = pa.array([True, True, True, False, None])
    assert expected.equals(result)